lmous
06-21-2005, 10:24 AM
Hello to everybody,
I am trying to parse the A tags & the Frame tags from HTML pages. I have developed the code below, which works for the A tags but it does not work for the Frame tags. Is there any idea about this?
private void getLinks() throws Exception {
System.out.println(diskName);
links=new ArrayList();
frames=new ArrayList();
BufferedReader rd = new BufferedReader(new FileReader(diskName));
// Parse the HTML
EditorKit kit = new HTMLEditorKit();
HTMLDocument doc = (HTMLDocument)kit.createDefaultDocument();
doc.putProperty("IgnoreCharsetDirective", new Boolean(true));
try {
kit.read(rd, doc, 0);
}
catch (RuntimeException e) {return;}
// Find all the FRAME elements in the HTML document, It finds nothing
HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.FRAME);
while(it.isValid()) {
SimpleAttributeSet s = (SimpleAttributeSet)it.getAttributes();
String frameSrc = (String)s.getAttribute(HTML.Attribute.SRC);
frames.add(frameSrc);
}
// Find all the A elements in the HTML document, it works ok
it = doc.getIterator(HTML.Tag.A);
while (it.isValid()) {
SimpleAttributeSet s = (SimpleAttributeSet)it.getAttributes();
String link = (String)s.getAttribute(HTML.Attribute.HREF);
int endOfSet=it.getEndOffset(),
startOfSet=it.getStartOffset();
String text=doc.getText(startOfSet,endOfSet-startOfSet);
if (link != null)
links.add(new Link(link,text));
it.next();
}
}
I am trying to parse the A tags & the Frame tags from HTML pages. I have developed the code below, which works for the A tags but it does not work for the Frame tags. Is there any idea about this?
private void getLinks() throws Exception {
System.out.println(diskName);
links=new ArrayList();
frames=new ArrayList();
BufferedReader rd = new BufferedReader(new FileReader(diskName));
// Parse the HTML
EditorKit kit = new HTMLEditorKit();
HTMLDocument doc = (HTMLDocument)kit.createDefaultDocument();
doc.putProperty("IgnoreCharsetDirective", new Boolean(true));
try {
kit.read(rd, doc, 0);
}
catch (RuntimeException e) {return;}
// Find all the FRAME elements in the HTML document, It finds nothing
HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.FRAME);
while(it.isValid()) {
SimpleAttributeSet s = (SimpleAttributeSet)it.getAttributes();
String frameSrc = (String)s.getAttribute(HTML.Attribute.SRC);
frames.add(frameSrc);
}
// Find all the A elements in the HTML document, it works ok
it = doc.getIterator(HTML.Tag.A);
while (it.isValid()) {
SimpleAttributeSet s = (SimpleAttributeSet)it.getAttributes();
String link = (String)s.getAttribute(HTML.Attribute.HREF);
int endOfSet=it.getEndOffset(),
startOfSet=it.getStartOffset();
String text=doc.getText(startOfSet,endOfSet-startOfSet);
if (link != null)
links.add(new Link(link,text));
it.next();
}
}