class Parser の main を改変したもの。
import java.io.Serializable;
import java.net.HttpURLConnection;
import java.net.URLConnection;
import org.htmlparser.Parser;
import org.htmlparser.NodeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.http.ConnectionManager;
import org.htmlparser.http.ConnectionMonitor;
import org.htmlparser.http.HttpHeader;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.util.DefaultParserFeedback;
import org.htmlparser.util.IteratorImpl;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.ParserFeedback;
import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.visitors.NodeVisitor;
public class SimpleParser {
public static void main (String [] args) {
Parser parser = null;
NodeFilter filter = null;
if (args.length < 1 || args[0].equals ("-help")) {
System.out.println ("HTML Parser v" + Parser.getVersion () + "\n");
System.out.println ();
System.out.println ("Syntax : java -jar htmlparser.jar"
+ " <file/page> [type]");
System.out.println (" <file/page> the URL or file to be parsed");
System.out.println (" type the node type, for example:");
System.out.println (" A - Show only the link tags");
System.out.println (" IMG - Show only the image tags");
System.out.println (" TITLE - Show only the title tag");
System.out.println ();
System.out.println ("Example : java -jar htmlparser.jar"
+ " http://www.yahoo.com");
System.out.println ();
}
else
try {
parser = new Parser ();
if (1 < args.length)
filter = new TagNameFilter (args[1]);
else
{
filter = null;
// for a simple dump, use more verbose settings
parser.setFeedback (Parser.STDOUT);
Parser.getConnectionManager ().setMonitor (parser);
}
Parser.getConnectionManager ().setRedirectionProcessingEnabled (true);
Parser.getConnectionManager ().setCookieProcessingEnabled (true);
parser.setResource (args[0]);
System.out.println (parser.parse (filter));
}
catch (EncodingChangeException ece) {
try {
parser.reset ();
System.out.println (parser.parse (filter));
}
catch (ParserException e) {
e.printStackTrace ();
}
}
catch (ParserException e) {
e.printStackTrace ();
}
}
}