SimpleParser

class Parser の main を改変したもの。

import java.io.Serializable;
import java.net.HttpURLConnection;
import java.net.URLConnection;

import org.htmlparser.Parser;
import org.htmlparser.NodeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.http.ConnectionManager;
import org.htmlparser.http.ConnectionMonitor;
import org.htmlparser.http.HttpHeader;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.util.DefaultParserFeedback;
import org.htmlparser.util.IteratorImpl;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.ParserFeedback;
import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.visitors.NodeVisitor;

public class SimpleParser {
    public static void main (String [] args) {
        Parser parser = null;
        NodeFilter filter = null;

        if (args.length < 1 || args[0].equals ("-help")) {
	    System.out.println ("HTML Parser v" + Parser.getVersion () + "\n");
	    System.out.println ();
	    System.out.println ("Syntax : java -jar htmlparser.jar"
				+ " <file/page> [type]");
	    System.out.println ("   <file/page> the URL or file to be parsed");
	    System.out.println ("   type the node type, for example:");
	    System.out.println ("     A - Show only the link tags");
	    System.out.println ("     IMG - Show only the image tags");
	    System.out.println ("     TITLE - Show only the title tag");
	    System.out.println ();
	    System.out.println ("Example : java -jar htmlparser.jar"
				+ " http://www.yahoo.com");
	    System.out.println ();
	}
        else
            try {
		parser = new Parser ();
		if (1 < args.length)
		    filter = new TagNameFilter (args[1]);
		else
		    {
			filter = null;
			// for a simple dump, use more verbose settings
			parser.setFeedback (Parser.STDOUT);
			Parser.getConnectionManager ().setMonitor (parser);
		    }
		Parser.getConnectionManager ().setRedirectionProcessingEnabled (true);
		Parser.getConnectionManager ().setCookieProcessingEnabled (true);
		parser.setResource (args[0]);
		System.out.println (parser.parse (filter));
	    }
            catch (EncodingChangeException ece) {
		try {
		    parser.reset ();
		    System.out.println (parser.parse (filter));
		}
		catch (ParserException e) {
		    e.printStackTrace ();
		}
            }
            catch (ParserException e) {
                e.printStackTrace ();
            }
    }
}

SimpleParser.java