[转][htmlparser]htmlparser应用例子(全)
[转][htmlparser]htmlparser使用例子(全)
from : http://gcgmh.iteye.com/blog/474093
- import java.net.URL;
- import junit.framework.TestCase;
- import org.apache.log4j.Logger;
- import org.htmlparser.Node;
- import org.htmlparser.NodeFilter;
- import org.htmlparser.Parser;
- import org.htmlparser.Tag;
- import org.htmlparser.beans.LinkBean;
- import org.htmlparser.filters.NodeClassFilter;
- import org.htmlparser.filters.OrFilter;
- import org.htmlparser.filters.TagNameFilter;
- import org.htmlparser.tags.HeadTag;
- import org.htmlparser.tags.ImageTag;
- import org.htmlparser.tags.InputTag;
- import org.htmlparser.tags.LinkTag;
- import org.htmlparser.tags.OptionTag;
- import org.htmlparser.tags.SelectTag;
- import org.htmlparser.tags.TableColumn;
- import org.htmlparser.tags.TableRow;
- import org.htmlparser.tags.TableTag;
- import org.htmlparser.tags.TitleTag;
- import org.htmlparser.util.NodeIterator;
- import org.htmlparser.util.NodeList;
- import org.htmlparser.util.ParserException;
- import org.htmlparser.visitors.HtmlPage;
- import org.htmlparser.visitors.NodeVisitor;
- import org.htmlparser.visitors.ObjectFindingVisitor;
- public class T extends TestCase {
- private static final Logger logger = Logger.getLogger(T.class);
- public T(String name) {
- super(name);
- }
- /*
- * 测试ObjectFindVisitor的用法
- */
- public void testImageVisitor() {
- try {
- ImageTag imgLink;
- ObjectFindingVisitor visitor = new ObjectFindingVisitor(ImageTag.class);
- Parser parser = new Parser();
- parser.setURL("http://www.google.com");
- parser.setEncoding(parser.getEncoding());
- parser.visitAllNodesWith(visitor);
- Node[] nodes = visitor.getTags();
- for (int i = 0; i < nodes.length; i++) {
- imgLink = (ImageTag) nodes[i];
- logger.fatal("testImageVisitor() ImageURL = " + imgLink.getImageURL());
- logger.fatal("testImageVisitor() ImageLocation = " + imgLink.extractImageLocn());
- logger.fatal("testImageVisitor() SRC = " + imgLink.getAttribute("SRC"));
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- /*
- * 测试TagNameFilter用法
- */
- public void testNodeFilter() {
- try {
- NodeFilter filter = new TagNameFilter("IMG");
- Parser parser = new Parser();
- parser.setURL("http://www.google.com");
- parser.setEncoding(parser.getEncoding());
- NodeList list = parser.extractAllNodesThatMatch(filter);
- for (int i = 0; i < list.size(); i++) {
- logger.fatal("testNodeFilter() " + list.elementAt(i).toHtml());
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- /*
- * 测试NodeClassFilter用法
- */
- public void testLinkTag() {
- try {
- NodeFilter filter = new NodeClassFilter(LinkTag.class);
- Parser parser = new Parser();
- parser.setURL("http://www.google.com");
- parser.setEncoding(parser.getEncoding());
- NodeList list = parser.extractAllNodesThatMatch(filter);
- for (int i = 0; i < list.size(); i++) {
- LinkTag node = (LinkTag) list.elementAt(i);
- logger.fatal("testLinkTag() Link is :" + node.extractLink());
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- /*
- * 测试<link href=" text=’text/css’ rel=’stylesheet’ />用法
- */
- public void testLinkCSS() {
- try {
- Parser parser = new Parser();
- parser.setInputHTML("<head><title>Link Test</title>"
- + "<link href=’/test01/css.css' text='text/css' rel='stylesheet' />"
- + "<link href='/test02/css.css' text='text/css' rel='stylesheet' />" + "</head>"
- + "<body>");
- parser.setEncoding(parser.getEncoding());
- for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
- Node node = e.nextNode();
- logger.fatal("testLinkCSS()" + node.getText() + node.getClass());
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- /*
- * 测试OrFilter的用法
- */
- public void testOrFilter() {
- NodeFilter inputFilter = new NodeClassFilter(InputTag.class);
- NodeFilter selectFilter = new NodeClassFilter(SelectTag.class);
- NodeList nodeList = null;
- try {
- Parser parser = new Parser();
- parser
- .setInputHTML("<head><title>OrFilter Test</title>"
- + "<link href='/test01/css.css' text='text/css' rel='stylesheet' />"
- + "<link href='/test02/css.css' text='text/css' rel='stylesheet' />"
- font-size: 1em; margin-top: 0px; margin-righ
1 楼 满月无双 2011-08-10testLinkCSS()方法怎么取出 <link href属性的值呢??2 楼 zxhDaniel 2011-08-13满月无双 写道testLinkCSS()方法怎么取出 <link href属性的值呢??
你可以拿到那个link这个节点的对象Node,然后再用getAttribute()这种类似的方法取出来