发布网友 发布时间:2022-05-10 16:00
共3个回答
热心网友 时间:2022-04-19 06:10
public classReadHtml2 { publicstatic void main(String[] args) throws IOException {//String strUrl=" https://passport.baidu.com/?reg&tpl=mn"; /// URL url=new URL(strUrl); File f=new File("fortest.htm"); //输入流 //InputStreamReader isr=newInputStreamReader(url.openStream()); InputStreamReader isr1=newInputStreamReader(new FileInputStream(f)); BufferedReader br=new BufferedReader(isr1); //获取html转换成String String s; String AllContent=""; while((s=br.readLine())!=null) { AllContent=AllContent+s; } //使用后HTML Parser 控件 Parser myParser; NodeList nodeList = null; myParser =Parser.createParser(AllContent, "utf-8"); NodeFilter tableFilter = newNodeClassFilter(TableTag.class); OrFilter lastFilter = newOrFilter(); lastFilter.setPredicates(newNodeFilter[] { tableFilter }); try { //获取标签为table的节点列表 nodeList =myParser.parse(lastFilter); //循环读取每个table for (int i = 0; i <=nodeList.size(); i++) { if (nodeList.elementAt(i)instanceof TableTag) { TableTag tag = (TableTag)nodeList.elementAt(i); TableRow[] rows =tag.getRows(); System.out.println("----------------------table "+i+"--------------------------------"); //循环读取每一行 for (int j = 0; j <rows.length; j++) { TableRow tr =(TableRow) rows[j]; TableColumn[] td =tr.getColumns(); //读取每行的单元格内容 for (int k = 0; k< td.length; k++) { System.out.println(td[k].getStringText());//(按照自己需要的格式输出) } } } } } catch (ParserException e) { e.printStackTrace(); }}}热心网友 时间:2022-04-19 07:28
http://www.oschina.net/code/snippet_222150_7984热心网友 时间:2022-04-19 09:02
是需要例子?