• 大小: 2.26MB
    文件类型: .rar
    金币: 1
    下载: 0 次
    发布日期: 2023-10-27
  • 语言: 其他
  • 标签: Jsoup  网络爬虫  

资源简介

Jsoup网络爬虫

资源截图

代码片段和文件信息

package com.github.webcrawder;

import java.io.IOException;

import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class CrawderDemo {
public static void main(String[] args) throws ClientProtocolException IOException {
// 创建httpClient客户端
HttpClient hClient = new DefaultHttpClient();
// 创建http发送请求对象,Httpget
HttpGet hget = new HttpGet(“http://www.itcast.cn“);
// 发送请求
HttpResponse response = hClient.execute(hget);
// 获取网页内容
String content = EntityUtils.toString(response.getEntity() “utf-8“);
// 使用Jsoup解析网页内容
Document document = Jsoup.parse(content);
// 使用元素选择器选择网页的内容
Elements elements = document.select(“ul.nav_li a“);
System.out.println(elements.text());
System.out.println(elements);

}

}

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件        859  2017-03-15 17:08  WebCrawler\.classpath

     文件        386  2017-03-15 17:04  WebCrawler\.project

     文件        670  2017-03-15 17:05  WebCrawler\.settings\org.eclipse.jdt.core.prefs

     文件       1887  2017-03-15 19:16  WebCrawler\bin\com\github\webcrawder\CrawderDemo.class

     文件       2679  2017-03-15 18:04  WebCrawler\bin\com\github\webcrawder\HttpClientCrawder.class

     文件       2718  2017-03-15 18:27  WebCrawler\bin\com\github\webcrawder\HttpClientJsoup.class

     文件       1786  2017-03-15 17:31  WebCrawler\bin\com\github\webcrawder\JsoupCrawder.class

     文件       1963  2017-03-15 19:06  WebCrawler\bin\com\github\webcrawder\Jsouptest.class

     文件       1891  2017-03-15 17:47  WebCrawler\bin\com\github\webcrawder\MyHttpClient.class

     文件       1707  2017-03-15 17:32  WebCrawler\bin\com\github\webcrawder\MyJsoup.class

     文件     345035  2017-03-15 17:07  WebCrawler\lib\apache-mime4j-0.6.jar

     文件      58160  2017-03-15 17:07  WebCrawler\lib\commons-codec-1.4.jar

     文件      60841  2017-03-15 17:07  WebCrawler\lib\commons-logging-1.1.1.jar

     文件     291039  2017-03-15 17:07  WebCrawler\lib\httpclient-4.0.1.jar

     文件     172888  2017-03-15 17:07  WebCrawler\lib\httpcore-4.0.1.jar

     文件      25443  2017-03-15 17:07  WebCrawler\lib\httpmime-4.0.1.jar

     文件     119888  2017-03-15 17:07  WebCrawler\lib\json.jar

     文件     293672  2017-03-15 17:07  WebCrawler\lib\jsoup-1.7.2.jar

     文件     489884  2017-03-15 17:07  WebCrawler\lib\log4j-1.2.17.jar

     文件     724225  2017-03-15 17:07  WebCrawler\lib\mysql-connector-java-5.1.10-bin.jar

     文件       1175  2017-03-15 19:16  WebCrawler\src\com\github\webcrawder\CrawderDemo.java

     文件       2213  2017-03-15 18:04  WebCrawler\src\com\github\webcrawder\HttpClientCrawder.java

     文件       2214  2017-03-15 18:27  WebCrawler\src\com\github\webcrawder\HttpClientJsoup.java

     文件       1595  2017-03-15 17:31  WebCrawler\src\com\github\webcrawder\JsoupCrawder.java

     文件       1282  2017-03-15 19:06  WebCrawler\src\com\github\webcrawder\Jsouptest.java

     文件       1546  2017-03-15 17:47  WebCrawler\src\com\github\webcrawder\MyHttpClient.java

     文件        824  2017-03-15 17:32  WebCrawler\src\com\github\webcrawder\MyJsoup.java

     目录          0  2017-03-15 19:09  WebCrawler\bin\com\github\webcrawder

     目录          0  2017-03-15 19:09  WebCrawler\src\com\github\webcrawder

     目录          0  2017-03-15 17:09  WebCrawler\bin\com\github

............此处省略11个文件信息

评论

共有 条评论