当前位置:首页 > 新闻动态 > 网站文章

java爬虫获取网页源码2种方式(纯净版)

来源: 浏览:136 时间:2023-10-13

第一种:URL

package InternetTest;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class a44 {
    public static void main(String[] args) throws Exception {
        URL url = new URL("http://www.baidu.com");
        HttpURLConnection conn = (HttpURLConnection)url.openConnection();
        conn.setRequestMethod("GET");
        conn.setConnectTimeout(5 * 1024);
        InputStream inStream =  conn.getInputStream();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        byte[] buffer = new byte[1024];
        int len = 0;
        while ((len = inStream.read(buffer)) != -1) {
            outStream.write(buffer, 0, len);
        }
        inStream.close();
        byte[] data =outStream.toByteArray();
        String htmlSource = new String(data);
        System.out.println(htmlSource);
    }
}

第二种:HttpClient

package InternetTest;
import org.apache.http.HttpEntity;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.HttpClientUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class a45 {
    public static void main(String[] args) throws Exception{
        String url1 = "http://www.baidu.com";
        CloseableHttpClient closeableHttpClient = HttpClients.createDefault();
        CloseableHttpResponse closeableHttpResponse = null;
        HttpGet request = new HttpGet(url1);
        closeableHttpResponse = closeableHttpClient.execute(request);
        if(closeableHttpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
            HttpEntity httpEntity = closeableHttpResponse.getEntity();
            String html = EntityUtils.toString(httpEntity, "utf-8");
            System.out.println(html);
        } else {
            System.out.println(EntityUtils.toString(closeableHttpResponse.getEntity(), "utf-8"));
        }
        HttpClientUtils.closeQuietly(closeableHttpResponse);
        HttpClientUtils.closeQuietly(closeableHttpClient);
    }
}

地址 · ADDRESS

地址:建邺区新城科技园嘉陵江东街18号2层

邮箱:309474043@qq.Com

点击查看更多案例

联系 · CALL TEL

400-8793-956

售后专线:025-65016872

业务QQ:309474043    售后QQ:1850555641

©南京安优网络科技有限公司 版权所有   苏ICP备12071769号-4  网站地图