java模仿网络爬虫简单案例,直接看代码

发布时间 2023-04-06 12:05:03作者: 华科爬虫

java模仿网络爬虫简单案例,直接看代码

package com.example.demo1;

import java.io.*;
import java.net.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author: YinLei
 * Package:  com.example.demo1
 * @date: 2021/9/7 20:23
 * @Description: java爬虫测试
 * @version: 1.0
 */
public class Crawler {
    public static void main(String[] args) {
        URL url = null;   //用于定义url类型
        URLConnection urlConnection = null; //用于定义url连接类型
        BufferedReader br = null; //缓存读取
        PrintWriter pw = null; //输出流
        try {
            url = new URL("http://search.dangdang.com/?key=%BB%FA%D0%B5%B1%ED&act=input");//爬取的网址、这里爬取的是一个生物网站
            urlConnection = url.openConnection();  //url连接
            pw = new PrintWriter(new FileWriter("D:/SiteURL.txt"), true);//将爬取到的内容放到D盘的SiteURL文件中
            System.out.println("Stay Here1!!");
            br = new BufferedReader(new InputStreamReader(
                    urlConnection.getInputStream(),"UTF-8"));
            String buf = null;
            System.out.println("Stay Here2!!");
            Pattern p = Pattern.compile("1\\d{10}");

            while ((buf = br.readLine()) != null) {
//                System.out.println(buf);
                Matcher m = p.matcher(buf);
                while (m.find()){
                    pw.println(m.group());
                }
            }
            System.out.println("success!");
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}