爬虫之短信验证码

发布时间 2024-01-10 16:53:00作者: 没有梦想的java菜鸟

​ 处理短信验证码的思路就是手机端有一个可以转发短信到我们可以读取的地方。目前我的处理方式是将短信转发到邮箱,再读取邮箱中的邮件拿取验证码。

​ 首先就需要下载转发工具:https://github.com/pppscn/SmsForwarder/releases/tag/v3.2.0

具体使用参考官方文档。

以下是12306 短信验证码测试案例

爬取类,有两种方式:一种是使用cookie访问,还有就是登录,也可以结合使用

import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.jsoup.Jsoup;
import org.openqa.selenium.By;
import org.openqa.selenium.Keys;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxOptions;
import org.openqa.selenium.firefox.FirefoxProfile;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;

import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.Properties;

/**
 * @Author 没有梦想的java菜鸟
 * @Date 2024/1/9 16:59
 * @Version 1.0
 */
@Slf4j
public class Selenium12306 {
    public static final String LOGIN_URL = "https://kyfw.12306.cn/otn/resources/login.html";
    public static final String URL = "https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc";

    public static void main(String[] args) throws Exception {
        doPurchase();
    }

    public static void doPurchase() throws Exception {
        Properties pro = new Properties();
        pro.load(new InputStreamReader(Objects.requireNonNull(Selenium12306.class.getClassLoader().getResourceAsStream("config.properties")), StandardCharsets.UTF_8));
        String date = pro.getProperty("date");
        String fromStation = pro.getProperty("startStation");
        String toStation = pro.getProperty("endStation");
        String lUsername = pro.getProperty("username");
        String lPassword = pro.getProperty("password");
        String passenger = pro.getProperty("passenger");
        String idCard = pro.getProperty("idCard");
        String email = pro.getProperty("email");
        String authCode = pro.getProperty("authCode");

        FirefoxDriver driver = catchConfig();
        WebDriverWait wait = new WebDriverWait(driver, 5);
        driver.get(LOGIN_URL);
        //addCookie(driver);
        // 用户名 密码 处理
        doLogin(lUsername, lPassword,idCard,email,authCode,driver, wait);
        // 访问购票页面
        buyTicket(driver, date, fromStation, toStation, passenger);
    }

    private static void addCookie(FirefoxDriver driver) {
        String cookie = "";
        String[] split = cookie.split(";");
        for (String s : split) {
            String[] kv = s.split("=");
            driver.manage().addCookie(new org.openqa.selenium.Cookie(kv[0].trim(), kv[1].trim()));
        }
    }

    private static void doLogin(String lUsername, String lPassword,String idCard4,String email,String authCode,FirefoxDriver driver, WebDriverWait wait) throws InterruptedException {
        WebElement username = driver.findElement(By.id("J-userName"));
        WebElement password = driver.findElement(By.id("J-password"));
        username.sendKeys(lUsername);
        password.sendKeys(lPassword);
        driver.findElement(By.id("J-login")).click();
        //验证码
        WebElement idCard = wait.until(ExpectedConditions.visibilityOfElementLocated(By.id("id_card")));
        Thread.sleep(200L);
        idCard.sendKeys(idCard4);
        WebElement codeButton = driver.findElement(By.id("verification_code"));
        codeButton.click();
        WebElement codeInput = driver.findElement(By.id("code"));
        String code = EmailListener.getCode(email, authCode);
        codeInput.sendKeys(code);
        WebElement sureButton = driver.findElement(By.id("sureClick"));
        sureButton.click();
    }

    private static void buyTicket (FirefoxDriver driver, String date, String fromStation, String toStation, String passenger) throws InterruptedException {
        driver.get(URL);

        val startStation = driver.findElement(By.id("fromStationText"));
        startStation.click();
        startStation.sendKeys(fromStation);
        startStation.sendKeys(Keys.ENTER);
        Thread.sleep(500L);

        val endStation = driver.findElement(By.id("toStationText"));
        endStation.clear();
        endStation.click();
        endStation.sendKeys(toStation);
        endStation.sendKeys(Keys.ENTER);

        Thread.sleep(500L);

        val startDate = driver.findElement(By.id("train_date"));
        startDate.click();
        startDate.clear();
        startDate.sendKeys(date);

        Thread.sleep(500L);

        driver.findElement(By.id("cc_train_type_btn_all")).findElement(By.xpath("//input[@value='G']")).click();
        driver.findElement(By.id("query_ticket")).click();
        driver.findElement(By.id("avail_ticket")).click();


        driver.findElement(By.id("query_ticket")).click();
        val flag = false;
        if (!Objects.equals(driver.findElement(By.id("show_all_query_result")).getText(), "显示全部车次")) {
            val trCollection = driver.findElement(By.id("queryLeftTable")).findElements(By.tagName("tr"));
            trCollection.forEach(
                    tr -> {
                        int i = 0;
                        try {
                            i = Integer.parseInt(tr.findElement(By.className("start-t")).getText().substring(0, 2));
                        } catch (NumberFormatException e) {
                            i = -100;
                        }
                        if (i >= 1 && i <= 19 && tr.findElements(By.tagName("td")).get(3).getText().equals("有")) {
                            tr.findElement(By.className("btn72")).click();
                            try {
                                Thread.sleep(1500);
                            } catch (InterruptedException e) {
                                e.printStackTrace();
                            }
                            val dom = Jsoup.parse(driver.getPageSource());
                            if (dom.getElementById("qd_closeDefaultWarningWindowDialog_id") != null) {
                                driver.findElement(By.id("qd_closeDefaultWarningWindowDialog_id")).click();
                            }
                            val liCollection =
                                    driver.findElement(By.id("normal_passenger_id")).findElements(By.tagName("li"));

                            liCollection.forEach(info->{
                                if (Objects.equals(info.findElement(By.tagName("label")).getText(), passenger)) {
                                    try {
                                        info.findElement(By.tagName("label")).click();
                                        Thread.sleep(500);
                                        driver.findElement(By.id("submitOrder_id")).click();
                                        Thread.sleep(2000);
                                        driver.findElement(By.id("qr_submit_id")).click();
                                    } catch (InterruptedException e) {
                                        e.printStackTrace();
                                    }
                                }
                            });

                        }

                    }

            );
        }


    }

    private static FirefoxDriver catchConfig() throws InterruptedException {
        System.setProperty("webdriver.gecko.driver", "D:\\app\\WebDriver\\geckodriver-v0.31.0-win64\\geckodriver.exe");
        FirefoxOptions options = new FirefoxOptions();
        FirefoxProfile profile = new FirefoxProfile();

        // 设置火狐浏览器路径
        options.setBinary("D:\\app\\firefox\\firefox.exe");
        //禁止GPU渲染
        options.addArguments("--disable-gpu");
//        options.addArguments("--headless");
        //忽略错误
        options.addArguments("ignore-certificate-errors");
        //禁止浏览器被自动化的提示
        options.addArguments("--disable-infobars");
        //反爬关键:window.navigator.webdrive值=false*********************
        options.addPreference("dom.webdriver.enabled", false);
        //设置请求头
        profile.setPreference(
                "general.useragent.override",
                "Mozilla/5.0(iPhone;CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML,like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
        );

        return new FirefoxDriver(options);
    }
}

读取邮件

import cn.hutool.core.date.DateUtil;

import javax.mail.*;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMultipart;
import javax.mail.search.FromTerm;
import javax.mail.search.SearchTerm;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.Properties;

import static java.util.stream.Collectors.toList;

public class EmailListener {


  public static String getCode(String email, String password) {
      // 等待30秒
      try {
          Thread.sleep(30000L);
      } catch (InterruptedException e) {
          e.printStackTrace();
      }
      Properties properties = new Properties();
      properties.setProperty("mail.store.protocol", "imaps");
      properties.setProperty("mail.imaps.host", "imap.qq.com");
      properties.setProperty("mail.imaps.port", "993");

      Session session = Session.getDefaultInstance(properties);

      try {
          // 连接到邮箱
          Store store = session.getStore();
          store.connect(email, password);
          // 打开收件箱
          Folder inbox = store.getFolder("INBOX");
          inbox.open(Folder.READ_ONLY);

          SearchTerm sender = new FromTerm(new InternetAddress());
          Message[] messages = inbox.search(sender);

          Message mess = Arrays.stream(messages).filter(
                  message -> {
                      try {
                          Date sentDate = message.getSentDate();
                          if (sentDate == null) return false;
                          MimeMultipart content = (MimeMultipart) message.getContent();
                          BodyPart bodyPart = content.getBodyPart(0);
                          String textContent = (String) bodyPart.getContent();
                          return sentDate.after(DateUtil.parse("2024-01-01 00:00:00"))
                                  && "验证".equals(message.getSubject()) && textContent.contains("12306");
                      } catch (Exception e) {
                          return false;
                      }
                  }
          ).sorted(Comparator.comparing((Message message) -> {
              try {
                  return message.getSentDate().getTime();
              } catch (MessagingException e) {
                  e.printStackTrace();
                  return Long.MAX_VALUE;
              }
          }).reversed()).collect(toList()).get(0);

          MimeMultipart content = (MimeMultipart) mess.getContent();
          BodyPart bodyPart = content.getBodyPart(0);
          String textContent = (String) bodyPart.getContent();
          String[] split = textContent.split("验证码:");
          String[] split1 = split[1].split(",");
          return split1[0];
      } catch (Exception e) {
          e.printStackTrace();
          return "";
      }
  }
}

配置文件内容

date=2024-01-11
startStation=起始站
endStation=终点站
username=用户名
password=密码
passenger=乘车人
idCard=身份证后四位
email=邮箱
authCode=邮箱授权码

需要的依赖

<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.5.2</version>
</dependency>
<dependency>
    <groupId>org.seleniumhq.selenium</groupId>
    <artifactId>selenium-java</artifactId>
    <version>3.141.0</version>
</dependency>
<dependency>
    <groupId>org.projectlombok</groupId>
    <artifactId>lombok</artifactId>
    <version>1.18.10</version>
</dependency>
<dependency>
    <groupId>com.sun.mail</groupId>
    <artifactId>javax.mail</artifactId>
    <version>1.6.2</version>
    <scope>compile</scope>
</dependency>
<!--hutool -->
<dependency>
    <groupId>cn.hutool</groupId>
    <artifactId>hutool-all</artifactId>
    <version>5.8.16</version>
</dependency>