基于Java+selenium+Chrome,实现截取html页面内容并保存为图片

发布时间 2023-06-26 17:58:36作者: _天青色烟雨

1、需求

实现Java程序发送邮件,并将输入的多个页面转为pdf类型附件一同发送出去。而页面如何转为pdf呢?其中的一个方案就是先将html页面转为图片,再将图片合并为pdf。此文记录的是html=>png过程。

2、开发

主要依赖

<!-- html2image -->
<dependency>
    <groupId>org.seleniumhq.selenium</groupId>
    <artifactId>selenium-java</artifactId>
    <version>4.2.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/net.coobird/thumbnailator -->
<dependency>
    <groupId>net.coobird</groupId>
    <artifactId>thumbnailator</artifactId>
    <version>0.4.8</version>
</dependency>
<!-- html2image end -->
<dependency>
    <groupId>commons-io</groupId>
    <artifactId>commons-io</artifactId>
    <version>2.11.0</version>
</dependency>

处理逻辑

package com.sinby.screenshot.utils.html;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.openqa.selenium.*;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.springframework.util.StringUtils;

import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.util.concurrent.TimeUnit;

/**
 * @author:sinby
 * @Date: 2023/6/20 22:55
 * @Description: html 转 png 工具
 */
@Slf4j
public class ToImageUtil {

    /**
     * 将HTML转为图片,并保存至指定位置
     * @param url           页面地址
     * @param targetPath    保存地址(包含图片名,如 /images/test.png)
     * @return
     */
    public static String htmlToImage(String url, String targetPath) {

        if (StringUtils.isEmpty(url) || StringUtils.isEmpty(targetPath)) {
            throw new RuntimeException("截图失败!页面URL:"+url+",保存位置:"+targetPath+"。不可为空!");
        }

        // 休眠时长
        Integer sleepTime = 5 * 1000;

        // 无头模式
        System.setProperty("java.awt.headless", "true");

        long startTm = System.currentTimeMillis();
        log.info(">>>>>>>> 开始截图:{} <<<<<<<<",startTm);

        ChromeOptions chromeOptions = getChromeOptions();
        // 设置窗口大小
        chromeOptions.addArguments("--window-size=1920,6000");

        WebDriver driver = new ChromeDriver(chromeOptions);
        try {
            log.info(">>>>>>>> 开始加载{}页面 <<<<<<<<",url);
            driver.get(url);
            Thread.sleep(sleepTime);
            log.info(">>>>>>>> 开始截图 <<<<<<<<");
            File srcFile = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
            log.info(">>>>>>>> 保存截图 <<<<<<<<");
            FileUtils.copyFile(srcFile, new File(FilenameUtils.normalize(targetPath)));
        } catch (InterruptedException | IOException e) {
            e.printStackTrace();
            throw new RuntimeException(e.getMessage());
        } finally {
            driver.quit();
        }

        long endTm = System.currentTimeMillis();
        log.info(">>>>>>>> 结束截图:{},操作时长:{} <<<<<<<<", endTm, endTm-startTm);

        return targetPath;
    }

    /**
     * 将HTML转为图片,并保存至指定位置
     * @param url       页面地址
     * @param targetPath    保存地址(包含图片名,如 /images/test.png)
     * @param rollingDistance   每次滚动距离(px) 1000
     * @param waitTime  滚动等待时间(ms) 1000
     * @param implicitlyWait    浏览器等待时间(ms)
     * @return
     */
    public static String htmlToImage(String url, String targetPath,
                                     int rollingDistance, long waitTime, long implicitlyWait) {

        if (StringUtils.isEmpty(url) || StringUtils.isEmpty(targetPath)) {
            throw new RuntimeException("截图失败!页面URL:"+url+",保存位置:"+targetPath+"。不可为空!");
        }

        long startTm = System.currentTimeMillis();
        log.info(">>>>>>>> 开始截图:{} <<<<<<<<",startTm);

        // 无头模式
        System.setProperty("java.awt.headless", "true");

        WebDriver driver = new ChromeDriver(getChromeOptions());
        try {

            // 等待加载
            driver.manage().timeouts().implicitlyWait(implicitlyWait, TimeUnit.SECONDS);
            // 将窗口最大化
            driver.manage().window().maximize();
            // 加载页面
            driver.get(url);
            // 等待页面加载完毕
            new WebDriverWait(driver, Duration.ofSeconds(300)).until(drive -> ((JavascriptExecutor) drive)
                    .executeScript("return document.readyState").equals("complete"));
            // 设置小的分辨率
            driver.manage().window().setSize(new Dimension(1920, 1080));

            JavascriptExecutor javascriptExecutor = (JavascriptExecutor) driver;
            //获取网页最大宽度,适用于有滚动条等页面内容展示不全的情形
            int maxWidth = Integer.parseInt(String.valueOf(javascriptExecutor
                    .executeScript("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")));
            //获取网页最大高度,适用于有滚动条等页面内容展示不全的情形
            int maxHeight = Integer.parseInt(String.valueOf(javascriptExecutor
                    .executeScript("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")));

            log.info(">>>>>>>> 最大宽度maxWidth:{}, 最大高度maxHeight:{} <<<<<<<<", maxWidth, maxHeight);
            // 滚动次数
            int frequ = maxHeight%rollingDistance==0?maxHeight/rollingDistance:maxHeight/rollingDistance+1;
            log.info(">>>>>>>> 开始模拟页面滚动 <<<<<<<<");
            for (int i = 0; i < frequ; i++) {
                int length = i*rollingDistance;
                Thread.sleep(waitTime);
                int nowHeight = Integer.parseInt(String.valueOf(((JavascriptExecutor) driver)
                        .executeScript("return document.body.scrollHeight;")));
                log.info(">>>>>>>> 滚动第{}次时的页面最大高度: {} <<<<<<<<", (i+1), nowHeight);
                maxHeight = Math.max(maxHeight, nowHeight );
                ((JavascriptExecutor) driver).executeScript("window.scrollTo(0, "+length+")");
            }
            log.info(">>>>>>>> 模拟页面滚动结束 <<<<<<<<");
            log.info(">>>>>>>> 最大宽度maxWidth:{}, 最大高度maxHeight:{} <<<<<<<<", maxWidth, maxHeight);

            // 设置浏览器窗口大小
            driver.manage().window().setSize(new Dimension(maxWidth, maxHeight));
            // 回顶
            ((JavascriptExecutor) driver).executeScript("window.scrollTo(document.body.scrollHeight,0);");

            File srcFile = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
            FileUtils.copyFile(srcFile, new File(FilenameUtils.normalize(targetPath)));
        } catch (InterruptedException | IOException e) {
            e.printStackTrace();
            throw new RuntimeException("截图失败!"+e.getMessage());
        } finally {
            driver.quit();
        }


        long endTm = System.currentTimeMillis();
        log.info(">>>>>>>> 结束截图:{},操作时长:{} <<<<<<<<", endTm, endTm-startTm);

        return targetPath;
    }

    /**
     * 获取chrome配置信息
     * @return
     */
    public static ChromeOptions getChromeOptions() {
        log.debug(">>>>>>>> 开始获取chrome配置信息 <<<<<<<<");
        ChromeOptions options = new ChromeOptions();

        // 通过系统类型,获取chrome驱动位置
        String chromeDriver = null;
        String chrome = null;
        String os = System.getProperty("os.name");
        log.info(String.format("当前系统版本是:%s", os));
        String sysPath = System.getProperty("user.dir").replaceAll("\\\\", "\\/");
        if (os != null && os.toLowerCase().startsWith("windows")) { //windows
            chromeDriver = sysPath + "/chrome/windows/chromedriver.exe";
            chrome = sysPath + "/chrome/windows/chrome.exe";
        } else if (os != null && os.toLowerCase().startsWith("linux")) {    //Linux操作系统
            chromeDriver = sysPath + "/.local-browser/linux-1000022/chrome-linux/chromedriver";
            chrome = sysPath + "/.local-browser/linux-1000022/chrome-linux/chrome";
        } else {    //其它操作系统
            chromeDriver = sysPath + "/chrome/mac/";
            chrome = sysPath + "/chrome/mac/";
        }

        options.addArguments("disable-infobars");
        options.addArguments("--headless");
        options.addArguments("--dns-prefetch-disable");
        options.addArguments("--no-referrers");
        options.addArguments("--disable-gpu");
        options.addArguments("--disable-audio");
        options.addArguments("--no-sandbox");
        options.addArguments("--ignore-certificate-errors");
        options.addArguments("--allow-insecure-localhost");
//        options.addArguments("--window-size=1920,6000");  // 窗口默认大小

        // 设置chrome二进制文件
        options.setPageLoadStrategy(PageLoadStrategy.EAGER);
        options.setBinary(chrome);

        // 设置驱动
        System.setProperty("webdriver.chrome.driver", chromeDriver);

        log.debug(">>>>>>>> 结束获取chrome配置信息 <<<<<<<<");

        return options;
    }
}

3、其他

免安装chrome驱动
https://www.aliyundrive.com/s/c5UscpfU9KR
提取码: m82x
点击链接保存,或者复制本段内容,打开「阿里云盘」APP ,无需下载极速在线查看,视频原画倍速播放。