ztf.net.cn
七牛云
您当前的位置: 首页 >  个人博客

selenium自动批量快照截图为webp

漫画不提供下载链接怎么办? 自己想看怎么办?能造一个自动下载的工具吗?帮我们自动完成下载。

环境配置

1、安装selenium

pip install selenium

2、下载对应版本的ChromeDriver


安装Chrome,查看对应的版本号



下载对应版本的ChromeDriver

国内镜像下载地址,点击访问

3、测试安装环境是否OK

from selenium import webdriver
chr = webdriver.Chrome(r'/Usr/local/bin/ChromeDriver')
chr.get('https://www.csdn.net')


完整Demo

功能:

1、selenium 模拟浏览器行为,并滚动到底部
2、产生快照

3、转为webp格式图片

import io
import time
import os.path
import tempfile
import multiprocessing as mp
from PIL import Image
from selenium import webdriver


def webshot(link="", filename=""):
    print("当前进程%d已启动" % os.getpid())

    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # 不知为啥只能在无头模式执行才能截全屏
    # options.add_argument('--disable-gpu')
    driver = webdriver.Chrome("./chromedriver", options=options)
    # driver.maximize_window()
    driver.set_window_size(1920, 1080)
    # 返回网页的高度的js代码
    js_height = "return document.body.clientHeight"
    print(link)

    try:
        driver.get(link)
        k = 1
        height = driver.execute_script(js_height)
        while True:
            if k * 500 < height:
                js_move = "window.scrollTo(0,{})".format(k * 500)
                print(js_move)
                driver.execute_script(js_move)
                time.sleep(1)
                height = driver.execute_script(js_height)
                k += 1
            else:
                break
        scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
        scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
        driver.set_window_size(scroll_width, scroll_height)

        png = driver.get_screenshot_as_png()
        to_webp(filename, png)
        # driver.get_screenshot_as_file("./pics/" + filename)
        print(f"Process {link} get one pic !!!")
        driver.quit()
    except Exception as e:
        print(filename, e)
        print(f"Process {link} error !!!")


# 转换格式png to webp
def to_webp(save_path, png):
    with tempfile.TemporaryFile() as fp:
        fp.write(png)
        fp.seek(0)
        im = Image.open(fp)
        im.save(save_path, "WEBP")


def run__pool(data):  # 启用进程池
    from multiprocessing import Pool
    cpu_worker_num = 3
    start_time = time.time()
    with Pool(cpu_worker_num) as p:
        outputs = p.map(webshot, data)
    print(f'| outputs: {outputs}    TimeUsed: {time.time() - start_time:.1f}    \n')


if __name__ == '__main__':
    with open("urls.txt", "r+", encoding="utf-8") as f:
        data = f.readlines()

    result = []
    for item in data:
        tmp = item.split("  ")
        result.append({"link": tmp[0], "filename": tmp[1]}, )
   
    run__pool(data)

控制浏览器的窗口

# coding=utf-8
from selenium import webdriver
import time

driver = webdriver.Chrome()
driver.implicitly_wait(5)
driver.get("http:www.baidu.com")

try:
    print("测试全屏模式")
    driver.maximize_window()
    time.sleep(3)
    print(driver.get_window_size())
    print("测试最小化")
    driver.minimize_window()
    time.sleep(3)
    print(driver.get_window_size())
    print("测试设置浏览器宽1024、高768显示")
    driver.set_window_size(1024, 768)
    time.sleep(3)
    print(driver.get_window_size())
except Exception as e:
    print("控制浏览器失败", format(e))
    
driver.quit()


ZTF

ZTF|时间:2022-10-17

如果缘分安排我们相遇,请不要让她擦肩而过。扫一扫二维码,加我为好友吧!
标签云