关联仓库: https://github.com/yiyungent/WebScreenshot-python
main.py
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import sys
import os
def getUrls():
urlsStr = ""
# 从环境变量中获取
envUrlsStr = os.getenv('URLS')
urlsStr = envUrlsStr
# 从命令行参数中获取
if len(sys.argv) >= 2:
argUrlsStr = sys.argv[1]
if len(argUrlsStr) > 0:
urlsStr = argUrlsStr
urls = urlsStr.splitlines()
return urls
def save_screenshot(url):
driver.get(url)
width = driver.execute_script("return document.documentElement.scrollWidth")
height = driver.execute_script("return document.documentElement.scrollHeight")
driver.set_window_size(width, height)
driver.save_screenshot('./screenshots/' + time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) +'.png')
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--headless')
driver = webdriver.Chrome('./chromedriver', options=chrome_options)
urls = getUrls()
urlsLen = len(urls)
print('一共有: ' + str(urlsLen) + ' 条URL')
for i in range(0, urlsLen):
time.sleep(5)
save_screenshot(urls[i])
print('截图成功: ' + urls[i])
print('运行完成')
run-app.yml
name: Run App
on:
push:
schedule:
# 定时任务,在每天的5点执行
- cron: '0 21 * * *'
jobs:
run-app:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Add conda to system path
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
run: |
sudo apt-get install unzip
sudo pip install selenium
# 安装 Chrome
# 固定 Chrome 版本
# 注意: Chrome 版本必须与 chromedriver 版本对应
#sudo wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
sudo dpkg -i google-chrome*.deb
sudo apt-get install -f
# TODO: 输出Chrome版本, 失败
echo /usr/bin/google-chrome --version
# 安装 chromedriver
#sudo wget http://chromedriver.storage.googleapis.com/88.0.4324.96/chromedriver_linux64.zip
sudo unzip chromedriver_linux64.zip
# 为所有用户添加可执行权限 (对chromedriver文件)
sudo chmod a+x chromedriver
# 下面两行安装中文字体
sudo apt install -y --force-yes --no-install-recommends fonts-wqy-microhei
sudo apt install -y --force-yes --no-install-recommends ttf-wqy-zenhei
- name: Run App
run: |
sudo python main.py '${{ secrets.URLS }}'
# TODO: 发现这样设置环境变量,Python无法获取
env:
URLS: ${{ secrets.URLS }}
TZ: Asia/Shanghai
- name: Deploy 🚀
uses: JamesIves/github-pages-deploy-action@3.7.1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BRANCH: gh-pages
FOLDER: screenshots
clean: false
TODO: 未测试
FROM python:3.8 AS base
RUN apt-get install unzip
RUN pip install selenium
# 安装 Chrome
# TODO: 固定 Chrome 版本
# 注意: Chrome 版本必须与 chromedriver 版本对应
RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
RUN dpkg -i google-chrome*.deb
RUN apt-get install -f
# TODO: 输出Chrome版本, 失败
RUN echo /usr/bin/google-chrome --version
# 安装 chromedriver
RUN wget http://chromedriver.storage.googleapis.com/88.0.4324.96/chromedriver_linux64.zip
RUN unzip chromedriver_linux64.zip
# 为所有用户添加可执行权限 (对chromedriver文件)
RUN chmod a+x chromedriver
# 下面两行安装中文字体
RUN apt install -y --force-yes --no-install-recommends fonts-wqy-microhei
RUN apt install -y --force-yes --no-install-recommends ttf-wqy-zenhei
ENTRYPOINT ["python", "main.py"]
本文作者: yiyun
本文链接: https://moeci.com/posts/分类-爬虫/web-screenshot/
版权声明: 本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!