
本次看到了小影很好看,但是目前看了稍微有些知名的平台都是vue,毕竟vue超过普通html+css性能太多了,vue的渲染机制决定的,请注意奥我们python开发的落地页修改下只是临时用,并且赠送给客户,python使用一定要注意合法合规!!

目标网站是由 Vue.js 编译后生成的单页应用程序(SPA)时,爬取和保存网页内容的步骤会有所不同。Vue.js 和其他前端框架(如 React 和 Angular)生成的 SPA 通常依赖于动态加载的资源和客户端渲染。要完整保存这些网页的内容,通常需要执行以下步骤:
根据已知信息我们需要用到node.js,因此我们切换掉python,新建download.js

写入编写的js爬虫代码
const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');
const url = require('url');
// 目标URL
const targetUrl = 'https://www.xiaoying.tv/';
// 创建保存目录
const saveDirectory = 'xiaoying_tv';
const cssDirectory = path.join(saveDirectory, 'css');
const jsDirectory = path.join(saveDirectory, 'js');
const assetsDirectory = path.join(saveDirectory, 'assets');
if (!fs.existsSync(saveDirectory)) fs.mkdirSync(saveDirectory);
if (!fs.existsSync(cssDirectory)) fs.mkdirSync(cssDirectory);
if (!fs.existsSync(jsDirectory)) fs.mkdirSync(jsDirectory);
if (!fs.existsSync(assetsDirectory)) fs.mkdirSync(assetsDirectory);
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(targetUrl, { waitUntil: 'networkidle2' });
// 保存HTML文件
const html = await page.content();
fs.writeFileSync(path.join(saveDirectory, 'index.html'), html, 'utf8');
// 下载并保存CSS文件
const cssLinks = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
for (const link of cssLinks) {
const response = await page.goto(link);
const cssContent = await response.text();
const cssFileName = path.basename(url.parse(link).pathname);
fs.writeFileSync(path.join(cssDirectory, cssFileName), cssContent, 'utf8');
}
// 下载并保存JS文件
const jsLinks = await page.$$eval('script[src]', scripts => scripts.map(script => script.src));
for (const script of jsLinks) {
const response = await page.goto(script);
const jsContent = await response.text();
const jsFileName = path.basename(url.parse(script).pathname);
fs.writeFileSync(path.join(jsDirectory, jsFileName), jsContent, 'utf8');
}
// 下载并保存图片等静态文件
const imgLinks = await page.$$eval('img[src]', imgs => imgs.map(img => img.src));
for (const img of imgLinks) {
const response = await page.goto(img);
const imgBuffer = await response.buffer();
const imgFileName = path.basename(url.parse(img).pathname);
fs.writeFileSync(path.join(assetsDirectory, imgFileName), imgBuffer);
}
// 下载并保存其他静态文件(如视频、音频等)
const staticLinks = await page.$$eval('a[href], video[src], audio[src]', elements => elements.map(el => el.href || el.src).filter(src => src));
for (const link of staticLinks) {
const response = await page.goto(link);
const fileBuffer = await response.buffer();
const fileName = path.basename(url.parse(link).pathname);
const fileExtension = path.extname(fileName).toLowerCase();
let filePath = '';
if (['.png', '.jpg', '.jpeg', '.gif', '.svg', '.mp4', '.mp3'].includes(fileExtension)) {
filePath = path.join(assetsDirectory, fileName);
} else if (fileExtension === '.css') {
filePath = path.join(cssDirectory, fileName);
} else if (fileExtension === '.js') {
filePath = path.join(jsDirectory, fileName);
}
fs.writeFileSync(filePath, fileBuffer);
}
await browser.close();
console.log('网页内容下载完成!');
})();完成后,我分了一下目录 ,此前python也是有用的,于是我这样

由于安装了node依赖因此我默认根目录是node,继续我们的爬取
npm 安装 Puppeteer 库。/css 目录。/js 目录。/assets 目录。以下是使用 Puppeteer 爬取 Vue.js 编译后网站的示例代码:
首先,安装 Puppeteer 库。在终端中运行以下命令:
npm install puppeteer
使用 Puppeteer 模拟浏览器操作并下载页面内容。以下是详细的代码示例:
const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');
const url = require('url');
// 目标URL
const targetUrl = 'https://www.xiaoying.tv/';
// 创建保存目录
const saveDirectory = 'xiaoying_tv';
const cssDirectory = path.join(saveDirectory, 'css');
const jsDirectory = path.join(saveDirectory, 'js');
const assetsDirectory = path.join(saveDirectory, 'assets');
if (!fs.existsSync(saveDirectory)) fs.mkdirSync(saveDirectory);
if (!fs.existsSync(cssDirectory)) fs.mkdirSync(cssDirectory);
if (!fs.existsSync(jsDirectory)) fs.mkdirSync(jsDirectory);
if (!fs.existsSync(assetsDirectory)) fs.mkdirSync(assetsDirectory);
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(targetUrl, { waitUntil: 'networkidle2' });
// 保存HTML文件
const html = await page.content();
fs.writeFileSync(path.join(saveDirectory, 'index.html'), html, 'utf8');
// 下载并保存CSS文件
const cssLinks = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
for (const link of cssLinks) {
const response = await page.goto(link);
const cssContent = await response.text();
const cssFileName = path.basename(url.parse(link).pathname);
fs.writeFileSync(path.join(cssDirectory, cssFileName), cssContent, 'utf8');
}
// 下载并保存JS文件
const jsLinks = await page.$$eval('script[src]', scripts => scripts.map(script => script.src));
for (const script of jsLinks) {
const response = await page.goto(script);
const jsContent = await response.text();
const jsFileName = path.basename(url.parse(script).pathname);
fs.writeFileSync(path.join(jsDirectory, jsFileName), jsContent, 'utf8');
}
// 下载并保存图片等静态文件
const imgLinks = await page.$$eval('img[src]', imgs => imgs.map(img => img.src));
for (const img of imgLinks) {
const response = await page.goto(img);
const imgBuffer = await response.buffer();
const imgFileName = path.basename(url.parse(img).pathname);
fs.writeFileSync(path.join(assetsDirectory, imgFileName), imgBuffer);
}
// 下载并保存其他静态文件(如视频、音频等)
const staticLinks = await page.$$eval('a[href], video[src], audio[src]', elements => elements.map(el => el.href || el.src).filter(src => src));
for (const link of staticLinks) {
const response = await page.goto(link);
const fileBuffer = await response.buffer();
const fileName = path.basename(url.parse(link).pathname);
const fileExtension = path.extname(fileName).toLowerCase();
let filePath = '';
if (['.png', '.jpg', '.jpeg', '.gif', '.svg', '.mp4', '.mp3'].includes(fileExtension)) {
filePath = path.join(assetsDirectory, fileName);
} else if (fileExtension === '.css') {
filePath = path.join(cssDirectory, fileName);
} else if (fileExtension === '.js') {
filePath = path.join(jsDirectory, fileName);
}
fs.writeFileSync(filePath, fileBuffer);
}
await browser.close();
console.log('网页内容下载完成!');
})();执行
node download.js

完成爬取

查看代码,完美,

查看预览效果,也很完美

下一步我们来做安卓下载和苹果下载,以及修改网页整体内容
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。