代码已脱敏,自行替换
# @Time : 2023/10/8 14:43
# @Author : Lan
# @File : niukespider.py
# @Software: PyCharm
import time
import requests
def get_category(catalog='10klpm'):
url = f'https://www.lanol.cn.com/content/zhuanlan/index/catalog/{catalog}'
return requests.get(url).json()
c = """
Document
{{content}}
"""
def get_content(catalog, entity):
url = f'https://www.lanol.cn.com/content/zhuanlan/index/detail/{catalog}/{entity}?_={int(time.time() * 1000)}'
return requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}).json()
if __name__ == '__main__':
catalog = 'Gj5x2m'
for i in get_category(catalog)['data']['catalog']:
content = get_content(catalog, i['uuid'])['data']
with open('./docs/' + i['title'].replace('/', '-') + '.html', 'w', encoding='utf-8') as f:
f.write(c.replace('{{content}}', content['content']))
扫码关注腾讯云开发者
领取腾讯云代金券
Copyright © 2013 - 2025 Tencent Cloud. All Rights Reserved. 腾讯云 版权所有
深圳市腾讯计算机系统有限公司 ICP备案/许可证号:粤B2-20090059 深公网安备号 44030502008569
腾讯云计算(北京)有限责任公司 京ICP证150476号 | 京ICP备11018762号 | 京公网安备号11010802020287
Copyright © 2013 - 2025 Tencent Cloud.
All Rights Reserved. 腾讯云 版权所有