效果展示:
源码:
# -*- coding:utf-8 -*-
#2018年3月28日
#爬取淘宝商品信息存到EXcel
from urllib import request
import re
import time
import requests
from openpyxl import Workbook
value=[['商品名称','价格','月销量']]
def gethttp():
res = requests.get(url)
html=res.text
req=r'{"cat":".*?","title":"(.*?)","pic_url".*?jpg","price":"(.*?)".*?"month_sales":"(.*?)"'
req=re.compile(req,re.S)
html=req.findall(html)
for title,price,sales in html:
value.append([title,price,sales])
def cunchu():
wb=Workbook()
sheet=wb.active
sheet.title="商品信息"
for i in range(0,len(value)):
for j in range(0,3):
sheet.cell(row=i+1,column=j+1).value=value[i][j]
wb.save("淘宝.xlsx")
if __name__ == "__main__":
url1="https://s.taobao.com/search?q="
name="手机"
for i in range(1,100):
url2="&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&p4ppushleft=5%2C48&s=".format(j=i*48)
url=str(url1+name+url2)
gethttp()
print("正在爬取第%s页"%i)
cunchu()
print("爬取完毕!")
领取专属 10元无门槛券
私享最新 技术干货