每周分享
概要
时间:2019-01-13日
课程:python自动化班02天
上课地点:静安寺
上课内容:Python-web自动化selenium脚本实战
腾讯课堂链接:快点戳我!~
思维导图
主要代码片段
百度验证码
fromseleniumimportwebdriver
importbase64
importrequests
driver=webdriver.Chrome()driver.get('https://persons.shgjj.com/')
image=driver.find_element_by_id('img1')
image.screenshot('./01.png')
f=open(r'./01.png','rb')
# 参数image:图像base64编码
img=base64.b64encode(f.read())
importrequests
url="https://aip.baidubce.com/oauth/2.0/token"
querystring={"grant_type":"client_credentials","client_id":"1RQYVnqvNBIPoxFtzr68mWzz","client_secret":"NcQfFTMwmNyayuQiLsugfyiP05nPmnKT"}
payload=""
headers={
'cache-control':"no-cache",
'Postman-Token':"53654ba8-1fa5-419f-9cf5-d585dd302b5d"}
response=requests.request("GET",url,data=payload,headers=headers,params=querystring)
# print(response.json())
data=response.json()
print(data['access_token'])
access_token=data['access_token']
url="https://aip.baidubce.com/rest/2.0/ocr/v1/general"
querystring={"access_token":access_token}
payload={"image":img}
headers={
'Content-Type':"application/x-www-form-urlencoded",
'cache-control':"no-cache"}
response=requests.request("POST",url,data=payload,headers=headers,params=querystring)
print(response.json())
checkData=response.json()
num=checkData['words_result'][]['words']
driver.find_element_by_id('imagecode1').send_keys(num)
⚠️注意:如果你的元素截屏位置不对,请将屏幕分辨率设置为 100%
快捷键操作
fromseleniumimportwebdriver
fromselenium.webdriver.common.action_chainsimportActionChains
fromselenium.webdriver.common.keysimportKeys
driver=webdriver.Chrome()driver.get('http://39.107.96.138:3000/signin')
driver.find_element_by_id('name').send_keys('user1')
driver.find_element_by_id('pass').send_keys('123456')
driver.find_element_by_css_selector('.span-primary').click()
driver.get('http://39.107.96.138:3000/topic/create')
input_content=driver.find_element_by_css_selector('.CodeMirror-lines')
input_content.click()
action=ActionChains(driver)
action.move_to_element(input_content).send_keys('abc')
action.key_down(Keys.COMMAND)
action.send_keys('a')
action.key_up(Keys.COMMAND)
action.key_down(Keys.COMMAND)
action.send_keys('b')
action.key_up(Keys.COMMAND)
# perform()
action.perform()
# driver.find_element_by_class_name('eicon-image').click()
# driver.find_element_by_class_name('webuploader-element-invisible').send_keys('/Users/zyzhao/Desktop/2019-01-13-selenium02.png')
拖拽
fromseleniumimportwebdriver
fromselenium.webdriver.common.action_chainsimportActionChains
fromselenium.webdriver.common.keysimportKeys
driver=webdriver.Chrome()
driver.get('https://login.zhipin.com/?ka=header-login')
span=driver.find_element_by_css_selector('form>div:nth-child(4) div.nc_scale > span.nc_iconfont.btn_slide')
action=ActionChains(driver)
action.move_to_element(span)
action.click_and_hold()
action.move_by_offset(495,)
action.release()
action.perform()
iframe 切换
fromseleniumimportwebdriver
importtime
driver=webdriver.Chrome()
driver.get('file:///Users/zyzhao/Desktop/20190113/index.html')
iframe1=driver.find_element_by_tag_name('iframe')
time.sleep(2)
driver.switch_to_frame(iframe1)
driver.switch_to_frame('iframeLoginIfm')
driver.find_element_by_id('pwdTab').click()
driver.find_element_by_id('pwdUserNameIpt').send_keys('12345')
driver.find_element_by_id('pwdIpt').send_keys('sddsds')
driver.switch_to_default_content()
print(driver.page_source)
alert 处理
fromseleniumimportwebdriver
fromselenium.webdriver.common.alertimportAlert
fromselenium.webdriver.common.byimportBy
driver=webdriver.Chrome()
driver.get('http://39.107.96.138:3000/signin')
driver.find_element_by_id('name').send_keys('user1')
driver.find_element_by_id('pass').send_keys('123456')
driver.find_element_by_css_selector('.span-primary').click()
driver.get('http://39.107.96.138:3000/user/user1')
driver.find_element(by=By.CLASS_NAME,'topic_title').click()
driver.find_element_by_class_name('topic_title').click()
driver.find_element_by_css_selector('i.fa.fa-lg.fa-trash').click()
print(Alert(driver).text)
# 取消
Alert(driver).dismiss()
# 确定
Alert(driver).accept()
等待时间
fromseleniumimportwebdriver
fromselenium.webdriver.common.byimportBy
fromselenium.webdriver.support.uiimportWebDriverWait
fromselenium.webdriver.supportimportexpected_conditionsasEC
driver=webdriver.Chrome()
driver.implicitly_wait(10)
driver.get('https://outlook.live.com/owa/#')
driver.find_element_by_css_selector('div > a.linkButtonSigninHeader:nth-child(4)').click()
try:
# inputEmail = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.NAME,'loginfmt')))inputEmail=WebDriverWait(driver,10).until(lambdadriver:driver.find_element_by_name('loginfmt'))
inputEmail.send_keys('1321312312')exceptexpressionasidentifier:
print('time out...')
# driver.find_element_by_name('loginfmt').send_keys('1234455')
javascript
fromseleniumimportwebdriver
driver=webdriver.Chrome()
driver.get('https://www.ctrip.com/')
start_date='2019-01-20'
js_script='document.querySelector("#HD_CheckIn").value = "{}"'.format(start_date)
print(js_script)
driver.execute_script(js_script)
手机模拟
fromseleniumimportwebdriver
# from selenium.webdriver.chrome.options import Options
# mobile_emulation = {"deviceName":"iPhone X"}
mobile_emulation={"deviceName":"iPad"}
chrome_options=webdriver.ChromeOptions()
chrome_options.add_experimental_option("mobileEmulation",mobile_emulation)
driver=webdriver.Chrome(options=chrome_options)
driver.get('https://www.baidu.com')
微博抓取页面
importxlwt
# from datetime import datetime
# style0 = xlwt.easyxf('font: name Times New Roman, color-index red, bold on',
# num_format_str='#,##0.00')
# style1 = xlwt.easyxf(num_format_str='D-MMM-YY')
# ws.write(0, 0, 1234.56, style0)
# ws.write(1, 0, datetime.now(), style1)
# ws.write(2, 0, 1)
# ws.write(555, 1, 1)
# ws.write(2, 2, xlwt.Formula("A3+B3"))
# wb.save('example.xls')
fromseleniumimportwebdriver
driver=webdriver.Chrome()
driver.maximize_window()
driver.implicitly_wait(3)
driver.get('https://s.weibo.com/')
keys='web自动化'
wb=xlwt.Workbook()
ws=wb.add_sheet(keys)
ws.write(,,'用户')
ws.write(,1,'内容')
ws.write(,2,"时间")
ws.write(,3,'来源')
ws.write(,4,'收藏')
ws.write(,5,'转发')
ws.write(,6,'评论')
ws.write(,7,'喜欢')
driver.find_element_by_css_selector('[type="text"]').send_keys(keys)
# driver.find_element_by_css_selector('[type="text"]').submit()
driver.find_element_by_class_name('s-btn-b').click()
driver.find_element_by_css_selector('[node-type="advsearch"]').click()
driver.find_element_by_css_selector('[for="radio03"]').click()
driver.find_element_by_css_selector('[node-type="OK"]').click()
rowIndex=1
forxinrange(5):
users=driver.find_elements_by_css_selector('[action-type="feed_list_item"]')
foruserinusers:
username=user.find_element_by_xpath('.//*[@class="name"]').textcontent=user.find_element_by_xpath('.//*[@class="txt"]').textpublish_date=user.find_element_by_xpath('.//*[@class="from"]/a[1]').textpublish_source=user.find_element_by_xpath('.//*[@class="from"]/a[2]').textshoucang=user.find_element_by_xpath('.//*/div[@class="card-act"]/ul/li[1]/a').textshoucang=shoucang.split("收藏")[1]orzhuanfa=user.find_element_by_xpath('.//*/div[@class="card-act"]/ul/li[2]/a').textzhuanfa=zhuanfa.split("转发")[1]orpinglun=user.find_element_by_xpath('.//*/div[@class="card-act"]/ul/li[3]/a').textpinglun=pinglun.split("评论")[1]orxihuan=user.find_element_by_xpath('.//*/div[@class="card-act"]/ul/li[4]/a').textor# xihuan = xihuan.split("收藏")[1]print(username,content,publish_date,publish_source,shoucang,zhuanfa,pinglun,xihuan)
ws.write(rowIndex,,username)
ws.write(rowIndex,1,content)
ws.write(rowIndex,2,publish_date)
ws.write(rowIndex,3,publish_source)
ws.write(rowIndex,4,shoucang)
ws.write(rowIndex,5,zhuanfa)
ws.write(rowIndex,6,pinglun)
ws.write(rowIndex,7,xihuan)
rowIndex+=1# 开始点第二页xpath='//*/ul[@class="s-scroll"]/li[{}]'.format(x+2)
print(xpath)
driver.find_element_by_class_name('pagenum').click()
driver.find_element_by_xpath(xpath).click()
wb.save('web.xls')
driver.quit()
这么多代码的感觉怎么样啊?大家加油哦,反正小编是晕的不要不要的。
领取专属 10元无门槛券
私享最新 技术干货