chromedp是go写的,支持Chrome DevTools Protocol 的一个驱动浏览器的库。https://github.com/chromedp/chromedp。随着前端spa应用的普及,传统的爬虫很难抓取到我们想要的内容,Chrome DevTools Protocol (CDP)提供了一个完整的浏览器接口,使得我们可以用浏览器一样的环境来模拟请求来抓取动态生成的网页。所谓 CDP 的协议,本质上是什么呢?本质上是基于 websocket 的一种协议。
下面我们通过两个例子看下如何使用chromedp:
// Command click is a chromedp example demonstrating how to use a selector to
// click on an element.
package main
import (
"context"
"log"
"time"
"github.com/chromedp/chromedp"
)
func main() {
// create chrome instance
ctx, cancel := chromedp.NewContext(
context.Background(),
// chromedp.WithDebugf(log.Printf),
)
defer cancel()
// create a timeout
ctx, cancel = context.WithTimeout(ctx, 15*time.Second)
defer cancel()
// navigate to a page, wait for an element, click
var example string
err := chromedp.Run(ctx,
chromedp.Navigate(`https://pkg.go.dev/time`),
// wait for footer element is visible (ie, page is loaded)
chromedp.WaitVisible(`body > footer`),
// find and click "Example" link
chromedp.Click(`#example-After`, chromedp.NodeVisible),
// retrieve the text of the textarea
chromedp.Value(`#example-After textarea`, &example),
)
if err != nil {
log.Fatal(err)
}
log.Printf("Go's time.After example:\n%s", example)
}
返回结果如下:
2023/07/02 23:29:57 Go's time.After example:
package main
import (
"fmt"
"time"
)
var c chan int
func handle(int) {}
func main() {
select {
case m := <-c:
handle(m)
case <-time.After(10 * time.Second):
fmt.Println("timed out")
}
}
完全模拟了一个浏览器的点击请求,返回我们想要的内容。当然也可以用来种植cookie,
// Command cookie is a chromedp example demonstrating how to set a HTTP cookie
// on requests.
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"log"
"net/http"
"time"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/network"
"github.com/chromedp/cdproto/storage"
"github.com/chromedp/chromedp"
)
func main() {
port := flag.Int("port", 8544, "port")
flag.Parse()
// start cookie server
go cookieServer(fmt.Sprintf(":%d", *port))
// create context
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
// run task list
var res string
err := chromedp.Run(ctx, setcookies(
fmt.Sprintf("http://localhost:%d", *port), &res,
"cookie1", "value1",
"cookie2", "value2",
))
if err != nil {
log.Fatal(err)
}
log.Printf("chrome received cookies: %s", res)
}
// cookieServer creates a simple HTTP server that logs any passed cookies.
func cookieServer(addr string) error {
mux := http.NewServeMux()
mux.HandleFunc("/", func(res http.ResponseWriter, req *http.Request) {
cookies := req.Cookies()
for i, cookie := range cookies {
log.Printf("from %s, server received cookie %d: %v", req.RemoteAddr, i, cookie)
}
buf, err := json.MarshalIndent(req.Cookies(), "", " ")
if err != nil {
http.Error(res, err.Error(), http.StatusInternalServerError)
return
}
fmt.Fprintf(res, indexHTML, string(buf))
})
return http.ListenAndServe(addr, mux)
}
// setcookies returns a task to navigate to a host with the passed cookies set
// on the network request.
func setcookies(host string, res *string, cookies ...string) chromedp.Tasks {
if len(cookies)%2 != 0 {
panic("length of cookies must be divisible by 2")
}
return chromedp.Tasks{
chromedp.ActionFunc(func(ctx context.Context) error {
// create cookie expiration
expr := cdp.TimeSinceEpoch(time.Now().Add(180 * 24 * time.Hour))
// add cookies to chrome
for i := 0; i < len(cookies); i += 2 {
err := network.SetCookie(cookies[i], cookies[i+1]).
WithExpires(&expr).
WithDomain("localhost").
WithHTTPOnly(true).
Do(ctx)
if err != nil {
return err
}
}
return nil
}),
// navigate to site
chromedp.Navigate(host),
// read the returned values
chromedp.Text(`#result`, res, chromedp.ByID, chromedp.NodeVisible),
// read network values
chromedp.ActionFunc(func(ctx context.Context) error {
cookies, err := storage.GetCookies().Do(ctx)
if err != nil {
return err
}
for i, cookie := range cookies {
log.Printf("chrome cookie %d: %+v", i, cookie)
}
return nil
}),
}
}
const (
indexHTML = `<!doctype html>
<html>
<body>
<div id="result">%s</div>
</body>
</html>`
)
本文分享自 golang算法架构leetcode技术php 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!