package main
import (
"bufio"
"fmt"
"github.com/antchfx/htmlquery"
"io/ioutil"
"net/http"
"os"
"strconv"
"time"
)
func getResponse(url string ) *http.Response {
client := &http.Client{}
//生成要访问的url
//提交请求
request, err := http.NewRequest("GET", url, nil)
//增加header选项
request.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36")
request.Header.Add("Referer", "https://www.cnblogs.com/brady-wang/default.html?page=167")
if err != nil {
panic(err)
}
//处理返回结果
resp, _ := client.Do(request)
return resp
}
func main() {
var url string = "https://www.cnblogs.com/brady-wang/default.html?page="
pages := 167
for i:=1;i<=pages;i++{
newUrl := url + strconv.Itoa(i)
fmt.Printf("crawl page %s\n",newUrl)
urlList := getUrls(newUrl)
if len(urlList) > 0{
for _,detailUrl := range urlList{
//fmt.Printf("crawl 详情页面 %s\n",detailUrl)
title := getDetail(detailUrl)
fmt.Printf("%s\n",title)
time.Sleep(time.Microsecond*3000)
}
}
}
}
func getDetail(url string) string {
response := getResponse(url)
defer response.Body.Close()
doc,err := htmlquery.Parse(response.Body)
if err !=nil{
return ""
}
titleNodes := htmlquery.Find(doc,"//*[@id='cb_post_title_url']/span")
if len(titleNodes) >0{
title := htmlquery.InnerText(titleNodes[0])
return title
} else {
return ""
}
}
func getUrls(url string )[]string {
response := getResponse(url)
defer response.Body.Close()
doc,_ := htmlquery.Parse(response.Body)
list := htmlquery.Find(doc,"//*[@id='mainContent']//div[@class='postTitle']/a")
var urls = make([]string,0)
for _,item := range list{
url := htmlquery.SelectAttr(item,"href")
urls = append(urls, url)
}
return urls
}
func downloadXiaoshuo(url string) {
response := getResponse(url)
defer response.Body.Close()
body,err := ioutil.ReadAll(response.Body)
if err != nil{
fmt.Println(err)
}
writeToFile(string(body))
}
func writeToFile(str string ) {
filePath := "./a.txt"
file, err := os.OpenFile(filePath, os.O_WRONLY | os.O_CREATE, 0666)
if err != nil {
fmt.Printf("open file err=%v\n", err)
return
}
//及时关闭file句柄
defer file.Close()
//写入时,使用带缓存的 *Writer
writer := bufio.NewWriter(file)
for i := 0; i < 5; i++ {
writer.WriteString(str)
}
}
|
请发表评论