在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
最近在学习go语言爬虫,写了个小demo package main import ( "fmt" "io/ioutil" "net/http" "regexp" "strconv" ) type Movie struct { name string mark string person string time string url string } func main() { chs := make([] chan int, 10) sliceList := []int{1291841,26761416,1309220,1300741,1293172} for i,v:=range sliceList{ go child(v, chs[i]) } for _, ch := range chs { <-ch } } func child(id int, ch chan int) { url := "https://movie.douban.com/subject/" + strconv.Itoa(id) + "/" resp, err := http.Get(url) if err != nil { panic(err) } defer resp.Body.Close() sHtml, _ := ioutil.ReadAll(resp.Body) movie := new(Movie) movie.name = GetValue(`<span\s*property="v:itemreviewed">(.*)</span>`, &sHtml) movie.mark = GetValue(`<strong\s*class="ll\s*rating_num"\s*property="v:average">(.*)</strong>`, &sHtml) movie.person = GetValue(`<a href="/celebrity/[0-9]+/" rel="v:directedBy">(.*)</a>`, &sHtml) movie.time = GetValue(`<span property="v:runtime" content="(.*)">.*</span>`, &sHtml) movie.url = GetValue(`<a href="(.*)" target="_blank" rel="nofollow">.*</a>`, &sHtml) fmt.Println(movie) ch <- 1 } func GetValue(rule string, sHtml *[] byte) string { reg := regexp.MustCompile(rule) result := reg.FindAllStringSubmatch(string(*sHtml), 1) return result[0][1] }
|
请发表评论