package main
import (
"fmt"
"github.com/antchfx/htmlquery"
"io/ioutil"
"net/http"
"os"
"regexp"
"strings"
"sync"
"time"
)
var wg sync.WaitGroup
func main() {
var url string = "https://haomooc.com/xiaoxue-read-2991.html"
resp, _ := http.Get(url)
defer resp.Body.Close()
doc, _ := htmlquery.Parse(resp.Body)
list := htmlquery.Find(doc, "//div[@class='dxs-l-b']//a")
for _, li := range list {
href := htmlquery.SelectAttr(li, "href")
strings.Replace(href, " ", "", -1)
title := htmlquery.SelectAttr(li,"title")
strings.Replace(title, " ", "", -1)
fmt.Printf("%s\n", title)
fmt.Printf("%s\n", href)
video := getVideo(href)
wg.Add(1)
//saveVideo(title,video)
fmt.Printf("%s\n", video)
}
wg.Wait()
}
func getVideo(url string) string {
time.Sleep(time.Second*1)
resp, _ := http.Get(url)
bytesContent, _ := ioutil.ReadAll(resp.Body)
var reEmail = `(https://video.haomooc.com/.*.mp4)`
re := regexp.MustCompile(reEmail)
list := re.FindAllStringSubmatch(string(bytesContent), -1)
var result string
for _,v := range list {
if v != nil{
result = v[1]
}
}
strings.Replace(result, " ", "", -1)
return result
}
func PathExists(path string) (bool, error) {
_, err := os.Stat(path)
if err == nil {
return true, nil
}
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
func saveVideo(title string ,url string) {
fmt.Printf(title,url)
path := "/www/shell/video/"+title+".mp4"
b, err := PathExists(path)
if err != nil {
fmt.Printf("PathExists(%s),err(%v)\n", path, err)
}
if b {
fmt.Printf("path %s 存在\n", path)
} else{
fmt.Println("save video "+title )
fmt.Printf("%s",url)
// Get the data
resp, err := http.Get(url)
if err != nil {
panic(err)
}
defer resp.Body.Close()
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
ioutil.WriteFile(path, data, 0644)
defer wg.Done()
}
}
|
请发表评论