Golang网页下载示例
发布时间:2020-12-16 09:38:51 所属栏目:大数据 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 package main /* * 中文编码问题 */ import ( "errors" "flag" "fmt" query "github.com/PuerkitoBio/goquery" "golang.org/x/text/encoding/simplifi
以下代码由PHP站长网 52php.cn收集自互联网 现在PHP站长网小编把它分享给大家,仅供参考 package main /* * 中文编码问题 */ import ( "errors" "flag" "fmt" query "github.com/PuerkitoBio/goquery" "golang.org/x/text/encoding/simplifiedchinese" "io/ioutil" "net/http" "os" "path/filepath" "runtime" "strings" "sync" ) var ( np = runtime.NumCPU() _ = runtime.GOMAXPROCS(np) ) var wg sync.WaitGroup type Folder struct { Url string Dir string } type File struct { Url string Dir string Name string } func checkErr(err error) { if err != nil { fmt.Printf("%vn",err.Error()) os.Exit(1) } } func decodeToGBK(text string) (string,error) { dst := make([]byte,len(text)*2) tr := simplifiedchinese.GB18030.NewDecoder() nDst,_,err := tr.Transform(dst,[]byte(text),true) if err != nil { return text,err } return string(dst[:nDst]),nil } func printEach(index int,item *query.Selection) { fmt.Println("Selection: ",item.Text()) } func isDir(path string) bool { return strings.HasSuffix(path,"/") } func makeFolder(item *query.Selection,url,dir string) (f *Folder,err error) { tx := item.Text() href,ok := item.Attr("href") name,err := decodeToGBK(tx) if err != nil { return } if !ok { err = errors.New("makeFolder : " + tx + " href属性不存在") return } f = &Folder{Url: url + href,Dir: filepath.Join(dir,name)} return } func makeFile(item *query.Selection,dir string) (f *File,ok := item.Attr("href") if !ok { err = errors.New("makeFile : " + tx + " href属性不存在") return } name,err := decodeToGBK(tx) if err != nil { return } f = &File{Url: url + href,Dir: dir,Name: name} return } func crawl(url,localDir string) { doc,err := query.NewDocument(url) // checkErr(err) if err != nil { fmt.Printf("%vn",err.Error()) return } items := doc.Find("a") dir := localDir if !strings.HasSuffix(url,"/") { url += "/" } crawlEach := func(i int,item *query.Selection) { tx := item.Text() if isDir(tx) { folder,err := makeFolder(item,dir) if err != nil { fmt.Printf("%vn",err.Error()) return } wg.Add(1) go crawlFolder(folder) } else { file,err := makeFile(item,err.Error()) return } download(file) } } items.Each(crawlEach) } func download(file *File) { dir := file.Dir url := file.Url name := file.Name if err := os.MkdirAll(dir,os.ModePerm); os.IsExist(err) { fmt.Printf("%x is existn",dir) } else { os.Chmod(dir,os.ModePerm) } resp,err := http.Get(url) if err != nil { fmt.Printf("%vn",err.Error()) return } defer resp.Body.Close() body,err := ioutil.ReadAll(resp.Body) if err != nil { fmt.Printf("%vn",err.Error()) return } fp := string([]rune(filepath.Join(dir,name))) err = ioutil.WriteFile(fp,body,0777) if err != nil { fmt.Printf("%v fp:[%v]n",err.Error(),fp) return } fmt.Printf("Download: %+vn",file) } func crawlFolder(folder *Folder) { url := folder.Url dir := folder.Dir crawl(url,dir) wg.Done() } func main() { host := flag.String("host","http://localhost:8000","HTTP服务地址Host") location := flag.String("locate","E:/Crawler下载文件","本地文件系统绝对路径") flag.Parse() crawl(*host,*location) wg.Wait() } 以上内容由PHP站长网【52php.cn】收集整理供大家参考研究 如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |