本文用Web::Scraper抓取新浪中的文本
#!/usr/bin/perl -w use strict; use utf8; use URI; use Web::Scraper; binmode( STDOUT, ':encoding(utf8)' ); my $url = "http://news.sina.com.cn/c/2010-06-04/203520413927.shtml"; my $proce = scraper { process '.blkContainerSblk >h1', "news[]" => "TEXT"; process '.blkContainerSblkCon >p', "texts[]" => "TEXT"; }; my $res = $proce->scrape( URI->new($url) ); for my $result ( @{ $res->{news} } ) { print "$result\n"; } for my $result ( @{ $res->{texts} } ) { print "$result\n"; }
|
请发表评论