Learn Web.Crawling of Perl

发布时间：2020-12-13 20:15:33 所属栏目：PHP教程来源：网络整理

导读：######Overview of Web.Crawling related modules.#Note that,below codes can not be executed just for overview intention.######!/usr/bin/perl######HTTP::Thin#####use 5.12.1;use HTTP::Request::Common;use HTTP::Thin;say HTTP::Thin-new()-request

##### #Overview of Web.Crawling related modules. #Note that,below codes can not be executed just for overview intention. ##### #!/usr/bin/perl ##### #HTTP::Thin ##### use 5.12.1; use HTTP::Request::Common; use HTTP::Thin; say HTTP::Thin->new()->request(GET 'http://example.com')->as_string; ##### #HTTP:Tiny ##### use HTTP::Tiny; my $response = HTTP::Tiny->new->get('http://example.com/'); die "Failed! " unless $response->{success}; print "$response->{status} $response->{reason} "; while (my ($k,$v) = each %{$response->{headers}}) { for (ref $v eq 'ARRAY' ? @$v : $v) { print "$k: $_ "; } } print $response->{content} if length $response->{content}; #new $http = HTTP::Tiny->new{ %attrubutes }; #valid attributes include: #-agent #-cookie_jar #-default_headers #-local_address #-keep_alive #-max_redirect #-max_size #-https_proxy #-proxy #-no_proxy #-timeout #-verify_SSL #-SSL_options #get[head][put][post]delete $response = $http->get($url); $response = $http->get($url,%options); $response = $http->head($url); #post_form $response = $http->post_form($url,$form_data); $response = $http->post_form($url,$form_data,%options); #request $response = $http->request($method,$url); $response = $http->request($method,$url,%options); $http->request('GET','http://user:pwd hk.mars@aol.com'); #or $http->request('GET','http://mars%40:pwd hk.mars@aol.com'); #www_form_urlencode $params = $http->www_form_urlencode( $data ); $response = $http->get("http://example.com/query?$params"); #SSL support SSL_options => { SSL_ca_file => $file_path,} #proxy support ##### #www::Mechanize # #Stateful programmatic web browsing,used for automating interaction with websites. ##### use WWW::Mechanize; my $mech = WWW::Mechanize->new(); $mech->get( $url ); $mech->follow_link( n => 3 ); $mech->follow_link( text_regex => qr/download this/i ); $mech->follow_link( url => 'http://host.com/index.html' ); $mech->submit_form( form_number => 3,fields => { username => 'banana',passoword => 'lost-and-alone',} ); $mech->submit_form( form_name => 'search',fields => { query => 'pot of gold',},button => 'search now' ); #testing web applications use Test::More; like( $mech->content(),qr/$expected/,"Got expected content" ); #page traverse $mech->back(); #finer control over page $mech->find_link( n => $number ); $mech->form_number( $number ); $mech->form_name( $name ); $mech->field( $name,$value ); $mech->set_fields( $field_values ); $mech->set_visible( @criteria ); $mech->click( $button ); #subclass of LWP::UserAgent,eg: $mech->add_header( $name =>$value ); #page-fecting methods #status methods #content-handling methods #link methods #image methods #form methods #field methods #miscellaneous methods #overridden LWP::UserAgent methods #inherited unchanced LWP::UserAgent methods #yeah now,it's easy to implement a spider project for future integration use.

Mars

（编辑：李大同）

【声明】本站内容均来自网络，其相关言论仅代表作者个人观点，不代表本站立场。若无意侵犯到您的权利，请及时与联系站长删除相关内容!