加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 站长学院 > PHP教程 > 正文

PHP判断来访是搜索引擎蜘蛛还是普通用户的代码小结

发布时间:2020-12-13 02:50:55 所属栏目:PHP教程 来源:网络整理
导读:《PHP实例:PHP判断来访是搜索引擎蜘蛛还是普通用户的代码小结》要点: 本文介绍了PHP实例:PHP判断来访是搜索引擎蜘蛛还是普通用户的代码小结,希望对您有用。如果有疑问,可以联系我们。 1、推荐的一种方法:php判断搜索引擎蜘蛛爬虫还是人为拜访代码,摘自

《PHP实例:PHP判断来访是搜索引擎蜘蛛还是普通用户的代码小结》要点:
本文介绍了PHP实例:PHP判断来访是搜索引擎蜘蛛还是普通用户的代码小结,希望对您有用。如果有疑问,可以联系我们。

1、推荐的一种方法:php判断搜索引擎蜘蛛爬虫还是人为拜访代码,摘自Discuz x3.2PHP编程

<?php
function checkrobot($useragent=''){
 static $kw_spiders = array('bot','crawl','spider','slurp','sohu-search','lycos','robozilla');
 static $kw_browsers = array('msie','netscape','opera','konqueror','mozilla');
 $useragent = strtolower(empty($useragent) ? $_SERVER['HTTP_USER_AGENT'] : $useragent);
 if(strpos($useragent,'http://') === false && dstrpos($useragent,$kw_browsers)) return false;
 if(dstrpos($useragent,$kw_spiders)) return true;
 return false;
}
function dstrpos($string,$arr,$returnvalue = false) {
 if(empty($string)) return false;
 foreach((array)$arr as $v) {
  if(strpos($string,$v) !== false) {
   $return = $returnvalue ? $v : true;
   return $return;
  }
 }
 return false;
}
if(checkrobot()){
 echo '机器人爬虫';
}else{
 echo '人';
}
?>

实际应用中可以这样断定,直接不是搜索引擎才执行操作PHP编程

<?php
if(!checkrobot()){
//do something
}
?>

2、第二种办法:PHP编程

使用PHP实现蜘蛛拜访日志统计
PHP编程

$useragent = addslashes(strtolower($_SERVER['HTTP_USER_AGENT']));
 if (strpos($useragent,'googlebot')!== false){$bot = 'Google';}
 elseif (strpos($useragent,'mediapartners-google') !== false){$bot = 'Google Adsense';}
 elseif (strpos($useragent,'baiduspider') !== false){$bot = 'Baidu';}
 elseif (strpos($useragent,'sogou spider') !== false){$bot = 'Sogou';}
 elseif (strpos($useragent,'sogou web') !== false){$bot = 'Sogou web';}
 elseif (strpos($useragent,'sosospider') !== false){$bot = 'SOSO';}
 elseif (strpos($useragent,'360spider') !== false){$bot = '360Spider';}
 elseif (strpos($useragent,'yahoo') !== false){$bot = 'Yahoo';}
 elseif (strpos($useragent,'msn') !== false){$bot = 'MSN';}
 elseif (strpos($useragent,'msnbot') !== false){$bot = 'msnbot';}
 elseif (strpos($useragent,'sohu') !== false){$bot = 'Sohu';}
 elseif (strpos($useragent,'yodaoBot') !== false){$bot = 'Yodao';}
 elseif (strpos($useragent,'twiceler') !== false){$bot = 'Twiceler';}
 elseif (strpos($useragent,'ia_archiver') !== false){$bot = 'Alexa_';}
 elseif (strpos($useragent,'iaarchiver') !== false){$bot = 'Alexa';}
 elseif (strpos($useragent,'slurp') !== false){$bot = '雅虎';}
 elseif (strpos($useragent,'bot') !== false){$bot = '其它蜘蛛';}
 if(isset($bot)){
   $fp = @fopen('bot.txt','a');
   fwrite($fp,date('Y-m-d H:i:s')."t".$_SERVER["REMOTE_ADDR"]."t".$bot."t".'http://'.$_SERVER['SERVER_NAME'].$_SERVER["REQUEST_URI"]."rn");
   fclose($fp);
 }

第三种办法:PHP编程

我们可以通过HTTP_USER_AGENT来判断是否是蜘蛛,搜索引擎的蜘蛛都有本身的独特标志,下面列取了一部分.PHP编程

function is_crawler() { 
  $userAgent = strtolower($_SERVER['HTTP_USER_AGENT']); 
  $spiders = array( 
    'Googlebot',// Google 爬虫 
    'Baiduspider',// 百度爬虫 
    'Yahoo! Slurp',// 雅虎爬虫 
    'YodaoBot',// 有道爬虫 
    'msnbot' // Bing爬虫 
    // 更多爬虫症结字 
  ); 
  foreach ($spiders as $spider) { 
    $spider = strtolower($spider); 
    if (strpos($userAgent,$spider) !== false) { 
      return true; 
    } 
  } 
  return false; 
}

下面的php代码附带了更多的蜘蛛标识PHP编程

function isCrawler() { 
    echo $agent= strtolower($_SERVER['HTTP_USER_AGENT']); 
    if (!empty($agent)) { 
        $spiderSite= array( 
            "TencentTraveler","Baiduspider+","BaiduGame","Googlebot","msnbot","Sosospider+","Sogou web spider","ia_archiver","Yahoo! Slurp","YoudaoBot","Yahoo Slurp","MSNBot","Java (Often spam bot)","BaiDuSpider","Voila","Yandex bot","BSpider","twiceler","Sogou Spider","Speedy Spider","Google AdSense","Heritrix","Python-urllib","Alexa (IA Archiver)","Ask","Exabot","Custo","OutfoxBot/YodaoBot","yacy","SurveyBot","legs","lwp-trivial","Nutch","StackRambler","The web archive (IA Archiver)","Perl tool","MJ12bot","Netcraft","MSIECrawler","WGet tools","larbin","Fish search",); 
        foreach($spiderSite as $val) { 
            $str = strtolower($val); 
            if (strpos($agent,$str) !== false) { 
                return true; 
            } 
        } 
    } else { 
        return false; 
    } 
} 
if (isCrawler()){ 
    echo "你好蜘蛛精!"; 
} 
else{ 
   echo "你不是蜘蛛精啊!"; 
} 

第四种办法:PHP编程

<?php
$flag = false;
$tmp = $_SERVER['HTTP_USER_AGENT'];
if(strpos($tmp,'Googlebot') !== false){
  $flag = true;
} else if(strpos($tmp,'Baiduspider') >0){
  $flag = true;
} else if(strpos($tmp,'Yahoo! Slurp') !== false){
  $flag = true;
} else if(strpos($tmp,'msnbot') !== false){
  $flag = true;
} else if(strpos($tmp,'Sosospider') !== false){
  $flag = true;
} else if(strpos($tmp,'YodaoBot') !== false || strpos($tmp,'OutfoxBot') !== false){
  $flag = true;
} else if(strpos($tmp,'Sogou web spider') !== false || strpos($tmp,'Sogou Orion spider') !== false){
  $flag = true;
} else if(strpos($tmp,'fast-webcrawler') !== false){
  $flag = true;
} else if(strpos($tmp,'Gaisbot') !== false){
  $flag = true;
} else if(strpos($tmp,'ia_archiver') !== false){
  $flag = true;
} else if(strpos($tmp,'altavista') !== false){
  $flag = true;
} else if(strpos($tmp,'lycos_spider') !== false){
  $flag = true;
} else if(strpos($tmp,'Inktomi slurp') !== false){
  $flag = true;
}
if($flag == false){
  header("Location: " . $_SERVER['REQUEST_URI']);
  // 主动转到 对应的网页
  // $_SERVER['REQUEST_URI'] 为域名后面的路径
  // 或换成header("Location: ");
  exit();
}
?>

编程之家培训学院每天发布《PHP实例:PHP判断来访是搜索引擎蜘蛛还是普通用户的代码小结》等实战技能,PHP、MYSQL、LINUX、APP、JS,CSS全面培养人才。

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读