加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 站长学院 > PHP教程 > 正文

基于Snoopy的PHP近似完美获取网站编码的代码

发布时间:2020-12-13 06:00:38 所属栏目:PHP教程 来源:网络整理
导读:先要到网上下载Snoopy.class.php 调用方法: div class="codetitle" a style="CURSOR: pointer" data="29757" class="copybut" id="copybut29757" onclick="doCopy('code29757')" 代码如下: div class="codebody" id="code29757" ?php require 'lib/Snoopy.c

先要到网上下载Snoopy.class.php
调用方法:
<div class="codetitle"><a style="CURSOR: pointer" data="29757" class="copybut" id="copybut29757" onclick="doCopy('code29757')"> 代码如下:<div class="codebody" id="code29757">
<?php
require 'lib/Snoopy.class.php';
require 'lib/WebCrawl.class.php';//包含下面代码
$go=new WebCrawl('http://www.baidu.com');
echo $go->getCharset();
?>

<div class="codetitle"><a style="CURSOR: pointer" data="21471" class="copybut" id="copybut21471" onclick="doCopy('code21471')"> 代码如下:<div class="codebody" id="code21471">
<?php
class WebCrawl
{
private $url;
private $request;
public $charset_arr=array(
'gb2312',
'utf-8',
'big5',
'gbk',
'ascii',
'cp936',
'ibm037',
'ibm437',
'ibm500',
'asmo-708',
'dos-720',
'ibm737',
'ibm775',
'ibm850',
'ibm852',
'ibm855',
'ibm857',
'ibm00858',
'ibm861',
'ibm860',
'dos-862',
'ibm863',
'ibm864',
'ibm865',
'cp866',
'ibm869',
'ibm870',
'windows-874',
'cp875',
'shift_jis',
'ks_c_5601-1987',
'ibm1026',
'ibm01047',
'ibm01040',
'ibm01041',
'ibm01042',
'ibm01043',
'ibm01044',
'ibm01045',
'ibm01046',
'ibm01048',
'ibm01049',
'utf-16',
'unicodefffe',
'windows-1250',
'windows-1251',
'windows-1252',
'windows-1253',
'windows-1254',
'windows-1255',
'windows-1256',
'windows-1257',
'windows-1258',
'johab',
'macintosh',
'x-mac-japanese',
'x-mac-chinesetrad',
'x-mac-korean',
'x-mac-arabic',
'x-mac-hebrew',
'x-mac-greek',
'x-mac-cyrillic',
'x-mac-chinesesimp',
'x-mac-romanian',
'x-mac-ukrainian',
'x-mac-thai',
'x-mac-ce',
'x-mac-icelandic',
'x-mac-turkish',
'x-mac-croatian',
'x-chinese-cns',
'x-cp20001',
'x-chinese-eten',
'x-cp20003',
'x-cp20004',
'x-cp20005',
'x-ia5',
'x-ia5-german',
'x-ia5-swedish',
'x-ia5-norwegian',
'us-ascii',
'x-cp20261',
'x-cp20269',
'ibm273',
'ibm277',
'ibm278',
'ibm280',
'ibm284',
'ibm285',
'ibm290',
'ibm420',
'ibm423',
'ibm424',
'x-ebcdic-koreanextended',
'ibm-thai',
'koi8-r',
'ibm871',
'ibm880',
'ibm905',
'ibm00924',
'x-cp20936',
'x-cp20949',
'cp1025',
'koi8-u',
'iso-8859-1',
'iso-8859-2',
'iso-8859-3',
'iso-8859-4',
'iso-8859-5',
'iso-8859-6',
'iso-8859-7',
'iso-8859-8',
'iso-8859-9',
'iso-8859-13',
'iso-8859-15',
'x-europa',
'iso-8859-8-i',
'iso-2022-jp',
'csiso2022jp',
'iso-2022-kr',
'x-cp50227',
'euc-jp',
'euc-cn',
'euc-kr',
'hz-gb-2312',
'gb18030',
'x-iscii-de',
'x-iscii-be',
'x-iscii-ta',
'x-iscii-te',
'x-iscii-as',
'x-iscii-or',
'x-iscii-ka',
'x-iscii-ma',
'x-iscii-gu',
'x-iscii-pa',
'utf-7',
'utf-32',
'utf-32be'
);
public function __construct($url)
{
$this->url=$url;
}
//打开网站
private function open($url)
{
if($this->request!==null)
{
if($this->request->status==200)
{
return true;
}
else
{
return false;
}
}
else
{
$this->request=new Snoopy();
$this->request->fetch($url);
if($this->request->status==200)
{
$this->request->results=strtolower($this->request->results);
$charset=$this->getCharset();
if($charset!="utf-8")
{
if($charset=="windows-1252")
{
$this->request->results=$this->uni_decode($this->request->results);
}
else
{
$this->request->results=mb_convert_encoding($this->request->results,"UTF-8",$charset);
}
}
return true;
}
else
{
return false;
}
}
}
//获取网站title,keywords,description
public function getWebinfo()
{
$info=array(
'title'=>'',
'keywords'=>'',
'desc'=>'',
'ip'=>''
);
if(!$this->open($this->url)){return $info;exit;}
// print_r($this->request->results);exit;
preg_match('/([^>]<em>)</title>/si',$this->request->results,$titlematch ); <BR>if (isset($titlematch) && is_array($titlematch) && count($titlematch) > 0) <BR>{ <BR>$info['title'] = strip_tags($titlematch[1]); <BR>} <BR>preg_match_all('/<[s]</em>meta[s]<em>name="?' . '([^>"]</em>)"?[s]<em>' . 'content="?([^>"]</em>)"?[s]<em>[/]?[s]</em>>/si',$match); <BR>$ft=0; <BR>foreach($match[1] as $mt) <BR>{ <BR>if($mt=="keywords" || $mt=="description") <BR>{ <BR>$ft=1; <BR>} <BR>} <BR>if($ft==0) <BR>{ <BR>preg_match_all('/<[s]<em>meta[s]</em>content="?([^>"]<em>)"?[s]</em>name="?' . '([^>"]<em>)"?[s]</em>[/]?[s]*>/si',$match); <BR>if (isset($match) && is_array($match) && count($match) == 3) <BR>{ <BR>$originals = $match[0]; <BR>$names = $match[2]; <BR>$values = $match[1]; <BR>if (count($originals) == count($names) && count($names) == count($values)) <BR>{ <BR>$metaTags = array(); <BR>for ($i=0,$limiti=count($names); $i < $limiti; $i++) <BR>{ <BR>$metaTags[$names[$i]] = array ( <BR>'html' => htmlentities($originals[$i]),<BR>'value' => $values[$i] <BR>); <BR>} <BR>} <BR>} <BR>} <BR>else <BR>{ <BR>if (isset($match) && is_array($match) && count($match) == 3) <BR>{ <BR>$originals = $match[0]; <BR>$names = $match[1]; <BR>$values = $match[2]; <BR>if (count($originals) == count($names) && count($names) == count($values)) <BR>{ <BR>$metaTags = array(); <BR>for ($i=0,<BR>'value' => $values[$i] <BR>); <BR>} <BR>} <BR>} <BR>} <BR>$result = array ( <BR>'metaTags' => $metaTags <BR>); <BR>if(isset($result['metaTags']['keywords']['value'])) <BR>{ <BR>$info['keywords']=$result['metaTags']['keywords']['value']; <BR>} <BR>else <BR>{ <BR>$info['keywords']=""; <BR>} <BR>if(isset($result['metaTags']['description']['value'])) <BR>{ <BR>$info['desc']=$result['metaTags']['description']['value']; <BR>} <BR>else <BR>{ <BR>$info['desc']=""; <BR>} <BR>$domain=preg_replace('/http:///si','',$this->url); <BR>$ip=@gethostbyname($domain); <BR>$ip_arr=explode(".",$ip); <BR>if(count($ip_arr)==4) <BR>{ <BR>$info['ip']=$ip; <BR>} <BR>return $info; <BR>} <BR>public function t($string,$o) <BR>{ <BR>for($i=0;$i<strlen($string);$i++) <BR>{ <BR>if(ord($string{$i})<128) <BR>continue; <BR>if((ord($string{$i})&224)==224) <BR>{ <BR>//第一个字节判断通过 <BR>$char = $string{++$i}; <BR>if((ord($char)&128)==128) <BR>{ <BR>//第二个字节判断通过 <BR>$char = $string{++$i}; <BR>if((ord($char)&128)==128) <BR>{ <BR>$encoding = "UTF-8"; <BR>break; <BR>} <BR>} <BR>} <BR>if((ord($string{$i})&192)==192) <BR>{ <BR>//第一个字节判断通过 <BR>$char = $string{++$i}; <BR>if((ord($char)&128)==128) <BR>{ <BR>//第二个字节判断通过 <BR>$encoding = "GB2312"; <BR>break; <BR>} <BR>} <BR>} <BR>return strtolower($encoding); <BR>} <BR>function uni_decode ($str,$code = 'utf-8'){ <BR>$str = json_decode(preg_replace_callback('/&#(d{5});/',create_function('$dec','return 'u'.dechex($dec[1]);'),'"'.$str.'"')); <BR>if($code != 'utf-8'){ $str = iconv('utf-8',$code,$str); } <BR>return $str; <BR>} <BR>//获取网站编码 <BR>public function getCharset() <BR>{ <BR>if(!$this->open($this->url)){return false;exit;} <BR>//首先从html获取编码 <BR>preg_match("/<meta.+?charset=[^w]?([-w]+)/i",$temp) ? strtolower($temp[1]):""; <BR>if($temp[1]!="") <BR>{ <BR>if(in_array($temp[1],$this->charset_arr)) <BR>{ <BR>if($temp[1]=="gb2312") <BR>{ <BR>$tmp_charset=$this->t($this->request->results,$temp[1]); <BR>if($tmp_charset==$temp[1]) <BR>{ <BR>return $temp[1]; <BR>} <BR>} <BR>else <BR>{ <BR>return $temp[1]; <BR>} <BR>} <BR>} <BR>if(!empty($this->request->headers)) <BR>{ <BR>//从header中获取编码 <BR>$hstr=strtolower(implode("|||",$this->request->headers)); <BR>preg_match("/charset=[^w]?([-w]+)/is",$hstr,$lang) ? strtolower($lang[1]):""; <BR>if($lang[1]!="") <BR>{ <BR>return $lang[1]; <BR>} <BR>} <BR>$encode_arr=array("UTF-8","GB2312","GBK","BIG5","ASCII","EUC-JP","Shift_JIS","CP936","ISO-8859-1","JIS","eucjp-win","sjis-win"); <BR>$encoded=mb_detect_encoding($this->request->results,$encode_arr); <BR>if($encoded) <BR>{ <BR>return strtolower($encoded); <BR>} <BR>else <BR>{ <BR>return false; <BR>} <BR>} <BR>} <BR>?> <BR></div></p> <p style="text-align:right;">(编辑:李大同)</p> <p style="text-align:right;">【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!</p> </td> </tr> </table> </div> <div class="dede_pagess"><ul class="pagelist"></ul></div> <div class="ad-690"><script src='https://ess.0577qiche.com/d/js/acmsd/ad76.js' language='javascript'></script></div> <div class="g-box10"> <div class="t-2">相关内容</div> <ul class="b-box12"></ul> <ul class="b-box13"><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/85368.html" target="_blank">PHP的命令行命令使用指南</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/91440.html" target="_blank">php – 如何从AFNETWORKING POST获取参数</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/96955.html" target="_blank">php – ALPN,服务器不同意协议意味着什么?</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/87488.html" target="_blank">php 无限级 SelectTree 类</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/96292.html" target="_blank">php – 如何从leechers保护我的站点地图索引文件和sitemap.</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/144610.html" target="_blank">php – Mysql查询最近一行的连接表</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/143243.html" target="_blank">php – 检查file_get_contents何时完成</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/86162.html" target="_blank">PHP验证码生成原理和实现</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/99558.html" target="_blank">PHP – 匹配文本中所有存在的短语 – preg_replace</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/83894.html" target="_blank">PHP编程:smarty模板引擎中内建函数if、elseif和else的使用</a></li><div class="cl"></div></ul> </div> </div> </div> <div class="right-1 fr"> <div class="g-box8"> <div class="t-2">推荐文章</div> <ul></ul> </div> <div class="g-box11"> <div class="t-2">站长推荐</div> <ul class="b-box7"><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/84767.html" target="_blank">php使用标签替换的方式生成静态页面</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/118847.html" target="_blank">php – Authorize.net支付集成</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/88017.html" target="_blank">dedecms系统的广告设置代码 基础版本</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/90887.html" target="_blank">php使用异或实现的加密解密实例</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1212/78543.html" target="_blank">Win10 下安装配置IIS + MySQL + nginx + php7.1.</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/129902.html" target="_blank">php中网页添加到桌面快捷方式方法</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/137938.html" target="_blank">从原理到场景 系统讲解 PHP 缓存技术</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/87163.html" target="_blank">jq的get传参数在utf-8中乱码问题的解决php版</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/94473.html" target="_blank">PHP 5.5.x中不推荐使用的MySQL扩展</a></li><li><a href="https://www.lidatong.com.cn/html/jc/php/2020/1213/111176.html" target="_blank">如果像ucfirst()这样的PHP函数会忽略html?</a></li></ul> </div> <script type="text/javascript">jQuery(".g-box8").slide({ titCell:"li",triggerTime:0 }); </script> <div class="ad-250"><script src='https://ess.0577qiche.com/d/js/acmsd/ad103.js' language='javascript'></script></div> <div class="g-box3 u-3"> <div class="t-1">热点阅读</div> <ul class="b-box2"></ul> </div> <div class="ad-250"><script src='https://ess.0577qiche.com/d/js/acmsd/ad77.js' language='javascript'></script></div> </div> <div class="cl"></div> </div> <div class="ad-960"><script src='https://ess.0577qiche.com/d/js/acmsd/ad78.js' language='javascript'></script></div> <div class="footer"> <p>【免责声明】本站内容转载自互联网,其发布内容言论不代表本站观点,如果其链接、内容的侵犯您的权益,烦请提交相关链接至邮箱bqsm@foxmail.com我们将及时予以处理。</p> <p>建议您使用1920×1080分辨率、谷歌浏览器Google Chrome、Microsoft Edge以获得本站的最佳浏览效果</p> <p>Copygight © 2008-2022 https://www.lidatong.com.cn/ All Rights Reserved. 李大同</p> <p><script type="text/javascript" src="//js.users.51.la/21280179.js"></script></p> </div> </body> </html>