有没有可能使用PHP获取Google网站管理员工具(GWT)数据?
我正在尝试从Google网站管理员工具(GWT)获取一些数据,我已经搜索了一些API文档和实现,但他们只返回了一些来自GWT的数据.
我的需求: 需要从GWT获取以下数据, (1). TOP_PAGES (2). TOP_QUERIES (3). CRAWL_ERRORS (4). CONTENT_ERRORS (5). CONTENT_KEYWORDS (6). INTERNAL_LINKS (7).外部链接 (8). SOCIAL_ACTIVITY 获取这些数据后,我需要为每个数据生成Excel文件. 达成: 我从上面得到的数据很少,并生成到Excel文件中.如, (1). TOP_PAGES (2). TOP_QUERIES (3). INTERNAL_LINKS (4).外部链接 (5). CONTENT_KEYWORDS 没达到: 我还没有得到像这样的主要部件/数据, (1). CRAWL_ERRORS (2). CONTENT_ERRORS (3). SOCIAL_ACTIVITY 代码示例供您参考: 我在PHP中使用了两个用于此GWT API的文件, 文件#1:(gwdata.php) <?php /** * PHP class for downloading CSV files from Google Webmaster Tools. * * This class does NOT require the Zend gdata package be installed * in order to run. * * Copyright 2012 eyecatchUp UG. All Rights Reserved. * * Licensed under the Apache License,Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing,software * distributed under the License is distributed on an "AS IS" BASIS,* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @author: Stephan Schmitz <eyecatchup@gmail.com> * @link: https://code.google.com/p/php-webmaster-tools-downloads/ */ class GWTdata { const HOST = "https://www.google.com"; const SERVICEURI = "/webmasters/tools/"; public $_language,$_tables,$_daterange,$_downloaded,$_skipped; private $_auth,$_logged_in; public function __construct() { $this->_auth = false; $this->_logged_in = false; $this->_language = "en"; $this->_daterange = array("",""); $this->_tables = array("TOP_PAGES","TOP_QUERIES","CRAWL_ERRORS","CONTENT_ERRORS","CONTENT_KEYWORDS","INTERNAL_LINKS","EXTERNAL_LINKS","SOCIAL_ACTIVITY" ); $this->_errTablesSort = array(0 => "http",1 => "not-found",2 => "restricted-by-robotsTxt",3 => "unreachable",4 => "timeout",5 => "not-followed","kAppErrorSoft-404s" => "soft404","sitemap" => "in-sitemaps" ); $this->_errTablesType = array(0 => "web-crawl-errors",1 => "mobile-wml-xhtml-errors",2 => "mobile-chtml-errors",3 => "mobile-operator-errors",4 => "news-crawl-errors" ); $this->_downloaded = array(); $this->_skipped = array(); } /** * Sets content language. * * @param $str String Valid ISO 639-1 language code,supported by Google. */ public function SetLanguage($str) { $this->_language = $str; } /** * Sets features that should be downloaded. * * @param $arr Array Valid array values are: * "TOP_PAGES",* "CONTENT_KEYWORDS",* "SOCIAL_ACTIVITY". */ public function SetTables($arr) { if(is_array($arr) && !empty($arr) && sizeof($arr) <= 2) { $valid = array("TOP_PAGES","SOCIAL_ACTIVITY"); $this->_tables = array(); for($i=0; $i < sizeof($arr); $i++) { if(in_array($arr[$i],$valid)) { array_push($this->_tables,$arr[$i]); } else { throw new Exception("Invalid argument given."); } } } else { throw new Exception("Invalid argument given."); } } /** * Sets daterange for download data. * * @param $arr Array Array containing two ISO 8601 formatted date strings. */ public function SetDaterange($arr) { if(is_array($arr) && !empty($arr) && sizeof($arr) == 2) { if(self::IsISO8601($arr[0]) === true && self::IsISO8601($arr[1]) === true) { $this->_daterange = array(str_replace("-","",$arr[0]),str_replace("-",$arr[1])); return true; } else { throw new Exception("Invalid argument given."); } } else { throw new Exception("Invalid argument given."); } } /** * Returns array of downloaded filenames. * * @return Array Array of filenames that have been written to disk. */ public function GetDownloadedFiles() { return $this->_downloaded; } /** * Returns array of downloaded filenames. * * @return Array Array of filenames that have been written to disk. */ public function GetSkippedFiles() { return $this->_skipped; } /** * Checks if client has logged into their Google account yet. * * @return Boolean Returns true if logged in,or false if not. */ private function IsLoggedIn() { return $this->_logged_in; } /** * Attempts to log into the specified Google account. * * @param $email String User's Google email address. * @param $pwd String Password for Google account. * @return Boolean Returns true when Authentication was successful,* else false. */ public function LogIn($email,$pwd) { $url = self::HOST . "/accounts/ClientLogin"; $postRequest = array( 'accountType' => 'HOSTED_OR_GOOGLE','Email' => $email,'Passwd' => $pwd,'service' => "sitemaps",'source' => "Google-WMTdownloadscript-0.1-php" ); $ch = curl_init(); curl_setopt($ch,CURLOPT_URL,$url); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,30); curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,false); curl_setopt($ch,CURLOPT_FOLLOWLOCATION,CURLOPT_POST,CURLOPT_POSTFIELDS,$postRequest); $output = curl_exec($ch); $info = curl_getinfo($ch); curl_close($ch); if($info['http_code'] == 200) { preg_match('/Auth=(.*)/',$output,$match); if(isset($match[1])) { $this->_auth = $match[1]; $this->_logged_in = true; return true; } else { return false; } } else { return false; } } /** * Attempts authenticated GET Request. * * @param $url String URL for the GET request. * @return Mixed Curl result as String,* or false (Boolean) when Authentication fails. */ public function GetData($url) { if(self::IsLoggedIn() === true) { $url = self::HOST . $url; $head = array("Authorization: GoogleLogin auth=".$this->_auth,"GData-Version: 2"); $ch = curl_init(); curl_setopt($ch,$url); curl_setopt($ch,true); curl_setopt($ch,30); curl_setopt($ch,false); curl_setopt($ch,CURLOPT_ENCODING,CURLOPT_HTTPHEADER,$head); $result = curl_exec($ch); $info = curl_getinfo($ch); curl_close($ch); return ($info['http_code']!=200) ? false : $result; } else { return false; } } /** * Gets all available sites from Google Webmaster Tools account. * * @return Mixed Array with all site URLs registered in GWT account,* or false (Boolean) if request failed. */ public function GetSites() { if(self::IsLoggedIn() === true) { $feed = self::GetData(self::SERVICEURI."feeds/sites/"); if($feed !== false) { $sites = array(); $doc = new DOMDocument(); $doc->loadXML($feed); foreach ($doc->getElementsByTagName('entry') as $node) { array_push($sites,$node->getElementsByTagName('title')->item(0)->nodeValue); } return $sites; } else { return false; } } else { return false; } } /** * Gets the download links for an available site * from the Google Webmaster Tools account. * * @param $url String Site URL registered in GWT. * @return Mixed Array with keys TOP_PAGES and TOP_QUERIES,* or false (Boolean) when Authentication fails. */ public function GetDownloadUrls($url) { if(self::IsLoggedIn() === true) { $_url = sprintf(self::SERVICEURI."downloads-list?hl=%s&siteUrl=%s",$this->_language,urlencode($url)); $downloadList = self::GetData($_url); return json_decode($downloadList,true); } else { return false; } } /** * Downloads the file based on the given URL. * * @param $site String Site URL available in GWT Account. * @param $savepath String Optional path to save CSV to (no trailing slash!). */ public function DownloadCSV($site,$savepath=".") { if(self::IsLoggedIn() === true) { $downloadUrls = self::GetDownloadUrls($site); $filename = parse_url($site,PHP_URL_HOST) ."-". date("Ymd-His"); $tables = $this->_tables; foreach($tables as $table) { if($table=="CRAWL_ERRORS") { self::DownloadCSV_CrawlErrors($site,$savepath); } elseif($table=="CONTENT_ERRORS") { self::DownloadCSV_XTRA($site,$savepath,"html-suggestions",")","content-problems-dl"); } elseif($table=="CONTENT_KEYWORDS") { self::DownloadCSV_XTRA($site,"keywords","content-words-dl"); } elseif($table=="INTERNAL_LINKS") { self::DownloadCSV_XTRA($site,"internal-links","internal-links-dl"); } elseif($table=="EXTERNAL_LINKS") { self::DownloadCSV_XTRA($site,"external-links-domain","external-links-domain-dl"); } elseif($table=="SOCIAL_ACTIVITY") { self::DownloadCSV_XTRA($site,"social-activity","x26","SOCIAL_ACTIVITY","social-activity-dl"); } else { $finalName = "$savepath/$table-$filename.csv"; $finalUrl = $downloadUrls[$table] ."&prop=ALL&db=%s&de=%s&more=true"; $finalUrl = sprintf($finalUrl,$this->_daterange[0],$this->_daterange[1]); self::SaveData($finalUrl,$finalName); } } } else { return false; } } /** * Downloads "unofficial" downloads based on the given URL. * * @param $site String Site URL available in GWT Account. * @param $savepath String Optional path to save CSV to (no trailing slash!). */ public function DownloadCSV_XTRA($site,$savepath=".",$tokenUri,$tokenDelimiter,$filenamePrefix,$dlUri) { if(self::IsLoggedIn() === true) { $uri = self::SERVICEURI . $tokenUri . "?hl=%s&siteUrl=%s"; $_uri = sprintf($uri,$site); $token = self::GetToken($_uri,$tokenDelimiter); $filename = parse_url($site,PHP_URL_HOST) ."-". date("Ymd-His"); $finalName = "$savepath/$filenamePrefix-$filename.csv"; $url = self::SERVICEURI . $dlUri . "?hl=%s&siteUrl=%s&security_token=%s&prop=ALL&db=%s&de=%s&more=true"; $_url = sprintf($url,$site,$token,$this->_daterange[1]); self::SaveData($_url,$finalName); } else { return false; } } /** * Downloads the Crawl Errors file based on the given URL. * * @param $site String Site URL available in GWT Account. * @param $savepath String Optional: Path to save CSV to (no trailing slash!). * @param $separated Boolean Optional: If true,the method saves separated CSV files * for each error type. Default: Merge errors in one file. */ public function DownloadCSV_CrawlErrors($site,$separated=false) { if(self::IsLoggedIn() === true) { $type_param = "we"; $filename = parse_url($site,PHP_URL_HOST) ."-". date("Ymd-His"); if($separated) { foreach($this->_errTablesSort as $sortid => $sortname) { foreach($this->_errTablesType as $typeid => $typename) { if($typeid == 1) { $type_param = "mx"; } else if($typeid == 2) { $type_param = "mc"; } else { $type_param = "we"; } $uri = self::SERVICEURI."crawl-errors?hl=en&siteUrl=$site&tid=$type_param"; $token = self::GetToken($uri,"x26"); $finalName = "$savepath/CRAWL_ERRORS-$typename-$sortname-$filename.csv"; $url = self::SERVICEURI."crawl-errors-dl?hl=%s&siteUrl=%s&security_token=%s&type=%s&sort=%s"; $_url = sprintf($url,$typeid,$sortid); self::SaveData($_url,$finalName); } } } else { $uri = self::SERVICEURI."crawl-errors?hl=en&siteUrl=$site&tid=$type_param"; $token = self::GetToken($uri,"x26"); $finalName = "$savepath/CRAWL_ERRORS-$filename.csv"; $url = self::SERVICEURI."crawl-errors-dl?hl=%s&siteUrl=%s&security_token=%s&type=0"; $_url = sprintf($url,$token); self::SaveData($_url,$finalName); } } else { return false; } } /** * Saves data to a CSV file based on the given URL. * * @param $finalUrl String CSV Download URI. * @param $finalName String Filepointer to save location. */ private function SaveData($finalUrl,$finalName) { $data = self::GetData($finalUrl); if(strlen($data) > 1 && file_put_contents($finalName,utf8_decode($data))) { array_push($this->_downloaded,realpath($finalName)); return true; } else { array_push($this->_skipped,$finalName); return false; } } /** * Regular Expression to find the Security Token for a download file. * * @param $uri String A Webmaster Tools Desktop Service URI. * @param $delimiter String Trailing delimiter for the regex. * @return String Returns a security token. */ private function GetToken($uri,$delimiter) { $matches = array(); $tmp = self::GetData($uri); //preg_match_all("#x26security_token(.*?)$delimiter#si",$tmp,$matches); preg_match_all("#46security_token(.*?)$delimiter#si",$matches); //return substr($matches[1][0],4,-1); return substr($matches[1][0],3,-1); } /** * Validates ISO 8601 date format. * * @param $str String Valid ISO 8601 date string (eg. 2012-01-01). * @return Boolean Returns true if string has valid format,else false. */ private function IsISO8601($str) { $stamp = strtotime($str); return (is_numeric($stamp) && checkdate(date('m',$stamp),date('d',date('Y',$stamp))) ? true : false; } } ?> 文件#2:(index.php) <?php include 'gwtdata.php'; include 'credentials.php'; try { $website = "http://www.yourdomain.com/"; /* Add Your Website Url */ $gdata = new GWTdata(); if($gdata->LogIn($email,$password) === true) { $gdata->DownloadCSV($website,"Here Add Your Folder Path To Save CSV File With GWT Data"); echo "Datas Are Successfully Downloaded"; } } catch (Exception $e) { die($e->getMessage()); } ?> 任何人都可以帮助我,实现所有这些数据,并使其成为使用PHP生成的excel文件. 解决方法
我是您引用的代码(GWTdata PHP类)的作者,首先要明确表示此代码既不是由Google发布也不是使用官方API,而是一个自定义脚本处理来自Web界面的数据.
几周前,Google网站管理员工具网络界面发生了一些变化(同样,用于处理数据请求).因此,它破坏了PHP类GWTdata的一些功能 – 例如下载爬网错误.
不幸的是,对于大多数数据,我/我们无法做任何事情(因为数据不再可访问).
无论如何,您可以使用this followup project来获取爬网错误. GwtCrawlErrors(以CSV格式从Google网站站长工具下载网站抓取错误): (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |