<?php
$webfile = "sitexml.txt";
$opensite = fopen($webfile, 'r');
function curl($url) {
/*
* 测试用的浏览器信息
*
*/
$browsers = array (
"user_agent" => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)",
"language" => "en-us,en;q=0.5"
);
$ch = curl_init();
// 设置 url
curl_setopt($ch, CURLOPT_URL, $url);
// 设置浏览器的特定header
//CURLOPT_HTTPHEADER: An array of HTTP header fields to set.
//curl_setopt($ch, CURLOPT_HTTPHEADER, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)');
curl_setopt($ch, CURLOPT_HTTPHEADER, array (
"User-Agent: {$browsers['user_agent']}",
"Accept-Language: {$browsers['language']}"
));
// 页面内容我们并不需要
curl_setopt($ch, CURLOPT_NOBODY, 1);
// 只需返回HTTP header
curl_setopt($ch, CURLOPT_HEADER, 1);
// 返回结果,而不是输出它
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
//执行curl操作
//return (curl_exec($ch)!==false) ? true : false;
$output = curl_exec($ch);
return $output;
curl_close($ch);
}
while (!feof($opensite)) {
$onesite = fgets($opensite, 4096);
$onesite = str_replace("\n", "", $onesite);
$url = $onesite . "/sitemap.xml";
if (!empty ($onesite)) {
echo "[URL]: $url<br>";
echo curl($url);
if (curl($url) == false) {
echo '<FONT color=#ff0000>' . "网站不能打开" . '</font>' . "<br>";
}
preg_match('/HTTP\/1.1\s*(\d+)[\s\S]+/', curl($url), $http_status);
//print_r($http_status);
if ($http_status[1] == 200) {
echo $url . "存在sitemap" . "<br>";
}
if ($http_status[1] == 301) {
$url = 'www.' . $url;
preg_match('/HTTP\/1.1\s*(\d+)[\s\S]+/', curl($url), $http_status);
if ($http_status[1] == 200) {
echo $url . "存在sitemap" . "<br>";
} else {
echo '<FONT color=#ff0000>' . $url . "没有sitemap" . '</font>';
}
}
if ($http_status[1] == (404|400|403|500|501|502|503|504|505)) {
echo '<FONT color=#ff0000>' . $url . "没有sitemap" . '</font>';
}
echo "<br><br>";
}
}
?>