主要是实现访问google,取回页面,然后分析数据 所以速度有些慢,主要的在网络连接和数据分析上很多时间 有谁可以优化一下?
代码:
<? //页码参数如下 //q=%E6%88%91%E6%98%AF%E4%B8%80%E4%B8%AA%E5%85%B5 //hl=zh-CN //lr=lang_zh-CN //start=10 //sa=N
if($q<>""){ //$q = rawurlencode("我是一个兵"); if($q<>"") $url = $url."q=$q"; if($hl<>"") $url = $url."&hl=$hl"; if($lr<>"") $url = $url."&lr=$lr"; if($start<>"") $url = $url."&start=$start"; if($sa<>"") $url = $url."&sa=$sa"; $google = file("http://www.google.com/search?$url"); $google = join("\n",$google);
$record_count = explode("<td bgcolor=#3366cc align=right nowrap><font size=-1 color=#ffffff>共有 <b>",$google); $google = $record_count[1]; $record_count = explode("</b>",$google); $record_count = $record_count[0]; //$google = $record_count[1]; //提取了总记录条数 echo "总记录条数".$record_count."条<BR>\n";
//提取搜索花费的时间 $record_time = explode("</b> 项。 搜索用时 <b>",$google); $google = $record_time[1]; $record_time = explode("</b>",$google); $record_time = $record_time[0]; echo "搜索花费时间共".$record_time."秒<BR>\n";
//搜索结果全部在一个<div> $result = explode("<div>",$google); $google = $result[1]; $result = explode("</div>",$google); $result = $result[0];
//echo $result; //每一个记录之间是以<p class=g>来分隔的,当该记录下有子相关记录时,会出现<blockquote class=g> //因此,去掉所有的子记录是有必要的 $result = explode("<p class=g>",$result);
$ct_result = count($result);
for($i=0;$i<$ct_result;$i++){ $ch_result = explode("<blockquote class=g>",$result[$i]); $ch_result = $ch_result[0]; //网页快照以<a class=fl切开 $ch_result = explode("<a class=fl",$ch_result); $ch_result = $ch_result[0]; //die($ch_result); echo $ch_result."<P>\n"; }
//读取页码 //页码以<div class=n>开始,以</div>结束 $pages = explode("<div class=n>",$google); $pages = $pages[1]; $pages = explode("</div>",$pages); $pages = $pages[0]; //删除图片标记<img src=/intl/zh-CN/nav_page.gif width=16 height=26 alt="" border=0><br> $pages = str_replace("<img src=/intl/zh-CN/nav_page.gif width=16 height=26 alt=\"\" border=0><br>","",$pages); $pages = str_replace("<img src=/intl/zh-CN/nav_first.gif width=18 height=26 alt=\"\"><br>","",$pages); $pages = str_replace("<img src=/intl/zh-CN/nav_current.gif width=16 height=26 alt=\"\"><br>","",$pages); $pages = str_replace("<img src=/intl/zh-CN/nav_next.gif width=100 height=26 alt=\"\" border=0><br>","",$pages); $pages = str_replace("<img src=/intl/zh-CN/nav_previous.gif width=68 height=26 alt=\"\" border=0><br>","",$pages); $pages = str_replace("/search?q","http://www.netyd.com/search.php?q=$q&old",$pages); $pages = str_replace("width=1%","",$pages); $pages = str_replace("&ie=UTF-8","",$pages); $pages = str_replace("<span class=i>","",$pages); $pages = str_replace("<span class=b>","",$pages); $pages = str_replace("<b>","<b><font color=white>f</font>",$pages); $pages = str_replace("</span>","",$pages); $pages = str_replace("<td>","<td><font color=white>f</font>",$pages);
echo $pages; } ?> <form name=gs method=GET action=http://www.netyd.com/search.php> <input type=hidden name=hl value="zh-CN"> <input type=text name=q size=31 maxlength=2048 value=""> <input type=hidden name=lr value=lang_zh-CN > <input type=submit name="btnG" value="搜索"> </form>
|