这里只做单页采集,其它功能自己添加!
function get_sub_content($str, $start, $end){
if ( $start == '' || $end == '' ){
return;
}
$str = explode($start, $str);
$str = explode($end, $str[1]);
return $str[0];
}
$str = file_get_contents('http://www.cz88.net/proxy/index.aspx');
$str = iconv('gbk','utf-8',$str);
$str = get_sub_content($str, '<div id="Content">', '<p style="text-align:center;">');
preg_match_all('/<tr><td>(.*?)<\\/td><td>(.*?)<\\/td><td>(.*?)<\\/td><td>(.*?)<\\/td><td><div class="addr_style">(.*?)<\\/div><\\/td><\\/tr>/', $str, $match);
$strProxyList = '<dl class="proxy_header clear">';
$strProxyList .= '<dt>IP</dt>';
$strProxyList .= '<dd class="proxy_dd_port">端口</dd>';
$strProxyList .= '<dd class="proxy_dd_type">类型</dd>';
$strProxyList .= '<dd class="proxy_dd_whois">WHOIS</dd>';
$strProxyList .= '<dd class="proxy_dd_addr">地址</dd>';
$strProxyList .= '</dl>';
for ($i = 0;$i < count($match[1]) ;$i++ ) {
$strProxyList .= '<dl class="clear"><dt>'.trim($match[1][$i]).'</dt><dd class="proxy_dd_port">'.trim($match[2][$i]).'</dd><dd class="proxy_dd_type">'.trim($match[3][$i]).'</dd><dd class="proxy_dd_whois">WHOIS</dd><dd class="proxy_dd_addr">'.@str_replace('CZ88.NET','',trim($match[5][$i])).'</dd></dl>';
}
echo $strProxyList;