PHP使用curl获取远程网页内容,并转换编码
代码:【复制】function get_page_code_with_charset($url, $charset = 'UTF-8') { global $_HTTPHEADER, $_REFERER; global $_USERAGENT; try { $ch = curl_init(); curl_setopt($ch, CURLOPT_HTTPHEADER, $_HTTPHEADER); curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate, sdch'); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, 3); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($ch, CURLOPT_FRESH_CONNECT, true); curl_setopt($ch, CURLOPT_FORBID_REUSE, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //设置是否将响应结果存入变量,1是存入,0是直接echo curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 1); //在发起连接前等待的时间,如果设置为0,则无限等待。 curl_setopt($ch, CURLOPT_TIMEOUT, 15); //允许执行的最长秒数。 curl_setopt($ch, CURLOPT_USERAGENT, $_USERAGENT); curl_setopt($ch, CURLOPT_REFERER, $_REFERER); //伪造来源页面 curl_setopt($ch, CURLOPT_HEADER, true); // 获取头部信息 curl_setopt($ch, CURLOPT_URL, $url); if (curl_errno($ch)) { return false; } else { $html = curl_exec($ch); $httpStatusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); if($httpStatusCode == 200) { // 分离响应头和响应体 $headerInfo = curl_getinfo($ch, CURLINFO_HEADER_SIZE); // 获取响应头的长度 $header = substr($html, 0, $headerInfo); // 提取响应头 $body = substr($html, $headerInfo); // 提取响应体 $contentLength = curl_getinfo($ch, CURLINFO_CONTENT_LENGTH_DOWNLOAD); /* // 方案二 解析 Content-Length 字段,判断是否为 0 $contentLength = 0; // // 按行分割响应头,遍历查找 Content-Length $headerLines = explode("\r\n", $header); foreach ($headerLines as $line) { // 忽略大小写,匹配 Content-Length 字段 if (stripos($line, "Content-Length:") !== false) { //if (preg_match('/^Content-Length:/i', $line)) { 提取字段值并去除空格 $parts = explode(":", $line); $contentLength = trim($parts[1]); break; } } */ if($contentLength == 0) { return false; } else { // 转换字符集 if ($charset != 'UTF-8') { $body = mb_convert_encoding($body, 'UTF-8', $charset); } return $body; } } else { return false; } } curl_close($ch); } catch (Exception $e) { return false; } }