国产av日韩一区二区三区精品,成人性爱视频在线观看,国产,欧美,日韩,一区,www.成色av久久成人,2222eeee成人天堂

Home Backend Development PHP Tutorial php結(jié)合curl實(shí)現(xiàn)多線程抓取_PHP

php結(jié)合curl實(shí)現(xiàn)多線程抓取_PHP

May 30, 2016 am 08:46 AM
php

php結(jié)合curl實(shí)現(xiàn)多線程抓取

<&#63;php
/*
curl 多線程抓取
*/
 /** 
   * curl 多線程 
   * 
   * @param array $array 并行網(wǎng)址 
   * @param int $timeout 超時(shí)時(shí)間
   * @return array 
   */ 
 function Curl_http($array,$timeout){
 $res = array();
 $mh = curl_multi_init();//創(chuàng)建多個(gè)curl語(yǔ)柄
 $startime = getmicrotime();
 foreach($array as $k=>$url){
  $conn[$k]=curl_init($url);

    curl_setopt($conn[$k], CURLOPT_TIMEOUT, $timeout);//設(shè)置超時(shí)時(shí)間
    curl_setopt($conn[$k], CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)');
    curl_setopt($conn[$k], CURLOPT_MAXREDIRS, 7);//HTTp定向級(jí)別
    curl_setopt($conn[$k], CURLOPT_HEADER, 0);//這里不要header,加塊效率
    curl_setopt($conn[$k], CURLOPT_FOLLOWLOCATION, 1); // 302 redirect
    curl_setopt($conn[$k],CURLOPT_RETURNTRANSFER,1);
    curl_multi_add_handle ($mh,$conn[$k]);
 }
 //防止死循環(huán)耗死cpu 這段是根據(jù)網(wǎng)上的寫法
 do {
  $mrc = curl_multi_exec($mh,$active);//當(dāng)無(wú)數(shù)據(jù),active=true
 } while ($mrc == CURLM_CALL_MULTI_PERFORM);//當(dāng)正在接受數(shù)據(jù)時(shí)
 while ($active and $mrc == CURLM_OK) {//當(dāng)無(wú)數(shù)據(jù)時(shí)或請(qǐng)求暫停時(shí),active=true
  if (curl_multi_select($mh) != -1) {
  do {
   $mrc = curl_multi_exec($mh, $active);
  } while ($mrc == CURLM_CALL_MULTI_PERFORM);
  }
 }

 foreach ($array as $k => $url) {
   curl_error($conn[$k]);
    $res[$k]=curl_multi_getcontent($conn[$k]);//獲得返回信息
    $header[$k]=curl_getinfo($conn[$k]);//返回頭信息
    curl_close($conn[$k]);//關(guān)閉語(yǔ)柄
    curl_multi_remove_handle($mh , $conn[$k]);  //釋放資源 
 }

 curl_multi_close($mh);
 $endtime = getmicrotime();
 $diff_time = $endtime - $startime;

 return array('diff_time'=>$diff_time,
   'return'=>$res,
   'header'=>$header 
   );

 }
 //計(jì)算當(dāng)前時(shí)間
 function getmicrotime() {
   list($usec, $sec) = explode(" ",microtime());
   return ((float)$usec + (float)$sec);
 }

 //測(cè)試一下,curl 三個(gè)網(wǎng)址
 $array = array(
  "http://www.weibo.com/",
  "http://www.renren.com/",
  "http://www.qq.com/"
  );
 $data = Curl_http($array,'10');//調(diào)用
 var_dump($data);//輸出
//如果POST的數(shù)據(jù)大于1024字節(jié),curl并不會(huì)直接就發(fā)起POST請(qǐng)求
//發(fā)送請(qǐng)求時(shí),header中包含一個(gè)空的Expect。curl_setopt($ch, CURLOPT_HTTPHEADER, array("Expect:"));
&#63;>

我們?cè)賮?lái)看幾個(gè)例子

(1)下面這段代碼是實(shí)現(xiàn)抓取多個(gè)URL,然后將抓取的URL的頁(yè)面代碼寫入指定的文件

$urls = array(
'http://www.bitsCN.com/',
'http://www.google.com/',
'http://www.example.com/'
); // 設(shè)置要抓取的頁(yè)面URL
$save_to='/test.txt'; // 把抓取的代碼寫入該文件
$st = fopen($save_to,"a");
$mh = curl_multi_init();
foreach ($urls as $i => $url) {
$conn[$i] = curl_init($url);
curl_setopt($conn[$i], CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)");
curl_setopt($conn[$i], CURLOPT_HEADER ,0);
curl_setopt($conn[$i], CURLOPT_CONNECTTIMEOUT,60);
curl_setopt($conn[$i], CURLOPT_FILE,$st); // 將爬取的代碼寫入文件
curl_multi_add_handle ($mh,$conn[$i]);
} // 初始化
do {
curl_multi_exec($mh,$active);
} while ($active); // 執(zhí)行
foreach ($urls as $i => $url) {
curl_multi_remove_handle($mh,$conn[$i]);
curl_close($conn[$i]);
} // 結(jié)束清理
curl_multi_close($mh);
fclose($st);

(2)下面這段代碼和上面差不多意思,只不過(guò)這個(gè)地方是將獲得的代碼先放入變量,然后再將獲取到的內(nèi)容寫入指定的文件

$urls = array(
'http://www.bitsCN.com/',
'http://www.google.com/',
'http://www.example.com/'
);
$save_to='/test.txt'; // 把抓取的代碼寫入該文件
$st = fopen($save_to,"a");
$mh = curl_multi_init();
foreach ($urls as $i => $url) {
$conn[$i] = curl_init($url);
curl_setopt($conn[$i], CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)");
curl_setopt($conn[$i], CURLOPT_HEADER ,0);
curl_setopt($conn[$i], CURLOPT_CONNECTTIMEOUT,60);
curl_setopt($conn[$i],CURLOPT_RETURNTRANSFER,true); // 不將爬取代碼寫到瀏覽器,而是轉(zhuǎn)化為字符串
curl_multi_add_handle ($mh,$conn[$i]);
}
do {
curl_multi_exec($mh,$active);
} while ($active);
foreach ($urls as $i => $url) {
$data = curl_multi_getcontent($conn[$i]); // 獲得爬取的代碼字符串
fwrite($st,$data); // 將字符串寫入文件
} // 獲得數(shù)據(jù)變量,并寫入文件
foreach ($urls as $i => $url) {
curl_multi_remove_handle($mh,$conn[$i]);
curl_close($conn[$i]);
}
curl_multi_close($mh);
fclose($st);

(3)下面這段代碼實(shí)現(xiàn)的是利用 PHP 的 Curl Functions 實(shí)現(xiàn)并發(fā)多線程下載文件

$urls=array(
 'http://www.bitsCN.com/5w.zip',
 'http://www.bitsCN.com/5w.zip',
 'http://www.bitsCN.com/5w.zip'
);
$save_to='./home/';
$mh=curl_multi_init();
foreach($urls as $i=>$url){
 $g=$save_to.basename($url);
 if(!is_file($g)){
   $conn[$i]=curl_init($url);
   $fp[$i]=fopen($g,"w");
   curl_setopt($conn[$i],CURLOPT_USERAGENT,"Mozilla/4.0(compatible; MSIE 7.0; Windows NT 6.0)");
   curl_setopt($conn[$i],CURLOPT_FILE,$fp[$i]);
   curl_setopt($conn[$i],CURLOPT_HEADER ,0);
   curl_setopt($conn[$i],CURLOPT_CONNECTTIMEOUT,60);
   curl_multi_add_handle($mh,$conn[$i]);
 }
}
do{
 $n=curl_multi_exec($mh,$active);
}while($active);
foreach($urls as $i=>$url){
 curl_multi_remove_handle($mh,$conn[$i]);
 curl_close($conn[$i]);
 fclose($fp[$i]);
}
curl_multi_close($mh);$urls=array(
 'http://www.bitsCN.com/5w.zip',
 'http://www.bitsCN.com/5w.zip',
 'http://www.bitsCN.com/5w.zip'
);
$save_to='./home/';
$mh=curl_multi_init();
foreach($urls as $i=>$url){
 $g=$save_to.basename($url);
 if(!is_file($g)){
   $conn[$i]=curl_init($url);
   $fp[$i]=fopen($g,"w");
   curl_setopt($conn[$i],CURLOPT_USERAGENT,"Mozilla/4.0(compatible; MSIE 7.0; Windows NT 6.0)");
   curl_setopt($conn[$i],CURLOPT_FILE,$fp[$i]);
   curl_setopt($conn[$i],CURLOPT_HEADER ,0);
   curl_setopt($conn[$i],CURLOPT_CONNECTTIMEOUT,60);
   curl_multi_add_handle($mh,$conn[$i]);
 }
}
do{
 $n=curl_multi_exec($mh,$active);
}while($active);
foreach($urls as $i=>$url){
 curl_multi_remove_handle($mh,$conn[$i]);
 curl_close($conn[$i]);
 fclose($fp[$i]);
}
curl_multi_close($mh);

以上所述就是本文的全部?jī)?nèi)容了,希望大家能夠喜歡。

Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Hot AI Tools

Undress AI Tool

Undress AI Tool

Undress images for free

Undresser.AI Undress

Undresser.AI Undress

AI-powered app for creating realistic nude photos

AI Clothes Remover

AI Clothes Remover

Online AI tool for removing clothes from photos.

Clothoff.io

Clothoff.io

AI clothes remover

Video Face Swap

Video Face Swap

Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Tools

Notepad++7.3.1

Notepad++7.3.1

Easy-to-use and free code editor

SublimeText3 Chinese version

SublimeText3 Chinese version

Chinese version, very easy to use

Zend Studio 13.0.1

Zend Studio 13.0.1

Powerful PHP integrated development environment

Dreamweaver CS6

Dreamweaver CS6

Visual web development tools

SublimeText3 Mac version

SublimeText3 Mac version

God-level code editing software (SublimeText3)

How to access a character in a string by index in PHP How to access a character in a string by index in PHP Jul 12, 2025 am 03:15 AM

In PHP, you can use square brackets or curly braces to obtain string specific index characters, but square brackets are recommended; the index starts from 0, and the access outside the range returns a null value and cannot be assigned a value; mb_substr is required to handle multi-byte characters. For example: $str="hello";echo$str[0]; output h; and Chinese characters such as mb_substr($str,1,1) need to obtain the correct result; in actual applications, the length of the string should be checked before looping, dynamic strings need to be verified for validity, and multilingual projects recommend using multi-byte security functions uniformly.

How Do Generators Work in PHP? How Do Generators Work in PHP? Jul 11, 2025 am 03:12 AM

AgeneratorinPHPisamemory-efficientwaytoiterateoverlargedatasetsbyyieldingvaluesoneatatimeinsteadofreturningthemallatonce.1.Generatorsusetheyieldkeywordtoproducevaluesondemand,reducingmemoryusage.2.Theyareusefulforhandlingbigloops,readinglargefiles,or

How to prevent session hijacking in PHP? How to prevent session hijacking in PHP? Jul 11, 2025 am 03:15 AM

To prevent session hijacking in PHP, the following measures need to be taken: 1. Use HTTPS to encrypt the transmission and set session.cookie_secure=1 in php.ini; 2. Set the security cookie attributes, including httponly, secure and samesite; 3. Call session_regenerate_id(true) when the user logs in or permissions change to change to change the SessionID; 4. Limit the Session life cycle, reasonably configure gc_maxlifetime and record the user's activity time; 5. Prohibit exposing the SessionID to the URL, and set session.use_only

PHP get the first N characters of a string PHP get the first N characters of a string Jul 11, 2025 am 03:17 AM

You can use substr() or mb_substr() to get the first N characters in PHP. The specific steps are as follows: 1. Use substr($string,0,N) to intercept the first N characters, which is suitable for ASCII characters and is simple and efficient; 2. When processing multi-byte characters (such as Chinese), mb_substr($string,0,N,'UTF-8'), and ensure that mbstring extension is enabled; 3. If the string contains HTML or whitespace characters, you should first use strip_tags() to remove the tags and trim() to clean the spaces, and then intercept them to ensure the results are clean.

PHP get the last N characters of a string PHP get the last N characters of a string Jul 11, 2025 am 03:17 AM

There are two main ways to get the last N characters of a string in PHP: 1. Use the substr() function to intercept through the negative starting position, which is suitable for single-byte characters; 2. Use the mb_substr() function to support multilingual and UTF-8 encoding to avoid truncating non-English characters; 3. Optionally determine whether the string length is sufficient to handle boundary situations; 4. It is not recommended to use strrev() substr() combination method because it is not safe and inefficient for multi-byte characters.

How to URL encode a string in PHP with urlencode How to URL encode a string in PHP with urlencode Jul 11, 2025 am 03:22 AM

The urlencode() function is used to encode strings into URL-safe formats, where non-alphanumeric characters (except -, _, and .) are replaced with a percent sign followed by a two-digit hexadecimal number. For example, spaces are converted to signs, exclamation marks are converted to!, and Chinese characters are converted to their UTF-8 encoding form. When using, only the parameter values ??should be encoded, not the entire URL, to avoid damaging the URL structure. For other parts of the URL, such as path segments, the rawurlencode() function should be used, which converts the space to . When processing array parameters, you can use http_build_query() to automatically encode, or manually call urlencode() on each value to ensure safe transfer of data. just

How to set and get session variables in PHP? How to set and get session variables in PHP? Jul 12, 2025 am 03:10 AM

To set and get session variables in PHP, you must first always call session_start() at the top of the script to start the session. 1. When setting session variables, use $_SESSION hyperglobal array to assign values ??to specific keys, such as $_SESSION['username']='john_doe'; it can store strings, numbers, arrays and even objects, but avoid storing too much data to avoid affecting performance. 2. When obtaining session variables, you need to call session_start() first, and then access the $_SESSION array through the key, such as echo$_SESSION['username']; it is recommended to use isset() to check whether the variable exists to avoid errors

How to prevent SQL injection in PHP How to prevent SQL injection in PHP Jul 12, 2025 am 03:02 AM

Key methods to prevent SQL injection in PHP include: 1. Use preprocessing statements (such as PDO or MySQLi) to separate SQL code and data; 2. Turn off simulated preprocessing mode to ensure true preprocessing; 3. Filter and verify user input, such as using is_numeric() and filter_var(); 4. Avoid directly splicing SQL strings and use parameter binding instead; 5. Turn off error display in the production environment and record error logs. These measures comprehensively prevent the risk of SQL injection from mechanisms and details.

See all articles