php爬虫疫情数据 php网络爬虫

怎样用python爬取疫情数据importrequests
frombs4 import BeautifulSoup
import re
import json
# 1.发送请求,获取疫情首页(数据来源于丁香园)
response = requests.get('')
home_page = response.content.decode()
# 2.从疫情首页提取最近一日数据
soup = BeautifulSoup(home_page, 'lxml')
script = soup.find(id='getAreaStat')
text = script.string
# 3.提取数据获取json格式数据
json_str = re.findall(r'\[.+\]', text)[0]
# 4.把json格式转换为python类型
last_day_corona_virus = json.loads(json_str)
# 5.以json格式保存最近一日数据
with open('data/last_day_coronavirus.json', 'w') as fp:
json.dump(last_day_corona_virus, fp, ensure_ascii=False)
如何用爬虫爬取网页上的数据用爬虫框架Scrapy,三步
定义item类
开发spider类
开发pipeline
如果你想要更透的信息,你可以参考《疯狂python讲义》
如何用php 编写网络爬虫php不太适合用来写网络爬虫,因为几乎没有现成的框架,或者成熟的下载机制,也不太适合做并发处理.
下载页面的话除了一个curl,就是file_get_contents,或者curl_multi来做并发请求.curl可以代理端口,虚假ip,带cookie,带header请求目标页面,下载完成之后解析页面可以用queryList来解析html.写法类似jQuery.
提供给你我之前写的类:curl.php希望可以帮到你.
QueryList.php和phpQuery.php由于文件太大了,没办法贴上来
?php
class Http {
public function curlRequest($url, $postDatahttps://www.04ip.com/post/= '', $timeOut = 10, $httpHeader = array()) {
$handle = curl_init ();
curl_setopt ( $handle, CURLOPT_URL, $url );
if ($httpHeader) {
curl_setopt($handle, CURLOPT_HTTPHEADER, $httpHeader);
}
curl_setopt ( $handle, CURLOPT_RETURNTRANSFER, true );
curl_setopt ( $handle, CURLOPT_HEADER, 0 );curl_setopt ( $handle, CURLOPT_TIMEOUT, $timeOut );
curl_setopt ( $handle, CURLOPT_FOLLOWLOCATION, 1 );
curl_setopt ( $handle, CURLOPT_SSL_VERIFYPEER, false );
curl_setopt ( $handle, CURLOPT_SSL_VERIFYHOST, false );
curl_setopt ( $handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36');curl_setopt ( $handle, CURLOPT_ENCODING, 'gzip,deflate,sdch');
if (! empty ( $postData )) {
curl_setopt ( $handle, CURLOPT_POST, 1 );
curl_setopt ( $handle, CURLOPT_POSTFIELDS, $postData);
}
$result['response'] = curl_exec ( $handle );
$result['httpStatus'] = curl_getinfo ( $handle, CURLINFO_HTTP_CODE );
$result['fullInfo'] = curl_getinfo ( $handle );
$result['errorMsg'] = '';
$result['errorNo'] = 0;
if (curl_errno($handle)) {
$result['errorMsg'] = curl_error($handle);
$result['errorNo'] = curl_errno($handle);
}
curl_close ( $handle );
return $result;
}
}
?
【php爬虫疫情数据 php网络爬虫】php爬虫疫情数据的介绍就聊到这里吧,感谢你花时间阅读本站内容 , 更多关于php网络爬虫、php爬虫疫情数据的信息别忘了在本站进行查找喔 。

    推荐阅读