- 版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/kun1280437633/article/details/80503625
import re
import requests
'''
分析:
1. 爬取流程
地址:https://search.bilibili.com/all?keyword=女神篇&from_source=banner_search&page=39
方式:get
参数:
keyword: 女神篇
from_source: banner_search
page: 39
'''
class BiliSpider:
def __init__(self):
self.base_url = "https://search.bilibili.com/all"
self.info = {}
def run(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
'Cookie':'LIVE_BUVID=AUTO3815276021025886; finger=edc6ecda; buvid3=B02A6519-CFCF-4F15-B094-A0C4FF19C91F31057infoc',
'Upgrade-Insecure-Requests':'1',
}
detail_div_pattern = re.compile(r'
detail_link_pattern = re.compile(r'
for i in range(39):
params = {
'keyword': '女神篇',
'from_source': 'banner_search',
'page': i,
}
url = self.base_url
response = requests.get(url,params=params,headers=headers)
total_html = response.content.decode('utf-8')
detail_div_htmls = detail_div_pattern.findall(total_html)
if detail_div_htmls == []:
pass
else:
for detail_div_html in detail_div_htmls:
# print(detail_div_html)
detail_link_html = detail_link_pattern.findall(detail_div_html)[0]
name = detail_link_html[1]
url = detail_link_html[0]
if name in self.info:
pass
else:
self.info[name] = url
print(self.info)
if __name__ == '__main__':
spider = BiliSpider()
spider.run()
【爬虫之哔哩哔哩女神篇】上面的失效啦,闲来无事,更新下(时间为2020年3月24号)
# -*- coding: utf-8 -*-
import re
import requests
'''
分析:
1. 爬取流程
地址:https://search.bilibili.com/all?keyword=女神篇&from_source=banner_search&page=39
方式:get
参数:
keyword: 女神篇
from_source: banner_search
page: 39
'''class BiliSpider:
def __init__(self):
self.base_url = "https://search.bilibili.com/all"
self.info = {}def run(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;
WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
'Cookie': 'LIVE_BUVID=AUTO3815276021025886;
finger=edc6ecda;
buvid3=B02A6519-CFCF-4F15-B094-A0C4FF19C91F31057infoc',
'Upgrade-Insecure-Requests': '1',
}
detail_div_pattern = re.compile(r'(.*?)', re.S)detail_link_pattern = re.compile(r'
效果展示
文章图片
推荐阅读
- 推荐系统论文进阶|CTR预估 论文精读(十一)--Deep Interest Evolution Network(DIEN)
- Python专栏|数据分析的常规流程
- Python|Win10下 Python开发环境搭建(PyCharm + Anaconda) && 环境变量配置 && 常用工具安装配置
- Python绘制小红花
- Pytorch学习|sklearn-SVM 模型保存、交叉验证与网格搜索
- OpenCV|OpenCV-Python实战(18)——深度学习简介与入门示例
- python|8. 文件系统——文件的删除、移动、复制过程以及链接文件
- 爬虫|若想拿下爬虫大单,怎能不会逆向爬虫,价值过万的逆向爬虫教程限时分享
- 分布式|《Python3网络爬虫开发实战(第二版)》内容介绍
- java|微软认真聆听了开源 .NET 开发社区的炮轰( 通过CLI 支持 Hot Reload 功能)