爬虫|python爬虫下载梨视频

import requests import re import os from urllib.request import urlretrieve def download_vedio(): '''下载梨视频'''#获取页面源代码 html = requests.get('https://www.pearvideo.com/category_8').text # print(html)# 获取视频ID匹配所有 reg= r'' vedio_id = re.findall(reg,html) # print(vedio_id)#拼接URL地址 vedio_url = [] starturl = 'https://www.pearvideo.com/' for id in vedio_id: newurl = starturl+id vedio_url.append(newurl)# 获取完整url地址 for playurl in vedio_url: #视频播放页的页面源代码 html = requests.get(playurl).text # re匹配视频播放地址 reg = r'srcUrl="(.*?)",vdoUrl=srcUrl' purl = re.findall(reg,html) # print(purl) reg = r'(.*?)' vedio_name = re.findall(reg,html) # print(vedio_name) print('正在下载视频:%s'%vedio_name[0]) path = 'vedio' if path not in os.listdir(): os.mkdir(path)filepath = path + '/%s.mp4'%vedio_name[0] urlretrieve(purl[0],filepath)download_vedio()

    推荐阅读