python.requests实战58写字楼

1.先看效果

python.requests实战58写字楼
文章图片
image.png 2.思路
反爬虫,武装user-agent
【python.requests实战58写字楼】3.上源代码

import re import requests from bs4 import BeautifulSoup class Guiyang(object):def __init__(self): self.page = range(1,10) self.url = 'http://gy.58.com/zhaozu/?PGTID=0d00000d-0000-0ee8-d8e7-f5dce12e009e&ClickID={}'.format(self.page) self.headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' ,'Host':'gy.58.com' ,'Upgrade-Insecure-Requests':'1' }self.link_url = self.get_data()def get_data(self):data = https://www.it610.com/article/{'PGTID':'0d00000d-0000-0ee8-d8e7-f5dce12e009e' ,'ClickID':'2' }r = requests.get(url=self.url,headers=self.headers,data=https://www.it610.com/article/data).texts = BeautifulSoup(r,'lxml').find('a',class_='on').get_text() #print(s) soup = BeautifulSoup(r,'lxml').find('ul',class_='house-list-wrap').find_all('li')for items in soup: link_url = items.find('a')['href']#每个url的链接 #get_link = requests.get(item_link_url,headers=headers).text name = items.find('span',attrs={'class':'title_des'}).get_text() location =items.find('p',class_='baseinfo').get_text().replace('\n','') #pricea = items.find('p',class_='sum').get_text().replace('\n','')+str('>每平米')+'\n\n' try: pricetoday = items.find('p',class_='unit').get_text().replace(' ','').replace('\n','').replace('\r','') print('{},{},{}'.format(pricetoday,location,name))except: passc = Guiyang() c.get_data()

    推荐阅读