- 首页 > it技术 > >
用python爬取一部小说的前面51章节
#/usr/bin/python
#coding=utf-8import requests
from bs4 import BeautifulSoupdef handleUrl(n):
#网址是错的
baseUrl = "http://www.baidu.com/books/74/74873/";
baseNum = 13099890;
url = baseUrl + str(n+baseNum)+ '.html';
print("url--%s"%(url));
return url;
def handleHtml(n):
print(n);
url = handleUrl(n);
response = requests.get(url)
response.encoding = response.apparent_encoding
print(response);
if response.status_code == 200:
# print("获取网页成功:%s",response.text);
return response.text;
else:
print("获取网页失败");
return None;
def parserText(n):result = handleHtml(n);
if len(result) >0:
soup = BeautifulSoup(result,'html.parser')
soup = soup.find(id = "contents");
# print(soup.find(id ='contents'));
text = soup.get_text();
# print(text);
return text;
else:
return None;
if __name__ == "__main__":for x in range(0,51):
text = parserText(x);
fileName = "/Users/hbddz/Desktop/小说.text";
with open(fileName,'a') as f:
f.write(text);
推荐阅读