想想还有什么小东西需要写一写

【想想还有什么小东西需要写一写】有什么奇怪的需求可以在下面留言啊,如果正好我也感兴趣这个奇怪的需求,那就,嘿嘿嘿……
#encoding=utf-8 import urllib import urllib.request import http.cookiejar import re from collections import deque import os import gzip import threading from time import sleep ''' page_num=0; #页数 类的static myQueue=deque(); #展示页队列 类的static 先在外面定义一个登陆类,用于登陆创建cookie,就不用每一次创建线程都走init函数登陆和创建cookie,这个类返回一个opener 一个单独的线程类用于下载,改变登陆类传出的opner的header参数执行每次下载请求 '''class Login: def __init__(self): self.__url_login='http://hkbbcc.com/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=1'; #用来登录 self.__header_post={ 'Accept':'text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,*/*; q=0.8', 'Accept-Encoding':'gzip, deflate', 'Accept-Language':'zh-CN,zh; q=0.8,en; q=0.6', 'Cache-Control':'max-age=0', 'Content-Length':'96', 'Content-Type':'application/x-www-form-urlencoded', 'Host':'hkbbcc.com', 'Origin':'http://hkbbcc.com', 'Proxy-Connection':'keep-alive', 'Referer':'http://hkbbcc.com/forum.php', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36' }; def __createPostData(self): post_data=https://www.it610.com/article/{'fastloginfield':'username', 'username':'wyude', 'password':'3692580000000000', 'quickforward':'yes', 'handlekey':'ls' }; poster=urllib.parse.urlencode(post_data).encode(); return poster; def __createOpener(self): cookieJ=http.cookiejar.CookieJar(); #添加cookie HCPro=urllib.request.HTTPCookieProcessor(cookieJ); self.__opener=urllib.request.build_opener(HCPro); headerTmp=[]; for key,value in self.__header_post.items(): elem=(key,value); headerTmp.append(elem); self.__opener.addheaders=headerTmp; def go(self): self.__createOpener(); __back=self.__opener.open(self.__url_login,self.__createPostData()); print(__back.read().decode()); #这里应该有一个getHeader,验证是否ok才能继续 return self.__opener; class browserTest(threading.Thread): myLock=threading.RLock(); myQueue=deque(); def __init__(self,opener,q,someone=False,name=None):#q待下载页码队列#someone=true指定要下载的作者False就不指定,按顺序下载 threading.Thread.__init__(self); self.__queue=q; self.__opener=opener; self.__url_host='http://hkbbcc.com/forum.php'; #验证登录后的页面什么样 self.__header_after={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36' }; #贴图发帖时间排序第一页 self.__urlStart='http://hkbbcc.com/forum.php?mod=forumdisplay&fid=18&orderby=dateline&orderby=dateline&filter=author&page=1'; self.__name=name; self.__someone=someone; self.__pageLike=re.compile(''); self.__authLike=re.compile('.+?(.+?)<'); self.__picLike=re.compile('想想还有什么小东西需要写一写"); pageUrl=self.__urlStart[:-1]+str(pageNum); self.__class__.myLock.acquire(); self.createOpener(self.__header_after); #打开展示页 try: pageData=https://www.it610.com/article/self.__opener.open(pageUrl).read().decode(); #打开展示页 self.__class__.myLock.release(); b=pageData.replace('amp; ',''); #删掉这个特殊字符 getUrl=self.__pageLike.findall(b); #取出本页所有帖子url getAuth=self.__authLike.findall(b); #取出本页所有帖子对应作者 #print(getAuth); j=0; for mypage,dirname in getUrl: #print(getAuth[j]); if(self.__someone and getAuth[j] not in self.__name): #print(getAuth[j] ,"not in ",self.__name); j +=1; continue; else: j +=1; mypage='http://hkbbcc.com/'+mypage; print('+++++'+str(pageNum)+" 页第"+str(j)+"帖当前页网址--->"+mypage); print('+++++'+"当前帖子主题--->"+dirname); try: folder='e:/loadbs/'+str(pageNum)+'/'+dirname; if(not os.path.exists(folder)): os.makedirs(folder); try:self.__class__.myLock.acquire(); self.createOpener(self.__header_bs); #这个干嘛的? cur_page=self.__opener.open(mypage).read(); self.__class__.myLock.release(); try: print("正在解压当前网页..............."); unzip_page=gzip.decompress(cur_page); cur_page=unzip_page.decode(); print("当前网页解压完毕..............."); except: print("网页解压失败"); continue; pic=self.__picLike.findall(cur_page); for picUrl in pic: try: picName=picUrl.split('/')[-1]; print("正在保存---》"+picName); f=open(folder+'/'+picName,'wb'); #print(folder+'/'+picName); #print(picUrl); self.__class__.myLock.acquire(); self.createOpener(self.__header_pic); #图片下载请求 picRes=self.__opener.open(picUrl); self.__class__.myLock.release(); picR=picRes.read(); #picRes=urllib.request.urlopen(picUrl); f.write(picR); f.close(); except: print('保存图片失败'); continue; except: print('打开链接失败'+str(j)); continue; except: print("创建文件夹失败!"+dirname); continue; except: print("打开展示页失败"+str(pageNum)); if __name__=='__main__': #实现登陆返回带cookie的opener,之后替换header后用这个opener去请求下载 appLog=Login(); app_opener=appLog.go(); #要下载的页码 myQueue=deque(); for i in range(1,100):#不包括100 myQueue.append(i); #执行多线程下载 name=['魏晴','魔幻王']; threads=[]; #如果不行就不要传myQueue而是设为全局,然后browserTest.myQueue=myQueue; for j in range(3): app=browserTest(app_opener,myQueue,True,name); app.setDaemon(True); app.start(); threads.append(app); for t in threads: t.join(); print('Done !');

    推荐阅读