Python爬漫画(GUI)

技术无罪,仅供学习

最近闲来无事看漫画,上网找了一堆漫画网站看漫画,懒得一个个找,就写了这个脚本(当然GUI版本花我4天时间。。),当然我自己也是在看正版漫画的bilibili和腾讯动漫的。
代码十分辣鸡,各大佬轻喷。
爬百年漫画。
github。
无GUI版本
#getComic.py import requests,re,time,os,threading,sys from bs4 import BeautifulSoup header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36 Edg/79.0.309.63'} class bnComic: def __init__(self): self.comicDict={} self.url='https://m.bnmanhua.com' self.kw=input('请输入要下载的漫画:') self.comicUrl=self.searchComic(self.kw) self.pageList=self.returnPage(self.comicUrl) def searchComic(self,kw): r=requests.post(self.url+'/index.php?m=vod-search',{'wd':self.kw}) comic=re.search('',r.text) r.close() self.filename=comic.group(2) if not comic: raise Exception('没有'+self.kw+'漫画,请检查漫画名称') return self.url+comic.group(1) def returnPage(self,u): r=requests.get(u) soup=BeautifulSoup(r.text,'html.parser') r.close() if soup.body.font: raise Exception(self.kw+'漫画受版权限制,禁止下载') return [self.url+x.attrs['href'] for x in soup.ul.find_all('a')] def picDict(self,u): r=requests.get(u,headers=header) soup=BeautifulSoup(r.text,'html.parser') b=re.findall(r'\[(.*)\]',soup.body.script.string.split('; ')[1]) if 'https:' in re.sub('\"','',eval(re.sub(r'\\','',repr(b[0])))).split(','): self.comicDict[soup.title.text.split('-')[1]]=re.sub('\"','',eval(re.sub(r'\\','',repr(b[0])))).split(',') else: self.comicDict[soup.title.text.split('-')[1]]=['https://img.yaoyaoliao.com/'+x for x in re.sub('\"','',eval(re.sub(r'\\','',repr(b[0])))).split(',')] r.close() def saveComic(self,part): c=0 os.mkdir('.\\'+self.kw+'\\'+part[0]) for pic in part[1]: c+=1 r=requests.get(pic) if pic[-1]=='0': f=open('.\\'+self.kw+'\\'+part[0]+'\\'+str(c)+'.png','wb') f.write(r.content) r.close() f.close() else: f=open('.\\'+self.kw+'\\'+part[0]+'\\'+str(c)+'.jpg','wb') f.write(r.content) r.close() f.close() return def run(self): try: for page in self.pageList: t=threading.Thread(target=self.picDict,args=(page,)) t.setDaemon(1) t.start() print('正在下载') os.mkdir(self.filename) for part in self.comicDict.items(): t=threading.Thread(target=self.saveComic,args=(part,)) t.setDaemon(1) t.start() print('下载完成') except Exception as e: print(e) manhua=bnComic() manhua.run()

有GUI版本
import requests import re import time import os import threading import sys import tkinter from tkinter import ttk, messagebox, filedialog from bs4 import BeautifulSoupurl = 'https://m.bnmanhua.com'class bnComic(tkinter.Tk): def __init__(self): super().__init__() self.comicDict = {} self.pageList = [] self.urlData = https://www.it610.com/article/{} self.title('漫画下载') self.geometry('960x525') self.resizable(0, 0) self.createUI()def createUI(self): self.topFrame = tkinter.Frame(self) self.topFrame.pack(fill='x', side='top') self.middleFrame = tkinter.Frame(self) self.middleFrame.pack(fill='x') self.scrollbar = tkinter.Scrollbar(self.middleFrame) self.bottomFrame = tkinter.Frame(self) self.bottomFrame.pack(fill='x', side='bottom') self.pageBox = tkinter.Listbox( self.middleFrame, selectmode=tkinter.EXTENDED, height=26, width=65) self.pageBox.pack(side=tkinter.LEFT) self.scrollbar.pack(side='left', fill='y') self.pageBox['yscrollcommand'] = self.scrollbar.set self.scrollbar['command'] = self.pageBox.yview self.kwEntry = tkinter.Entry(self.topFrame) self.kwEntry.grid(row=0, column=1) self.l1 = tkinter.Label( self.topFrame, text='关键字:').grid(row=0, column=0) self.l2 = tkinter.Label( self.topFrame, text='搜索结果:').grid(row=0, column=3) self.searchButton = tkinter.Button(self.topFrame, text='搜索', command=lambda: self.runthread( self.searchComic, self.kwEntry.get())).grid(row=0, column=2) self.downButton = tkinter.Button(self.topFrame, text='下载所有', command=lambda: self.runthread( self.saveComic, self.comicDict)).grid(row=0, column=5) self.comicList = ttk.Combobox(self.topFrame, height=8, width=40) self.comicList.grid(row=0, column=4) self.comicList.bind("<>", lambda x: self.runthread( self.returnPage, self.urlData[self.comicList.get()])) self.progress = ttk.Progressbar( self.bottomFrame, length=100, maximum=100.0) self.progress.pack(side=tkinter.RIGHT) self.l3 = tkinter.Label(self.bottomFrame) self.l3.pack(side=tkinter.RIGHT)def searchComic(self, kw): self.l3['text'] = '正在搜索关键字' self.progress['value'] = 0.0 self.comicList.set('') self.comicList['values'] = '' self.urlData = https://www.it610.com/article/{} r = requests.post(url+'/index.php?m=vod-search-pg-1-wd-'+kw+'.html') soup = BeautifulSoup(r.text, 'html.parser') if not soup.find_all(name='li', attrs={'class': 'vbox'}): self.progress['value'] = 0.0 self.l3['text'] = '' raise messagebox.showerror('ERROR', '无此漫画,请检查漫画名称') v = int(soup.body.find_all(name='em', attrs={ 'class': 'num'})[0].string.split('/')[1])+1 for p in range(1, v): r = requests.post( url+'/index.php?m=vod-search-pg-'+str(p)+'-wd-'+kw+'.html') soup = BeautifulSoup(r.text, 'html.parser') pageNumList = soup.find_all(name='li', attrs={'class': 'vbox'}) for x in pageNumList: self.progress['value'] += 100/(len(pageNumList)*v) self.urlData[x.a['title']] = url+x.a['href'] self.comicList['values'] = [x[0] for x in dict( sorted(self.urlData.items(), key=lambda d:d[1], reverse=True)).items()] time.sleep(0.1) self.l3['text'] = '' self.progress['value'] = 0.0def returnPage(self, u): self.comicDict = {} self.progress['value'] = 0.0 self.l3['text'] = '正在返回章节列表' self.pageBox.delete(0, tkinter.END) r = requests.get(u) soup = BeautifulSoup(r.text, 'html.parser') r.close() if soup.body.font: self.progress['value'] = 0.0 self.l3['text'] = '' raise messagebox.showerror('ERROR', '漫画受版权限制,禁止下载') self.pageList = [url+x.attrs['href']+'-' + x.string for x in soup.ul.find_all('a')] for l in self.pageList: self.pageBox.insert(tkinter.END, l) t = threading.Thread(target=self.picDict, args=(l,)) self.progress['value'] += 100/len(self.pageList) t.setDaemon(1) t.start() self.progress['value'] = 0.0 self.l3['text'] = ''def picDict(self, u): r = requests.get(u.split('-')[0]) soup = BeautifulSoup(r.text, 'html.parser') b = re.findall(r'\[(.*)\]', soup.body.script.string.split('; ')[1]) if 'https:' in re.sub('\"', '', eval(re.sub(r'\\', '', repr(b[0])))): self.comicDict[soup.title.text.split('-')[1]] = ['https://m.bnmanhua.com/pic-mkzhan/'+x for x in re.sub( 'https://', '', re.sub('\"', '', eval(re.sub(r'\\', '', repr(b[0]))))).split(',')] else: self.comicDict[soup.title.text.split('-')[1]] = ['https://img.yaoyaoliao.com/'+x for x in re.sub( '\"', '', eval(re.sub(r'\\', '', repr(b[0])))).split(',')] r.close()def saveComic(self, part): global total c = 0 os.mkdir(self.path+'/'+part[0]) for pic in part[1]: total += 100/(len(part[1])*self.ff) c += 1 r = requests.get(pic) if pic[-1] == '0': f = open(self.path+'/'+part[0]+'/'+str(c)+'.jpg', 'wb') f.write(r.content) r.close() f.close() else: f = open(self.path+'/'+part[0]+'/'+str(c)+'.jpg', 'wb') f.write(r.content) r.close() f.close() self.progress['value'] = total print(self.progress['value'], '\t', total)def runthread(self, func, args): global total try: if type(args) == type({}): self.path = filedialog.askdirectory() self.l3['text'] = '正在下载' if self.path == '': self.l3['text'] = '' raise messagebox.showerror('ERROR', '下载文件路径不能为空') self.ff = len(args) total = 0 for arg in args.items(): t = threading.Thread(target=func, args=(arg,)) t.start() t.join() elif type(args) == type([]): self.ff = len(args) for arg in args: t = threading.Thread(target=func, args=(arg,)) t.start() else: t = threading.Thread(target=func, args=(args,)) t.start() except Exception as e: print(repr(e))manhua = bnComic() manhua.mainloop()

【Python爬漫画(GUI)】最后挺一下同行,微信公众号:
Python爬漫画(GUI)
文章图片

    推荐阅读