毕业设计之 - 题目(基于大数据的电影数据分析可视化系统)

非完整代码,毕业设计找丹成学长,q746876041 import csv
import pymysql
import requests
import re
from lxml import html
import time
请求头 headers = {

"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"

}
读取电影url urls = []
with open('./豆瓣电影TOP250链接.csv', 'r') as f:
reader = csv.reader(f) urls = [row[0] for row in reader]

【毕业设计之 - 题目(基于大数据的电影数据分析可视化系统)】for i in range(0, len(urls)):
url = urls[i] # 请求页面 r = requests.get(url=url, headers=headers, timeout=5) time.sleep(2) etree = html.etree selector = etree.HTML(r.text) # 获取电影名称 filmname = [] try: filmname = selector.xpath('//*[@id="content"]/h1/span[1]/text()')[0]# 电影名 if filmname == "": filmname = None except Exception as e: filmname = None print("filmname :{}".format(filmname)) # 获取电影评分 score = [] try: score_list = selector.xpath('//*[@id="interest_sectl"]/div[1]/div[2]/strong/text()') score = score_list[0].replace("\t", "").replace("\n", "") if score == "": score = None except Exception as e: score = None print("score :{}".format(score)) # 获取电影上映时间 showtime = [] try: st = selector.xpath('//*[@id="content"]/h1/span[2]/text()')[0]# 上映日期 showtime = st.replace("(", "").replace(")", "") if showtime == "": showtime = None except Exception as e: showtime = None print("time :{}".format(showtime)) # 获取电影片长 mins = [] try: mins_list = re.findall('片长:.*?>(.*?)', r.text, re.S)# 片长 mins = mins_list[0].replace(' ', '').replace('分钟', '') if mins == "": mins = None except Exception as e: mins = None print("mins :{}".format(mins)) # 获取电影类型 genres_list = [] try: genres_list = re.findall('(.*?)', r.text, re.S) genres_list = '/'.join(genres_list) if genres_list == "": genres_list = None except Exception as e: genres_list = None print("genres_list :{}".format(genres_list)) # 获取电影制片地区 area_list = [] try: area_list = re.findall('制片国家/地区: (.*?)
', r.text, re.S) area_list = '/'.join(area_list).replace(' ', '') if area_list == "": area_list = None except Exception as e: area_list = None print("area_list :{}".format(area_list)) # 获取电影导演 directors_list = [] try: d_list = selector.xpath('//div[@id="info"]/span[1]/span[2]/a/text()')# 导演 if len(d_list) > 2: for i in range(0, 3): directors_list.append(d_list[i]) else: for j in range(0, len(d_list)): directors_list.append(d_list[j]) directors_list = '/'.join(directors_list) if directors_list == "": directors_list = None except Exception as e: directors_list = None print("directors_list :{}".format(directors_list)) # 获取电影编剧 scriptwriters_list = [] try: w_list = selector.xpath('//*[@id="info"]/span[2]/span[2]/a/text()')# 编剧 if len(w_list) > 2: for i in range(0, 3): scriptwriters_list.append(w_list[i]) else: for j in range(0, len(w_list)): scriptwriters_list.append(w_list[j]) scriptwriters_list = '/'.join(scriptwriters_list) if scriptwriters_list == "": scriptwriters_list = None except Exception as e: scriptwriters_list = None print('scriptwriters_list :{}'.format(scriptwriters_list)) # 获取电影主演 actors_list = [] try: actors = selector.xpath('//*[@id="info"]/span[3]/span[2]')[0]# 演员 a_list = [Skrill下载](https://www.gendan5.com/wallet/Skrill.html)actors.xpath('string(.)').replace(' ', '').split('/')# 标签套标签,用string(.)同时获取所有文本 if len(a_list) > 2: for i in range(0, 3): actors_list.append(a_list[i]) else: for j in range(0, a_list): actors_list.append(a_list[j]) actors_list = '/'.join(actors_list) if actors_list == "": actors_list = None except Exception as e: actors_list = None print('actors_list :{}'.format(actors_list)) # 获取电影评价 comment = [] try: comment = selector.xpath('//*[@id="interest_sectl"]/div[1]/div[2]/div/div[2]/a/span/text()')[0] if comment == "": comment = None except Exception as e: comment = None print("comment :{}".format(comment)) try: # 打开数据库连接 conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', db='douban', charset='utf8') # 使用cursor方法创建一个游标 cursor = conn.cursor() # # 执行sql语句 query = 'insert into tb_film(url, filmname, score, showtime, genres, areas, mins, directors, scriptwriters, actors, comments) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' values = ( url, filmname, score, showtime, genres_list, area_list, mins, directors_list, scriptwriters_list, actors_list, comment) cursor.execute(query, values) # 提交之前的操作,如果之前已经执行多次的execute,那么就都进行提交 conn.commit() except Exception as e: print(e) # 回滚 conn.rollback() # 关闭cursor对象 cursor.close() # 关闭数据库连接 conn.close()

非完整代码,毕业设计找丹成学长,q746876041

    推荐阅读