python3爬取教务系统的个人学期课程表(无头谷歌浏览模拟登录)

前言 今天带来的是与上次爬取教务系统获取成绩单的姐妹版——爬取教务个人的学期课程表。
工具 使用pycharm编辑器,安装selenium库,beautifulsoup库,csv库,当然需要下载对应的chromedriver 版本对应下载链接点击下载
接入需要的接口

from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.select import Select from bs4 import BeautifulSoup import time import csv

设置参数
#模拟登陆终端文件路径 driver_path = r'E:\py\chromedriver\chromedriver.exe'#生成csv文件路径 csv_file_path = r'E://py//个人学期课程表.csv'#登录教务系统的账号与密码 UserId = '账号' PassWord = '密码' #实现后台登陆浏览器 chrome_options = Options() chrome_options.add_argument('--disable-gpu') #关闭GPU对网页的加速,防止网页过多卡住 chrome_options.add_argument('--headless')#进行无头后台运行浏览器 #更改代理ip防反爬虫 #chrome_options.add_argument(f"--proxy-server=121.52.208.200:808")driver = webdriver.Chrome(executable_path = driver_path, chrome_options=chrome_options)

模拟登录 模拟登录,然后用driver的find_element_by_× 寻找对应输入框的位置输入账号密码,点击登录,页面转换一系列操作,driver.implicitly_wait(1)是防止页面加载过慢,导致直接爬取下一步出错,具体可自行百度
driver.get('http://jwgln.zsc.edu.cn/jsxsd/') driver.implicitly_wait(1)#输入登录账号 try: driver.find_element_by_id("userAccount").send_keys(UserId) print('输入账号成功!') except: print('输入账号失败!') # 输入登录密码 try: driver.find_element_by_id("userPassword").send_keys(PassWord) print('输入密码成功!') except: print('输入密码失败!') # 点击登录 try: driver.find_element_by_xpath('//*[@id="btnSubmit"]').click()# 用click模拟浏览器点击 print('正在登录...') except: print('登录失败!') driver.implicitly_wait(1) if '用户名或密码错误' in driver.page_source: print('登录失败,用户名或密码错误,请查证账号密码是否准确。') exit(0) else: print('登录成功!') # 点击学业情况 try: driver.find_element_by_xpath('//*[@class="block4"]').click() print('点击培养方案成功!') except: print('点击培养方案失败!') driver.implicitly_wait(1)#点击课程成绩查询 try: driver.find_element_by_xpath('//*[@href="https://www.it610.com/jsxsd/xskb/xskb_list.do"]').click() time.sleep(1) print('我的学期课表点击成功!') except: print('我的学期课表点击失败!')

接着是难点 点击学期课表 默认是没有课表的日期 会出现alert弹窗 需要模拟点击
python3爬取教务系统的个人学期课程表(无头谷歌浏览模拟登录)
文章图片

#页面弹窗点击确认 try: alt = driver.switch_to_alert() alt.accept() except: pass

接着是下拉框选择日期
python3爬取教务系统的个人学期课程表(无头谷歌浏览模拟登录)
文章图片

检查页面元素
python3爬取教务系统的个人学期课程表(无头谷歌浏览模拟登录)
文章图片

使用Select选择到自己所需的日期,点击,代码如下
#选择准确学期 #driver.find_element_by_xpath(".//*[@id='xnxq01id']/option[4]").click() # 若此时点击后,下拉选项未收回,可点击整个下拉框,收回下拉选项 #driver.find_element_by_xpath(".//*[@id='nr']").click() try: Select(driver.find_element_by_id("xnxq01id")).select_by_value("2019-2020-2").click() except: pass

数据整理 【python3爬取教务系统的个人学期课程表(无头谷歌浏览模拟登录)】使用的是beautifulsoup来筛选所需要的数据
soup = BeautifulSoup(driver.page_source,'lxml') page = soup.find_all('div',attrs={'class': "kbcontent"}) teachers1,teachers2 = [],[] weeks1,weeks2= [],[] classrooms1,classrooms2= [],[] for i in page: teachers1.append(i.find('font',attrs={'title':'老师'})) weeks1.append(i.find('font',attrs={'title':'周次(节次)'})) classrooms1.append(i.find('font', attrs={'title': '教室'})) my_detail = list(page) for i in teachers1: if i == None: teachers2.append('\n') else: teachers2.append(i.string) for i in weeks1: if i == None: weeks2.append('\n') else: weeks2.append('\n'+i.string) for i in classrooms1: if i == None: classrooms2.append('\n') else: classrooms2.append('\n'+i.string) all_data = https://www.it610.com/article/[] pitch_number = ['(上午)\n第1,2节\n(08:00-08:45)\n(08:55-09:40)','第3,4节\n(10:00-10:45)\n(10:55-11:40)', '(下午)\n第5,6节\n(14:30-15:15)\n(15:25-16:10)','第7,8节\n(16:20-16:05)\n(17:15-18:00)', '(晚上)\n第9,10节\n(19:30-20:15)\n(20:25-21:10)','第11,12节','第13,14节']temp = [] temp.append(pitch_number[0]) num = 0 pnum = 0 for i in range(len(my_detail)): if my_detail[i].text == '\xa0': temp.append('\n\n\n') else: temp.append(my_detail[i].text.split(teachers2[i])[0]+'\n'+teachers2[i]+weeks2[i]+classrooms2[i]) num = num + 1 if num == 7: all_data.append(temp) temp = [] pnum = pnum + 1 temp.append(pitch_number[pnum]) num = 0 page2 = soup.find('td',attrs={'colspan':"7"}) BZ = ['备注:'+page2.text,'\n','\n','\n','\n','\n','\n','\n'] all_data.append(BZ)

生成文件
f = open(csv_file_path, 'w', newline='') csv_write = csv.writer(f) csv_write.writerow(['课程时间','星期一','星期二','星期三','星期四','星期五','星期六','星期日']) #设置列表的头部,就是各个列的列名 #再按列的长度每次输入一个列表进去生成一行 for i in range(len(all_data)): csv_write.writerow(all_data[i]) f.close() print('生成csv文件成功')

全部代码
from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.select import Select from bs4 import BeautifulSoup import time import csv ''' @author Himit_ZH Date:2020.01.20 ''' #模拟登陆终端文件路径 driver_path = r'E:\py\chromedriver\chromedriver.exe'#生成csv文件路径 csv_file_path = r'E://py//个人学期课程表.csv'#登录教务系统的账号与密码 UserId = '账号' PassWord = '密码' #实现后台登陆浏览器 chrome_options = Options() chrome_options.add_argument('--disable-gpu') #关闭GPU对网页的加速,防止网页过多卡住 chrome_options.add_argument('--headless')#进行无头后台运行浏览器 #更改代理ip防反爬虫 #chrome_options.add_argument(f"--proxy-server=121.52.208.200:808")driver = webdriver.Chrome(executable_path = driver_path, chrome_options=chrome_options)driver.get('http://jwgln.zsc.edu.cn/jsxsd/') driver.implicitly_wait(1)#输入登录账号 try: driver.find_element_by_id("userAccount").send_keys(UserId) print('输入账号成功!') except: print('输入账号失败!') # 输入登录密码 try: driver.find_element_by_id("userPassword").send_keys(PassWord) print('输入密码成功!') except: print('输入密码失败!') # 点击登录 try: driver.find_element_by_xpath('//*[@id="btnSubmit"]').click()# 用click模拟浏览器点击 print('正在登录...') except: print('登录失败!') driver.implicitly_wait(1) if '用户名或密码错误' in driver.page_source: print('登录失败,用户名或密码错误,请查证账号密码是否准确。') exit(0) else: print('登录成功!') # 点击学业情况 try: driver.find_element_by_xpath('//*[@class="block4"]').click() print('点击培养方案成功!') except: print('点击培养方案失败!') driver.implicitly_wait(1)#点击课程成绩查询 try: driver.find_element_by_xpath('//*[@href="https://www.it610.com/jsxsd/xskb/xskb_list.do"]').click() time.sleep(1) print('我的学期课表点击成功!') except: print('我的学期课表点击失败!')#页面弹窗点击确认 try: alt = driver.switch_to_alert() alt.accept() except: pass #选择准确学期 #driver.find_element_by_xpath(".//*[@id='xnxq01id']/option[4]").click() # 若此时点击后,下拉选项未收回,可点击整个下拉框,收回下拉选项 #driver.find_element_by_xpath(".//*[@id='nr']").click() try: Select(driver.find_element_by_id("xnxq01id")).select_by_value("2019-2020-2").click() except: pass print('开始进行数据整理') #对获取的数据进行整理 soup = BeautifulSoup(driver.page_source,'lxml') page = soup.find_all('div',attrs={'class': "kbcontent"}) teachers1,teachers2 = [],[] weeks1,weeks2= [],[] classrooms1,classrooms2= [],[] for i in page: teachers1.append(i.find('font',attrs={'title':'老师'})) weeks1.append(i.find('font',attrs={'title':'周次(节次)'})) classrooms1.append(i.find('font', attrs={'title': '教室'})) my_detail = list(page) for i in teachers1: if i == None: teachers2.append('\n') else: teachers2.append(i.string) for i in weeks1: if i == None: weeks2.append('\n') else: weeks2.append('\n'+i.string) for i in classrooms1: if i == None: classrooms2.append('\n') else: classrooms2.append('\n'+i.string) all_data = https://www.it610.com/article/[] pitch_number = ['(上午)\n第1,2节\n(08:00-08:45)\n(08:55-09:40)','第3,4节\n(10:00-10:45)\n(10:55-11:40)', '(下午)\n第5,6节\n(14:30-15:15)\n(15:25-16:10)','第7,8节\n(16:20-16:05)\n(17:15-18:00)', '(晚上)\n第9,10节\n(19:30-20:15)\n(20:25-21:10)','第11,12节','第13,14节']temp = [] temp.append(pitch_number[0]) num = 0 pnum = 0 for i in range(len(my_detail)): if my_detail[i].text == '\xa0': temp.append('\n\n\n') else: temp.append(my_detail[i].text.split(teachers2[i])[0]+'\n'+teachers2[i]+weeks2[i]+classrooms2[i]) num = num + 1 if num == 7: all_data.append(temp) temp = [] pnum = pnum + 1 temp.append(pitch_number[pnum]) num = 0 page2 = soup.find('td',attrs={'colspan':"7"}) BZ = ['备注:'+page2.text,'\n','\n','\n','\n','\n','\n','\n'] all_data.append(BZ) f = open(csv_file_path, 'w', newline='') csv_write = csv.writer(f) csv_write.writerow(['课程时间','星期一','星期二','星期三','星期四','星期五','星期六','星期日']) for i in range(len(all_data)): csv_write.writerow(all_data[i]) f.close() print('生成csv文件成功') driver.close() driver.quit()

    推荐阅读