selenium模拟登陆知乎

**
selenium模拟登陆知乎及验证码识别 **
import scrapy
import time
from urllib import parse
from mouse import move,click
class ZhihuSpider(scrapy.Spider):
name = ‘zhihu’
allowed_domains = [‘www.zhihu.com’]
start_urls = [‘https://www.zhihu.com/’]

def start_requests(self): from selenium importwebdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys chrome_option = Options() chrome_option.add_argument("--disable-extensions") chrome_option.add_experimental_option("debuggerAddress","127.0.0.1:9222")browser = webdriver.Chrome(executable_path='D:/Evns/article-spider/Scripts/chromedriver.exe',chrome_options=chrome_option) try: browser.maximize_window() except: passbrowser.get("https://www.zhihu.com/signin")browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys(Keys.CONTROL +'a') browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys('xxx') browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + 'a') browser.find_element_by_css_selector(".SignFlow-password input").send_keys('xxx') browser.find_element_by_css_selector(".Button.SignFlow-submitButton").click() time.sleep(10) login_success = False if login_success: Cookies = browser.get_cookies() #print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open('D:/py/ArticleSpider/cookies/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict)] while not login_success: try: notify_ele = browser.find_element_by_class_name("Popover PushNotifications AppHeader-notifications") login_success = TrueCookies = browser.get_cookies() #print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open('d:/ArticleSpider/cookies/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict)] except: passtry: english_captcha_element = browser.find_element_by_class_name("Captcha-englishImg") except: english_captcha_element =None try: chinese_captcha_element = browser.find_element_by_class_name("Captcha-chineseImg") except: chinese_captcha_element = Noneif chinese_captcha_element: ele_postion = chinese_captcha_element.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] browser_navigation_panel_height = 70 base64_text =chinese_captcha_element.get_attribute("src") import base64 code = base64_text.replace("data:image/jpg; base64,","").replace("%0A","") fh = open("yzm_cn.jpeg","wb") fh.write(base64.b64decode(code)) fh.close()from zheye import zheye z= zheye() positions = z.Recognize('yzm_cn.jpeg') last_position = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append([positions[1][1], positions[1][0]]) last_position.append([positions[0][1], positions[0][0]]) else: last_position.append([positions[0][1], positions[0][0]]) last_position.append([positions[1][1], positions[1][0]]) first_position = [int(last_position[0][0] / 2), int(last_position[0][1] / 2)] second_position = [int(last_position[1][0] / 2), int(last_position[1][1] / 2)] move(x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]+30) click()move(x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]+30) click()else: last_position.append([positions[0][1], positions[0][0]]) first_position = [int(last_position[0][0] / 2), int(last_position[0][1] / 2)] move(x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]+30) click()browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + 'a') browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( 'xxx') browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + 'a') browser.find_element_by_css_selector(".SignFlow-password input").send_keys('xxx') browser.find_element_by_css_selector(".Button.SignFlow-submitButton").click() move(672,564) click()if english_captcha_element:base64_text = english_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg; base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close()from tools.yundama_requests import YDMHttp yundama = YDMHttp("xxx", "xxx", 7333, "1bb3a77475497442e99443717e204aa7") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break time.sleep(2) browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input').send_keys( Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input').send_keys( code)browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( "xxx") browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector(".SignFlow-password input").send_keys("xxx") move(668, 543) click() time.sleep(60)

普通验证码 :使用云打码平台识别,大家可以在上面插入自己的账号密码,根据云打码平台提供的用例来完成对验证码的识别。
倒立文字验证码,使用GitHub上的一个大神Zheye源码来识别倒立文字,然后点击
上面的代码只是scrapy框架中的spider.py文件,因为模拟登陆只是用了这一部分,我就把此部分代码贴了出来
【selenium模拟登陆知乎】zheye源地址:点这里

    推荐阅读