划动验证码的破解

2020/02/22 posted in  服务端
Tags:  #test #tool

最近需要获取某些信息,但每次页面都要滑动一次验证码实在太烦了,so研究一下破解方案。

使用的技术

  • python3
  • opencv
  • pymydb
  • selenium

效果

1582371835327952

核心思路

1) 使用selenium加载网站,定位到验证码的iframe
2) 背景版,缺口图,使用opencv灰度图像分析出缺口位置。

result = cv2.matchTemplate(target, template, cv2.TM_CCOEFF_NORMED)
x, y = np.unravel_index(result.argmax(), result.shape)

3) 控制验证码的滑块到指定位置。
4) 代理ip的使用,因为对于ip访问次数太多,验证码就算滑中了,也是不正确的。

具体代码

#!/usr/bin/env python
# encoding: utf-8

import random
import time
import os
import cv2
import ssl
import pymysql
import urllib.request
import numpy as np
from PIL import Image
from selenium.webdriver import ActionChains
from selenium.webdriver import Chrome
from selenium.webdriver import ChromeOptions
from selenium.webdriver.support.ui import WebDriverWait
class Login(object):
    """
    python + seleniuum + cv2
    """
    def __init__(self,proxyIP=''):
        # 如果是实际应用中,可在此处账号和密码
        self.option = ChromeOptions()
        # 不加ip,模拟验证码可能有问题
        if(proxyIP):
            self.option.add_argument("--proxy-server="+proxyIP);
        self.option.add_argument('--headless');
        self.option.add_experimental_option('excludeSwitches', ['enable-automation'])
        self.driver = Chrome(options=self.option)
    
    def seturl(self,url):
        self.url = url;
    
    @staticmethod
    def get_postion(otemp, oblk):
        """
        判断缺口位置
        :param chunk: 缺口图片是原图
        :param canves:
        :return: 位置 x, y
        """
        target = cv2.imread(otemp, 0)
        template = cv2.imread(oblk, 0)
        # w, h = target.shape[::-1]
        temp = 'temp.jpg'
        targ = 'targ.jpg'
        #imwrite
        cv2.imwrite(temp, template)
        cv2.imwrite(targ, target)
        
        target = cv2.imread(targ)
        target = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)
        target = abs(255 - target)
        
        cv2.imwrite(targ, target)
        target = cv2.imread(targ)
        template = cv2.imread(temp)
        result = cv2.matchTemplate(target, template, cv2.TM_CCOEFF_NORMED)
        x, y = np.unravel_index(result.argmax(), result.shape)
        return x, y

    @staticmethod
    def get_track(distance):
        tracks = []
        distance = int(distance);
        tracks.append(distance)
        return tracks;

    @staticmethod
    def urllib_download(imgurl, imgsavepath):
        from urllib.request import urlretrieve
        urlretrieve(imgurl, imgsavepath)
        
    def quit2(self):
        self.driver.execute_script("window.close();");
        self.driver.switch_to.window(self.driver.window_handles[-1])
        
    def quit(self):
        self.driver.quit();
        
    def login_main(self, driver):
        driver.switch_to.default_content()
        #滑块iframe
        driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
        # 大图
        bk_block = driver.find_element_by_xpath('//img[@id="slideBkg"]')  
        web_image_width = bk_block.size
        web_image_width = web_image_width['width']
        bk_block_x = bk_block.location['x']
     # 小滑块
        slide_block = driver.find_element_by_xpath('//img[@id="slideBlock"]')  
        slide_block_x = slide_block.location['x']
        bk_block = driver.find_element_by_xpath('//img[@id="slideBkg"]').get_attribute('src') # 大图 url
#       print(bk_block);
        slide_block = driver.find_element_by_xpath('//img[@id="slideBlock"]').get_attribute('src') # 小滑块 图片url
#       print(slide_block);
        slid_ing = driver.find_element_by_xpath('//div[@id="slide_bar_head"]')  # 滑块

        #下载图片分析
        os.makedirs('./image/', exist_ok=True)
        self.urllib_download(bk_block, './image/bkBlock.png')
        self.urllib_download(slide_block, './image/slideBlock.png')
        time.sleep(0.5)
        img_bkblock = Image.open('./image/bkBlock.png')
        bkblock_real_width = img_bkblock.size[0]
        # 滑块宽度/大图背景宽度
        width_scale = float(bkblock_real_width) / float(web_image_width)
        #opencv分析,获取滑块与嵌块的位置
        position = self.get_postion('./image/bkBlock.png', './image/slideBlock.png')
        real_position = position[1] / width_scale
        real_position = real_position - (slide_block_x - bk_block_x)
        #滑动的轨迹
        track_list = self.get_track(real_position)
        # 点击鼠标左键,按住不放
        ActionChains(driver).click_and_hold(on_element=slid_ing).perform()  
        time.sleep(0.2)
        #拖动滑块
        for track in track_list:
            #移动鼠标到嵌块距离
            ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
        # 释放鼠标
        ActionChains(driver).release(on_element=slid_ing).perform()

    def main(self):
        driver = self.driver
        driver.maximize_window()
        print(self.url);
        #url 这里开始赋值
        driver.get(self.url)
        time.sleep(4)
        self.login_main(driver)
        driver.switch_to.default_content() #切换主页面
        #获取相关详细信息
        time.sleep(4)
        # 这里获取不了,只能尝试再获取一次验证码的
        try:
            test = driver.find_element_by_class_name('profile-card-user-info-description')
            #和对应的id存起来
            content = test.text;  
            return test.text;
        except Exception as e:
            reutrn;
        
#获取代理ip
def getproxyIP():
   # 大家可以找出免费的代码ip
    proxyurl = "xx";
    response = urllib.request.urlopen(proxyurl)
    proxyIP = response.read();
    proxyIP = proxyIP.decode("unicode_escape")
    proxyIP = str(proxyIP);
    return proxyIP;
    
if __name__ == '__main__':
    ssl._create_default_https_context = ssl._create_unverified_context
    #跑取url的组成
    conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='phpshow', charset='utf8mb4')
    # 创建游标
    cursor = conn.cursor()
    while(1):
        cursor.execute("select UserId from spider where crawl='1' and intro is Null limit 10");
        res = cursor.fetchall();
        if res==False:
            break;
        proxyIP = getproxyIP();
        print(proxyIP);
        #这里要有异常处理
        login = Login(proxyIP)
        for val in res:
            print(val[0]);
            try:
                time.sleep(1);
                #url规则,手动更改
                url = str("https://xx.com/search/author?keyword=")+str(val[0])+str("&page=1");
                
                login.seturl(url);
                text = login.main()
                print(text);
                if text:
                    print("update");
                    cursor2 = conn.cursor();
                    # 表名
                    update_sql = "update xx set intro='"+str(text)+"' where UserId = '"+str(val[0])+"'";
                    print(update_sql);
                    tmp = cursor2.execute(update_sql);
                    conn.commit();
                    cursor2.close;
                #退出浏览器
    #           login.quit();
            except Exception as e:
                login.quit2();
                print("error");
        login.quit();
        print("---------------------------");
    ```