谷歌验证码解析

  • Post author:
  • Post category:其他


主要的的核心思想是,通过快捷键启动chrome浏览器,然后通过chrome的插件Surfingkeys来实现纯键盘操作页面元素(绕过谷歌的爬虫检测),然后将谷歌验证码切换到语音验证,下载语音文件通过谷歌的语音识别服务来把语音转换成文字,然后输入完成谷歌验证码的破解。解析拿到谷歌验证通过的的tokenRecaptch,后续的请求大概率可以直接通过这个token直接postman请求获取了。

#!/usr/bin/python3
# -*- coding: utf-8 -*-


command ='ctrl'
RECAPTCHA_PAGE_URL = "https://baohiemxahoi.gov.vn/tracuu/Pages/tra-cuu-ho-gia-dinh.aspx"
secs_between_keys=0.5
config = readConfig.ReadConfig()
proxy =config.get_proxy('luminati')

class VnSSChrome(ISpiderChrome):
    def __init__(self, task_id):
        ISpiderChrome.__init__(self, task_id)
        self.op_type = "vnss"

    @ChromeRetry(max_retry_count=2)
    def Chromestart(self,data):
        #打开谷歌浏览器
        pyautogui.PAUSE = random.randint(5, 10) * 0.1
        super().ChromeStart(data)
        task_id = self.task_id
        logger.info('task_id:%s,vnss开始工作', task_id)
        ss_html=''
        try:
            city = data['city']
            username = data['username']
            idcard = data['idcard']
            social_security_infos = []
            spider_status = 'SUCCESS'
            # pyautogui.typewrite('chrome --no-sandbox --proxy-server='+prxoy +' --incognito' )
            pyautogui.hotkey('shift', 'ctrl', 'c')
            sleep(1)
            pyautogui.typewrite(RECAPTCHA_PAGE_URL)
            pyautogui.press('enter')
            sleep(15)
            refresh_count=1
            refresh_status=True
            while refresh_status and refresh_count<=2:
                logger.info('task_id:%s,检测网站是否打开成功', task_id)
                # open_status=waitFor('Tôi không phải là người máy')
                open_status=waitFor('Toi khong pha')
                if open_status:
                    logger.info('task_id:%s,网站打开成功', task_id)
                    refresh_status=False
                else:
                    pyautogui.hotkey(command,'r')
                refresh_count+=1
            if refresh_status:
                # logger.error('task_id:%s,网站打开失败',task_id)
                raise Exception('task_id:'+task_id+'网站打开失败')
            # 谷歌点击识别
            # pyautogui.typewrite('w')
            pyautogui.hotkey('shift', 'tab')
            # pyautogui.press('tab')
            pyautogui.press('enter')
            sleep(2)
            # 判断是否一次识别成功 Chọn tất cả hình ảnh có
            # check1 = check_str_bychrome('XÁC MINH')
            check1 = check_str_bychrome('Chọn tat ca')
            # if not check1:
            #     check1 = check_str_bychrome('Bỏ qua')
            retry_flage=False
            retry_count = 2
            if check1:
                google_pass =False
                #选择语音识别
                # pyautogui.typewrite(['f', 'd', 'w'], interval=secs_between_keys)
                logger.info('task_id:%s,开始语音识别',task_id)
                pyautogui.hotkey('shift', 'tab')
                pyautogui.hotkey('shift', 'tab')
                pyautogui.hotkey('shift', 'tab')
                # pyautogui.press('esc')
                # pyautogui.press('enter')
                pyautogui.press('enter')
                sleep(2)
                #Máy tính hoặc mạng của bạn có thể đang gửi yêu cầu tự động
                check_auto = check_str_bychrome('Nhấn PHÁT và nhập các từ bạn nghe thấy')
                # check_auto = check_str_bychrome('Máy tính hoặc mạng của bạn có thể đang gửi yêu cầu tự động')
                if not check_auto:
                    raise Exception('谷歌人机判断生效,需要更换代理')
                pyautogui.press('tab')
                google_repcha2(task_id)
                #检查语音识别是否通过
                check2 = check_str_bychrome('Yêu cầu trả lời nhiều hình ảnh xác thực chính xác')
                if check2:
                    retry_flage = True
                    logger.info('task_id:%s,语音识别失败', task_id)
                else:
                    logger.info('task_id:%s,语音识别成功', task_id)
                while retry_flage and retry_count<=5:
                    #pyautogui.typewrite(['f', 'd', 'w'], interval=secs_between_keys)
                    #刷新语音识别
                    logger.info('task_id:%s,开始第%s次语音识别', task_id,retry_count)
                    # pyautogui.typewrite(['g', 'i'], interval=secs_between_keys)
                    pyautogui.press('tab')
                    pyautogui.press('tab')
                    pyautogui.press('tab')
                    pyautogui.press('tab')
                    pyautogui.press('enter')
                    google_repcha2(task_id)
                    check3 = check_str_bychrome('Yêu cầu trả lời nhiều hình ảnh xác thực chính xác')
                    if not check3:
                        logger.info('task_id:%s,第%s次语音识别成功', task_id, retry_count)
                        retry_flage = False
                    retry_count+=1
            else:
                # google_pass=True
                logger.info('task_id:%s,谷歌认证直接通过!', task_id)
            if retry_count>5:
                raise Exception('谷歌验证码尝试次数超过5次')

            # pyautogui.typewrite('w')
            #复制网页源代码
            pyautogui.hotkey('shift', 'tab')
            pyautogui.typewrite(['y', 's'], interval=secs_between_keys)
            recaptcha_data=pyperclip.paste()
            #分析页面
            recaptcha_html = BeautifulSoup(recaptcha_data, "lxml")
            recaptcha_token = recaptcha_html.find(id='tokenRecaptch').get('value')
            # if google_pass:
            #     recaptcha_token = recaptcha_html.find(id='tokenRecaptch').get('value')
            # else:
            #     recaptcha_token = recaptcha_html.find(id='recaptcha-token').get('value')
            if not recaptcha_token:
                logger.error('task_id:%s,谷歌token查询失败:%s', task_id, recaptcha_html)
                raise Exception('谷歌token查询失败查询失败')
            proxies = get_luminatiproxy(country='vn_luminati')
            logger.info('tokenRecaptch:%s',recaptcha_token)
            headers = {
                "Accept": "*/*","Origin":"https://baohiemxahoi.gov.vn", "Host":"baohiemxahoi.gov.vn","Referer":"https://baohiemxahoi.gov.vn/tracuu/Pages/tra-cuu-ho-gia-dinh.aspx",
                # "Sec-Fetch-Mode":"cors","Sec-Fetch-Site":"same-origin",
                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:69.0) Gecko/20100101 Firefox/69.0"}
            data = {'matinh': city, 'tennhankhau': username, 'cmnd': idcard,'mahuyen':'','maxa':'','mathon':'','macd':'','ngaysinh':'','namsinh':'',
                    'tokenRecaptch': recaptcha_token,
                    'typetext': 'CoDau'}
            ss_data = requests.post(
                'https://baohiemxahoi.gov.vn/UserControls/BHXH/BaoHiemYTe/HienThiHoGiaDinh/pListKoOTP.aspx', data=data,headers=headers,
                proxies=proxies,timeout=60,verify=False)
            ss_html = BeautifulSoup(ss_data.text, "lxml")
            no_data = ss_html.find_all(text='Không có kết quả cần tìm')
            ss_list = ss_html.find(id='contentChiTietHGD')
            if not no_data:
                for ss_tr in ss_list.find_all('tr'):
                    ss_tds = ss_tr.find_all('td')
                    social_no = ss_tds[1].text
                    username = ss_tds[2].text
                    gender = ss_tds[3].text
                    birthday = ss_tds[4].text
                    birthday = date_to_china(birthday, '/', '-')
                    family_cdoe = ss_tds[5].text
                    address = ss_tds[6].text
                    status = ss_tds[7].text
                    social_info = {'social_no': social_no, 'username': username, 'gender': gender, 'birthday': birthday,
                                   'family_cdoe': family_cdoe, 'address': address, 'status': status}
                    social_security_infos.append(social_info)
            self.ChromeFinish(social_security_infos,spider_status)
            # notify_spider_from_chrome(self.op_type, task_id, social_security_infos, spider_status)
            logger.info('vnss爬取成功:%s', social_security_infos)
        except Exception as e:
            error_path=logPath+'/'+task_id+".png"
            pyautogui.screenshot().save(error_path)
            logger.info('task_id:%s,html_msg:%s',task_id,ss_html)
            try:
                logger.error('spider-vnss-chrome,task_id:%s,爬取异常,截屏文件:%s,异常信息%s,html_msg:%s',task_id,error_path,traceback.format_exc(),ss_html)
            except:
                pass
            raise e
        finally:
            #关闭浏览器
            pyautogui.hotkey(command,'w')
            pyautogui.hotkey(command, 'w')
        return social_security_infos


def trans_mp3_to_wav(filepath):
    song = AudioSegment.from_mp3(filepath)
    song.export("now.wav", format="wav")

def google_repcha2(task_id):
    # pyautogui.typewrite(['g', 'i'], interval=secs_between_keys)
    pyautogui.press('tab')
    pyautogui.press('tab')
    pyautogui.press('enter')
    sleep(5)
    # pyautogui.press('tab')
    # pyautogui.hotkey('shift', 'tab')
    pyautogui.hotkey(command, 'l')
    pyautogui.hotkey(command, 'c')
    audo_url = pyperclip.paste()
    # pyautogui.typewrite(['g', '0'], interval=secs_between_keys)
    #关闭语音页面
    # pyautogui.typewrite('x')
    pyautogui.hotkey(command, 'w')
    logger.info(audo_url)
    mp3_filename =task_id+".mp3"
    wav_filename =task_id+".wav"
    if 'mp3' in audo_url:
        request = requests.get(audo_url,timeout=30)
        text = request.content
        with open(mp3_filename, "wb") as f:
            f.write(text)
            f.close()
        # trans_mp3_to_wav(mp3_filename)
        song = AudioSegment.from_mp3(mp3_filename)
        song.export(wav_filename, format="wav")
        code = run_quickstart(wav_filename)
        os.remove(mp3_filename)
        os.remove(wav_filename)
        # pyautogui.hotkey('shift', 'tab')
        # pyautogui.typewrite(['f', 'f', 's'], interval=secs_between_keys)
        pyautogui.typewrite(['g', 'i'], interval=secs_between_keys)
        pyautogui.typewrite(code)
        pyautogui.press('tab')
        pyautogui.press('tab')
        pyautogui.press('tab')
        pyautogui.press('tab')
        pyautogui.press('tab')
        pyautogui.press('enter')
        # pyautoguigui.press(['tab','tab','tab','tab','tab'])
        # pyautogui.typewrite(['f', 'w'], interval=secs_between_keys)
    else:
        raise Exception('task_id:%s,打开语音链接失败',task_id)

def check_picture(file_path):
    check_data = pyautogui.locateOnScreen(file_path,grayscale=False)
    if check_data:
        return True
    else:
        return False

def click_picture(file_path):
    x, y=pyautogui.locateCenterOnScreen(file_path)
    pyautogui.click(x,y)

def check_str_bypyautogui(str):
   pyperclip.copy(str)
   pyautogui.typewrite('/')
   pyautogui.hotkey(command, 'v')
   pyautogui.press('enter')
   pyautogui.typewrite('v')
   pyautogui.hotkey('shift', '$')
   pyperclip.copy('this is a example')
   pyautogui.hotkey(command,'c')
   pyautogui.press(['esc','esc'])
   if str in pyperclip.paste():
       return True
   else:
       return False

def check_str_bychrome(str):
    pyperclip.copy(str)
    pyautogui.hotkey(command,'f')
    pyautogui.hotkey(command, 'v')
    pyautogui.press('enter')
    # pyautogui.press('enter')
    if command =='ctrl':
        pyautogui.press('tab')
        pyautogui.press('tab')
        pyautogui.press('tab')
        pyautogui.press('enter')
    else:
        pyautogui.press('esc')
    pyperclip.copy('this is a example')
    pyautogui.hotkey(command,'c')

    if unidecode(pyperclip.paste().upper()) == unidecode(str.upper()):
        return True
    else:
        return False

def waitFor(string):
    check_result=check_str_bychrome(string)
    numWaitedFor = 0
    while not check_result:
        sleep(5)
        check_result = check_str_bychrome(string)
        numWaitedFor += 1
        if numWaitedFor > 2:
            return False
    return True


if __name__ == "__main__":
    this_spider = VnSSChrome('vnsstest')
    data=this_spider.Chromestart({'city':'79TTT','username':'Ngô Mai Thanh Trà1x','idcard':'024744790'})
    print(data)



版权声明:本文为zhangpz19871210原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。