#-*-coding:utf-8 -*- from selenium importwebdriver from selenium.webdriver.support importexpected_conditions as EC from selenium.webdriver.support.ui importWebDriverWait from urllib.request importurlretrieve importtime, random from selenium.webdriver.common.action_chains importActionChains from selenium.webdriver.chrome.options importOptions from selenium.common.exceptions importTimeoutException from PIL importImageChops importPIL.Image as image importcv2 from scrapy.http importHtmlResponse #options = Options()#options.add_argument('--headless')#options.add_argument('--no-sandbox')#options.add_argument('--disable-dev-shm-usage')#driver = webdriver.Chrome(executable_path="d:CaiPanChromechromedriver.exe", chrome_options=options) classCrack(object): def __init__(self, url): self.options =Options() #self.options.add_argument('--headless') #self.options.add_argument('--disable-dev-shm-usage') self.options.add_argument('--disable-gpu') self.options.add_argument("--no-sandbox") #self.options.add_experimental_option('excludeSwitches', ['enable-automation']) #self.options.add_experimental_option('debuggerAddress', '127.0.0.1:9222') self.options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"') self.options.add_argument('upgrade-insecure-requests="1"') self.options.add_argument('sec-fetch-user="?1"') self.options.add_argument('sec-fetch-site="none"') self.options.add_argument('sec-fetch-mode="navigate"') self.options.add_argument('pragma="no-cache"') #self.options.add_argument('cookie="xhsTrackerId=05e3b581-4bbf-4f7b-cf0c-d0aa80b4151a; ra-user-id-ares=5bfe244a9df0a90001b38b2c; Hm_lvt_d0ae755ac51e3c5ff9b1596b0c09c826=1565592524,1565593646,1565593678,1565594279; Hm_lpvt_d0ae755ac51e3c5ff9b1596b0c09c826=1565595993; Hm_lvt_b344979f0455853bf22b3ef05fa7b4ac=1566370861; Hm_lpvt_b344979f0455853bf22b3ef05fa7b4ac=1566370861; xhs_spses.6983=*; solar.beaker.session.id=6bc441db11c89bbade2ee08edcd106efe516723egAJ9cQAoWAwAAABsb2dpbkFjY291bnRxAVgLAAAAMTMxMjIyNTIzMjRxAlgEAAAAcm9sZXEDWAUAAABicmFuZHEEWAoAAABiQWNjb3VudE5vcQVYCgAAADk5NzMyNTg2NTFxBlgJAAAAbG9naW5UeXBlcQdLAVgIAAAAbmlja05hbWVxCFgOAAAAQmVsbGEncyBHYXJkZW5xCVgIAAAAc2hvcE5hbWVxCk5YCQAAAHJlc291cmNlc3ELXXEMKFgYAAAANWMzNTUzZGUxZDk3NzE1OGEzNjc0OTM0cQ1lWAYAAABhdmF0YXJxDlhaAAAAaHR0cHM6Ly9pbWcueGlhb2hvbmdzaHUuY29tL2F2YXRhci81YzM0NTU1NGMxMmZkNDAwMDExMjE3NzYuanBnQDEyMHdfMTIwaF85MnFfMWVfMWNfMXguanBncQ9YDgAAAF9hY2Nlc3NlZF90aW1lcRBHQddXnAhVcKRYBgAAAHVzZXJJZHERWBgAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNxElgIAAAAYXR0ckxpc3RxE05YDAAAAGJBY2NvdW50VHlwZXEUWAcAAABQUklNQVJZcRVYCwAAAGRlYWN0aXZhdGVkcRaJWAgAAABzZWxsZXJJZHEXWBgAAAA1YzM1NTNkZTFkOTc3MTU4YTM2NzQ5MzRxGFgOAAAAX2NyZWF0aW9uX3RpbWVxGUdB11ecCFVwpFgLAAAAcGVybWlzc2lvbnNxGl1xGyhlWAsAAABzZWxsZXJSb2xlc3EcTlgDAAAAX2lkcR1YIAAAADUxZDNhZWM0NzRmNDQ2MmRhNTM2NDFiNzU5Y2QzYWM2cR5YCgAAAHNlbGxlclR5cGVxH0sEWAUAAABlbWFpbHEgWCEAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNAeGhzLmZha2VxIXUu; xhs_spid.6983=fe43536f085a4a3f.1565594090.21.1566470205.1566270695.ba61cacc-e97c-41c6-b72f-c6ca1a6b4d9c"') self.options.add_argument('cache-control="no-cache"') self.options.add_argument('accept-language="en-US,en;q=0.9"') self.options.add_argument('accept-encoding="gzip, deflate, br"') self.options.add_argument(':scheme="https"') self.options.add_argument(':method="GET"') self.options.add_argument(':authority="www.xiaohongshu.com"') self.options.add_argument('accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"') self.url =url #self.url = 'https://www.xiaohongshu.com/web-login/captcha?redirectPath=https%3A%2F%2Finfluencer.xiaohongshu.com%2Fsolar%2Fadvertiser%2Fpatterns%2Fkol' self.browser = webdriver.Chrome('D:CaiPanChromechromedriver.exe', chrome_options=self.options) self.wait = WebDriverWait(self.browser, 100) #self.keyword = keyword self.BORDER = 6 defopen(self): """打开浏览器,并输入查询内容 """cookie1 = {'name': 'xhsTrackerId', 'value': '05e3b581-4bbf-4f7b-cf0c-d0aa80b4151a'} cookie2 = {'name': 'ra-user-id-ares', 'value': '5bfe244a9df0a90001b38b2c'} cookie3 = {'name': 'Hm_lvt_d0ae755ac51e3c5ff9b1596b0c09c826', 'value': '1565592524,1565593646,1565593678,1565594279'} cookie4 = {'name': 'Hm_lpvt_d0ae755ac51e3c5ff9b1596b0c09c826', 'value': '1565595993'} cookie5 = {'name': 'Hm_lvt_b344979f0455853bf22b3ef05fa7b4ac', 'value': '1566370861'} cookie6 = {'name': 'Hm_lpvt_b344979f0455853bf22b3ef05fa7b4ac', 'value': '1566370861'} cookie7 = {'name': 'xhs_spses.6983', 'value': '*'} cookie8 = {'name': 'solar.beaker.session.id', 'value': '6bc441db11c89bbade2ee08edcd106efe516723egAJ9cQAoWAwAAABsb2dpbkFjY291bnRxAVgLAAAAMTMxMjIyNTIzMjRxAlgEAAAAcm9sZXEDWAUAAABicmFuZHEEWAoAAABiQWNjb3VudE5vcQVYCgAAADk5NzMyNTg2NTFxBlgJAAAAbG9naW5UeXBlcQdLAVgIAAAAbmlja05hbWVxCFgOAAAAQmVsbGEncyBHYXJkZW5xCVgIAAAAc2hvcE5hbWVxCk5YCQAAAHJlc291cmNlc3ELXXEMKFgYAAAANWMzNTUzZGUxZDk3NzE1OGEzNjc0OTM0cQ1lWAYAAABhdmF0YXJxDlhaAAAAaHR0cHM6Ly9pbWcueGlhb2hvbmdzaHUuY29tL2F2YXRhci81YzM0NTU1NGMxMmZkNDAwMDExMjE3NzYuanBnQDEyMHdfMTIwaF85MnFfMWVfMWNfMXguanBncQ9YDgAAAF9hY2Nlc3NlZF90aW1lcRBHQddXnAhVcKRYBgAAAHVzZXJJZHERWBgAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNxElgIAAAAYXR0ckxpc3RxE05YDAAAAGJBY2NvdW50VHlwZXEUWAcAAABQUklNQVJZcRVYCwAAAGRlYWN0aXZhdGVkcRaJWAgAAABzZWxsZXJJZHEXWBgAAAA1YzM1NTNkZTFkOTc3MTU4YTM2NzQ5MzRxGFgOAAAAX2NyZWF0aW9uX3RpbWVxGUdB11ecCFVwpFgLAAAAcGVybWlzc2lvbnNxGl1xGyhlWAsAAABzZWxsZXJSb2xlc3EcTlgDAAAAX2lkcR1YIAAAADUxZDNhZWM0NzRmNDQ2MmRhNTM2NDFiNzU5Y2QzYWM2cR5YCgAAAHNlbGxlclR5cGVxH0sEWAUAAABlbWFpbHEgWCEAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNAeGhzLmZha2VxIXUu'} cookie9 = {'name': 'xhs_spid.6983', 'value': 'fe43536f085a4a3f.1565594090.21.1566470205.1566270695.ba61cacc-e97c-41c6-b72f-c6ca1a6b4d9c'} self.browser.get(self.url) #self.browser.add_cookie(cookie1) #self.browser.add_cookie(cookie2) #self.browser.add_cookie(cookie3) #self.browser.add_cookie(cookie4) #self.browser.add_cookie(cookie5) #self.browser.add_cookie(cookie6) #self.browser.add_cookie(cookie7) #self.browser.add_cookie(cookie8) #self.browser.add_cookie(cookie9) #self.browser.get(self.url) self.browser.implicitly_wait(30) defget_size(self): screenSize = self.browser.get_window_size() #返回个字典 print(f"当前屏幕尺寸为{screenSize}") #当前屏幕尺寸为{'width': 1080, 'height': 2280} #width = screenSize['width'] #height = screenSize['height'] defget_images(self): """获取验证码图片 :return: 图片的location信息 """ #bg = [] #gb = [] bg_filename = 'bg.jpg'fg_filename = 'fg.jpg'bg_location_list =[] fg_location_list =[] whileTrue: try: fg = self.browser.find_element_by_class_name("shumei_captcha_loaded_img_fg") iffg: time.sleep(2) print(fg.location) fg_location_list.append(fg.location) print(fg.size) start_x = fg.location["x"] + int(fg.size['width']) * 0.2start_y = fg.location["y"] + int(fg.size['height']) * 0.5end_x = fg.location['x'] + int(fg.size['width']) * 0.8end_y = fg.location['y'] + int(fg.size['height']) * 0.5 print(start_x, start_y, end_x, end_y) fg_url = fg.get_attribute("src") iffg_url: print(fg_url) urlretrieve(url=fg_url, filename=fg_filename) print('缺口图片下载完成') break exceptTimeoutException: self.get_images() whileTrue: try: bgfull = self.browser.find_element_by_class_name("shumei_captcha_loaded_img_bg") time.sleep(2) ifbgfull: print(bgfull.location) bg_location_list.append(bgfull.location) print(bgfull.size) start_xx = bgfull.location["x"] + int(bgfull.size['width']) * 0.2start_yy = bgfull.location["y"] + int(bgfull.size['height']) * 0.5end_xx = bgfull.location['x'] + int(bgfull.size['width']) * 0.8end_yy = bgfull.location['y'] + int(bgfull.size['height']) * 0.5 print(start_xx, start_yy, end_xx, end_yy) bg_url = bgfull.get_attribute("src") ifbg_url: print(bg_url) urlretrieve(url=bg_url, filename=bg_filename) print('背景图片下载完成') break exceptTimeoutException: self.get_images() distance = end_xx -end_x print(distance) returndistance #return bg_location_list, fg_location_list defget_gap(self, img1, img2): """获取缺口偏移量 :param img1: 不带缺口图片 :param img2: 带缺口图片 :return: """left = 15 for i inrange(left, img1.size[0]): for j in range(img1.size[1]): if notself.is_pixel_equal(img1, img2, i, j): left =i returnleft returnleft defis_pixel_equal(self, img1, img2, x, y): """判断两个像素是否相同 :param image1: 图片1 :param image2: 图片2 :param x: 位置x :param y: 位置y :return: 像素是否相同 """ #取两个图片的像素点 pix1 =img1.load()[x, y] pix2 =img2.load()[x, y] threshold = 60 if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) andabs( pix1[2] - pix2[2] <threshold)): returnTrue else: returnFalse defcrack(self): #打开浏览器 self.open() bg_filename = 'bg.jpg'fg_filename = 'fg.jpg' #获取图片 bg_location_list, fullbg_location_list =self.get_images() #根据位置对图片进行合并还原 bg_img =self.get_merge_image(bg_filename, bg_location_list) fullbg_img =self.get_merge_image(fg_filename, fullbg_location_list) #获取缺口位置 gap =self.get_gap(fullbg_img, bg_img) print('缺口位置', gap) track = self.get_track(gap -self.BORDER) print('滑动滑块') print(track) defget_merge_image(self, filename, location_list): """根据位置对图片进行合并还原 :filename:图片 :location_list:图片位置 """im =image.open(filename) #浏览器生成的图片规格是260px * 116px , 所以指定image.new('RGB', (260, 116))· new_im = image.new('RGB', (400, 200)) im_list_upper =[] im_list_down =[] for location inlocation_list: if location['y'] == -100: im_list_upper.append(im.crop((abs(location['x']), 100, abs(location['x']) + 10, 200))) if location['y'] ==0: im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x']) + 10, 100))) new_im = image.new('RGB', (400, 200)) x_offset =0 for im inim_list_upper: new_im.paste(im, (x_offset, 0)) x_offset +=im.size[0] x_offset =0 for im inim_list_down: new_im.paste(im, (x_offset, 100)) x_offset +=im.size[0] new_im.save(filename) print(new_im) returnnew_im deffixed_size(self, infile, outfile, width, height): #infile = 'fg.jpg' #outfile = 'new_fg.png' """按照固定尺寸处理图片"""im =image.open(infile) out =im.resize((width, height), image.ANTIALIAS) out.save(outfile) defFindPic(self, target, template): """找出图像中最佳匹配位置 :param target: 目标即背景图 :param template: 模板即需要找到的图 :return: 返回最佳匹配及其最差匹配和对应的坐标 """target_rgb =cv2.imread(target) target_gray =cv2.cvtColor(target_rgb, cv2.COLOR_BGR2GRAY) template_rgb =cv2.imread(template, 0) res =cv2.matchTemplate(target_gray, template_rgb, cv2.TM_CCOEFF_NORMED) value =cv2.minMaxLoc(res) print('*****') print(value) return value[-1][0] defget_slider(self): """获取滑块 :return: 滑块对象 """ whileTrue: try: slider = self.browser.find_element_by_xpath("//div[@class='shumei_captcha_slide_btn']") #print(slider) break except: time.sleep(0.5) returnslider defget_track(self, distance): """根据偏移量获取移动轨迹 :param distance: 偏移量 :return: 移动轨迹 """ print("=" * 10, distance) #移动轨迹 track =[] #当前位移 current =0 #减速阈值 mid = distance * 4 / 5 print(mid) #计算间隔 t = 0.2 #初速度 v =0 while current <distance: if current <mid: #加速度为正2 a = 4 else: #加速度为负3 a = -3.5 #初速度v0 v0 =v #当前速度v = v0 + at v = v0 + a *t #移动距离x = v0t + 1/2 * a * t^2 move = v0 * t + 1 / 2 * a * t *t #当前位移 current +=move #加入轨迹 track.append(round(move)) #print(track) returntrack defmove_to_gap(self, slider, track): """拖动滑块到缺口处 :param slider: 滑块 :param track: 轨迹 :return: """ActionChains(self.browser).click_and_hold(slider).perform() a =[] b =track for x intrack: ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() time.sleep(0.8) ActionChains(self.browser).release().perform() defresult_html(self): response = HtmlResponse(url=self.browser.current_url, body=self.browser.page_source, encoding='utf-8') if '验证失败,请重新再试' inresponse.text: c.process() else: print(response.text) returnresponse defclose(self): self.browser.close() defprocess(self): self.get_images() self.fixed_size('bg.jpg', '1bg.jpg', 400, 200) self.fixed_size('fg.jpg', '1fg.png', 60, 200) x = self.FindPic('1bg.jpg', '1fg.png') a =self.get_slider() r =self.get_track(x) self.move_to_gap(a, r) time.sleep(2) self.result_html() self.close() if __name__ == '__main__': url = 'https://www.xiaohongshu.com/web-login/captcha?redirectPath=http%3A%2F%2Fwww.xiaohongshu.com%2Fuser%2Fprofile%2F590d4d5950c4b4281396ea20'c =Crack(url) c.open() c.get_size() for i in range(1, 2): c.process()
缺口验证码的验证!!