Selenium处理极验滑动验证码

Camille ·
更新时间:2024-09-21
· 565 次阅读

  要爬取一个网站遇到了极验的验证码,这周都在想着怎么破解这个,网上搜了好多知乎上看到有人问了这问题https://www.zhihu.com/question/28833985,我按照这思路去大概实现了一下。

  1.使用htmlunit(这种方式我没成功,模拟鼠标拖拽后轨迹没生成,可以跳过)   我用的是java,我首先先想到了用直接用htmlunit,我做了点初始化 private void initWebClient() { if (webClient != null) { return; } webClient = new WebClient(BrowserVersion.FIREFOX_24); webClient.getOptions().setProxyConfig(new ProxyConfig("127.0.0.1",8888)); webClient.getOptions().setActiveXNative(true); webClient.getOptions().setUseInsecureSSL(true); // 配置证书 webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(true); webClient.setCssErrorHandler(new SilentCssErrorHandler()); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); CookieManager cookieManager = new CookieManager(); List<org.apache.http.cookie.Cookie> httpCookies = client.getCookies();//其方式获取的cookie for (org.apache.http.cookie.Cookie cookie : httpCookies) { cookieManager.addCookie(new com.gargoylesoftware.htmlunit.util.Cookie(cookie)); } webClient.setCookieManager(cookieManager); }   初始化代理,cookie..然后能正常调用了   HtmlPage page = webClient.getPage("http://www.qixin.com/login");//企信宝   gePageInfor(page);   下面是我获取图片,还原图片并且模拟拖拽,(这里我觉得是有些问题的,可能是拖拽我模拟的不对导致触发的js并没有生成正确的轨迹,还请大家帮忙看看哪里错了) private void gePageInfor(HtmlPage page) { String[] img_slice={"div", "class", "gt_cut_fullbg_slice"}; String[] img_bg_slice={"div", "class", "gt_cut_bg_slice"}; HtmlDivision div = (HtmlDivision) page.getElementById("captcha"); int deCAPTCHA = 0; try { byte[] img_slice_binary = client.get(getImgUrl(img_slice, div, true)).getBinary();//获取图片byte byte[] img_bg_slice_binary = client.get(getImgUrl(img_bg_slice, div, false)).getBinary(); //获取还原后的图片 BufferedImage geetestImg = ImgTest.getGeetestImg(img_slice_binary, ImgTest.imgArray); BufferedImage geetestImg2 = ImgTest.getGeetestImg(img_bg_slice_binary, ImgTest.imgArray); //获得图片移动位置(目前还有问题,需改用第三方图片识别) deCAPTCHA =ImgTest.deCAPTCHA(geetestImg,geetestImg2); System.out.println(deCAPTCHA); } catch (IOException | FetchException e) { e.printStackTrace(); } HtmlDivision div_slider_knob = get_div_slider_knob(page,"gt_slider_knob gt_show");//获取要移动div HtmlPage mouseOver = (HtmlPage) div_slider_knob.mouseOver(); HtmlPage mouseDownPage = (HtmlPage)div_slider_knob.mouseDown(); div_slider_knob = get_div_slider_knob(mouseDownPage,"gt_slider_knob gt_show moving"); mouseMoveX(deCAPTCHA, div_slider_knob, mouseDownPage); HtmlPage newPage =(HtmlPage)div_slider_knob.mouseOver(); //        newPage =(HtmlPage)div_slider_knob.mouseDown(); System.out.println(newPage.asXml()); div = (HtmlDivision)newPage.getElementById("captcha"); HtmlElement htmlElement = div.getElementsByAttribute("div", "class", "gt_slice gt_show moving").get(0); System.out.println(htmlElement); newPage =(HtmlPage)div_slider_knob.mouseUp();//触发js,轨迹没有生成 System.out.println("---------------"); System.out.println(newPage.asXml()); if (newPage.getElementById("captcha")!=null) {//错误重试 //gePageInfor(newPage); } } private void mouseMoveX(int deCAPTCHA, HtmlDivision div_slider_knob, HtmlPage mouseDown) { MouseEvent mouseEvent = new MouseEvent(div_slider_knob, MouseEvent.TYPE_MOUSE_MOVE, false, false, false, MouseEvent.BUTTON_LEFT); mouseEvent.setClientX( mouseEvent.getClientX()+((deCAPTCHA!=0)?deCAPTCHA:99));    //移动x坐标 ScriptResult scriptResult = mouseDown.getDocumentElement().fireEvent(mouseEvent); } private HtmlDivision get_div_slider_knob(HtmlPage page,String classString) { return (HtmlDivision)(((HtmlDivision) page.getElementById("captcha")).getElementsByAttribute("div", "class", classString).get(0)); } private String getImgUrl(String[] img_slice, HtmlDivision div, boolean isNeedCheckPostion) { String url =""; int[] postion = new int[2]; boolean empty = div.getElementsByAttribute(img_slice[0],img_slice[1],img_slice[2]).isEmpty(); if (div.hasChildNodes() && !empty) { List<HtmlElement> elementsByAttribute = div.getElementsByAttribute(img_slice[0],img_slice[1],img_slice[2]); for(int i = 0;i<elementsByAttribute.size();i++){ HtmlDivision div_img = (HtmlDivision)elementsByAttribute.get(i); String style = div_img.getAttribute("style"); String[] imge_url_position = style.split(";"); if(StringUtils.isBlank(url)){//确认url url = StringUtils.replacePattern(imge_url_position[0], ".*\(", "").replace(")", ""); } if (isNeedCheckPostion) {//确认图片切割postion,两张图切割方式一样  background-position: -157px -58px //                    String[] positionS = StringUtils.split(StringUtils.remove(imge_url_position[1], "px").replace("-", "").replaceAll(".*:", ""), null); String[] positionS = StringUtils.split(StringUtils.removePattern(imge_url_position[1], "[^\d+ \s]"),null); postion[0] = Integer.parseInt(positionS[0]); postion[1] = Integer.parseInt(positionS[1]); int[] is = ImgTest.imgArray[i]; if (is[0]!=postion[0]||is[1]!=postion[1]) { logger.debug("更新分割postion"); ImgTest.imgArray[i] = postion; } System.out.println(ImgTest.imgArray); isNeedCheckPostion= false; } } } return url; }



滑动验证码 滑动验证 验证码 selenium

需要 登录 后方可回复, 如果你还没有账号请 注册新账号
相关文章