From: Jack Bi <814...@qq...> - 2014-03-15 09:42:58
|
Hi Ronald: Thank you for your reply. If you are interested in my code. /******************************************************/ import java.io.IOException; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import org.apache.log4j.Logger; import org.apache.log4j.chainsaw.Main; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.WebWindowEvent; import com.gargoylesoftware.htmlunit.WebWindowListener; import com.gargoylesoftware.htmlunit.html.HtmlElement; import com.gargoylesoftware.htmlunit.html.HtmlForm; import com.gargoylesoftware.htmlunit.html.HtmlInput; import com.gargoylesoftware.htmlunit.html.HtmlOption; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.html.HtmlSelect; public class A { private static Logger logger = Logger.getLogger(A.class); static boolean warningTag = false; static boolean finalTag = false; static boolean htmlTag = false; static final String descUrl = "https://twitter.com/"; static String descTag = "welcome twitter"; static HtmlPage page = null; static HtmlPage page2 = null; static WebClient webClient = null; static String htmlString = null; static String urlString = ""; public static void initWebClient() { webClient = new WebClient(BrowserVersion.FIREFOX_24); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setRedirectEnabled(true); webClient.getOptions().setJavaScriptEnabled(true); // webClient.getOptions().setTimeout(20*1000); webClient.getOptions().setUseInsecureSSL(true); webClient.addWebWindowListener(new WebWindowListener() { @Override public void webWindowOpened(WebWindowEvent event) { } @Override public void webWindowContentChanged(WebWindowEvent event) { page = (HtmlPage) webClient.getCurrentWindow().getEnclosedPage(); if (page.getTitleText().equals("Security Warning")) { warningTag = true; } if (page.asText().contains(descTag)) { finalTag = true; } } @Override public void webWindowClosed(WebWindowEvent event) { } }); } public static void webClientGetPage(String url) throws IOException, InterruptedException { try { page = webClient.getPage(url); } catch (FailingHttpStatusCodeException e) { logger.info(url + ":Error:Http Status"); return; } catch (MalformedURLException e) { logger.info(url + ":Error:URL Error"); return; } catch (Exception e) { logger.info(url + ":Error:Connect Error"); return; } try { dealRootPage(page); } catch (Exception e) { logger.info(urlString + ":dealRootPage Failed"); return; } } public static void dealRootPage(HtmlPage htmlPage) throws IOException, InterruptedException { Document document = Jsoup.parse(htmlPage.asXml()); Elements forms = document.getElementsByTag("form"); List<HtmlForm> forms2 = htmlPage.getForms(); if (forms.size() == 0) { logger.info(urlString + ":Error:Cannot find Form"); return; } else { for (int i = 0; i < forms.size(); i++) { Element form = forms.get(i); HtmlForm form2 = forms2.get(i); System.out.println("Execute:form" + (i+1)); try { parseForm(form,form2); } catch (Exception e) { logger.info(urlString + ":parForm:form" + i + "Failed"); continue; } } } } public static int parseForm(Element formElement, HtmlForm form2) throws IOException, InterruptedException { String inputName = formElement.select("input[type=text]").attr("name"); String buttonValue = formElement.select("input[type=submit]").attr("value"); Elements selects = formElement.getElementsByTag("select"); HtmlSelect select = null; if (inputName.length() < 1) { logger.info(urlString + ":Error:Cannot find Input"); return -2; } HtmlInput inputText = form2.getInputByName(inputName); inputText.setAttribute("value", descUrl); HtmlInput buttonInput = form2.getInputByValue(buttonValue); if (selects.size() == 0) { try { doRequest(buttonInput); } catch (Exception e) { logger.info(urlString + ":doRequest(button)" + "Failed"); return -7; } } else { Elements options = formElement.getElementsByTag("select").get(0).getElementsByTag("option"); String selectName = selects.get(0).attr("name"); select = form2.getSelectByName(selectName); for (int i = 0; i < options.size(); i++) { String optionValue = options.get(i).attr("value"); HtmlOption option = select.getOptionByValue(optionValue); select.setSelectedAttribute(option, true); System.out.println("Select Server:" + (i+1)); try { doRequest(buttonInput); } catch (Exception e) { logger.info(urlString + ":doRequest " + "Server:" + (i+1) + "Failed"); continue; } } } return 0; } public static int doRequest(HtmlInput buttonInput) throws InterruptedException, IOException { page = (HtmlPage)(buttonInput.click()); for(int i = 0; i < 1000; i++){ if (warningTag || finalTag) { break; } if (i == 100) { logger.info(urlString + "Error:JS Timeout"); System.out.println(page.asXml()); return -4; } Thread.sleep(1000); } int result = dealPage(page); warningTag = false; finalTag = false; Thread.sleep(5000); return result; } public static int dealPage(HtmlPage page) throws IOException, InterruptedException { if (page.asText().contains("Twitter")) { logger.info(urlString + ":" + "Succ"); System.out.println(page.asXml()); return 1; } else if (page.asText().contains("Warning")) { return dealWarning(page); } return 0; } public static int dealWarning(HtmlPage page) throws IOException, InterruptedException { HtmlForm form2 = page.getForms().get(0); HtmlElement button2 = form2.getInputByValue("Continue anyway..."); @SuppressWarnings("unused") HtmlPage page3 = button2.click(); for(int i = 0; i < 1000; i++){ if (finalTag) { return 2;//Succ } if (i == 50) { logger.info(urlString + ":" + "Error:Warning timeout"); return -5; } Thread.sleep(1000); } return 0; } public static void main(String[] args) throws IOException, InterruptedException { java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF); System.setProperty("org.apache.commons.logging.Log","org.apache.commons.logging.impl.NoOpLog"); urlString = "http://www.q8daili.com/"; initWebClient(); Log.loadLogProperties(); logger.info("**************************************************"); logger.info("begin"); try { webClientGetPage(urlString); } catch (Exception e) { logger.info(urlString + ":Get root HTMLPage failed"); } } } Thank you! yours Jack! -- View this message in context: http://htmlunit.10904.n7.nabble.com/htmlUnit-always-failed-when-execute-JS-in-html-tp33378p33383.html Sent from the HtmlUnit - General mailing list archive at Nabble.com. |