Run attached configuration in Web-Harvest IDE.
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://web-harvest.sourceforge.net/schema/2.1/core"
xmlns:var="http://web-harvest.sourceforge.net/schema/2.1/var"
xmlns:p="http://web-harvest.sourceforge.net/schema/2.1/param">
<set var="searchEngine">
google
</set>
<set var="${searchEngine}Content">
<http url="http://www.${searchEngine}.com"/>
</set>
<file action="write" path="data/${searchEngine}_content.html">
<set var="${searchEngine}Content"/>
</file>
INFO - ConfigProcessor starts processing...
INFO - SetVarPlugin starts processing...
INFO - ConstantProcessor starts processing...
INFO - ConstantProcessor processor executed in 2ms.
INFO - SetVarPlugin processor executed in 8ms.
INFO - SetVarPlugin starts processing...
INFO - HttpProcessor starts processing...
INFO - Executing method get...
ERROR - Unsupported cookie spec [Default cookie policy of the HTTP client]
java.lang.IllegalStateException: Unsupported cookie spec [Default cookie policy of the HTTP client]
at org.apache.commons.httpclient.cookie.CookiePolicy.getCookieSpec(CookiePolicy.java:220)
at org.apache.commons.httpclient.HttpMethodBase.getCookieSpec(HttpMethodBase.java:1240)
at org.apache.commons.httpclient.HttpMethodBase.addCookieRequestHeader(HttpMethodBase.java:1277)
at org.apache.commons.httpclient.HttpMethodBase.addRequestHeaders(HttpMethodBase.java:1422)
at org.apache.commons.httpclient.HttpMethodBase.writeRequestHeaders(HttpMethodBase.java:2177)
at org.apache.commons.httpclient.HttpMethodBase.writeRequest(HttpMethodBase.java:2060)
at org.apache.commons.httpclient.HttpMethodBase.execute(HttpMethodBase.java:1096)
at org.apache.commons.httpclient.HttpMethodDirector.executeWithRetry(HttpMethodDirector.java:398)
at org.apache.commons.httpclient.HttpMethodDirector.executeMethod(HttpMethodDirector.java:171)
at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:397)
at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:323)
at org.webharvest.runtime.web.HttpClientManager.executeFollowingRedirects(HttpClientManager.java:255)
at org.webharvest.runtime.web.HttpClientManager.doExecute(HttpClientManager.java:218)
at org.webharvest.runtime.web.HttpClientManager.execute(HttpClientManager.java:181)
at org.webharvest.runtime.processors.HttpProcessor.execute(HttpProcessor.java:144)
at org.webharvest.runtime.processors.AbstractProcessor.run(AbstractProcessor.java:120)
at org.webharvest.runtime.processors.RunningStatusController.run(RunningStatusController.java:37)
at org.webharvest.runtime.processors.BodyProcessor$1.call(BodyProcessor.java:29)
at org.webharvest.runtime.processors.BodyProcessor$1.call(BodyProcessor.java:1)
at org.webharvest.runtime.ScraperContext.executeWithinNewContext(ScraperContext.java:160)
at org.webharvest.runtime.processors.BodyProcessor.execute(BodyProcessor.java:21)
at org.webharvest.runtime.processors.WebHarvestPlugin.executeBody(WebHarvestPlugin.java:130)
at org.webharvest.runtime.processors.plugins.variable.AbstractVariableModifierPlugin.executePlugin(AbstractVariableModifierPlugin.java:96)
at org.webharvest.runtime.processors.plugins.variable.SetVarPlugin.executePlugin(SetVarPlugin.java:1)
at org.webharvest.runtime.processors.WebHarvestPlugin.execute(WebHarvestPlugin.java:60)
at org.webharvest.runtime.processors.AbstractProcessor.run(AbstractProcessor.java:120)
at org.webharvest.runtime.processors.RunningStatusController.run(RunningStatusController.java:37)
at org.webharvest.runtime.processors.BodyProcessor$1.call(BodyProcessor.java:29)
at org.webharvest.runtime.processors.BodyProcessor$1.call(BodyProcessor.java:1)
at org.webharvest.runtime.ScraperContext.executeWithinNewContext(ScraperContext.java:160)
at org.webharvest.runtime.processors.BodyProcessor.execute(BodyProcessor.java:21)
at org.webharvest.runtime.processors.AbstractProcessor.getBodyTextContent(AbstractProcessor.java:168)
at org.webharvest.runtime.processors.AbstractProcessor.getBodyTextContent(AbstractProcessor.java:175)
at org.webharvest.runtime.processors.AbstractProcessor.getBodyTextContent(AbstractProcessor.java:179)
at org.webharvest.runtime.processors.ConfigProcessor.execute(ConfigProcessor.java:45)
at org.webharvest.runtime.processors.AbstractProcessor.run(AbstractProcessor.java:120)
at org.webharvest.runtime.processors.RunningStatusController.run(RunningStatusController.java:37)
at org.webharvest.runtime.Scraper.execute(Scraper.java:88)
at org.webharvest.runtime.ScrapingHarvester.execute(ScrapingHarvester.java:205)
at org.webharvest.runtime.ScrapingHarvester$$EnhancerByGuice$$9ec530ab.CGLIB$execute$0(<generated>)
at org.webharvest.runtime.ScrapingHarvester$$EnhancerByGuice$$9ec530ab$$FastClassByGuice$$e88d435e.invoke(<generated>)
at com.google.inject.internal.cglib.proxy.$MethodProxy.invokeSuper(MethodProxy.java:228)
at com.google.inject.internal.InterceptorStackCallback$InterceptedMethodInvocation.proceed(InterceptorStackCallback.java:72)
at org.webharvest.ioc.ScrapingInterceptor$1.execute(ScrapingInterceptor.java:84)
at org.webharvest.ioc.ScrapingInterceptor.invoke(ScrapingInterceptor.java:105)
at org.webharvest.ioc.ScrapingInterceptor.invoke(ScrapingInterceptor.java:79)
at com.google.inject.internal.InterceptorStackCallback$InterceptedMethodInvocation.proceed(InterceptorStackCallback.java:72)
at com.google.inject.internal.InterceptorStackCallback.intercept(InterceptorStackCallback.java:52)
at org.webharvest.runtime.ScrapingHarvester$$EnhancerByGuice$$9ec530ab.execute(<generated>)
at org.webharvest.gui.ScraperExecutionThread.run(ScraperExecutionThread.java:61)</generated></generated></generated>
Anonymous
Diff:
Cannot reproduce it. Probably this bug has been fixed during work on other task.