At the 25-30minites work Web-Harvest casual stoped. Why?
<?xml version="1.0" encoding="UTF-8"?> <config> <var-def name="host"> [url]http://www.fragrantica.ru[/url] </var-def> <var-def name="Path">d:\xml_harvest</var-def> <var-def name="ImagePath">d:\xml_harvest\img</var-def> <var-def name="OutFile"><template>d:\xml_harvest\${sys.date()+sys.time()}</template></var-def> <var-def name="AllDesc"> k0|k1|k2|country_name|country_URI|disainer_URI|disainer_Name|disainer_sImgURL|disainer_sImgName|disainer_lgImgName|disainer_Industia_url|disainer_Industia_name|disainer_WEB_PAGE|disainer_LIC_url|disainer_LIC_name|disainer_DESC|aromat_URI|aromat_Name|aromat_pol|aromat_year|aromat_sImgName|aromat_lgImgName|aromat_name_1|aromat_decr|aromat_group_name|aromat_group_url|note_verh_name|note_verh_id|note_srd_name|note_srd_id|note_niz_name|note_niz_id </var-def> <file action="append" type="text" path="${OutFile}"> <var name="AllDesc" /> </file> <function name="download-file"> <!--ImgName,ImageDir, ImgURL --> <return> <empty> <var-def name="ImgName"> <script return="name"><![CDATA[ name=ImgName.toString()+"."+ImgURL.toString().replaceAll("(.*\\.)", "").trim(); name=name.replaceAll("/","(047)"); ;]]> </script> </var-def> <var-def name="isImageExists"> <script return="fil"><![CDATA[ fil1=new File(ImageDir+"/"+ImgName); if (fil1.exists()) fil=true; else fil=false; ;]]> </script> </var-def> <case> <if condition="${isImageExists}"> </if> <else> <file action="write" type="binary" path="${ImageDir}/${ImgName}"> <http url="${ImgURL}"/> </file> </else> </case> </empty> </return> </function> <empty> <var-def name="First_html"> <html-to-xml outputtype="pretty"> <http method="get" url="${host}/designers-1/#A" charset="utf-8" /> </html-to-xml> </var-def> </empty> <var-def name="All"> <!-- Country loop--> <loop item="item_country" index="k0" filter="unique"> <list> <xpath expression="(//select)[1]/option"> <var name="First_html" /> </xpath> </list> <body> <case> <if condition='${Integer.parseInt(k0.toString())>17}'> <!-- k0 --> <empty> <var-def name="country_URI"> <xpath expression="/option/@value"> <var name="item_country" /> </xpath> </var-def> <var-def name="country_Name"> <xpath expression="/option/text()"> <var name="item_country" /> </xpath> </var-def> </empty> <case> <if condition='${(country_URI.toString().trim().length()!= 0)}'> <empty> <var-def name="Country_html"> <html-to-xml outputtype="pretty"> <http method="get" url="${host}${country_URI}" charset="utf-8" /> </html-to-xml> </var-def> </empty> <loop item="item_disainer" index="k1" filter="unique"> <list> <xpath expression="//div[@class='nduList']/p/a"> <var name="Country_html" /> </xpath> </list> <body> <case> <if condition='${Integer.parseInt(k1.toString())>149}'> <!-- k1 --> <empty> <var-def name="disainer_URI"> <xpath expression="/a/@href"> <var name="item_disainer" /> </xpath> </var-def> <var-def name="disainer_Name"><!-- Gabriela Sabatini --> <xpath expression="/a/text()"> <var name="item_disainer" /> </xpath> </var-def> <var-def name="disainer_sImgURL"> <xpath expression="/a/img/@src"> <var name="item_disainer" /> </xpath> </var-def> <var-def name="disainer_sImgName"> <script return="r"><![CDATA[ r="sml_dis_"+k1.toString()+"_"+disainer_Name.toString().replaceAll("(?sim)[^a-zA-Z0-9]", ""); ;]]> </script> </var-def> <empty> <call name="download-file"><!-- --> <call-param name="ImgName"><var name="disainer_sImgName"/></call-param> <call-param name="ImageDir"><var name="ImagePath"/></call-param> <call-param name="ImgURL"><var name="disainer_sImgURL"/></call-param> </call> <var-def name="disainer_html"> <html-to-xml outputtype="pretty"> <http method="get" url="${host}${disainer_URI}" charset="utf-8" /> </html-to-xml> </var-def> </empty> <var-def name="disainer_Name_a"><!-- Gabriela Sabatini aroma--> <xpath expression="//div[@id='col1']/h1/text()"> <var name="disainer_html" /> </xpath> </var-def> <empty> <var-def name="disainer_lgImgName"> <script return="r"><![CDATA[ r="lg_dis_"+k1.toString()+"_"+disainer_Name.toString().replaceAll("(?sim)[^a-zA-Z0-9]", ""); ;]]> </script> </var-def> <call name="download-file"><!-- Большое фото--> <call-param name="ImgName"><var name="disainer_lgImgName" /></call-param> <call-param name="ImageDir"><var name="ImagePath"/></call-param> <call-param name="ImgURL"> <xpath expression="//div[@id='col1']/div/img/@src"> <var name="disainer_html" /> </xpath> </call-param> </call> <var-def name="disainer_area"><!--har --> <xpath expression="//div[@id='col1']"> <var name="disainer_html" /> </xpath> </var-def> </empty> <var-def name="disainer_Industia_url"></var-def><!-- URL Indastia--> <var-def name="disainer_Industia_name"></var-def><!-- Neme--> <var-def name="disainer_Industia"> <regexp flag-multiline="True" max="1"> <regexp-pattern>Индустрия:.{1,20}<a.{1,3}href="(.*?)".{0,3}>(.*?)</a></regexp-pattern> <regexp-source> <var name="disainer_area"/> </regexp-source> <regexp-result><template>"${_1}"|+|"${_2}"</template></regexp-result> </regexp> </var-def> <var-def name="disainer_Industia_url"> <script return="r1"><![CDATA[ r1 = disainer_Industia.toString().replaceAll("(?sim)\\|\\+\\|.*$", ""); ;]]> </script> </var-def> <var-def name="disainer_Industia_name"> <script return="r2"><![CDATA[ r2 = disainer_Industia.toString().replaceAll("(?sim)^.*?\\|\\+\\|", ""); r2 = r2.replaceAll("(?sim)<.*?>", "").trim(); ;]]> </script> </var-def> <var-def name="disainer_WEB_PAGE"> <!-- URL disainer_WEB_PAGE--> <regexp flag-multiline="True" max="1"> <regexp-pattern>Веб-страница дизайнера:.{1,20}<a.{1,3}href="(.*?)".*?></regexp-pattern> <regexp-source> <var name="disainer_area"/> </regexp-source> <regexp-result><template>${_1}</template></regexp-result> </regexp> </var-def> <var-def name="disainer_LIC_url"></var-def><!-- URL Lic--> <var-def name="disainer_LIC_name"></var-def><!-- Name Lic--> <var-def name="disainer_LIC"> <regexp flag-multiline="True" max="1"> <regexp-pattern>Владелец лицензии:.{1,20}<a.{1,3}href="(.*?)".{0,3}>(.*?)</a></regexp-pattern> <regexp-source> <var name="disainer_area"/> </regexp-source> <regexp-result><template>"${_1}"|+|"${_2}"</template></regexp-result> </regexp> </var-def> <var-def name="disainer_LIC_url"> <script return="r1"><![CDATA[ r1 = disainer_Industia.toString().replaceAll("(?sim)\\|\\+\\|.*$", ""); ;]]> </script> </var-def> <var-def name="disainer_LIC_name"> <script return="r2"><![CDATA[ r2 = disainer_Industia.toString().replaceAll("(?sim)^.*?\\|\\+\\|", ""); r2 = r2.replaceAll("(?sim)<.*?>", "").trim(); ;]]> </script> </var-def> <var-def name="disainer_DESC"> <!-- Description--> <regexp flag-multiline="True" max="1"> <regexp-pattern><iframe.*?</div>(.*?)<div.{1,10}style="clear:</regexp-pattern> <regexp-source> <var name="disainer_area"/> </regexp-source> <regexp-result><template>"${_1}"</template></regexp-result> </regexp> </var-def> </empty> <!-- Aromat loop--> <loop item="item_aromat" index="k2" filter="unique"> <list> <xpath expression="//div[@id='col1']/div[@class]/div"> <var name="disainer_html" /> </xpath> </list> <body> <empty> <var-def name="aromat_URI"> <xpath expression="/div/p/a/@href"> <var name="item_aromat" /> </xpath> </var-def> <var-def name="aromat_Name"><!-- Gabriela Sabatini --> <xpath expression="/div/p/a/text()"> <var name="item_aromat" /> </xpath> </var-def> <var-def name="aromat_pol"><!-- Aroma pol --> <xpath expression="/div/p/span/text()"> <var name="item_aromat" /> </xpath> </var-def> <var-def name="aromat_year"><!--Aroma year --> <xpath expression="/div/p/span/span/strong/text()"> <var name="item_aromat" /> </xpath> </var-def> <var-def name="aromat_sImgName"> <script return="r10"><![CDATA[ r10="sml_arm_"+k1.toString()+"_"+k2.toString()+"_"+disainer_Name.toString().replaceAll("(?sim)[^a-zA-Z0-9]", "")+"_"+aromat_Name.toString().replaceAll("(?sim)[^a-zA-Z0-9]", ""); ;]]> </script> </var-def> <var-def name="aromat_lgImgName"> <script return="r10"><![CDATA[ r10="lg_arm_"+k1.toString()+"_"+k2.toString()+"_"+disainer_Name.toString().replaceAll("(?sim)[^a-zA-Z0-9]", "")+"_"+aromat_Name.toString().replaceAll("(?sim)[^a-zA-Z0-9]", ""); ;]]> </script> </var-def> <empty> <var-def name="aromat_img_URL"> <xpath expression="/div/p/a/img/@src"> <var name="item_aromat" /> </xpath> </var-def> <call name="download-file"><!----> <call-param name="ImgName"><var name="aromat_lgImgName" /></call-param> <call-param name="ImageDir"><var name="ImagePath"/></call-param> <call-param name="ImgURL"><var name="aromat_img_URL" /></call-param> </call> <var-def name="aromat_html"> <html-to-xml outputtype="pretty"> <http method="get" url="${host}${aromat_URI}" charset="utf-8" /> </html-to-xml> </var-def> </empty> <var-def name="aromat_lgImgURL"> <xpath expression="//div[@id='mainpicbox']/img/@src"> <var name="aromat_html" /> </xpath> </var-def> <call name="download-file"><!----> <call-param name="ImgName"><var name="aromat_lgImgName" /></call-param> <call-param name="ImageDir"><var name="ImagePath"/></call-param> <call-param name="ImgURL"><var name="aromat_lgImgURL" /></call-param> </call> <var-def name="aromat_name_1"> <xpath expression="(//div[@id='col1']/div)[2]/h1/text()"> <var name="aromat_html" /> </xpath> </var-def> <var-def name="aromat_decr"> <xpath expression="(//div[@id='col1']/div)[2]/p[2]"> <var name="aromat_html" /> </xpath> </var-def> <var-def name="aromat_group_name"> <xpath expression="(//div[@id='col1']/div)[2]/p[1]/span[2]/span[1]/text()"> <var name="aromat_html" /> </xpath> </var-def> <var-def name="aromat_group_url"> <xpath expression="(//div[@id='col1']/div)[2]/p[1]/span[2]/span[1]/@title"> <var name="aromat_html" /> </xpath> </var-def> <!-- Note loop up--> <var-def name="note_verh_name"></var-def> <var-def name="note_verh_id"></var-def> <loop item="item_note" index="n1" filter="unique"> <list> <xpath expression="((//div[@id='col1']/div)[2]/div[@style]/div[@style])[1]/p[1]/span"> <var name="aromat_html" /> </xpath> </list> <body> <empty> <var-def name="id_note"> <xpath expression="/span/@id"> <var name="item_note" /> </xpath> </var-def> <var-def name="name_note"> <xpath expression="/span/img/@title"> <var name="item_note" /> </xpath> </var-def> <var-def name="url_note"> <xpath expression="/span/img/@src"> <var name="item_note" /> </xpath> </var-def> <call name="download-file"><!-- --> <call-param name="ImgName"> <script return="r"><![CDATA[ r="note_"+id_note.toString().replaceAll("(?sim)[^a-zA-Z0-9]", ""); ;]]> </script> </call-param> <call-param name="ImageDir"><var name="ImagePath"/></call-param> <call-param name="ImgURL"><var name="url_note"/></call-param> </call> </empty> <var-def name="note_verh_name"> <script return="verh_name"><![CDATA[ verh_name = note_verh_name.toString()+"["+name_note.toString()+"],"; ;]]> </script> </var-def> <var-def name="note_verh_id"> <script return="verh_id"><![CDATA[ verh_id = note_verh_id.toString()+"["+id_note.toString()+"],"; ;]]> </script> </var-def> </body> </loop> <!-- Note loop mid--> <var-def name="note_srd_name"></var-def> <var-def name="note_srd_id"></var-def> <loop item="item_note" index="n1" filter="unique"> <list> <xpath expression="((//div[@id='col1']/div)[2]/div[@style]/div[@style])[1]/p[2]/span"> <var name="aromat_html" /> </xpath> </list> <body> <empty> <var-def name="id_note"> <xpath expression="/span/@id"> <var name="item_note" /> </xpath> </var-def> <var-def name="name_note"> <xpath expression="/span/img/@title"> <var name="item_note" /> </xpath> </var-def> <var-def name="url_note"> <xpath expression="/span/img/@src"> <var name="item_note" /> </xpath> </var-def> <call name="download-file"><!-- --> <call-param name="ImgName"> <script return="r"><![CDATA[ r="note_"+id_note.toString().replaceAll("(?sim)[^a-zA-Z0-9]", ""); ;]]> </script> </call-param> <call-param name="ImageDir"><var name="ImagePath"/></call-param> <call-param name="ImgURL"><var name="url_note"/></call-param> </call> </empty> <var-def name="note_srd_name"> <script return="srd_name"><![CDATA[ srd_name = note_srd_name.toString()+"["+name_note.toString()+"],"; ;]]> </script> </var-def> <var-def name="note_srd_id"> <script return="srd_id"><![CDATA[ srd_id = note_srd_id.toString()+"["+id_note.toString()+"],"; ;]]> </script> </var-def> </body> </loop> <!-- Note loop foot--> <var-def name="note_niz_name"></var-def> <var-def name="note_niz_id"></var-def> <loop item="item_note" index="n1" filter="unique"> <list> <xpath expression="((//div[@id='col1']/div)[2]/div[@style]/div[@style])[1]/p[3]/span"> <var name="aromat_html" /> </xpath> </list> <body> <empty> <var-def name="id_note"> <xpath expression="/span/@id"> <var name="item_note" /> </xpath> </var-def> <var-def name="name_note"> <xpath expression="/span/img/@title"> <var name="item_note" /> </xpath> </var-def> <var-def name="url_note"> <xpath expression="/span/img/@src"> <var name="item_note" /> </xpath> </var-def> <call name="download-file"><!-- --> <call-param name="ImgName"> <script return="r"><![CDATA[ r="note_"+id_note.toString().replaceAll("(?sim)[^a-zA-Z0-9]", ""); ;]]> </script> </call-param> <call-param name="ImageDir"><var name="ImagePath"/></call-param> <call-param name="ImgURL"><var name="url_note"/></call-param> </call> </empty> <var-def name="note_niz_name"> <script return="niz_name"><![CDATA[ niz_name = note_niz_name.toString()+"["+name_note.toString()+"],"; ;]]> </script> </var-def> <var-def name="note_niz_id"> <script return="niz_id"><![CDATA[ niz_id = note_niz_id.toString()+"["+id_note.toString()+"],"; ;]]> </script> </var-def> </body> </loop> <!-- Save AllComm.toString() + "\r\n" + --> <var-def name="AllComm"> <script return="allcomm"><![CDATA[ row =k0.toString()+"|"+k1.toString()+"|"+ k2.toString()+"|" +country_Name.toString()+"|"+ country_URI.toString()+"|"+disainer_URI.toString()+"|" +disainer_Name.toString()+"|"+disainer_sImgURL.toString()+"|"+disainer_sImgName.toString()+".jpg|" +disainer_lgImgName.toString()+".jpg|"+disainer_Industia_url.toString()+"|"+disainer_Industia_name.toString()+"|" +disainer_WEB_PAGE.toString()+"|"+disainer_LIC_url.toString()+"|"+disainer_LIC_name.toString()+"|" +disainer_DESC.toString()+"|"+aromat_URI.toString()+"|"+aromat_Name.toString()+"|"+aromat_pol.toString()+"|" +aromat_year.toString()+"|"+aromat_sImgName.toString()+".jpg|"+aromat_lgImgName.toString()+".jpg|" +aromat_name_1.toString()+"|"+aromat_decr.toString()+"|"+aromat_group_name.toString()+"|" +aromat_group_url.toString()+"|"+note_verh_name.toString()+"|"+note_verh_id.toString()+"|" +note_srd_name.toString()+"|"+note_srd_id.toString()+"|"+note_niz_name.toString()+"|"+note_niz_id.toString(); row=row.replaceAll("(?sim)[\r\n\t]", ""); allcomm ="\r\n"+ row.replace(';',','); ;]]> </script> </var-def> </empty> <file action="append" type="text" path="${OutFile}"> <var name="AllComm" /> </file> </body> </loop> </if> <else><!-- k1 --> </else> </case> </body> </loop> </if> <else><!-- No Valid --> </else> </case> <!-- No Valid <case> <if condition="${k0}>4"> <var name="STOP" /> </if> <else> </else> </case> --> </if> <else><!-- k0--> </else> </case> </body> </loop> </var-def> </config>
Probably ran out of memory. It's a known bug.
Log in to post a comment.
At the 25-30minites work Web-Harvest casual stoped. Why?
Probably ran out of memory. It's a known bug.