Menu

#26 Different "template" behavior between Windows and Linux

Backlog
accepted
None
5
2015-02-26
2010-04-17
christian
No

See the following Code:

<?xml version="1.0" encoding="UTF-8"?>
<!--
onclick="__doPostBack('ctl00$WebPartManager$wp1296363554$wp609332096$TabContainerHe    ader','0')
post __EVENTTARGET=ctl00$WebPartManager$wp1296363554$wp609332096$TabContainerHeader
 __EVENTARGUMENT=0
!-->
<config charset="UTF-8">
<!-- <var-def name="isin" overwrite="false">CH0102891550</var-def> -->

<var-def name="table">
  <xpath expression="(//div[@class='TabContainer'])[2]">
      <html-to-xml>
            <http url="http://scoach.ch/ENG/Factsheet/Warrants/${isin}#"/>
      </html-to-xml>
  </xpath>
</var-def>

<var-def name="columns">
    <xpath expression="(//table)[1]"><template>${table}</template></xpath>
</var-def>

<var-def name="target">
            <regexp>
                <regexp-pattern>__doPostBack\('([^']*)'.*</regexp-pattern>
                <regexp-source><template>${columns}</template></regexp-source>
                <regexp-result>
                    <template>${_1}</template>
            </regexp-result>
            </regexp>
</var-def>

<var-def name="data">
    <xpath expression="(//table)[2]"><template>${table}</template></xpath>
</var-def>

<var-def name="table2">
  <xpath expression="(//div[@class='TabContainer'])[2]">
      <html-to-xml>
            <http method="post" url="http://scoach.ch/ENG/Factsheet/Warrants/${isin}#">
                                     <http-param name="__EVENTTARGET"><template><![CDATA[ ${target} ]]></template></http-param>
                                     <http-param name="__EVENTARGUMENT">1</http-param>
                            </http>
      </html-to-xml>
  </xpath>
</var-def>

<var-def name="data2">
    <xpath expression="(//table)[2]"><template>${table2}</template></xpath>
</var-def>

<var-def name="ratio">
    <template>
            <![CDATA[<tr><td>Ratio</td><td>]]>
            <xpath expression="(//label)[1]"><template>${table2}</template></xpath>
            <![CDATA[</td></tr>]]>
    </template>
</var-def>

<var-def name="mergedata">
    <template><![CDATA[<table><tbody> ${data} ${data2} ${ratio} </tbody></table>]]></template>
</var-def>

<var-def name="rows">
    <xpath expression="//tr"><template>${mergedata}</template></xpath>
</var-def>

<file action="write" type="text" path="${isin}.txt">
    <loop item="row" index="i" filter="unique">
    <list><var name="rows"/></list>
    <body>
            <template>
                    <regexp replace="true">
                                <regexp-pattern><![CDATA[<[^<>]*>\n*]]></regexp-pattern>
                    <regexp-source>
                            ${isin};
                            <xpath expression="//tr/td[1]"><template><![CDATA[${row}]]></template></xpath>;
                            <xpath expression="//tr/td[2]"><template><![CDATA[${row}]]></template></xpath>
                            ${sys.lf}
                    </regexp-source>
                    <regexp-result>
                            <template></template>
                    </regexp-result>
                            </regexp>
            </template>
    </body>
    </loop>
</file>

<file action="write" type="text" path="rows.txt">
    <template><![CDATA[${rows}]]></template>
</file>

<file action="write" type="text" path="table.html">
    <template><![CDATA[ ${mergedata} ]]></template>
</file>

<file action="write" type="text" path="foo4.txt">
    <template><![CDATA[ ${data2} ]]></template>
</file>

<file action="write" type="text" path="foo3.txt">
    <template><![CDATA[ ${target} ]]></template>
</file>
</config>

Produces on Windows a file like I had expected:

CH0102891550;Name;Put-Warrant
CH0102891550;ISIN;CH0102891550
CH0102891550;Valor;10289155
CH0102891550;Symbol;BLIBA
CH0102891550;Type;Warrants
CH0102891550;Name;Put
CH0102891550;Strike price;97.00 EUR
CH0102891550;Exercise type;European
CH0102891550;First listing date;20/08/2009
CH0102891550;Date of maturity;13/12/2010
CH0102891550;Last trading day;13/12/2010
CH0102891550;Currency safeguarded;No
CH0102891550;Issuer;BNP Paribas
CH0102891550;Name;3M EURIBOR
CH0102891550;Valor;-
CH0102891550;ISIN;EU0009652783
CH0102891550;Currency;-
CH0102891550;Ratio;3M EURIBOR   (Ratio: 1.00)

But on Linux there ase some more newlines

CH0102891550;
Name
;
Put-Warrant

CH0102891550;
ISIN
;
CH0102891550

CH0102891550;
Valor
;
10289155

CH0102891550;
Symbol
;
BLIBA

CH0102891550;
Type
;
Warrants

CH0102891550;
Name
;
Put

CH0102891550;
Strike price
;
97.00 EUR

CH0102891550;
Exercise type
;
European

CH0102891550;
First listing date
;
20/08/2009

CH0102891550;
Date of maturity
;
13/12/2010

CH0102891550;
Last trading day
;
13/12/2010

CH0102891550;
Currency safeguarded
;
No

CH0102891550;
Issuer
;
BNP Paribas

CH0102891550;
Name
;
3M EURIBOR

CH0102891550;
Valor
;
-

CH0102891550;
ISIN
;
EU0009652783

CH0102891550;
Currency
;
-

CH0102891550;
Ratio
;

      3M EURIBOR   (Ratio: 1.00)

Discussion

  • Piotr Dyraga

    Piotr Dyraga - 2012-11-15
    • milestone: --> Backlog
     
  • Piotr Dyraga

    Piotr Dyraga - 2012-11-16
    • assigned_to: Piotr Dyraga
     
  • Piotr Dyraga

    Piotr Dyraga - 2012-11-16

    I tried to reproduce this issue with configuration attached in bug, but it seems that schoach has reorganized their site somehow and configuration is no longer valid. Anyway, I written configuration doing similar thing:

    <?xml version="1.0" encoding="UTF-8"?>
    <config xmlns="http://web-harvest.sourceforge.net/schema/2.1/core">
    
      <set var="table">
         <xpath expression="//div[@class='full']//tr[@class='hi']">
        <html-to-xml>
          <http url="http://www.scoach.ch/de/basiswert/nasdaq-100-US6311011026"/>
        </html-to-xml>
        </xpath>
      </set>
    
      <set var="output">
        <loop item="row">
          <list>
            <get var="table"/>
          </list>
          <body>
            <template>          
              <xpath expression="//td[@class='isinColumn']/a/text()">          
                <template><![CDATA[ ${row} ]]></template>
              </xpath>;
              <xpath expression="//td[@class='nameColumn']/a/text()">          
                <template><![CDATA[ ${row} ]]></template>
              </xpath>;
              <xpath expression="//td[@class='quoteColumn']/text()">          
                <template><![CDATA[ ${row} ]]></template>
              </xpath>
              ${sys.lf}
            </template>
          </body>
        </loop>
      </set>
    
      <set var="output2">
        <loop item="row">
          <list>
            <get var="table"/>
          </list>
          <body>
            <template>          
              <xpath expression="//td[@class='isinColumn']/a/text()">          
                <get var="row" />
              </xpath>;
              <xpath expression="//td[@class='nameColumn']/a/text()">          
                <get var="row" />
              </xpath>;
              <xpath expression="//td[@class='quoteColumn']/text()">          
                <get var="row" />
              </xpath>
              ${sys.lf}
            </template>
          </body>
        </loop>
      </set>
    
    </config>
    

    I tried to execute it on Windows and Linux in web harvest 2.0beta1 and 2.1.0rc1 (upcoming). In all cases I had newlines added at the end of each 'template'.

     

    Last edit: Piotr Dyraga 2012-11-16
  • Piotr Dyraga

    Piotr Dyraga - 2012-11-16
    • status: open --> accepted
     

Log in to post a comment.