Menu

Trouble with multiple td in table

Help
Queezy
2013-02-13
2013-07-31
  • Queezy

    Queezy - 2013-02-13

    I have successfully filtered down my html file to an xml
    that looks like so (only showing one record):


    Pennsylvania

    123456789

    Here's my code:

    <var-def name="Final">
    <xquery>
    <xq-param name="doc">

    </xq-param>
    <xq-expression><![CDATA
    declare variable $doc as node() external;
    let $name := data($doc)
    return
    <name>{$name}</name>
    ]></xq-expression>
    </xquery>
    </var-def>

    This correctly pulls what I need :

    <name>
    Pennsylvania
    123456789
    </name>

    BUT, when I have any more than one , I get an xQuery error.
    It does not like multiple columns.
    Does anyone see any reason why?

    Thanks.

     

    Last edit: Queezy 2013-02-13
  • Queezy

    Queezy - 2013-02-14

    I figured out my issue. I will post my resolution soon.

     
  • Queezy

    Queezy - 2013-07-31

    A few months late but here is my solution I had found...
    Given a table ("numberList" in this fake example).
    I then removed all of the <tr> and </tr> with regex replaces.
    Then used Xquery to pull the data I needed. See below.

    <config>
        <var-def name="page_num" overwrite="false">0</var-def>
    
        <var-def name="number"> 
            <xpath expression="//table[@class='numberList']/tbody"> 
               <html-to-xml>
                    <http url="http://www.number.com/number-list.php?page=${page_num}"/>
               </html-to-xml>
           </xpath>
        </var-def>
    
        <var-def name="tr_begin_replace">
            <regexp replace="true">
                <regexp-pattern>
                <![CDATA[<tr>\s*\n]]>
                </regexp-pattern>
                <regexp-source>
                    <var name="tr_slash_replace"/>
                </regexp-source>
                <regexp-result>
                    <template></template>
                </regexp-result>
            </regexp>
        </var-def>
    
        <var-def name="tr_end_replace">
            <regexp replace="true">
                <regexp-pattern>
                <![CDATA[</tr>\s*\n*]]>
                </regexp-pattern>
                <regexp-source>
                    <var name="tr_begin_replace"/>
                </regexp-source>
                <regexp-result>
                    <template></template>
                </regexp-result>
            </regexp>
        </var-def>
    
        <var-def name="Final">
            <xquery>
                 <xq-param name="doc">
                    <var name="tr_end_replace"></var>
                 </xq-param>        
                <xq-expression><![CDATA[
                    declare variable $doc as node()* external;
                    for $td in $doc//tbody/td
                        let $name := $td//a
                        let $number := $td//div
                            return 
                                <numbers>
                                    <name>{normalize-space(data($name))}</name>
                                    <number>{normalize-space(data($number))}</number>           
                                </numbers>  
                ]]></xq-expression> 
            </xquery>
        </var-def>
    
       <file action="write" path="NumbersFinal_${page_num}.xml">
            <var name="Final"></var>
        </file>
    </config>
    
     

Log in to post a comment.