jbrooks78 - 2007-03-29

I want to scrape data from a site that requires an http-param named "ADD". I have 1000's of addresses in a file (either a CSV file or an XML file, whichever makes more sense). For each address i need to submit the address and then data-mine the returned data. Any idea how i can write a config file to submit each of those addresses?

Any ideas would be appreciated.

Here's my file to download a single address:

<?xml version="1.0" encoding="UTF-8"?>
<config charset="ISO-8859-1">

&lt;var-def name=&quot;baseUrl&quot;&gt;http://www5.metrokc.gov/reports/&lt;/var-def&gt;
&lt;var-def name=&quot;property&quot;&gt;property_report.asp&lt;/var-def&gt;
&lt;var-def name=&quot;id&quot;&gt;1&lt;/var-def&gt;
&lt;var-def name=&quot;address&quot;&gt;12325 38TH AVE NE&lt;/var-def&gt;

&lt;var-def name=&quot;content&quot;&gt;
    &lt;html-to-xml&gt;
        &lt;!-- Gets the data from the county's site --&gt;
        &lt;http method=&quot;post&quot; url=&quot;${sys.fullUrl(baseUrl, property)}&quot;&gt;
            &lt;http-param name=&quot;ADD&quot;&gt;&lt;var name=&quot;address&quot;/&gt;&lt;/http-param&gt;
            &lt;http-param name=&quot;T1&quot;&gt;Search&lt;/http-param&gt;
        &lt;/http&gt;
    &lt;/html-to-xml&gt;
 &lt;/var-def&gt;

&lt;file action=&quot;write&quot; path=&quot;KingCounty/data/parcel.xml&quot; charset=&quot;UTF-8&quot;&gt;
    &lt;xquery&gt;
       &lt;xq-param name=&quot;doc&quot;&gt;
            &lt;var name=&quot;content&quot;/&gt;
       &lt;/xq-param&gt;

        &lt;!-- replace the &quot;item&quot; below with the xpath to the table containing the values  --&gt;
        &lt;!-- &lt;xq-param name=&quot;item&quot; type=&quot;node()&quot;&gt;&lt;var name=&quot;/html/body/div/table[11]/tbody&quot;/&gt;&lt;/xq-param&gt; --&gt;
        &lt;xq-expression&gt;&lt;![CDATA[
                declare variable $doc as node() external;

                let $address := data($doc/html/body/div/table[11]/tbody/tr[1]/td[2])
                let $parcelNo := data($doc/html/body/div/table[2]/tbody/tr[1]/td[4]/b/font)
                let $yr := data($doc/html/body/div/table[11]/tbody/tr[4]/td[2])
                let $sf := data($doc/html/body/div/table[11]/tbody/tr[6]/td[2])
                    return
                        &lt;parcel&gt;
                            &lt;parcelNo&gt;{normalize-space($parcelNo)}&lt;/parcelNo&gt;
                            &lt;address&gt;{normalize-space($address)}&lt;/address&gt;  
                            &lt;yr&gt;{normalize-space($yr)}&lt;/yr&gt;
                            &lt;sf&gt;{normalize-space($sf)}&lt;/sf&gt;
                        &lt;/parcel&gt;
        ]]&gt;&lt;/xq-expression&gt;
    &lt;/xquery&gt;
&lt;/file&gt;

</config>