From: <jen...@us...> - 2008-10-22 11:39:25
|
Revision: 1400 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1400&view=rev Author: jenslehmann Date: 2008-10-22 11:39:17 +0000 (Wed, 22 Oct 2008) Log Message: ----------- - moved DBpedia Navigator page rank calculator to scripts - integrated a simple ini reader to read DB configuration values Added Paths: ----------- trunk/lib/ini4j-0.3.2.jar trunk/resources/logos/DL-Learner2.svg trunk/src/dbpedia-navigator/settings.ini.dist trunk/src/dl-learner/org/dllearner/scripts/CalculatePageRank.java Removed Paths: ------------- trunk/src/dbpedia-navigator/CalculatePageRank.java trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java Property Changed: ---------------- trunk/src/dbpedia-navigator/ Added: trunk/lib/ini4j-0.3.2.jar =================================================================== (Binary files differ) Property changes on: trunk/lib/ini4j-0.3.2.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: trunk/resources/logos/DL-Learner2.svg =================================================================== --- trunk/resources/logos/DL-Learner2.svg (rev 0) +++ trunk/resources/logos/DL-Learner2.svg 2008-10-22 11:39:17 UTC (rev 1400) @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> +<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" width="210mm" height="297mm" id="svg2" sodipodi:version="0.32" inkscape:version="0.45.1" sodipodi:docbase="c:\Data\work\DL-Lerner" sodipodi:docname="DL-Learner.svg" inkscape:output_extension="org.inkscape.output.svg.inkscape"> + <sodipodi:namedview id="base" pagecolor="#ffffff" bordercolor="#666666" borderopacity="1.0" inkscape:pageopacity="0.0" inkscape:pageshadow="2" inkscape:zoom="1.4329667" inkscape:cx="430.84137" inkscape:cy="985.67425" inkscape:document-units="px" inkscape:current-layer="layer1" inkscape:window-width="1400" inkscape:window-height="964" inkscape:window-x="-4" inkscape:window-y="-4" showguides="true" inkscape:guide-bbox="true"/> + <defs id="defs4"/> + <metadata id="metadata7"> + <rdf:RDF> + <cc:Work rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> + </cc:Work> + </rdf:RDF> + </metadata> + <g inkscape:label="Ebene 1" id="layer1" inkscape:groupmode="layer"> + <path style="fill: rgb(11, 71, 157); fill-opacity: 1;" d="M 172.79279,291.17739 C 155.51538,279.07582 149.17819,259.06513 147.67832,211.87443 C 146.91282,187.78924 150.80965,163.19822 143.23382,167.38534 C 136.23639,171.25277 75.153792,185.39046 70.978132,188.85594 C 61.136292,197.02394 44.781322,197.64857 28.594962,190.47464 C 16.313771,185.0315 11.922172,180.63825 8.0887989,169.96072 C 1.6224726,151.94936 8.2476549,130.08692 22.654819,121.89431 C 31.304862,116.97548 32.585932,114.07442 32.605632,99.360184 C 32.625492,84.520004 31.429722,81.851934 22.746321,77.361574 C 16.854053,74.314564 10.915664,66.862284 8.0375759,58.903064 C -1.8946914,31.435874 17.655616,3.669334 46.927412,3.669334 C 61.644322,3.669334 81.102252,14.503334 84.261252,24.456484 C 85.873312,29.535624 91.068792,32.151784 103.13775,33.961634 C 127.78529,37.657754 160.02473,46.894428 180.87021,62.946968 C 210.73114,85.942048 215.11542,135.19295 168.79316,160.30854 L 171.27428,140.18654 L 172.57326,203.1074 C 173.6854,248.10594 174.92379,257.4653 181.18305,268.17745 C 190.43421,284.00997 202.14008,286.27349 210.33237,273.77046 C 213.67216,268.6733 211.36,258.64117 214.90341,262.01102 C 218.68734,265.60961 207.5248,260.05346 200.64295,256.93706 C 174.89973,245.2794 173.72628,204.27373 198.70006,189.04425 C 220.65915,175.65318 254.6993,182.85534 260.20405,205.97932 C 265.03562,226.27545 275.96104,222.99092 275.96104,213.14683 C 275.96104,210.33806 278.2422,205.38873 281.33184,202.29908 C 286.25311,197.37783 304.83109,196.68155 431.2179,196.68155 C 557.60472,196.68155 576.18272,197.37783 581.10396,202.29908 C 584.19363,205.38873 586.35142,210.33806 586.35142,213.14683 C 586.35142,223.09383 597.51529,218.69176 603.00277,205.704 C 616.13114,174.63178 665.28303,174.92125 678.36309,206.22628 C 688.16844,229.69377 673.46416,258.51422 649.38044,263.03235 C 631.7908,266.33219 605.49899,256.46477 602.85337,240.92346 C 600.68994,228.21468 586.72151,224.15111 586.72151,233.96156 C 586.72151,236.77033 584.19363,241.5963 581.10396,244.68594 C 576.18272,249.60719 557.60472,250.30348 431.2179,250.30348 C 304.83109,250.30348 286.25311,249.60719 281.33184,244.68594 C 278.2422,241.5963 275.71431,236.77033 275.71431,233.96156 C 275.71431,231.15279 272.49699,228.8547 268.56472,228.8547 C 264.04427,228.8547 263.26559,231.2379 261.41513,235.33548 C 258.25561,242.33179 240.18123,252.51704 233.39986,261.97969 C 231.05045,265.25804 231.12476,267.09427 229.79305,270.79119 C 221.024,295.13464 201.17737,296.77581 190.44036,296.77581 C 185.13027,296.77581 177.18887,294.25652 172.79279,291.17739 z M 256.30608,216.43503 C 252.48547,196.06945 229.95289,182.73968 212.26169,190.37936 L 202.431,194.6246 L 215.6144,195.85562 C 225.99298,196.82475 229.03518,198.7548 229.91358,204.92739 C 230.62027,209.89336 233.97624,213.18667 239.06676,213.90968 C 248.0913,215.19144 252.63388,227.10648 249.09552,240.21478 C 247.26677,246.98967 247.92415,246.63514 252.67059,238.28672 C 256.49546,231.55925 257.74002,224.07862 256.30608,216.43503 z M 674.55712,216.43503 C 670.73652,196.06945 648.20395,182.73968 630.51274,190.37936 L 620.68206,194.6246 L 633.86543,195.85562 C 644.24404,196.82475 647.28622,198.7548 648.16462,204.92739 C 648.87132,209.89336 652.2273,213.18667 657.31781,213.90968 C 666.34235,215.19144 670.88491,227.10648 667.34657,240.21478 C 665.51782,246.98967 666.17519,246.63514 670.92166,238.28672 C 674.74651,231.55925 675.99108,224.07862 674.55712,216.43503 z M 81.141102,148.51392 C 77.320502,128.14835 54.787912,114.81857 37.096722,122.45825 L 27.266042,126.70349 L 40.449432,127.93452 C 50.828012,128.90364 53.870212,130.83369 54.748612,137.00629 C 55.455302,141.97225 58.811272,145.26556 63.901792,145.98858 C 72.926322,147.27034 77.468912,159.18537 73.930562,172.29368 C 72.101802,179.06856 72.759182,178.71403 77.505622,170.36562 C 81.330492,163.63814 82.575052,156.15752 81.141102,148.51392 z M 116.17614,160.7986 C 141.97051,151.14769 147.39833,144.70674 134.63678,138.89219 C 125.66504,134.80439 116.50452,117.88804 116.50452,106.57102 C 116.50452,103.76695 117.92812,93.442684 121.69207,85.652204 C 129.46763,69.558634 128.64401,68.476964 103.33419,61.542574 C 90.594212,58.052054 89.424242,58.506384 75.629532,72.301094 C 63.357522,84.573094 61.226592,88.936934 61.226592,101.79623 C 61.226592,114.50748 62.665802,117.54418 70.349332,121.04504 C 80.164802,125.51726 89.824952,144.35897 89.824952,159.03138 C 89.824952,169.96433 91.396792,170.06975 116.17614,160.7986 z M 191.95975,98.466804 C 188.13915,78.101214 156.53447,63.375738 138.84329,71.015418 C 127.06112,91.441957 120.37412,135.20415 152.64798,135.50575 C 158.06219,139.97 184.27568,137.04112 189.02213,128.69271 C 192.84699,121.96524 193.3937,106.11038 191.95975,98.466804 z M 81.141102,37.695274 C 77.320502,17.329704 54.787912,3.999934 37.096722,11.639604 L 27.266042,15.884844 L 40.449432,17.115874 C 50.828012,18.084994 53.870212,20.015054 54.748612,26.187634 C 55.455302,31.153604 58.811272,34.446914 63.901792,35.169934 C 72.926322,36.451694 77.468912,48.366724 73.930562,61.475034 C 72.101802,68.249904 72.759182,67.895384 77.505622,59.546974 C 81.330492,52.819494 82.575052,45.338874 81.141102,37.695274 z " id="path3320" sodipodi:nodetypes="cssssssssssssscccssssssssssssssssssssssssscccccsssscccccsssscccccssssccssssssssccccscccccssssc"/> + <text xml:space="preserve" style="font-size: 55.2859px; font-style: normal; font-weight: bold; fill: rgb(0, 0, 0); fill-opacity: 1; stroke: none; stroke-width: 1px; stroke-linecap: butt; stroke-linejoin: miter; stroke-opacity: 1; font-family: Arial;" x="200.904" y="335.529" id="text3326" transform="scale(1.42565, 0.701436)"><tspan sodipodi:role="line" id="tspan3328" x="200.904" y="335.529" style="fill: rgb(255, 255, 255); font-family: Arial;">Learner</tspan></text> + <path style="fill: rgb(11, 71, 157); fill-opacity: 1;" d="M 618.73497,260.66384 C 587.45211,246.07999 588.77609,202.80526 618.8551,185.53936 C 628.83409,179.81123 645.538,176.88635 661.93157,186.1828 C 687.86039,200.88648 690.98611,234.12649 670.09714,254.1394 C 656.31104,267.34731 638.04764,269.66728 618.73497,260.66384 z M 675.78808,216.71692 C 671.75368,195.21171 647.96021,181.13602 629.27904,189.20321 L 618.89825,193.686 L 632.81937,194.98592 C 643.77872,196.00927 646.99116,198.04732 647.91871,204.56532 C 648.66495,209.80918 652.2087,213.28678 657.58408,214.05025 C 667.11362,215.40374 671.91039,227.98552 668.17404,241.82735 C 666.24296,248.98133 666.93712,248.60697 671.94916,239.79139 C 675.98807,232.68746 677.30227,224.78823 675.78808,216.71692 z " id="path3531" sodipodi:nodetypes="cssscccccssssc"/> + <path style="fill: rgb(11, 71, 157); fill-opacity: 1;" d="M 199.99989,259.9044 C 168.71703,245.32055 170.04101,202.04582 200.12002,184.77992 C 210.09901,179.05179 226.80292,176.12691 243.19649,185.42336 C 269.12531,200.12704 272.25103,233.36705 251.36206,253.37996 C 237.57596,266.58787 219.31256,268.90784 199.99989,259.9044 z M 257.053,215.95748 C 253.0186,194.45227 229.22513,180.37658 210.54396,188.44377 L 200.16317,192.92656 L 214.08429,194.22648 C 225.04364,195.24983 228.25608,197.28788 229.18363,203.80588 C 229.92987,209.04974 233.47362,212.52734 238.849,213.29081 C 248.37854,214.6443 253.17531,227.22608 249.43896,241.06791 C 247.50788,248.22189 248.20204,247.84753 253.21408,239.03195 C 257.25299,231.92802 258.56719,224.02879 257.053,215.95748 z " id="path3570" sodipodi:nodetypes="cssscccccssssc"/> + <path style="fill: rgb(11, 71, 157); fill-opacity: 1;" d="M 25.809737,196.24851 C -5.4731222,181.66466 -5.6295116,135.92265 26.423324,119.64366 C 36.682208,114.43338 53.106224,110.99065 69.499794,120.2871 C 95.428614,134.99078 98.554334,168.23079 77.665364,188.2437 C 63.879264,201.45161 45.122407,205.25195 25.809737,196.24851 z M 83.356304,150.82122 C 79.321904,129.31601 55.528434,115.24032 36.847264,123.30751 L 26.466474,127.7903 L 40.387594,129.09022 C 51.346944,130.11357 54.559384,132.15162 55.486934,138.66962 C 56.233174,143.91348 59.776924,147.39108 65.152304,148.15455 C 74.681844,149.50804 79.478614,162.08982 75.742264,175.93165 C 73.811184,183.08563 74.505344,182.71127 79.517384,173.89569 C 83.556294,166.79176 84.870494,158.89253 83.356304,150.82122 z " id="path3572" sodipodi:nodetypes="cssscccccssssc"/> + <path style="fill: rgb(11, 71, 157); fill-opacity: 1;" d="M 26.303191,83.246967 C -4.9796692,68.663117 -3.6556892,25.388387 26.423321,8.1224867 C 36.402311,2.3943567 53.106221,-0.53052327 69.499791,8.7659267 C 95.428611,23.469607 98.554331,56.709617 77.665361,76.722527 C 63.879261,89.930437 45.615861,92.250407 26.303191,83.246967 z M 83.356301,39.300047 C 79.321901,17.794837 55.528431,3.7191467 36.847261,11.786337 L 26.466471,16.269127 L 40.387591,17.569047 C 51.346941,18.592397 54.559381,20.630447 55.486931,27.148447 C 56.233171,32.392307 59.776921,35.869907 65.152301,36.633377 C 74.681841,37.986867 79.478611,50.568647 75.742261,64.410477 C 73.811181,71.564457 74.505341,71.190097 79.517381,62.374517 C 83.556291,55.270587 84.870491,47.371357 83.356301,39.300047 z " id="path3574" sodipodi:nodetypes="cssscccccssssc"/> + <path style="fill: rgb(11, 71, 157); fill-opacity: 1;" d="M 134.7439,150.90934 C 103.46104,135.36982 104.78502,89.259302 134.86403,70.861971 C 144.84302,64.758478 163.4906,62.697761 178.92741,71.547575 C 204.36449,86.130474 208.74046,123.50995 187.09298,143.95736 C 173.30688,156.9792 154.05657,160.50278 134.7439,150.90934 z M 191.79701,104.08259 C 187.76261,81.168148 163.96914,66.170081 145.28797,74.765912 L 134.90718,79.542459 L 148.8283,80.927562 C 159.78765,82.017972 163.00009,84.189575 163.92764,91.134698 C 164.67388,96.722187 168.21763,100.42767 173.59301,101.24117 C 183.12255,102.68336 187.91932,116.08962 184.18297,130.8385 C 182.25189,138.46128 182.94605,138.06239 187.95809,128.66913 C 191.997,121.09968 193.3112,112.68281 191.79701,104.08259 z " id="path3576" sodipodi:nodetypes="cssscccccssssc"/> + </g> +</svg> \ No newline at end of file Property changes on: trunk/src/dbpedia-navigator ___________________________________________________________________ Modified: svn:ignore - temp .htaccess main.wsdl def0.xsd def1.xsd test.html test.php + temp .htaccess main.wsdl def0.xsd def1.xsd test.html test.php settings.ini Deleted: trunk/src/dbpedia-navigator/CalculatePageRank.java =================================================================== --- trunk/src/dbpedia-navigator/CalculatePageRank.java 2008-10-22 11:20:08 UTC (rev 1399) +++ trunk/src/dbpedia-navigator/CalculatePageRank.java 2008-10-22 11:39:17 UTC (rev 1400) @@ -1,231 +0,0 @@ -package org.dllearner.test; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.Statement; - -public class CalculatePageRank { - - private final String wikilinks="../pagelinks_en.nt"; - private final String labels="../articles_label_en.nt"; - private final String categories="../yago_en.nt"; - - private void calculateLinks() - { - try{ - Statement stmt; - ResultSet rs; - int number; - - Class.forName("com.mysql.jdbc.Driver"); - - String url = - "jdbc:mysql://localhost:3306/navigator_db"; - - Connection con = DriverManager.getConnection( - url,"navigator", "dbpedia"); - - stmt = con.createStatement(); - BufferedReader in = new BufferedReader(new FileReader(wikilinks)); - - String line; - String[] split; - String name; - int i=0; - while ((line=in.readLine())!=null) - { - split=line.split(" "); - name=split[2].substring(1, split[2].length()-1); - rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); - if (rs.next()){ - number=rs.getInt(1); - number++; - stmt.executeUpdate("UPDATE rank SET number="+number+" WHERE name='"+name+"'"); - } - else{ - try{ - stmt.executeUpdate("INSERT INTO rank (name,number) VALUES ('"+name+"',1)"); - }catch(Exception e) - {} - } - if (i%100000==0) System.out.println(i); - i++; - } - - in.close(); - con.close(); - } catch (FileNotFoundException e) - { - System.out.println("File not found"); - } catch (IOException e) - { - System.out.println("IOException"); - } catch (Exception e) - { - e.printStackTrace(); - } - } - - private void addLabels() - { - try{ - Statement stmt; - ResultSet rs; - - Class.forName("com.mysql.jdbc.Driver"); - - String url = - "jdbc:mysql://localhost:3306/navigator_db"; - - Connection con = DriverManager.getConnection( - url,"navigator", "dbpedia"); - - stmt = con.createStatement(); - BufferedReader in = new BufferedReader(new FileReader(labels)); - - String line; - String[] split; - String name; - String label; - int i=0; - while ((line=in.readLine())!=null) - { - split=line.split(">"); - name=split[0].substring(1); - label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); - rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); - if (rs.next()){ - stmt.executeUpdate("UPDATE rank SET label=\""+label+"\" WHERE name='"+name+"'"); - } - else{ - try{ - stmt.executeUpdate("INSERT INTO rank (name,label) VALUES ('"+name+"',\""+label+"\")"); - }catch(Exception e) - {} - } - if (i%100000==0) System.out.println(i); - i++; - } - - in.close(); - con.close(); - } catch (FileNotFoundException e) - { - System.out.println("File not found"); - } catch (IOException e) - { - System.out.println("IOException"); - } catch (Exception e) - { - e.printStackTrace(); - } - } - - private void calculateCategories() - { - try{ - Statement stmt; - - Class.forName("com.mysql.jdbc.Driver"); - - String url = - "jdbc:mysql://localhost:3306/navigator_db"; - - Connection con = DriverManager.getConnection( - url,"navigator", "dbpedia"); - - stmt = con.createStatement(); - - stmt.executeUpdate("ALTER TABLE rank DROP COLUMN category"); - - BufferedReader in = new BufferedReader(new FileReader(categories)); - - String line; - String[] split; - String name; - String label; - String pred; - int i=0; - while ((line=in.readLine())!=null) - { - split=line.split(">"); - name=split[0].substring(1); - pred=split[1].substring(2); - if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")) - label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); - else - label=split[2].substring(2); - if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")){ - try{ - stmt.executeUpdate("INSERT INTO categories (category,label) VALUES (\""+name+"\",\""+label+"\")"); - }catch(Exception e) - {} - } - else{ - if (name.startsWith("http://dbpedia.org/resource")){ - try{ - stmt.executeUpdate("INSERT INTO articlecategories (name,category) VALUES ('"+name+"','"+label+"')"); - }catch(Exception e) - {} - }else{ - try{ - stmt.executeUpdate("INSERT INTO classhierarchy (father,child) VALUES ('"+label+"','"+name+"')"); - }catch(Exception e) - {} - } - } - if (i%100000==0) System.out.println(i); - i++; - } - - in.close(); - con.close(); - } catch (FileNotFoundException e) - { - System.out.println("File not found"); - } catch (IOException e) - { - System.out.println("IOException"); - } catch (Exception e) - { - e.printStackTrace(); - } - } - - private void copyNumbers() - { - try{ - Statement stmt; - - Class.forName("com.mysql.jdbc.Driver"); - - String url = - "jdbc:mysql://localhost:3306/navigator_db"; - - Connection con = DriverManager.getConnection( - url,"navigator", "dbpedia"); - - stmt = con.createStatement(); - - stmt.executeUpdate("UPDATE articlecategories SET number=(SELECT number FROM rank WHERE articlecategories.name=rank.name)"); - - con.close(); - } catch (Exception e) - { - e.printStackTrace(); - } - } - - public static void main(String[] args){ - CalculatePageRank cal=new CalculatePageRank(); - cal.calculateLinks(); - cal.addLabels(); - cal.calculateCategories(); - cal.copyNumbers(); - } -} \ No newline at end of file Added: trunk/src/dbpedia-navigator/settings.ini.dist =================================================================== --- trunk/src/dbpedia-navigator/settings.ini.dist (rev 0) +++ trunk/src/dbpedia-navigator/settings.ini.dist 2008-10-22 11:39:17 UTC (rev 1400) @@ -0,0 +1,20 @@ +[general] +wsdluri = http://localhost:8181/services?wsdl + +; URI pointing to DBpedia SPARQL endpoint +dbpediauri = http://dbpedia.openlinksw.com:8890/sparql + +; which predefined endpoint settings to use in DL-Learner +endpoint = DBPEDIA + +[database] +; the type of database server +type = mysql +; the server, where the mysql database is located +server = localhost +; the user, that has rights to access the navigator database +user = navigator +; the password of that user +pass = dbpedia +; the name of the used database +name = navigator_db Copied: trunk/src/dl-learner/org/dllearner/scripts/CalculatePageRank.java (from rev 1396, trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/CalculatePageRank.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/CalculatePageRank.java 2008-10-22 11:39:17 UTC (rev 1400) @@ -0,0 +1,280 @@ +/** + * Copyright (C) 2007-2008, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.prefs.BackingStoreException; +import java.util.prefs.Preferences; + +import org.ini4j.IniFile; + +/** + * Fills that database needed for running DBpedia Navigator. + * First move the mentioned DBpedia files to the specified + * directory, then execute this script. Database settings are + * taken from the settings.ini file of DBpedia Navigator. + * + * @author Sebastian Knappe + * @author Jens Lehmann + * + */ +public class CalculatePageRank { + + private final String datasetDir = "src/dbpedia-navigator/data/"; + private final String wikilinks = datasetDir + "pagelinks_en.nt"; + private final String labels = datasetDir + "articles_label_en.nt"; + private final String categories = datasetDir + "yago_en.nt"; + + private static String dbServer; + private static String dbName; + private static String dbUser; + private static String dbPass; + + private void calculateLinks() + { + try{ + Statement stmt; + ResultSet rs; + int number; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://"+dbServer+":3306/"+dbName; + + Connection con = DriverManager.getConnection( + url, dbUser, dbPass); + + stmt = con.createStatement(); + BufferedReader in = new BufferedReader(new FileReader(wikilinks)); + + String line; + String[] split; + String name; + int i=0; + while ((line=in.readLine())!=null) + { + split=line.split(" "); + name=split[2].substring(1, split[2].length()-1); + rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); + if (rs.next()){ + number=rs.getInt(1); + number++; + stmt.executeUpdate("UPDATE rank SET number="+number+" WHERE name='"+name+"'"); + } + else{ + try{ + stmt.executeUpdate("INSERT INTO rank (name,number) VALUES ('"+name+"',1)"); + }catch(Exception e) + {} + } + if (i%100000==0) System.out.println(i); + i++; + } + + in.close(); + con.close(); + } catch (FileNotFoundException e) + { + System.out.println("File not found"); + } catch (IOException e) + { + System.out.println("IOException"); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + private void addLabels() + { + try{ + Statement stmt; + ResultSet rs; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://localhost:3306/navigator_db"; + + Connection con = DriverManager.getConnection( + url,"navigator", "dbpedia"); + + stmt = con.createStatement(); + BufferedReader in = new BufferedReader(new FileReader(labels)); + + String line; + String[] split; + String name; + String label; + int i=0; + while ((line=in.readLine())!=null) + { + split=line.split(">"); + name=split[0].substring(1); + label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); + rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); + if (rs.next()){ + stmt.executeUpdate("UPDATE rank SET label=\""+label+"\" WHERE name='"+name+"'"); + } + else{ + try{ + stmt.executeUpdate("INSERT INTO rank (name,label) VALUES ('"+name+"',\""+label+"\")"); + }catch(Exception e) + {} + } + if (i%100000==0) System.out.println(i); + i++; + } + + in.close(); + con.close(); + } catch (FileNotFoundException e) + { + System.out.println("File not found"); + } catch (IOException e) + { + System.out.println("IOException"); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + private void calculateCategories() + { + try{ + Statement stmt; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://localhost:3306/navigator_db"; + + Connection con = DriverManager.getConnection( + url,"navigator", "dbpedia"); + + stmt = con.createStatement(); + + stmt.executeUpdate("ALTER TABLE rank DROP COLUMN category"); + + BufferedReader in = new BufferedReader(new FileReader(categories)); + + String line; + String[] split; + String name; + String label; + String pred; + int i=0; + while ((line=in.readLine())!=null) + { + split=line.split(">"); + name=split[0].substring(1); + pred=split[1].substring(2); + if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")) + label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); + else + label=split[2].substring(2); + if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")){ + try{ + stmt.executeUpdate("INSERT INTO categories (category,label) VALUES (\""+name+"\",\""+label+"\")"); + }catch(Exception e) + {} + } + else{ + if (name.startsWith("http://dbpedia.org/resource")){ + try{ + stmt.executeUpdate("INSERT INTO articlecategories (name,category) VALUES ('"+name+"','"+label+"')"); + }catch(Exception e) + {} + }else{ + try{ + stmt.executeUpdate("INSERT INTO classhierarchy (father,child) VALUES ('"+label+"','"+name+"')"); + }catch(Exception e) + {} + } + } + if (i%100000==0) System.out.println(i); + i++; + } + + in.close(); + con.close(); + } catch (FileNotFoundException e) + { + System.out.println("File not found"); + } catch (IOException e) + { + System.out.println("IOException"); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + private void copyNumbers() + { + try{ + Statement stmt; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://localhost:3306/navigator_db"; + + Connection con = DriverManager.getConnection( + url,"navigator", "dbpedia"); + + stmt = con.createStatement(); + + stmt.executeUpdate("UPDATE articlecategories SET number=(SELECT number FROM rank WHERE articlecategories.name=rank.name)"); + + con.close(); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + public static void main(String[] args) throws BackingStoreException{ + + // reading values from ini file + String iniFile = "src/dbpedia-navigator/settings.ini"; + Preferences prefs = new IniFile(new File(iniFile)); + dbServer = prefs.node("database").get("name", null); + dbName = prefs.node("database").get("name", null); + dbUser = prefs.node("database").get("user", null); + dbPass = prefs.node("database").get("pass", null); + + CalculatePageRank cal=new CalculatePageRank(); + cal.calculateLinks(); + cal.addLabels(); + cal.calculateCategories(); + cal.copyNumbers(); + } +} \ No newline at end of file Deleted: trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java 2008-10-22 11:20:08 UTC (rev 1399) +++ trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java 2008-10-22 11:39:17 UTC (rev 1400) @@ -1,231 +0,0 @@ -package org.dllearner.test; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.Statement; - -public class CalculatePageRank { - - private final String wikilinks="../pagelinks_en.nt"; - private final String labels="../articles_label_en.nt"; - private final String categories="../yago_en.nt"; - - private void calculateLinks() - { - try{ - Statement stmt; - ResultSet rs; - int number; - - Class.forName("com.mysql.jdbc.Driver"); - - String url = - "jdbc:mysql://localhost:3306/navigator_db"; - - Connection con = DriverManager.getConnection( - url,"navigator", "dbpedia"); - - stmt = con.createStatement(); - BufferedReader in = new BufferedReader(new FileReader(wikilinks)); - - String line; - String[] split; - String name; - int i=0; - while ((line=in.readLine())!=null) - { - split=line.split(" "); - name=split[2].substring(1, split[2].length()-1); - rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); - if (rs.next()){ - number=rs.getInt(1); - number++; - stmt.executeUpdate("UPDATE rank SET number="+number+" WHERE name='"+name+"'"); - } - else{ - try{ - stmt.executeUpdate("INSERT INTO rank (name,number) VALUES ('"+name+"',1)"); - }catch(Exception e) - {} - } - if (i%100000==0) System.out.println(i); - i++; - } - - in.close(); - con.close(); - } catch (FileNotFoundException e) - { - System.out.println("File not found"); - } catch (IOException e) - { - System.out.println("IOException"); - } catch (Exception e) - { - e.printStackTrace(); - } - } - - private void addLabels() - { - try{ - Statement stmt; - ResultSet rs; - - Class.forName("com.mysql.jdbc.Driver"); - - String url = - "jdbc:mysql://localhost:3306/navigator_db"; - - Connection con = DriverManager.getConnection( - url,"navigator", "dbpedia"); - - stmt = con.createStatement(); - BufferedReader in = new BufferedReader(new FileReader(labels)); - - String line; - String[] split; - String name; - String label; - int i=0; - while ((line=in.readLine())!=null) - { - split=line.split(">"); - name=split[0].substring(1); - label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); - rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); - if (rs.next()){ - stmt.executeUpdate("UPDATE rank SET label=\""+label+"\" WHERE name='"+name+"'"); - } - else{ - try{ - stmt.executeUpdate("INSERT INTO rank (name,label) VALUES ('"+name+"',\""+label+"\")"); - }catch(Exception e) - {} - } - if (i%100000==0) System.out.println(i); - i++; - } - - in.close(); - con.close(); - } catch (FileNotFoundException e) - { - System.out.println("File not found"); - } catch (IOException e) - { - System.out.println("IOException"); - } catch (Exception e) - { - e.printStackTrace(); - } - } - - private void calculateCategories() - { - try{ - Statement stmt; - - Class.forName("com.mysql.jdbc.Driver"); - - String url = - "jdbc:mysql://localhost:3306/navigator_db"; - - Connection con = DriverManager.getConnection( - url,"navigator", "dbpedia"); - - stmt = con.createStatement(); - - stmt.executeUpdate("ALTER TABLE rank DROP COLUMN category"); - - BufferedReader in = new BufferedReader(new FileReader(categories)); - - String line; - String[] split; - String name; - String label; - String pred; - int i=0; - while ((line=in.readLine())!=null) - { - split=line.split(">"); - name=split[0].substring(1); - pred=split[1].substring(2); - if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")) - label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); - else - label=split[2].substring(2); - if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")){ - try{ - stmt.executeUpdate("INSERT INTO categories (category,label) VALUES (\""+name+"\",\""+label+"\")"); - }catch(Exception e) - {} - } - else{ - if (name.startsWith("http://dbpedia.org/resource")){ - try{ - stmt.executeUpdate("INSERT INTO articlecategories (name,category) VALUES ('"+name+"','"+label+"')"); - }catch(Exception e) - {} - }else{ - try{ - stmt.executeUpdate("INSERT INTO classhierarchy (father,child) VALUES ('"+label+"','"+name+"')"); - }catch(Exception e) - {} - } - } - if (i%100000==0) System.out.println(i); - i++; - } - - in.close(); - con.close(); - } catch (FileNotFoundException e) - { - System.out.println("File not found"); - } catch (IOException e) - { - System.out.println("IOException"); - } catch (Exception e) - { - e.printStackTrace(); - } - } - - private void copyNumbers() - { - try{ - Statement stmt; - - Class.forName("com.mysql.jdbc.Driver"); - - String url = - "jdbc:mysql://localhost:3306/navigator_db"; - - Connection con = DriverManager.getConnection( - url,"navigator", "dbpedia"); - - stmt = con.createStatement(); - - stmt.executeUpdate("UPDATE articlecategories SET number=(SELECT number FROM rank WHERE articlecategories.name=rank.name)"); - - con.close(); - } catch (Exception e) - { - e.printStackTrace(); - } - } - - public static void main(String[] args){ - CalculatePageRank cal=new CalculatePageRank(); - cal.calculateLinks(); - cal.addLabels(); - cal.calculateCategories(); - cal.copyNumbers(); - } -} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |