From: <sk...@us...> - 2008-08-13 12:55:58
|
Revision: 1063 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1063&view=rev Author: sknappe Date: 2008-08-13 12:55:56 +0000 (Wed, 13 Aug 2008) Log Message: ----------- Class to calculate pageranks and put them into mysql database, also add labels and later also add categories to use only the database for all searches Added Paths: ----------- trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java Added: trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java 2008-08-13 12:55:56 UTC (rev 1063) @@ -0,0 +1,127 @@ +package org.dllearner.test; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.Statement; + +public class CalculatePageRank { + + private final String wikilinks="../pagelinks_en.nt"; + private final String labels="../articles_label_en.nt"; + + private void calculateLinks() + { + try{ + Statement stmt; + ResultSet rs; + int number; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://localhost:3306/navigator_db"; + + Connection con = DriverManager.getConnection( + url,"navigator", "dbpedia"); + + stmt = con.createStatement(); + BufferedReader in = new BufferedReader(new FileReader(wikilinks)); + + String line; + String[] split; + String name; + int i=0; + while ((line=in.readLine())!=null) + { + split=line.split(" "); + name=split[2].substring(1, split[2].length()-1); + rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); + if (rs.next()){ + number=rs.getInt(1); + number++; + stmt.executeUpdate("UPDATE rank SET number="+number+" WHERE name='"+name+"'"); + } + else{ + stmt.executeUpdate("INSERT INTO rank (name,number) VALUES ('"+name+"',1)"); + } + if (i%100000==0) System.out.println(i); + i++; + } + + in.close(); + con.close(); + } catch (FileNotFoundException e) + { + System.out.println("File not found"); + } catch (IOException e) + { + System.out.println("IOException"); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + private void addLabels() + { + try{ + Statement stmt; + ResultSet rs; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://localhost:3306/navigator_db"; + + Connection con = DriverManager.getConnection( + url,"navigator", "dbpedia"); + + stmt = con.createStatement(); + BufferedReader in = new BufferedReader(new FileReader(labels)); + + String line; + String[] split; + String name; + String label; + int i=0; + while ((line=in.readLine())!=null) + { + split=line.split(">"); + name=split[0].substring(1); + label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); + rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); + if (rs.next()){ + stmt.executeUpdate("UPDATE rank SET label=\""+label+"\" WHERE name='"+name+"'"); + } + else{ + stmt.executeUpdate("INSERT INTO rank (name,label) VALUES ('"+name+"',\""+label+"\")"); + } + if (i%100000==0) System.out.println(i); + i++; + } + + in.close(); + con.close(); + } catch (FileNotFoundException e) + { + System.out.println("File not found"); + } catch (IOException e) + { + System.out.println("IOException"); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + public static void main(String[] args){ + CalculatePageRank cal=new CalculatePageRank(); + //cal.calculateLinks(); + cal.addLabels(); + } +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |