From: <seb...@us...> - 2011-11-24 18:11:08
|
Revision: 3437 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3437&view=rev Author: sebastianwtr Date: 2011-11-24 18:11:01 +0000 (Thu, 24 Nov 2011) Log Message: ----------- [tbsl.exploration] added sqlite function etc Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/mySQLDictionary.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java 2011-11-24 12:19:01 UTC (rev 3436) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java 2011-11-24 18:11:01 UTC (rev 3437) @@ -1,5 +1,6 @@ package org.dllearner.algorithm.tbsl.exploration.Sparql; import java.io.BufferedReader; + import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -13,6 +14,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; import net.didion.jwnl.JWNLException; @@ -28,8 +30,15 @@ import org.dllearner.algorithm.tbsl.templator.BasicTemplator; import org.dllearner.algorithm.tbsl.templator.Templator; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + public class SparqlObject { //global Variable dict @@ -41,19 +50,25 @@ static WordNet wordnet; BasicTemplator btemplator; Templator templator; - private static HashMap<String, String> hm = new HashMap<String, String>(); +/* private static HashMap<String, String> hm = new HashMap<String, String>(); + private static HashMap<String, String> hm_new = new HashMap<String, String>();*/ + private static mySQLDictionary myindex; //Konstruktor - public SparqlObject() throws MalformedURLException{ + public SparqlObject() throws MalformedURLException, ClassNotFoundException, SQLException{ wordnet = new WordNet(); //hm=hm_new; - hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity2",hm); - hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity1",hm); + /*hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity2",hm); + hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity1",hm);*/ System.out.println("Loading SPARQL Templator"); btemplator = new BasicTemplator(); templator = new Templator(); System.out.println("Loading SPARQL Templator Done\n"); + System.out.println("Start Indexing Wikipedia URI's"); + myindex = new mySQLDictionary(); + + System.out.println("Done:Indexing Wikipedia URI's"); setExplorationdepthwordnet(1); //eigentlich immer mit 0 initialisieren setIterationdepth(1); @@ -103,7 +118,7 @@ * "Main" Method of this Class. * */ - public void create_Sparql_query(String question) throws JWNLException, IOException{ + public void create_Sparql_query(String question) throws JWNLException, IOException, SQLException{ //create_Sparql_query_new(string); ArrayList<ArrayList<String>> lstquery = new ArrayList<ArrayList<String>>(); @@ -215,7 +230,7 @@ //asking server String answer; answer=sendServerQuestionRequest(query); - + System.out.println(query); //if Emty answer, get properties an look up the right property with levensthein if(answer.contains("EmtyAnswer")){ //TODO: get all information from the query @@ -249,7 +264,8 @@ Boolean goOnAfterProperty = true; try { //using uri now, not the string - properties=property.getPropertys(hm.get(rescource.toLowerCase())); + //properties=property.getPropertys(hm.get(rescource.toLowerCase())); + properties=property.getPropertys(getUriFromIndex(rescource.toLowerCase(),0)); if (properties==null){ final_answer.add("Error in getting Properties\n"); goOnAfterProperty=false; @@ -276,7 +292,8 @@ //System.out.println(tmp); //alte property uri mit neuer ersetzen: String query_tmp=query; - query_tmp=query_tmp.replace(hm.get(property_to_compare_with.toLowerCase()),properties.get(i-1)); + //query_tmp=query_tmp.replace(hm.get(property_to_compare_with.toLowerCase()),properties.get(i-1)); + query_tmp=query_tmp.replace(getUriFromIndex(property_to_compare_with.toLowerCase(),1),properties.get(i-1)); //System.out.println("hm.get(property_to_compare_with.toLowerCase(): " + hm.get(property_to_compare_with.toLowerCase())); new_queries.add(query_tmp); } @@ -328,7 +345,9 @@ //create new query result_SemanticsMatchProperties.add(properties.get(h)); String query_tmp=query; - query_tmp=query_tmp.replace(hm.get(property_to_compare_with.toLowerCase()),properties.get(h-1)); + + //query_tmp=query_tmp.replace(hm.get(property_to_compare_with.toLowerCase()),properties.get(h-1)); + query_tmp=query_tmp.replace(getUriFromIndex(property_to_compare_with.toLowerCase(),1),properties.get(h-1)); //System.out.println("hm.get(property_to_compare_with.toLowerCase(): " + hm.get(property_to_compare_with.toLowerCase())); new_queries.add(query_tmp); } @@ -402,13 +421,16 @@ //create_Sparql_query_old(string); // } + + /** * Method gets a String and takes the information from the templator to creat a Sparql query. * @param question question in natural language * @return ArrayList of Sparql queries. + * @throws SQLException */ - private ArrayList<ArrayList<String>> getQuery(String question) { + private ArrayList<ArrayList<String>> getQuery(String question) throws SQLException { ArrayList<ArrayList<String>> lstquery = new ArrayList<ArrayList<String>>(); Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(question); for (BasicQueryTemplate temp : querytemps) { @@ -423,6 +445,7 @@ String query; String selTerms =""; for(SPARQL_Term terms :temp.getSelTerms()) selTerms=selTerms+(terms.toString())+" "; + System.out.println(selTerms); String conditions = ""; for(Path condition: temp.getConditions()) conditions=conditions+(condition.toString())+"."; @@ -474,7 +497,9 @@ //System.out.println("replace: " + replace); //hier dann den hm wert von array[1] eintragen - String hm_result=hm.get(array[1].toLowerCase()); + + //String hm_result=hm.get(array[1].toLowerCase()); + String hm_result=getUriFromIndex(array[1].toLowerCase(),0); try { if(hm_result.contains("Category:")) hm_result=hm_result.replace("Category:",""); @@ -515,78 +540,40 @@ - - - private void doIteration(String string1, String string2) throws JWNLException{ - long startTime = System.currentTimeMillis(); - - String string2_uri; - string2_uri=hm.get(string2); - string2_uri=string2_uri.replace("Category:", ""); - string2_uri=string2_uri.replace("category:", ""); - System.out.println("Get Propertys of "+string2); - - //contains uri AND string, every second is the string - ArrayList<String> properties = new ArrayList<String>(); - GetRessourcePropertys property = new GetRessourcePropertys(); - try { - //using uri now, not the string - properties=property.getPropertys(hm.get(string2)); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); + /** + * + * @param string + * @param fall 1 Property 0 no Property + * @return + * @throws SQLException + */ + private String getUriFromIndex(String string, int fall) throws SQLException{ + String result=null; + if(fall==0){ + //result=hm.get(string.toLowerCase()); + //if(result==null)result=myindex.getURI(string); + result=myindex.getResourceURI(string.toLowerCase()); + if(result==null)result=myindex.getPropertyURI(string.toLowerCase()); } - - System.out.println("Start Iterating Wordnet with "+string1+" and deept of "+explorationdepthwordnet); - ArrayList<String> semantics=new ArrayList<String>(); - ArrayList<String> tmp_semantics=new ArrayList<String>(); - ArrayList<String> result_SemanticsMatchProperties=new ArrayList<String>(); - semantics.add(string1); - tmp_semantics=semantics; - for(int i=0;i<=explorationdepthwordnet;i++){ - - try { - tmp_semantics=getSemantics(tmp_semantics); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - + if(fall==1){ + /*result=hm.get(string.toLowerCase()); + if(result==null)result=myindex.getURI(string); + if(result==null)result="http://dbpedia.org/property/"+string.toLowerCase();*/ + //should be alway property an not resource + //result=result.replace("resource", "property"); + result=myindex.getPropertyURI(string.toLowerCase()); + if(result==null){ + result=myindex.getResourceURI(string.toLowerCase()); + result=result.replace("resource", "property"); } - //each word only one time - for(String k : tmp_semantics){ - if(!semantics.contains(k)) semantics.add(k); - } - - } - long endTime = System.currentTimeMillis(); - System.out.println("Getting Properties and Semantics took "+(endTime-startTime) +" ms\n"); - - //TODO: Try, if it works, if you use only one loop: (b.lowerCase).contains(properties.get(h)) - for(int h=1;h<properties.size()-2;h=h+2){ - for(String b : semantics){ - //System.out.println(properties.get(h)); - //System.out.println(b); - if(properties.get(h).contains(b.toLowerCase())){ - if(!result_SemanticsMatchProperties.contains(properties.get(h))) - result_SemanticsMatchProperties.add(properties.get(h)); - } - } + } - for(String b : result_SemanticsMatchProperties){ - string1=b.toLowerCase(); - String anfrage; - String string1_uri; - string1_uri=hm.get(string1); - if(string1_uri!=null){ - anfrage="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>select ?x where { <"+string2_uri+"> <"+string1_uri+"> ?x.}"; - System.out.println("Answer with the property \" " + b + "\" :\n"+sendServerQuestionRequest(anfrage)); - } - } - long endTime2 = System.currentTimeMillis(); - System.out.println("Getting Properties, Semantics and Answer from server took "+(endTime2-startTime) +" ms"); + + if(result==null) return "http://dbpedia.org/property/"+string.toLowerCase(); + else return result; } - + private static ArrayList<String> getSemantics (ArrayList<String> semantics) throws IOException, JWNLException { @@ -612,6 +599,7 @@ private String sendServerQuestionRequest(String query){ //SPARQL-Endpoint of Semantic Computing Group String tmp="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query="+createServerRequest(query)+"&format=text%2Fhtml&debug=on&timeout="; + System.out.println(tmp); URL url; InputStream is; InputStreamReader isr; @@ -711,8 +699,14 @@ return query; } + + + + } + + /** * Cluster function */ Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/mySQLDictionary.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/mySQLDictionary.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/mySQLDictionary.java 2011-11-24 18:11:01 UTC (rev 3437) @@ -0,0 +1,309 @@ +package org.dllearner.algorithm.tbsl.exploration.Sparql; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +public class mySQLDictionary { + private Connection conn; + + public mySQLDictionary() throws ClassNotFoundException, SQLException { + // TODO Auto-generated constructor stub + Class.forName( "org.sqlite.JDBC" ); + conn = DriverManager.getConnection("jdbc:sqlite::memory:"); + createIndexPropertys(); + createIndexResource(); + + //optional!! + //createIndexWikipedia(); + + } + +private String createSimpleHashKey(String string){ + string = string.replace("!",""); + string = string.replace(":",""); + string = string.replace("/",""); + string = string.replace("\\",""); + string = string.replace("?",""); + string = string.replace(":",""); + + string = string.replace("a","1"); + string = string.replace("b","2"); + string = string.replace("c","3"); + string = string.replace("d","4"); + string = string.replace("e","5"); + string = string.replace("f","6"); + string = string.replace("g","7"); + string = string.replace("h","8"); + string = string.replace("i","9"); + string = string.replace("j","10"); + string = string.replace("k","11"); + string = string.replace("l","12"); + string = string.replace("m","13"); + string = string.replace("n","14"); + string = string.replace("o","15"); + string = string.replace("p","16"); + string = string.replace("q","17"); + string = string.replace("r","18"); + string = string.replace("s","19"); + string = string.replace("t","20"); + string = string.replace("u","21"); + string = string.replace("v","22"); + string = string.replace("w","23"); + string = string.replace("x","24"); + string = string.replace("y","25"); + string = string.replace("z","26"); + string = string.replace("ä","0"); + string = string.replace("ö","0"); + string = string.replace("ü","0"); + string = string.replace("?","0"); + string = string.replace(" ","0"); + return string; + + +} + + public String getResourceURI(String string) throws SQLException{ + Statement stat = conn.createStatement(); + ResultSet rs; + try { + rs = stat.executeQuery("select uri from resource where name='"+string.toLowerCase()+"';"); + return rs.getString("uri"); + } catch (Exception e) { + // TODO Auto-generated catch block + //e.printStackTrace(); + return null; + } + + } + + public String getPropertyURI(String string) throws SQLException{ + Statement stat = conn.createStatement(); + ResultSet rs; + try { + rs = stat.executeQuery("select uri from property where name='"+string.toLowerCase()+"';"); + return rs.getString("uri"); + } catch (Exception e) { + // TODO Auto-generated catch block + //e.printStackTrace(); + return null; + } + + + } + + public String getWikipediaURI(String string) throws SQLException{ + Statement stat = conn.createStatement(); + ResultSet rs; + try { + rs = stat.executeQuery("select uri from wikiindex where name='"+string.toLowerCase()+"';"); + return rs.getString("uri"); + } catch (Exception e) { + // TODO Auto-generated catch block + //e.printStackTrace(); + return null; + } + + + } + + + private void createIndexWikipedia() throws ClassNotFoundException, SQLException{ + /*System.out.println("Start SQL test"); + Class.forName( "org.sqlite.JDBC" ); + conn = DriverManager.getConnection("jdbc:sqlite::memory:");*/ + Statement stat = conn.createStatement(); + stat.executeUpdate("drop table if exists wikiindex;"); + stat.executeUpdate("create table wikiindex (name, uri);"); + PreparedStatement prep = conn.prepareStatement("insert into wikiindex values (?, ?);"); + BufferedReader in=null; + // conn.setAutoCommit(false); + int zaehler=0; + try { + in = new BufferedReader( + new InputStreamReader( + new FileInputStream( "/home/swalter/workspace/URIsFromWikipedia" ) ) ); + String s; + while( null != (s = in.readLine()) ) { + String[] tmp_array =s.split("::"); + if(tmp_array.length>=2){ + prep.setString(1, tmp_array[0]); + prep.setString(2, tmp_array[1]); + prep.addBatch(); + zaehler=zaehler+1; + //if(zaehler%100000==0) System.out.println(zaehler); + if(zaehler%1000000==0){ + conn.setAutoCommit(false); + prep.executeBatch(); + conn.setAutoCommit(false); + System.out.println("done"); + } + + } + } + } catch( FileNotFoundException ex ) { + } catch( Exception ex ) { + System.out.println( ex ); + } finally { + if( in != null ) + try { + in.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + conn.setAutoCommit(false); + prep.executeBatch(); + conn.setAutoCommit(true); + System.out.println("Done"); + //Statement stat = conn.createStatement(); + /* long start = System.currentTimeMillis(); + // zu messender Code + + ResultSet rs = stat.executeQuery("select * from people where name='kornyval';"); + while (rs.next()) + { + System.out.println("name = " + rs.getString("name")); + System.out.println("job = " + rs.getString("occupation")); + } + System.out.println("Duration in ms: " + (System.currentTimeMillis() - start)); + + start = System.currentTimeMillis(); + // zu messender Code + + rs = stat.executeQuery("select * from people where name='barack obama';"); + while (rs.next()) + { + System.out.println("name = " + rs.getString("name")); + System.out.println("job = " + rs.getString("occupation")); + } + System.out.println("Duration in ms: " + (System.currentTimeMillis() - start)); + + rs = stat.executeQuery("select * from people where name='kornyval';"); + while (rs.next()) + { + System.out.println("name = " + rs.getString("name")); + System.out.println("job = " + rs.getString("occupation")); + } + System.out.println("Duration in ms: " + (System.currentTimeMillis() - start)); + + + rs.close();*/ + // conn.close(); + } +private void createIndexPropertys() throws ClassNotFoundException, SQLException{ + /*System.out.println("Start SQL test"); + Class.forName( "org.sqlite.JDBC" ); + conn = DriverManager.getConnection("jdbc:sqlite::memory:");*/ + System.out.println("start indexing Properties"); + Statement stat = conn.createStatement(); + stat.executeUpdate("drop table if exists property;"); + stat.executeUpdate("create table property (name, uri);"); + PreparedStatement prep = conn.prepareStatement("insert into property values (?, ?);"); + BufferedReader in=null; + // conn.setAutoCommit(false); + int zaehler=0; + try { + in = new BufferedReader( + new InputStreamReader( + new FileInputStream( "/home/swalter/workspace/property" ) ) ); + String s; + while( null != (s = in.readLine()) ) { + String[] tmp_array =s.split(":::"); + if(tmp_array.length>=2){ + prep.setString(1, tmp_array[1]); + prep.setString(2, tmp_array[0]); + prep.addBatch(); + zaehler=zaehler+1; + //if(zaehler%10000==0) System.out.println(zaehler); + if(zaehler%1000000==0){ + conn.setAutoCommit(false); + prep.executeBatch(); + conn.setAutoCommit(false); + System.out.println("done"); + } + + } + } + } catch( FileNotFoundException ex ) { + } catch( Exception ex ) { + System.out.println( ex ); + } finally { + if( in != null ) + try { + in.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + conn.setAutoCommit(false); + prep.executeBatch(); + conn.setAutoCommit(true); + System.out.println("Done"); + + } +private void createIndexResource() throws ClassNotFoundException, SQLException{ + /*System.out.println("Start SQL test");*/ + System.out.println("start indexing Resources"); + Statement stat = conn.createStatement(); + stat.executeUpdate("drop table if exists resource;"); + stat.executeUpdate("create table resource (name, uri);"); + PreparedStatement prep = conn.prepareStatement("insert into resource values (?, ?);"); + BufferedReader in=null; + // conn.setAutoCommit(false); + int zaehler=0; + try { + in = new BufferedReader( + new InputStreamReader( + new FileInputStream( "/home/swalter/workspace/resource" ) ) ); + String s; + while( null != (s = in.readLine()) ) { + String[] tmp_array =s.split(":::"); + if(tmp_array.length>=2){ + prep.setString(1, tmp_array[1]); + prep.setString(2, tmp_array[0]); + prep.addBatch(); + zaehler=zaehler+1; + // if(zaehler%10000==0) System.out.println(zaehler); + if(zaehler%1000000==0){ + conn.setAutoCommit(false); + prep.executeBatch(); + conn.setAutoCommit(false); + System.out.println("done"); + } + + } + } + } catch( FileNotFoundException ex ) { + } catch( Exception ex ) { + System.out.println( ex ); + } finally { + if( in != null ) + try { + in.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + conn.setAutoCommit(false); + prep.executeBatch(); + conn.setAutoCommit(true); + System.out.println("Done"); + + } + +} \ No newline at end of file Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java 2011-11-24 12:19:01 UTC (rev 3436) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java 2011-11-24 18:11:01 UTC (rev 3437) @@ -13,7 +13,11 @@ import net.didion.jwnl.JWNLException; import org.dllearner.algorithm.tbsl.exploration.Sparql.SparqlObject; -import org.dllearner.algorithm.tbsl.exploration.sax.ParseXmlHtml; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; /* * @@ -21,6 +25,18 @@ * in Eclipse Run -> RunConfigurations -> Arguments -> VM Arguments -> -Xmx1024m */ +/* + * + * + * for pom.xml file + * <dependencies> + <dependency> + <groupId>org.xerial</groupId> + <artifactId>sqlite-jdbc</artifactId> + <version>3.6.16</version> + </dependency> + </dependencies> + */ // Sax example from http://www.bennyn.de/programmierung/java/java-xml-sax-parser.html /* @@ -39,14 +55,15 @@ * @throws IOException * @throws JWNLException * @throws InterruptedException + * @throws ClassNotFoundException + * @throws SQLException */ - public static void main(String[] args) throws IOException, JWNLException, InterruptedException { + public static void main(String[] args) throws IOException, JWNLException, InterruptedException, ClassNotFoundException, SQLException { /** * Do the starting initializing stuff */ long startInitTime = System.currentTimeMillis(); - System.out.println("Start Indexing"); //For testing! @@ -168,5 +185,7 @@ return name; } + + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |