From: <ku...@us...> - 2009-05-05 11:52:41
|
Revision: 1735 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1735&view=rev Author: kurzum Date: 2009-05-05 11:52:27 +0000 (Tue, 05 May 2009) Log Message: ----------- code for testing existing matching Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/LearnCriteria.java trunk/src/dl-learner/org/dllearner/scripts/matching/Mcollect.java trunk/src/dl-learner/org/dllearner/scripts/matching/Pcollect.java trunk/src/dl-learner/org/dllearner/scripts/matching/SameCollect.java Added: trunk/src/dl-learner/org/dllearner/scripts/matching/LearnCriteria.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LearnCriteria.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LearnCriteria.java 2009-05-05 11:52:27 UTC (rev 1735) @@ -0,0 +1,120 @@ +package org.dllearner.scripts.matching; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.CSVFileToArray; +import org.dllearner.utilities.datastructures.StringTuple; + +import com.wcohen.ss.Jaro; +import com.wcohen.ss.api.StringDistance; + +public class LearnCriteria { + ArrayList<SameCollect>sameAs =new ArrayList<SameCollect>() ; + Mcollect m ; + /** + * @param args + */ + public static void main(String[] args) { + LearnCriteria lc = new LearnCriteria(); + @SuppressWarnings("unused") + StringDistance distance = new Jaro(); + try{ + CSVFileToArray csv = new CSVFileToArray("osmdata/owlsameas_en.csv"); + ArrayList<String> al =null ; + + while ((al = csv.next()) != null){ + //System.out.println(al); + if(al.size()!=2)continue; + //if(distance.score(al.get(0), al.get(1))>=0.7){ + //System.out.println(distance.score(al.get(0), al.get(1))); + //System.out.println(al); + //} + //String dbpedia = al.get(1).replace("%25", "%"); + String dbpedia = al.get(1); + lc.sameAs.add(new SameCollect(al.get(0), dbpedia)); + } + }catch (Exception e) { + e.printStackTrace(); + } + + Mcollect m = new Mcollect(); + SPARQLTasks dbpedia = new SPARQLTasks(new Cache("matching"), SparqlEndpoint.getEndpointLOCALDBpedia()); + int countzerold = 0; + int countzerodb = 0; + for (int x = 0; x<lc.sameAs.size();x++) { + SameCollect s = lc.sameAs.get(x); + String query = "SELECT * WHERE {<"+s.db+"> ?p ?o}"; + s.dbdata = dbpedia.queryAsRDFNodeTuple(query, "?p", "?o"); + s.lddata = lc.getLinkedData(s.ld); +// System.exit(0); +// System.out.println(s.lddata); +// for (StringTuple string : s.lddata ) { +// System.out.println(string); +// } + m.add(s); + if(s.dbdata.size() == 0){ + System.out.println(s.db); + countzerodb+=1; + } + //if( s.lddata.size() == 0)countzerold+=1; +// if(x>110) break; + System.out.println(x); + } + + System.out.println(countzerodb); + System.out.println(countzerold); + // System.exit(0); + + System.out.println(m); + //System.out.println(lc.sameAs); + //System.out.println(lc.sameAs.size()); + + + } + + public SortedSet<StringTuple> getLinkedData(String url){ + SortedSet<StringTuple> result = new TreeSet<StringTuple>(); + try{ + URL linkedGeoDataURL = new URL(url); + + URLConnection conn = linkedGeoDataURL.openConnection(); + BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream())); + String line=""; + @SuppressWarnings("unused") + boolean oneLine = false; + while ((line = rd.readLine()) != null) + + { oneLine = true; +// System.out.println(line);continue; + line = line.replace("<"+url+"#id>", ""); + line = line.replace("<"+url+">", ""); + + + String p = line.substring(line.indexOf("<")+1,line.indexOf(">") ); + line = line.substring(line.indexOf(">")+1); + line = line.substring(0,line.lastIndexOf(".")); + line = line.trim(); + line = line.substring(1); + String o = line.substring(0,line.length()-1); +// System.out.println(new StringTuple(p,o)); + result.add(new StringTuple(p,o)); + } + + rd.close(); + + }catch (Exception e) { + e.printStackTrace(); + } + return result; + } + +} Added: trunk/src/dl-learner/org/dllearner/scripts/matching/Mcollect.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Mcollect.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Mcollect.java 2009-05-05 11:52:27 UTC (rev 1735) @@ -0,0 +1,63 @@ +package org.dllearner.scripts.matching; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.dllearner.utilities.datastructures.RDFNodeTuple; +import org.dllearner.utilities.datastructures.StringTuple; + +import com.wcohen.ss.Jaro; +import com.wcohen.ss.api.StringDistance; + +public class Mcollect { + String name; + List<Pcollect> props = new ArrayList<Pcollect>(); + + + public void add(SameCollect s){ + StringDistance distance = new Jaro(); + + for (RDFNodeTuple db : s.dbdata) { + for (StringTuple ld : s.lddata) { +// System.out.println(ld.b); +// System.out.println(db.b.toString()); +// System.out.println(istance.score(ld.b,db.b.toString())); + if ( distance.score(ld.b,db.b.toString())>=0.90){ + boolean found = false; + for (Pcollect p : props){ + + if(p.ldp.equals(ld.a) && p.dbp.equals( db.a.toString())){ + p.count +=1; + found = true; + } + } + if(found==false){ + props.add(new Pcollect(ld.a, db.a.toString())); + } + + + }; + + } + } + } + + @SuppressWarnings("unchecked") + @Override + public String toString(){ + //SortedSet<Pcollect> s = new TreeSet<Pcollect>(); +// for(Pcollect one : s){ +// s.add(one); +// +// } + String ret = ""; + Collections.sort(props ); + for(int a=0; a<props.size();a++){ + + ret+= props.get(a).toString()+"\n"; + + } + return ret; + } +} Added: trunk/src/dl-learner/org/dllearner/scripts/matching/Pcollect.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Pcollect.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Pcollect.java 2009-05-05 11:52:27 UTC (rev 1735) @@ -0,0 +1,34 @@ +package org.dllearner.scripts.matching; + +public class Pcollect implements Comparable<Pcollect>{ + + String ldp; + String dbp; + int count=1; + + + + + public Pcollect(String ldp, String dbp) { + super(); + this.ldp = ldp.trim(); + this.dbp = dbp.trim(); + } + + + @Override + public String toString(){ + String ret = "count : "+count+" : "+ldp+ " = "+dbp; + + return ret; + } + + public int compareTo(Pcollect in){ + + Pcollect other = (Pcollect) in; + if(this.count==other.count)return 0; + if( this.count>other.count){ + return -1; + }else {return 1;} + } +} Added: trunk/src/dl-learner/org/dllearner/scripts/matching/SameCollect.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/SameCollect.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/SameCollect.java 2009-05-05 11:52:27 UTC (rev 1735) @@ -0,0 +1,23 @@ +package org.dllearner.scripts.matching; + +import java.util.SortedSet; + +import org.dllearner.utilities.datastructures.RDFNodeTuple; +import org.dllearner.utilities.datastructures.StringTuple; + +public class SameCollect { + + String ld; + String db; + + SortedSet<RDFNodeTuple> dbdata; + SortedSet<StringTuple> lddata; + + public SameCollect(String ld, String db) { + super(); + this.ld = ld; + this.db = db; + } + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |