From: <lor...@us...> - 2013-11-25 09:43:00
|
Revision: 4173 http://sourceforge.net/p/dl-learner/code/4173 Author: lorenz_b Date: 2013-11-25 09:42:56 +0000 (Mon, 25 Nov 2013) Log Message: ----------- ISLE. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java 2013-11-22 12:44:10 UTC (rev 4172) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java 2013-11-25 09:42:56 UTC (rev 4173) @@ -60,7 +60,7 @@ curNormalizedLength += p.getNormalizedLength(); curOriginalLength += p.getOriginalLength(); if (curNormalizedLength >= length) { - return new Annotation(originalDocument, originalStart, curOriginalLength); +// return new Annotation(originalDocument, originalStart, curOriginalLength); } // include space @@ -82,16 +82,16 @@ } public static void main(String[] args) { - NormalizedTextMapper n = new NormalizedTextMapper(new TextDocument("This is a testing text using letters")); - System.out.println(n.getOriginalText()); - System.out.println(n.getNormalizedText()); - for (OccurenceMappingPair p : n.normalizedIndexToOriginalIndex) { - System.out.println(p); - } - System.out.println(n.getOriginalAnnotationForPosition(7,6)); - System.out.println(n.getOriginalAnnotationForPosition(23,6)); - System.out.println(n.getOriginalAnnotationForPosition(7,1)); - System.out.println(n.getOriginalAnnotationForPosition(14,15)); +// NormalizedTextMapper n = new NormalizedTextMapper(new TextDocument("This is a testing text using letters")); +// System.out.println(n.getOriginalText()); +// System.out.println(n.getNormalizedText()); +// for (OccurenceMappingPair p : n.normalizedIndexToOriginalIndex) { +// System.out.println(p); +// } +// System.out.println(n.getOriginalAnnotationForPosition(7,6)); +// System.out.println(n.getOriginalAnnotationForPosition(23,6)); +// System.out.println(n.getOriginalAnnotationForPosition(7,1)); +// System.out.println(n.getOriginalAnnotationForPosition(14,15)); } /** Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-11-22 12:44:10 UTC (rev 4172) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-11-25 09:42:56 UTC (rev 4173) @@ -39,6 +39,8 @@ public void postProcess(HashMap<Annotation,Set<Entity>> candidatesMap, int window, StopWordFilter stopWordFilter) { Set<Annotation> annotations = candidatesMap.keySet(); List<Annotation> sortedAnnotations = new ArrayList<Annotation>(annotations); + /** + // Sort annotations by offset in ascending order Collections.sort(sortedAnnotations, new Comparator<Annotation>(){ @@ -102,20 +104,21 @@ } - + */ } private Annotation mergeAnnotations(Annotation annotation_i, Annotation annotation_j) { - int offset; - int length; - if (annotation_i.getOffset() < annotation_j.getOffset()) { - offset = annotation_i.getOffset(); - length = annotation_j.getOffset() - offset + annotation_j.getLength(); - } else { - offset = annotation_j.getOffset(); - length = annotation_i.getOffset() - offset + annotation_i.getLength(); - } - return new Annotation(annotation_i.getReferencedDocument(), offset, length); + return null; +// int offset; +// int length; +// if (annotation_i.getOffset() < annotation_j.getOffset()) { +// offset = annotation_i.getOffset(); +// length = annotation_j.getOffset() - offset + annotation_j.getLength(); +// } else { +// offset = annotation_j.getOffset(); +// length = annotation_i.getOffset() - offset + annotation_i.getLength(); +// } +// return new Annotation(annotation_i.getReferencedDocument(), offset, length); } @Override Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-11-22 12:44:10 UTC (rev 4172) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-11-25 09:42:56 UTC (rev 4173) @@ -7,10 +7,12 @@ import org.apache.log4j.Logger; import org.dllearner.algorithms.isle.EntityCandidateGenerator; +import org.dllearner.algorithms.isle.TextDocumentGenerator; import org.dllearner.algorithms.isle.index.AnnotatedDocument; import org.dllearner.algorithms.isle.index.LinguisticAnnotator; import org.dllearner.algorithms.isle.index.SemanticAnnotator; import org.dllearner.algorithms.isle.index.TextDocument; +import org.dllearner.algorithms.isle.index.Token; import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation; import org.dllearner.core.owl.Entity; @@ -108,7 +110,7 @@ } } if (label != null) { - documents.add(new TextDocument(label)); + documents.add(TextDocumentGenerator.getInstance().generateDocument(label)); } } buildIndex(documents); Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2013-11-22 12:44:10 UTC (rev 4172) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2013-11-25 09:42:56 UTC (rev 4173) @@ -108,6 +108,7 @@ private Map<NamedClass, Integer> classPopularityMap; private Map<ObjectProperty, Integer> objectPropertyPopularityMap; private Map<DatatypeProperty, Integer> dataPropertyPopularityMap; + private Map<Individual, Integer> individualPopularityMap; private boolean prepared = false; @@ -156,6 +157,8 @@ classPopularityMap = new HashMap<NamedClass, Integer>(); objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>(); + dataPropertyPopularityMap = new HashMap<DatatypeProperty, Integer>(); + individualPopularityMap = new HashMap<Individual, Integer>(); if(ks.isRemote()){ SparqlEndpoint endpoint = ks.getEndpoint(); @@ -176,6 +179,8 @@ classPopularityMap = new HashMap<NamedClass, Integer>(); objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>(); + dataPropertyPopularityMap = new HashMap<DatatypeProperty, Integer>(); + individualPopularityMap = new HashMap<Individual, Integer>(); } public void precomputePopularity(){ @@ -330,7 +335,19 @@ dataPropertyPopularityMap.put(dp, cnt); return cnt; } + } + + public int getPopularity(Individual ind){ + if(individualPopularityMap != null && individualPopularityMap.containsKey(ind)){ + return individualPopularityMap.get(ind); + } else { + String queryTemplate = "SELECT (COUNT(*) AS ?cnt) WHERE {<%s> ?p ?o}"; + ResultSet rs = executeSelectQuery(String.format(queryTemplate, ind.getName())); + int cnt = rs.next().getLiteral("cnt").getInt(); + individualPopularityMap.put(ind, cnt); + return cnt; + } } public final ClassHierarchy prepareSubsumptionHierarchy() { Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java 2013-11-22 12:44:10 UTC (rev 4172) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java 2013-11-25 09:42:56 UTC (rev 4173) @@ -85,7 +85,7 @@ if(!file.isDirectory() && !file.isHidden()){ try { String text = Files.toString(file, Charsets.UTF_8); - documents.add(new TextDocument(text)); + documents.add(TextDocumentGenerator.getInstance().generateDocument(text)); } catch (IOException e) { e.printStackTrace(); } @@ -103,7 +103,7 @@ if(!file.isDirectory() && !file.isHidden()){ try { String text = Files.toString(file, Charsets.UTF_8); - documents.add(new TextDocument(text)); + documents.add(TextDocumentGenerator.getInstance().generateDocument(text)); } catch (IOException e) { e.printStackTrace(); } Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java 2013-11-22 12:44:10 UTC (rev 4172) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java 2013-11-25 09:42:56 UTC (rev 4173) @@ -87,7 +87,7 @@ String text = Files.toString(file, Charsets.UTF_8); // String posTagged = getPOSTaggedText(text); // Files.write(posTagged, new File(taggedFolder, file.getName() + ".tagged"), Charsets.UTF_8); - documents.add(new TextDocument(text)); + documents.add(TextDocumentGenerator.getInstance().generateDocument(text)); } catch (IOException e) { e.printStackTrace(); } @@ -98,9 +98,9 @@ } catch (IOException e) { e.printStackTrace(); } - - documents = Sets.newHashSet(new TextDocument("and in that day seven women shall take hold of one man saying we will eat our own bread and wear our own apparel only let us be called by thy name to take away our reproach in that day shall the branch of the lord be beautiful and glorious and the fruit of the earth excellent and comely for them that are escaped of israel and it shall come to pass left in zion and remaineth in jerusalem shall be called holy every one that is written among the living in jerusalem when the lord shall have washed away the filth of the daughters of zion and shall have purged the blood of jerusalem from the midst thereof by the spirit of judgment and by the spirit of burning and the lord will create upon every dwelling place of mount zion and upon her assemblies a cloud and smoke by day and the shining of a flaming fire by night for upon all the glory a defence and there shall be a tabernacle for a shadow in the daytime from the heat and for a place of refuge and for a covert from storm and from rain")); - + documents.clear(); + TextDocument doc = TextDocumentGenerator.getInstance().generateDocument("and in that day seven women shall take hold of one man saying we will eat our own bread and wear our own apparel only let us be called by thy name to take away our reproach in that day shall the branch of the lord be beautiful and glorious and the fruit of the earth excellent and comely for them that are escaped of israel and it shall come to pass left in zion and remaineth in jerusalem shall be called holy every one that is written among the living in jerusalem when the lord shall have washed away the filth of the daughters of zion and shall have purged the blood of jerusalem from the midst thereof by the spirit of judgment and by the spirit of burning and the lord will create upon every dwelling place of mount zion and upon her assemblies a cloud and smoke by day and the shining of a flaming fire by night for upon all the glory a defence and there shall be a tabernacle for a shadow in the daytime from the heat and for a place of refuge and for a covert from storm and from rain"); + documents.add(doc); return documents; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |