From: <ki...@us...> - 2012-09-21 14:12:42
|
Revision: 3848 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3848&view=rev Author: kirdie Date: 2012-09-21 14:12:34 +0000 (Fri, 21 Sep 2012) Log Message: ----------- more work on the hmm. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; @@ -34,6 +35,7 @@ import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; +import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; @@ -55,6 +57,8 @@ import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.Options; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -74,8 +78,11 @@ } private Mode mode = Mode.BEST_QUERY; + private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); + private static final boolean CREATE_SYNONYMS = false; + private static final Double BOA_THRESHOLD = 0.9; private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); @@ -371,7 +378,7 @@ logger.debug("Generating SPARQL query templates..."); templateMon.start(); if(multiThreaded){ - templates = templateGenerator.buildTemplatesMultiThreaded(question); + templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); } else { templates = templateGenerator.buildTemplates(question); } @@ -512,8 +519,9 @@ SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); Query query = template.getQuery(); double score = 0; - + Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>(); + Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); for(Slot slot: template.getSlots()) { List<String> segment = new LinkedList<String>(); @@ -525,9 +533,19 @@ // if this gets used at another place, create a function IndexResultItemToResourceInfo() ResourceInfo info = new ResourceInfo(); info.setUri(item.getUri()); - info.setLabel(item.getLabel()); + String label = item.getLabel(); + // in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources) + info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri()))); + // in saedeehs algorithm, the emission probabilty is formed by the string similarity + // but we use the lucene index score + double max = 0; + for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} + if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); + info.setStringSimilarityScore(max); + + resourceInfos.add(info); } - segmentToURIs.put(segment,resources); + segmentToURIs.put(segment,resourceInfos); } HiddenMarkovModel hmm = new HiddenMarkovModel(); hmm.initialization(); @@ -851,11 +869,11 @@ private Set<IndexResultItem> getIndexResultItems(Slot slot) { -// List<String> uris = new LinkedList<String>(); + // List<String> uris = new LinkedList<String>(); Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>(); - + Index index = getIndexBySlotType(slot); - + for(String word : slot.getWords()) { IndexResultSet rs = new IndexResultSet(); @@ -876,18 +894,23 @@ //use the non manual indexes only if mapping based resultset is not empty and option is set if(!useManualMappingsIfExistOnly || rs.isEmpty()){ if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 20)); + rs.add(index.getResourcesWithScores(word, 20,0)); } else { if(slot.getSlotType() == SlotType.CLASS){ word = PlingStemmer.stem(word); } - rs.add(index.getResourcesWithScores(word, 20)); + IndexResultSet tmp = index.getResourcesWithScores(word, 20,0,Collections.singleton("boa-score")); + for(IndexResultItem item : tmp.getItems()) + {System.out.println(item); + Double boaScore = (Double) item.getFields().get("boa-score"); + if(boaScore==null||boaScore>BOA_THRESHOLD) rs.addItem(item); + } } } -// for(IndexResultItem item: rs.getItems()) -// { -// uris.add(item.getUri()); -// } + // for(IndexResultItem item: rs.getItems()) + // { + // uris.add(item.getUri()); + // } indexResultItems.addAll(rs.getItems()); } return indexResultItems; Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -31,111 +31,111 @@ import org.dllearner.algorithm.tbsl.sparql.Template; public class Templator { - + private static final Logger logger = Logger.getLogger(Templator.class); - - String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; - + + String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; + private String[] noun = {"NN","NNS","NNP","NNPS","NPREP","JJNN","JJNPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; - + PartOfSpeechTagger tagger; LTAGLexicon g; LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); Parser parser; Preprocessor pp; - + WordNet wordnet; LingPipeLemmatizer lem = new LingPipeLemmatizer(); - - DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); - DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); - + + DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); + DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); + boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; boolean USE_NER = false; boolean USE_WORDNET = true; boolean VERBOSE = true; - + private String taggedInput; - + private Set<Template> templates; private Set<DRS> drses; - + public Templator() { this(new StanfordPartOfSpeechTagger(), new WordNet()); } - + public Templator(final PartOfSpeechTagger tagger) { this(tagger, new WordNet()); } - + public Templator(final PartOfSpeechTagger tagger, WordNet wordnet) { - this.tagger = tagger; - this.wordnet = wordnet; - - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - - g = LTAG_Constructor.construct(grammarFiles); - - parser = new Parser(); - parser.SHOW_GRAMMAR = true; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; - - pp = new Preprocessor(USE_NER); + this.tagger = tagger; + this.wordnet = wordnet; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; + + pp = new Preprocessor(USE_NER); } - + public Templator(final PartOfSpeechTagger tagger, WordNet wordnet, String[] GRAMMAR_FILES) { - this.tagger = tagger; - this.wordnet = wordnet; - this.GRAMMAR_FILES = GRAMMAR_FILES; + this.tagger = tagger; + this.wordnet = wordnet; + this.GRAMMAR_FILES = GRAMMAR_FILES; - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++) { - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++) { + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } - g = LTAG_Constructor.construct(grammarFiles); + g = LTAG_Constructor.construct(grammarFiles); - parser = new Parser(); - parser.SHOW_GRAMMAR = true; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; - pp = new Preprocessor(USE_NER); -} - + pp = new Preprocessor(USE_NER); + } + public Templator(boolean b) { - this.tagger = new StanfordPartOfSpeechTagger(); - this.USE_WORDNET = false; - VERBOSE = b; - - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - - g = LTAG_Constructor.construct(grammarFiles); - - parser = new Parser(); - parser.SHOW_GRAMMAR = false; - parser.VERBOSE = b; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; - - pp = new Preprocessor(USE_NER); - pp.setVERBOSE(b); + this.tagger = new StanfordPartOfSpeechTagger(); + this.USE_WORDNET = false; + VERBOSE = b; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + + parser = new Parser(); + parser.SHOW_GRAMMAR = false; + parser.VERBOSE = b; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; + + pp = new Preprocessor(USE_NER); + pp.setVERBOSE(b); } - + public void setUNTAGGED_INPUT(boolean b) { UNTAGGED_INPUT = b; } @@ -146,20 +146,21 @@ VERBOSE = b; } public void setGrammarFiles(String[] gf) { - GRAMMAR_FILES = gf; - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - g = LTAG_Constructor.construct(grammarFiles); + GRAMMAR_FILES = gf; + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + g = LTAG_Constructor.construct(grammarFiles); } - public Set<Template> buildTemplates(String s) { - - d2s.setInputString(s); - + public Set<Template> buildTemplates(String s) {return buildTemplates(s,true);} + public Set<Template> buildTemplates(String s, boolean createSynonyms) { + + d2s.setInputString(s); + boolean clearAgain = true; - + String tagged; if (UNTAGGED_INPUT) { s = pp.normalize(s); @@ -176,135 +177,140 @@ newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); } else newtagged = pp.condenseNominals(tagged); - + newtagged = pp.condense(newtagged); logger.debug("Preprocessed: " + newtagged); - - parser.parse(newtagged,g); - - if (parser.getDerivationTrees().isEmpty()) { - parser.clear(g,parser.getTemps()); - clearAgain = false; - if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); - } - else { - try { - parser.buildDerivedTrees(g); - } catch (ParseException e) { - if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); - } - } - // build pairs <String,POStag> from tagged - Hashtable<String,String> postable = new Hashtable<String,String>(); - for (String st : newtagged.split(" ")) { + parser.parse(newtagged,g); + + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); + clearAgain = false; + if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); + } + else { + try { + parser.buildDerivedTrees(g); + } catch (ParseException e) { + if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + } + } + + // build pairs <String,POStag> from tagged + Hashtable<String,String> postable = new Hashtable<String,String>(); + for (String st : newtagged.split(" ")) { postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; } - // - - Set<DRS> drses = new HashSet<DRS>(); - Set<Template> templates = new HashSet<Template>(); - - for (Dude dude : parser.getDudes()) { - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - if (VERBOSE) { - logger.debug(">>> DUDE:\n" + dude.toString()); - logger.debug("\n>>> DRS:\n"+ drs.toString()); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - if (temp == null) { continue; } - temp = temp.checkandrefine(); - if (temp == null) { continue; } - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - } - } - - if (clearAgain) { - parser.clear(g,parser.getTemps()); - } -// System.gc(); - - return templates; - } - - public Set<Template> buildTemplatesMultiThreaded(String s) { - + // + + Set<DRS> drses = new HashSet<DRS>(); + Set<Template> templates = new HashSet<Template>(); + + for (Dude dude : parser.getDudes()) { + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + if (VERBOSE) { + logger.debug(">>> DUDE:\n" + dude.toString()); + logger.debug("\n>>> DRS:\n"+ drs.toString()); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + if (temp == null) { continue; } + temp = temp.checkandrefine(); + if (temp == null) { continue; } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if(createSynonyms) + { + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) {strings = wordnet.getAttributes(word);} + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if(createSynonyms) + { + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + } + } + + if (clearAgain) { + parser.clear(g,parser.getTemps()); + } + // System.gc(); + + return templates; + } + + public Set<Template> buildTemplatesMultiThreaded(String s) {return buildTemplates(s,true);} + public Set<Template> buildTemplatesMultiThreaded(String s,boolean createSynonyms) { + boolean clearAgain = true; - + String tagged; if (UNTAGGED_INPUT) { s = pp.normalize(s); @@ -321,148 +327,153 @@ newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); } else newtagged = pp.condenseNominals(tagged); - + newtagged = pp.condense(newtagged); logger.debug("Preprocessed: " + newtagged); - - parser.parseMultiThreaded(newtagged,g); - - if (parser.getDerivationTrees().isEmpty()) { - parser.clear(g,parser.getTemps()); - clearAgain = false; - logger.error("[Templator.java] '" + s + "' could not be parsed."); - } - else { - try { - parser.buildDerivedTreesMultiThreaded(g); - } catch (ParseException e) { - logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); - } - } - // build pairs <String,POStag> from tagged - Hashtable<String,String> postable = new Hashtable<String,String>(); - for (String st : newtagged.split(" ")) { + parser.parseMultiThreaded(newtagged,g); + + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); + clearAgain = false; + logger.error("[Templator.java] '" + s + "' could not be parsed."); + } + else { + try { + parser.buildDerivedTreesMultiThreaded(g); + } catch (ParseException e) { + logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + } + } + + // build pairs <String,POStag> from tagged + Hashtable<String,String> postable = new Hashtable<String,String>(); + for (String st : newtagged.split(" ")) { postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; } - // - - drses = new HashSet<DRS>(); - templates = new HashSet<Template>(); - -// ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); -// for (Dude dude : p.getDudes()) { -// threadPool.execute(new DudeProcessor(dude, postable)); -// } -// threadPool.shutdown(); -// while(!threadPool.isTerminated()){} - - for (Dude dude : parser.getDudes()) { - - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - logger.debug(dude); - logger.debug(drs); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - if (temp == null) { continue; } - temp = temp.checkandrefine(); - if (temp == null) { continue; } - - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - + // + + drses = new HashSet<DRS>(); + templates = new HashSet<Template>(); + + // ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + // for (Dude dude : p.getDudes()) { + // threadPool.execute(new DudeProcessor(dude, postable)); + // } + // threadPool.shutdown(); + // while(!threadPool.isTerminated()){} + + for (Dude dude : parser.getDudes()) { + + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + logger.debug(dude); + logger.debug(drs); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + if (temp == null) { continue; } + temp = temp.checkandrefine(); + if (temp == null) { continue; } + + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if(createSynonyms) + { + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + } + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if(createSynonyms) + { + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + + } + } + + + if (clearAgain) { + parser.clear(g,parser.getTemps()); + } + // System.gc(); + + return templates; } - } - - - if (clearAgain) { - parser.clear(g,parser.getTemps()); - } -// System.gc(); - - return templates; - } - + public String getTaggedInput() { return taggedInput; } - + public List<String> getUnknownWords(){ return parser.getUnknownWords(); } - + private List<String> getLemmatizedWords(List<String> words){ List<String> stemmed = new ArrayList<String>(); for(String word : words){ @@ -472,15 +483,15 @@ } else { stemmed.add(getLemmatizedWord(word)); } - + } return stemmed; } - + private String getLemmatizedWord(String word){ return lem.stem(word); } - + private boolean containsModuloRenaming(Set<DRS> drses, DRS drs) { for (DRS d : drses) { @@ -490,7 +501,7 @@ } return false; } - + private boolean equalsOneOf(String string,String[] strings) { for (String s : strings) { if (string.equals(s)) { @@ -499,30 +510,30 @@ } return false; } - + private String extractSentence(String taggedSentence){ - int pos = taggedSentence.indexOf("/"); - while(pos != -1){ - String first = taggedSentence.substring(0, pos); - int endPos = taggedSentence.substring(pos).indexOf(" "); - if(endPos == -1){ - endPos = taggedSentence.substring(pos).length(); - } - String rest = taggedSentence.substring(pos + endPos); - - taggedSentence = first + rest; - pos = taggedSentence.indexOf("/"); - - } - return taggedSentence; - - } - + int pos = taggedSentence.indexOf("/"); + while(pos != -1){ + String first = taggedSentence.substring(0, pos); + int endPos = taggedSentence.substring(pos).indexOf(" "); + if(endPos == -1){ + endPos = taggedSentence.substring(pos).length(); + } + String rest = taggedSentence.substring(pos + endPos); + + taggedSentence = first + rest; + pos = taggedSentence.indexOf("/"); + + } + return taggedSentence; + + } + class DudeProcessor implements Runnable{ - + private Dude dude; private Hashtable<String,String> postable; - + public DudeProcessor(Dude dude, Hashtable<String,String> postable) { this.dude = dude; this.postable = postable; @@ -530,94 +541,94 @@ @Override public void run() { - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - if (VERBOSE) { - logger.debug(dude); - logger.debug(drs); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - temp = temp.checkandrefine(); - if (temp == null) { - continue; - } - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - } + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + if (VERBOSE) { + logger.debug(dude); + logger.debug(drs); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); + if (temp == null) { + continue; + } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + } } - + } } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,6 +1,8 @@ package org.dllearner.common.index; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; public class HierarchicalIndex implements Index{ @@ -48,9 +50,15 @@ @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) { - IndexResultSet rs = primaryIndex.getResourcesWithScores(queryString, limit, offset); + return getResourcesWithScores(queryString, limit, DEFAULT_OFFSET,Collections.<String>emptyList()); + } + + @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, + Collection<String> additionalFields) + { + IndexResultSet rs = primaryIndex.getResourcesWithScores(queryString, limit, offset, additionalFields); if(rs.getItems().size() < limit){ - rs.add(secondaryIndex.getResourcesWithScores(queryString, limit-rs.getItems().size(), offset)); + rs.add(secondaryIndex.getResourcesWithScores(queryString, limit-rs.getItems().size(), offset,additionalFields)); } return rs; } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,5 +1,6 @@ package org.dllearner.common.index; +import java.util.Collection; import java.util.List; import java.util.Map; @@ -10,4 +11,5 @@ IndexResultSet getResourcesWithScores(String queryString); IndexResultSet getResourcesWithScores(String queryString, int limit); IndexResultSet getResourcesWithScores(String queryString, int limit, int offset); + IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, Collection<String> additionalFields); } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,26 +1,34 @@ package org.dllearner.common.index; +import java.util.Collections; +import java.util.Map; + public class IndexResultItem { + private final String uri; + private final String label; + private final float score; + private final Map<String,? extends Object> fields; - private String uri; - private String label; - private float score; + public IndexResultItem(String uri, String label, float score) + {this(uri,label,score,Collections.<String,Object>emptyMap());} - public IndexResultItem(String uri, String label, float score) { + public IndexResultItem(String uri, String label, float score,Map<String,? extends Object> fields) + { this.uri = uri; this.label = label; this.score = score; + if(fields==null) throw new AssertionError("fields null"); + this.fields = fields; } - public String getUri() { - return uri; - } + public String getUri() {return uri;} + public String getLabel() {return label; } + public float getScore() {return score;} + public Map<String,? extends Object> getFields() {return fields;} - public String getLabel() { - return label; + @Override public String toString() + { + // TODO Auto-generated method stub + return "label:" + label + "--uri:" + uri + "--fields:" + fields; } - - public float getScore() { - return score; - } } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -2,8 +2,11 @@ import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.List; - +import java.util.Map; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.SolrServerException; @@ -15,19 +18,19 @@ import org.apache.solr.common.params.ModifiableSolrParams; public class SOLRIndex implements Index{ - -private CommonsHttpSolrServer server; - + + private CommonsHttpSolrServer server; + private static final int DEFAULT_LIMIT = 10; private static final int DEFAULT_OFFSET = 0; - + private String primarySearchField; private String secondarySearchField; - + private String sortField; - + private boolean restrictiveSearch = true; - + public SOLRIndex(String solrServerURL){ try { server = new CommonsHttpSolrServer(solrServerURL); @@ -36,20 +39,20 @@ e.printStackTrace(); } } - + public void setSearchFields(String primarySearchField, String secondarySearchField){ this.primarySearchField = primarySearchField; this.secondarySearchField = secondarySearchField; } - + public void setPrimarySearchField(String primarySearchField) { this.primarySearchField = primarySearchField; } - + public void setSecondarySearchField(String secondarySearchField) { this.secondarySearchField = secondarySearchField; } - + @Override public List<String> getResources(String queryString) { return getResources(queryString, DEFAULT_LIMIT); @@ -91,9 +94,12 @@ } @Override - public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) { - IndexResultSet rs = new IndexResultSet(); - + public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) + {return getResourcesWithScores(queryString,limit,offset,Collections.<String>emptyList());} + + public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, Collection<String> additionalFields) + { + IndexResultSet rs = new IndexResultSet(); QueryResponse response; try { String solrString = queryString; @@ -112,36 +118,43 @@ } solrString += ")"; } - + } else { solrString += queryString; } } SolrQuery query = new SolrQuery(solrString); - query.setRows(limit); - query.setStart(offset); - if(sortField != null){ - query.addSortField(sortField, ORDER.desc); - } - query.addField("score"); + query.setRows(limit); + query.setStart(offset); + if(sortField != null){ + query.addSortField(sortField, ORDER.desc); + } + query.addField("score"); response = server.query(query); SolrDocumentList docList = response.getResults(); - - for(SolrDocument d : docList){ + + for(SolrDocument d : docList) + { + Map<String,Object> fields = new HashMap<String,Object>(); + for(String field: additionalFields) + { + Object o = d.get(field); + if(o!=null) {fields.put(field,o);} + } float score = 0; if(d.get("score") instanceof ArrayList){ score = ((Float)((ArrayList)d.get("score")).get(1)); } else { score = (Float) d.get("score"); } - rs.addItem(new IndexResultItem((String) d.get("uri"), (String) d.get("label"), score)); + rs.addItem(new IndexResultItem((String) d.get("uri"), (String) d.get("label"), score,fields)); } } catch (SolrServerException e) { e.printStackTrace(); } return rs; } - + public void setSortField(String sortField){ this.sortField = sortField; } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,6 +1,7 @@ package org.dllearner.common.index; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import org.dllearner.kb.sparql.ExtractionDBCache; @@ -150,5 +151,11 @@ public Model getModel() { return model; } + + @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, + Collection<String> additionalFields) + { + throw new UnsupportedOperationException("TODO: implement this later"); + } } Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -806,7 +806,7 @@ Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); boa_propertiesIndex.setSortField("boa-score"); - propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); +// propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |