From: <ku...@us...> - 2008-12-01 07:40:13
|
Revision: 1539 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1539&view=rev Author: kurzum Date: 2008-12-01 07:40:08 +0000 (Mon, 01 Dec 2008) Log Message: ----------- corpus still halfway done Added Paths: ----------- trunk/src/dl-learner/org/dllearner/examples/Corpus.java trunk/src/dl-learner/org/dllearner/examples/corpus/ trunk/src/dl-learner/org/dllearner/examples/corpus/Sentence.java Added: trunk/src/dl-learner/org/dllearner/examples/Corpus.java =================================================================== --- trunk/src/dl-learner/org/dllearner/examples/Corpus.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/examples/Corpus.java 2008-12-01 07:40:08 UTC (rev 1539) @@ -0,0 +1,127 @@ +package org.dllearner.examples; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; +import java.util.StringTokenizer; + +import org.dllearner.examples.corpus.Sentence; +import org.semanticweb.owl.apibinding.OWLManager; +import org.semanticweb.owl.model.AddAxiom; +import org.semanticweb.owl.model.OWLAxiom; +import org.semanticweb.owl.model.OWLDataFactory; +import org.semanticweb.owl.model.OWLOntology; +import org.semanticweb.owl.model.OWLOntologyChangeException; +import org.semanticweb.owl.model.OWLOntologyCreationException; +import org.semanticweb.owl.model.OWLOntologyManager; +import org.semanticweb.owl.util.SimpleURIMapper; + +public class Corpus { + + static BufferedReader br = null; + static File file; + public static String namespace = "http://www.test.de/test"; + static OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); + public static OWLDataFactory factory; + static OWLOntology currentOntology; + + /** + * @param args + */ + public static void main(String[] args) { + file= new File("ling/test.export"); + init(); + try{ + Sentence sentence = nextSentence(); + sentence.processSentence(); + + /*for (String line : sentence) { + System.out.println(line); + }*/ + }catch (Exception e) { + e.printStackTrace(); + } + + saveOntology(); + } + + + + public static Sentence nextSentence()throws IOException { + List<String> retList = new ArrayList<String>(); + int retID = 0; + String line = ""; + boolean proceed = true; + while (proceed ) { + line = br.readLine(); + if (line == null){ + break; + }else if(line.startsWith("#EOS")){ + + proceed = false; + }else if(line.startsWith("%%")||line.startsWith("#BOS")){ + if(line.startsWith("#BOS")){ + StringTokenizer s = new StringTokenizer(line); + s.nextToken(); + String id = s.nextToken(); + retID = Integer.parseInt(id); + } + proceed = true;; + }else{ + retList.add(line); + } + } + + return new Sentence(retID, retList); + + } + + + + public static void init(){ + try{ + br = new BufferedReader(new FileReader(file)); + URI ontologyURI = URI.create(namespace); + //URI physicalURI = new File("cache/"+System.currentTimeMillis()+".owl").toURI(); + URI physicalURI = new File("cache/tiger.owl").toURI(); + SimpleURIMapper mapper = new SimpleURIMapper(ontologyURI, physicalURI); + manager.addURIMapper(mapper); + try{ + currentOntology = manager.createOntology(ontologyURI); + }catch(OWLOntologyCreationException e){ + //logger.error("FATAL failed to create Ontology " + ontologyURI); + e.printStackTrace(); + } + factory = manager.getOWLDataFactory(); + + }catch (Exception e) { + e.printStackTrace(); + System.exit(0); + } + } + + public static void addAxiom(OWLAxiom axiom){ + AddAxiom addAxiom = new AddAxiom(currentOntology, axiom); + try{ + manager.applyChange(addAxiom); + }catch (OWLOntologyChangeException e) { + //TODO + e.printStackTrace(); + } + } + + public static void saveOntology(){ + try{ + manager.saveOntology(currentOntology); + //manager.s + }catch (Exception e) { + e.printStackTrace(); + + } + } + +} Added: trunk/src/dl-learner/org/dllearner/examples/corpus/Sentence.java =================================================================== --- trunk/src/dl-learner/org/dllearner/examples/corpus/Sentence.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/examples/corpus/Sentence.java 2008-12-01 07:40:08 UTC (rev 1539) @@ -0,0 +1,124 @@ +package org.dllearner.examples.corpus; + +import java.net.URI; +import java.util.ArrayList; +import java.util.List; +import java.util.StringTokenizer; + +import org.dllearner.examples.Corpus; +import org.dllearner.utilities.URLencodeUTF8; +import org.semanticweb.owl.model.OWLClass; +import org.semanticweb.owl.model.OWLClassAssertionAxiom; +import org.semanticweb.owl.model.OWLDescription; +import org.semanticweb.owl.model.OWLIndividual; +import org.semanticweb.owl.model.OWLObject; +import org.semanticweb.owl.model.OWLObjectProperty; + +public class Sentence { + int id ; + OWLIndividual sentenceURI; + List<String> sentence; + List<String> wordsInOrder; + List<String> urisInOrder; + + OWLClass element; + OWLClass structElement; + OWLClass wordElement; + OWLClass sentenceClass; + + OWLClass tagClass; + OWLClass morphClass; + OWLClass edgeClass; + + OWLObjectProperty hasElement; + + public Sentence(int id, List<String> sentence) { + super(); + this.id = id; + this.sentence = sentence; + this.sentenceURI = Corpus.factory.getOWLIndividual(URI.create(Corpus.namespace+"#"+"satz"+id)); + + this.urisInOrder = new ArrayList<String>(); + this.wordsInOrder = new ArrayList<String>(); + + element = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Element")); + structElement = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#StructureElement")); + wordElement = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#WordElement")); + sentenceClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Sentence")); + tagClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Tag")); + morphClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Morph")); + edgeClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Edge")); + + hasElement = Corpus.factory.getOWLObjectProperty(URI.create(Corpus.namespace+"#hasElement")); + + Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(this.sentenceURI,sentenceClass )); + } + + public void processSentence(){ + + int pos=0; + for (String line : sentence) { + + processLine(line,pos); + pos++; + } + } + + + public void processLine(String line, int pos){ + String elementURL = Corpus.namespace+"#"; + OWLIndividual lineElement; + StringTokenizer st = new StringTokenizer(line); + + //%String %% word lemma tag morph edge parent secedge comment + String word = st.nextToken(); + String lemma = st.nextToken(); + String tag = st.nextToken(); + String morph = st.nextToken(); + String edge = st.nextToken(); + String parent = st.nextToken(); + //word + if(word.startsWith("#")){ + elementURL+="s_"+id+"_"+word.substring(1); + lineElement = Corpus.factory.getOWLIndividual(URI.create(elementURL)); + Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement, structElement)); + + }else{ + elementURL+="s_"+id+"_"+pos+"_"+URLencodeUTF8.encode(word); + wordsInOrder.add(word); + urisInOrder.add(elementURL); + lineElement = Corpus.factory.getOWLIndividual(URI.create(elementURL)); + Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement, wordElement)); + Corpus.addAxiom(Corpus.factory.getOWLEntityAnnotationAxiom(lineElement, Corpus.factory.getCommentAnnotation(line))); + Corpus.addAxiom(Corpus.factory.getOWLEntityAnnotationAxiom(lineElement, Corpus.factory.getOWLLabelAnnotation(word))); + } + + Corpus.addAxiom(Corpus.factory.getOWLObjectPropertyAssertionAxiom(sentenceURI, hasElement, lineElement)); + + //tag + tag = (tag.equals("$("))?"SentenceBoundary":tag; + //morph + morph= "m_"+URLencodeUTF8.encode(morph); + makeClasses(lineElement, tag,morph,edge); + + } + + void makeClasses(OWLIndividual lineElement, String tag, String morph, String edge){ + if(!tag.equals("--")){ + OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+tag)); + Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d )); + Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, tagClass)); + } + if(!morph.equals("m_--")){ + + OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+morph)); + Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d )); + Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, morphClass)); + } + if(!edge.equals("--")){ + OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+edge)); + Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d )); + Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, edgeClass)); + } + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |