From: <km...@us...> - 2011-11-10 08:15:47
|
Revision: 3391 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3391&view=rev Author: kmpf Date: 2011-11-10 08:15:41 +0000 (Thu, 10 Nov 2011) Log Message: ----------- Bug fixed SPARQL query that retrieves the data used in the intern PDBRdfModel. Modified Paths: -------------- trunk/scripts/pom.xml trunk/scripts/src/main/java/org/dllearner/examples/pdb/HelixRDFCreator.java trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBIdRdfModel.java trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBWekaLearner.java Modified: trunk/scripts/pom.xml =================================================================== --- trunk/scripts/pom.xml 2011-11-09 10:02:03 UTC (rev 3390) +++ trunk/scripts/pom.xml 2011-11-10 08:15:41 UTC (rev 3391) @@ -64,5 +64,11 @@ <artifactId>commons-compress</artifactId> <version>1.2</version> </dependency> + <dependency> + <groupId>weka</groupId> + <artifactId>weka</artifactId> + <version>3.6.5</version> + </dependency> + </dependencies> </project> Modified: trunk/scripts/src/main/java/org/dllearner/examples/pdb/HelixRDFCreator.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/examples/pdb/HelixRDFCreator.java 2011-11-09 10:02:03 UTC (rev 3390) +++ trunk/scripts/src/main/java/org/dllearner/examples/pdb/HelixRDFCreator.java 2011-11-10 08:15:41 UTC (rev 3391) @@ -109,8 +109,10 @@ /* * data for test purpose */ - PDBProtein testProtein = new PDBProtein("1XFF"); + PDBProtein testProtein = new PDBProtein("1XFF","A"); // PDBProtein testProtein = new PDBProtein("1LMB", "3"); +// PDBProtein testProtein = new PDBProtein("8ABP"); + /* * create a training data set @@ -185,7 +187,8 @@ trainmodel.removeStatementsWithPoperty(endsAt); Resource residue = ResourceFactory.createResource("http://bio2rdf.org/pdb:Residue"); trainmodel.removeStatementsWithObject(residue); - + Property isPartOf = ResourceFactory.createProperty("http://purl.org/dc/terms/", "isPartOf"); + trainmodel.removeStatementsWithPoperty(isPartOf); /* * we add the information which amino acid is the fourth predecessor of which other amino acid */ @@ -228,20 +231,23 @@ * proteins that originate from that particular species. If it already exists * we will append to it. */ - File speciesProteins = new File(_dataDir + protein.getSpecies() + ".pos"); - try { - String line = protein.getPdbID() + "." + protein.getChainID() + "." + protein.getSpecies() + "\n"; - FileWriter out = new FileWriter(speciesProteins, true); - _logger.debug("Write " + line + "to file " + speciesProteins.getPath()); - out.write(line); - out.close(); - } catch (FileNotFoundException e) { - _logger.error("Could not find file " + speciesProteins.getPath() ); - e.printStackTrace(); - } catch (IOException e) { - _logger.error("Something went wrong while trying to write to " + speciesProteins.getPath() ); - e.printStackTrace(); + if (protein.getSpecies() != ""){ + File speciesProteins = new File(_dataDir + protein.getSpecies() + ".pos"); + + try { + String line = protein.getPdbID() + "." + protein.getChainID() + "." + protein.getSpecies() + "\n"; + FileWriter out = new FileWriter(speciesProteins, true); + _logger.debug("Write " + line + " to file " + speciesProteins.getPath()); + out.write(line); + out.close(); + } catch (FileNotFoundException e) { + _logger.error("Could not find file " + speciesProteins.getPath() + speciesProteins.getName()); + e.printStackTrace(); + } catch (IOException e) { + _logger.error("Something went wrong while trying to write to " + speciesProteins.getPath() + speciesProteins.getName()); + e.printStackTrace(); + } } } } Modified: trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBIdRdfModel.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBIdRdfModel.java 2011-11-09 10:02:03 UTC (rev 3390) +++ trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBIdRdfModel.java 2011-11-10 08:15:41 UTC (rev 3391) @@ -100,12 +100,11 @@ "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> " + "PREFIX fn: <http://www.w3.org/2005/xpath-functions#> " + - "CONSTRUCT { pdb:" + this.getProtein().getPdbID() + "/extraction/source/gene/organism rdfs:label ?species. }" + + "CONSTRUCT { <http://bio2rdf.org/pdb:" + this.getProtein().getPdbID() + "/extraction/source/gene/organism> rdfs:label ?species. }" + "WHERE { ?x1 dcterms:isPartOf ?x2 ." + - " ?x1 rdf:type> ?x3 ." + + " ?x1 rdf:type ?x3 ." + " ?x1 pdb:isImmediatelyBefore ?x4 ." + - " ?x5 rdfs:label ?species " + - " FILTER (str(?x5) = fn:concat(str(?x2), '/extraction/source/gene/organism')) . }"; + " OPTIONAL { ?x5 rdfs:label ?species FILTER (str(?x5) = fn:concat(str(?x2), '/extraction/source/gene/organism')) . } . }"; // System.out.println(queryString); @@ -151,7 +150,7 @@ NodeIterator niter = model.listObjectsOfProperty(nextRes, hasValue); sequence = niter.next().toString(); - System.out.println(sequence); + System.out.println("Sequence: " + sequence); } } return sequence; @@ -206,23 +205,25 @@ " ?x3 dcterms:isPartOf ?x4 ." + " ?x4 rdf:type <http://bio2rdf.org/pdb:Polypeptide(L)> ." + " ?x5 dcterms:isPartOf ?x4 ." + - " ?x5 rdf:type ?x6 ."; + " ?x5 rdf:type ?x6 ." + + " ?x5 pdb:hasChainPosition ?x8 . " + + " OPTIONAL { ?x5 pdb:isImmediatelyBefore ?x7 . } . "; + if (chainID.length() == 1 && pdbID.length() == 4) { queryString += - " ?x5 pdb:hasChainPosition ?x8 ." + - " ?x8 dcterms:isPartOf pdb:" + + " ?x8 dcterms:isPartOf <http://bio2rdf.org/pdb:" + pdbID.toUpperCase() + "/chain_" + chainID.toUpperCase() + "> ."; } queryString += - " ?organism rdfs:label ?organismName " + - "FILTER (str(?organism) = fn:concat(str(?x4), '/extraction/source/gene/organism')) . " + + " ?x4 pdb:hasPolymerSequence ?seq . " + " ?seq rdf:type pdb:PolymerSequence . " + - " ?seq pdb:hasValue ?sequence ." + + " ?seq pdb:hasValue ?sequence . " + // with the Optional clause i get the information by which amino acid // a amino acid is followed - " OPTIONAL { ?x5 pdb:isImmediatelyBefore ?x7 . } .}"; + " OPTIONAL { ?organism rdfs:label ?organismName " + + "FILTER (str(?organism) = fn:concat(str(?x4), '/extraction/source/gene/organism')) . } . }"; System.out.println(queryString); Query query = QueryFactory.create(queryString); @@ -324,6 +325,7 @@ while (riter.hasNext()) { // Initialization of variables needed Resource firstAA = riter.nextResource(); + System.out.println("First AA: " + firstAA.getLocalName()); Resource currentAA = firstAA; Resource nextAA = firstAA; boolean inHelix = false; @@ -358,7 +360,16 @@ } while (currentAA.hasProperty(iib)) ; } _positives = pos; + System.out.println("+++ Positive set +++"); + for (int i = 0; i < pos.size(); i++){ + System.out.println("Das " + i + "te Element: " + pos.get(i).getLocalName()); + } + _negatives = neg; + System.out.println("+++ Negatvie set +++"); + for (int i = 0; i < neg.size(); i++){ + System.out.println("Das " + i + "te Element: " + neg.get(i).getLocalName()); + } } public void createFastaFile(String dir){ @@ -366,23 +377,24 @@ String fastaFilePath = dir + this.getProtein().getFastaFileName(); PrintStream out = new PrintStream (new File(fastaFilePath)); out.println(">" + this.getProtein().getPdbID() + "." + this.getProtein().getChainID() + "." + this.getProtein().getSpecies()); - int seqLength = this.getProtein().getSequence().length(); + String sequence = this.getProtein().getSequence(); + int seqLength = sequence.length(); if (seqLength > 80) { // write sequence in 80 character blocks into file int beginIndex = 0; int endIndex = 80; - for (int i = 1; endIndex <= seqLength; i++ ){ - out.println(this.getProtein().getSequence().substring(beginIndex, endIndex)); - if (seqLength - endIndex <= 80){ - out.println(this.getProtein().getSequence().substring(endIndex, seqLength)); + while ( endIndex <= seqLength ){ + out.println(sequence.substring(beginIndex, endIndex)); + if (seqLength - endIndex < 80){ + out.println(sequence.substring(endIndex, seqLength)); } beginIndex = endIndex; - endIndex += (i * 80); + endIndex += 80; } } else { - out.println(this.getProtein().getSequence()); + out.println(sequence); } out.close(); } catch (IOException e) { Modified: trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBWekaLearner.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBWekaLearner.java 2011-11-09 10:02:03 UTC (rev 3390) +++ trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBWekaLearner.java 2011-11-10 08:15:41 UTC (rev 3391) @@ -1,11 +1,6 @@ package org.dllearner.examples.pdb; import java.io.File; -import java.io.IOException; - -import org.apache.log4j.Logger; -/* -import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Random; @@ -20,14 +15,14 @@ import weka.classifiers.bayes.NaiveBayes; import weka.core.Instances; import weka.core.converters.ConverterUtils.DataSource; -*/ + public class PDBWekaLearner { private static Logger logger = Logger.getRootLogger(); public PDBWekaLearner (File arffFile) throws IOException{ - /* + // create logger (configure this to your needs) SimpleLayout layout = new SimpleLayout(); FileAppender fileAppender = new FileAppender(layout, "log/sample_log.txt", false); @@ -75,6 +70,6 @@ } catch (Exception e){ e.printStackTrace(); } - */ + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |