From: <km...@us...> - 2011-11-28 12:11:57
|
Revision: 3444 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3444&view=rev Author: kmpf Date: 2011-11-28 12:11:47 +0000 (Mon, 28 Nov 2011) Log Message: ----------- Logger configuration changed. Modified Paths: -------------- trunk/scripts/src/main/java/org/dllearner/examples/pdb/AminoAcids.java trunk/scripts/src/main/java/org/dllearner/examples/pdb/HelixRDFCreator.java trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBIdRdfModel.java trunk/scripts/src/main/java/org/dllearner/examples/pdb/ProteinDataSet.java Modified: trunk/scripts/src/main/java/org/dllearner/examples/pdb/AminoAcids.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/examples/pdb/AminoAcids.java 2011-11-28 11:22:01 UTC (rev 3443) +++ trunk/scripts/src/main/java/org/dllearner/examples/pdb/AminoAcids.java 2011-11-28 12:11:47 UTC (rev 3444) @@ -84,6 +84,7 @@ resprint.put(TRP, new PrintStream(allConfFiles.get(TRP))); resprint.put(TYR, new PrintStream(allConfFiles.get(TYR))); resprint.put(SEL, new PrintStream(allConfFiles.get(SEL))); + resprint.put(HYT, new PrintStream(allConfFiles.get(HYT))); } catch (FileNotFoundException e) { e.printStackTrace(); } @@ -114,6 +115,7 @@ resourceString.put(TRP, new StringBuffer(init)); resourceString.put(TYR, new StringBuffer(init)); resourceString.put(SEL, new StringBuffer(init)); + resourceString.put(HYT, new StringBuffer(init)); return resourceString; } @@ -172,6 +174,14 @@ return resnum; } + /* + * +++ Amino acid properties +++ + * + * the following amino acid properties were gathered from + * http://www.russelllab.org/aas/ + * + */ + public static HashMap<String, String> getAminoAcidNumericArffAttributeMap(){ // Hydrophobicity hydrophilic = 0; Hydrophobic = 1; aromatic = 2; aliphatic = 3 // Polarity unpolar = 0; polar = 1; positive = 2; negative = 3; @@ -232,7 +242,6 @@ resdata.put(new String("X"), new String("?,?,?")); // unknown residue (e.g. modified amino acids) resdata.put(new String("Y"), new String("Aromatic,Polar,Large")); resdata.put(new String("U"), new String("?,?,?")); - return resdata; } } Modified: trunk/scripts/src/main/java/org/dllearner/examples/pdb/HelixRDFCreator.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/examples/pdb/HelixRDFCreator.java 2011-11-28 11:22:01 UTC (rev 3443) +++ trunk/scripts/src/main/java/org/dllearner/examples/pdb/HelixRDFCreator.java 2011-11-28 12:11:47 UTC (rev 3444) @@ -53,7 +53,7 @@ // console, you have to set the threshold and log level to trace // (but we recommend just setting the log level to trace and observe // the log file) - consoleAppender.setThreshold(Level.DEBUG); + consoleAppender.setThreshold(Level.INFO); // logger 2 is writes to a file; it records all debug messages // (you can choose HTML or TXT) @@ -66,11 +66,12 @@ fileName = _dataDir + "log/log.html"; } else { // simple variant: layout2 = new SimpleLayout(); - layout2 = new PatternLayout("%r [%t] %-5p %c :\n%m%n\n"); + layout2 = new PatternLayout("%d [%t] %-5p %c : %m%n"); fileName = _dataDir + "log/log.txt"; } try { fileAppenderNormal = new FileAppender(layout2, fileName, false); + fileAppenderNormal.setThreshold(Level.INFO); } catch (IOException e) { e.printStackTrace(); } @@ -79,7 +80,7 @@ _rootLogger.removeAllAppenders(); _rootLogger.addAppender(consoleAppender); _rootLogger.addAppender(fileAppenderNormal); - _rootLogger.setLevel(Level.DEBUG); + _rootLogger.setLevel(Level.INFO); Boolean fasta = true; @@ -102,12 +103,12 @@ Boolean dlLearn = false; Boolean wekaLearn = false; - int dataSet = 5; + int dataSet = 1; /* * data for test purpose */ - PDBProtein testProtein = new PDBProtein("1XFF","A"); + PDBProtein testProtein = new PDBProtein("1EDM","B"); // PDBProtein testProtein = new PDBProtein("1LMB", "3"); // PDBProtein testProtein = new PDBProtein("8ABP"); @@ -146,6 +147,7 @@ if (rdfConf || arff) { PDBProtein protein = proteinSet.getProteinset().get(i); + _logger.info("Start with extracting data from: " + protein.getPdbID()); String pdbDir = _dataDir + protein.getPdbID() + "/"; File directory = new File(pdbDir); if(! directory.exists()) directory.mkdir(); @@ -153,7 +155,7 @@ //String arffFilePath = pdbDir + protein.getArffFileName(); _logger.info("PDB ID: " + protein.getPdbID()); - _logger.info("chain ID: " + protein.getChainID()); + _logger.info("Chain ID: " + protein.getChainID()); trainmodel = new PDBIdRdfModel(protein); @@ -189,6 +191,8 @@ trainmodel.removeStatementsWithObject(residue); Property isPartOf = ResourceFactory.createProperty("http://purl.org/dc/terms/", "isPartOf"); trainmodel.removeStatementsWithPoperty(isPartOf); + Property hasValue = ResourceFactory.createProperty("http://bio2rdf.org/pdb:", "hasValue"); + trainmodel.removeStatementsWithPoperty(hasValue); /* * we add the information which amino acid is the fourth predecessor of which other amino acid */ @@ -533,7 +537,7 @@ dataLine.append( "?" ); } - _logger.info(dataLine); + _logger.debug(dataLine); out.println(dataLine); } @@ -623,7 +627,7 @@ dataLine.append( "?" ); } - _logger.info(dataLine); + _logger.debug(dataLine); out.println(dataLine); } Modified: trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBIdRdfModel.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBIdRdfModel.java 2011-11-28 11:22:01 UTC (rev 3443) +++ trunk/scripts/src/main/java/org/dllearner/examples/pdb/PDBIdRdfModel.java 2011-11-28 12:11:47 UTC (rev 3444) @@ -34,7 +34,7 @@ public class PDBIdRdfModel { - private static Logger _logger = Logger.getRootLogger(); + private static Logger _logger = Logger.getLogger(HelixRDFCreator.class); private PdbRdfModel _pdbIdModel = new PdbRdfModel(); private PdbRdfModel _removedFromModel = new PdbRdfModel(); @@ -48,9 +48,9 @@ this._protein = protein; this._pdbIdModel = this.getPdbRdfModel(); this.getProtein().setSequence(extractSequence(_pdbIdModel)); - System.out.println("Sequence: " + this.getProtein().getSequence()); + _logger.info("Sequence: " + this.getProtein().getSequence()); this.getProtein().setSpecies(extractSpecies(_pdbIdModel)); - System.out.println("Species: " + this.getProtein().getSpecies()); + _logger.info("Species: " + this.getProtein().getSpecies()); createPositivesAndNegatives(); this._positionResource = createPositionResidueMap(); } @@ -122,7 +122,7 @@ " ?x1 pdb:isImmediatelyBefore ?x4 ." + " OPTIONAL { ?x5 rdfs:label ?species FILTER (str(?x5) = fn:concat(str(?x2), '/extraction/source/gene/organism')) . } . }"; - // System.out.println(queryString); + _logger.debug(queryString); PdbRdfModel construct = new PdbRdfModel(); Query query = QueryFactory.create(queryString); @@ -138,10 +138,7 @@ { RDFNode nextRes = niter.next(); species = nextRes.toString(); -/* QuerySolution soln = results.nextSolution() ; - Literal l = soln.getLiteral("species") ; // Get a result variable - must be a literal - species = l.getString();*/ - System.out.println(species); + _logger.debug(species); } } finally @@ -166,7 +163,7 @@ NodeIterator niter = model.listObjectsOfProperty(nextRes, hasValue); sequence = niter.next().toString(); - System.out.println("Sequence: " + sequence); + _logger.debug("Sequence: " + sequence); } } ; return sequence; @@ -219,9 +216,10 @@ " ?organism rdfs:label ?organismName ." + " ?seq rdf:type pdb:PolymerSequence ." + " ?seq pdb:hasValue ?sequence . } " + - "WHERE { ?x1 rdf:type pdb:Helix ." + + "WHERE { " + + " OPTIONAL { ?x1 rdf:type pdb:Helix ." + " ?x1 pdb:beginsAt ?x2 ." + - " ?x1 pdb:endsAt ?x3 ." + + " ?x1 pdb:endsAt ?x3 . } . " + " ?x3 dcterms:isPartOf ?x4 ." + " ?x4 rdf:type <http://bio2rdf.org/pdb:Polypeptide(L)> ." + " ?x5 dcterms:isPartOf ?x4 ." + @@ -245,7 +243,7 @@ " OPTIONAL { ?organism rdfs:label ?organismName " + "FILTER (str(?organism) = fn:concat(str(?x4), '/extraction/source/gene/organism')) . } . }"; - System.out.println(queryString); + _logger.debug(queryString); Query query = QueryFactory.create(queryString); QueryExecution qe = QueryExecutionFactory.create(query, model); construct.add(qe.execConstruct()); @@ -318,7 +316,6 @@ position = positionLabels.get(0); } else { position = new Integer(0); - _logger.error(""); } return position.intValue(); } @@ -347,7 +344,7 @@ "PREFIX x:<" + prop.getNameSpace() + "> " + "CONSTRUCT { ?x1 x:" + prop.getLocalName()+ " ?x2 . } " + "WHERE { ?x1 x:" + prop.getLocalName() + " ?x2 . }"; - //System.out.println(queryString); + _logger.debug(queryString); Query query = QueryFactory.create(queryString); QueryExecution qe = QueryExecutionFactory.create(query, _pdbIdModel); StmtIterator stmtiter = qe.execConstruct().listStatements(); @@ -365,7 +362,7 @@ "PREFIX x:<" + res.getNameSpace() + "> " + "CONSTRUCT { ?x1 ?x2 x:" + res.getLocalName() + " . } " + "WHERE { ?x1 ?x2 x:" + res.getLocalName() + " . }"; - // System.out.println(queryString); + _logger.debug(queryString); Query query = QueryFactory.create(queryString); QueryExecution qe = QueryExecutionFactory.create(query, _pdbIdModel); StmtIterator stmtiter = qe.execConstruct().listStatements(); @@ -392,7 +389,7 @@ while (riter.hasNext()) { // Initialization of variables needed Resource firstAA = riter.nextResource(); - System.out.println("First AA: " + firstAA.getLocalName()); + _logger.debug("First AA: " + firstAA.getLocalName()); Resource currentAA = firstAA; Resource nextAA = firstAA; boolean inHelix = false; @@ -427,15 +424,15 @@ } while (currentAA.hasProperty(iib)) ; } _positives = pos; - System.out.println("+++ Positive set +++"); + _logger.debug("+++ Positive set +++"); for (int i = 0; i < pos.size(); i++){ - System.out.println("Das " + i + "te Element: " + pos.get(i).getLocalName()); + _logger.debug("Das " + i + "te Element: " + pos.get(i).getLocalName()); } _negatives = neg; - System.out.println("+++ Negatvie set +++"); + _logger.debug("+++ Negatvie set +++"); for (int i = 0; i < neg.size(); i++){ - System.out.println("Das " + i + "te Element: " + neg.get(i).getLocalName()); + _logger.debug("Das " + i + "te Element: " + neg.get(i).getLocalName()); } } @@ -473,84 +470,4 @@ this.getProtein().setFastaFileName(fastaFileName); this.createFastaFile(dir); } - - - /* - * OLD STUFF - * - // every element in riter stands for a AA-chain start - // every first amino acid indicates a new AA-chain - while (riter.hasNext()) - { - // Initialization of variables needed - int i = 0; - Resource aaOne = riter.nextResource(); - Resource currentaa = aaOne; - Resource nextaa = aaOne; - boolean inHelix = false; - _logger.debug(currentaa.getURI()); - // look if there is a next AA - do { - ++i; - _logger.debug(i); - //looks weird, but is needed to enter loop even for the last AA which does not have a iib-Property - currentaa = nextaa; - NodeIterator resType = model.listObjectsOfProperty(currentaa,type); - - // die Guten ins Töpfchen ... - // if we get an non-empty iterator for pdb:beginsAt the next AAs are within a AA-helix - if(model.listResourcesWithProperty(ba, currentaa).hasNext() && !inHelix ) - { - inHelix = true; - } - // die Schlechten ins Kröpfchen - // if we get an non-empty iterator for pdb:endsAt and are already within a AA-helix - // the AAs AFTER the current ones aren't within a helix - if (model.listResourcesWithProperty(ea, currentaa).hasNext() && inHelix) - { - inHelix = false; - } - // get next AA if there is one - if (model.listObjectsOfProperty(currentaa, iib).hasNext()) - { - nextaa = model.getProperty(currentaa, iib).getResource(); - } - - // add current amino acid to positives or negatives set - while(resType.hasNext()) - { - Resource aaType = resType.next().asResource(); - _logger.info(aaType.getURI()); - if (resdata.get(aaType) != null) - { - if (inHelix) - { - data += i + "," + 1 + "," + resdata.get(aaType); - } - else - { - data += i + "," + 0 + "," + resdata.get(aaType); - } - } - } - - } while (currentaa.hasProperty(iib)) ; - } - - try - { - PrintStream out = new PrintStream (new File(arffFilePath)); - out.println(relation); - out.print(attribute); - out.print(data); - out.close(); - } - catch (FileNotFoundException e ) - { - System.err.println("Datei " + arffFilePath + " konnte nicht angelegt werden!"); - e.printStackTrace(); - } - - - */ } Modified: trunk/scripts/src/main/java/org/dllearner/examples/pdb/ProteinDataSet.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/examples/pdb/ProteinDataSet.java 2011-11-28 11:22:01 UTC (rev 3443) +++ trunk/scripts/src/main/java/org/dllearner/examples/pdb/ProteinDataSet.java 2011-11-28 12:11:47 UTC (rev 3444) @@ -11,8 +11,12 @@ import java.util.HashMap; import java.util.Random; +import org.apache.log4j.Logger; + public class ProteinDataSet { + private static Logger _logger = Logger.getLogger(HelixRDFCreator.class); + private static String _dataDir = "../test/pdb/"; @@ -71,11 +75,11 @@ pdbproteins.close(); // get number of lines int linenr = lines.size(); - System.out.println("File "+ pdbIDlist.getCanonicalPath() + " has " + linenr + " lines."); + _logger.info("File "+ pdbIDlist.getCanonicalPath() + " has " + linenr + " lines."); this._proteinSet = new ArrayList<PDBProtein>(linenr); for (int i = 0; i < linenr; i++) { - System.out.println("LINES element " + i + " contains " + lines.get(i)); + _logger.info("LINES element " + i + " contains " + lines.get(i)); this._proteinSet.add( new PDBProtein( this.getPdbID(i, lines), @@ -85,7 +89,7 @@ } catch (IOException e) { - System.err.println("File " + pdbIDlist.getAbsolutePath() + " could not be read in!"); + _logger.error("File " + pdbIDlist.getAbsolutePath() + " could not be read in!"); // TODO Auto-generated catch block e.printStackTrace(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |