|
From: Johan T. <jt...@us...> - 2006-10-11 15:00:37
|
Update of /cvsroot/vienti/vienti/source/se/biobanksregistersyd/vienti/datasources In directory sc8-pr-cvs9.sourceforge.net:/tmp/cvs-serv27804/source/se/biobanksregistersyd/vienti/datasources Modified Files: DsSympathy_RBBR.java Log Message: Call a stored procedure in the database instead of executing an SQL-query. Rewrote the whole tuple parse logic from scratch since the format of the resultset changed anyway. Seems to be more maintainable, but introduces a few perculiar constructions. Finally derive the values for preparationresult, storageform and storagetemp from the kind of sample stored. Coalesce biobankvalues if subsequent values are the same. Add the method 'writeData' which duplicates the sample entry into two if both specimen slides and paraffin blocks exists, each sample with a suffix of 'G' (glas) or 'K' (kloss) respectively. Index: DsSympathy_RBBR.java =================================================================== RCS file: /cvsroot/vienti/vienti/source/se/biobanksregistersyd/vienti/datasources/DsSympathy_RBBR.java,v retrieving revision 1.18 retrieving revision 1.19 diff -u -d -r1.18 -r1.19 --- DsSympathy_RBBR.java 20 Jun 2006 14:24:29 -0000 1.18 +++ DsSympathy_RBBR.java 11 Oct 2006 15:00:30 -0000 1.19 @@ -24,6 +24,18 @@ ** liable for any use that may be made of the information contained herein. ** ** $Log$ + ** Revision 1.19 2006/10/11 15:00:30 jtorin + ** Call a stored procedure in the database instead of executing an SQL-query. + ** Rewrote the whole tuple parse logic from scratch since the format of + ** the resultset changed anyway. Seems to be more maintainable, but + ** introduces a few perculiar constructions. + ** Finally derive the values for preparationresult, storageform and + ** storagetemp from the kind of sample stored. + ** Coalesce biobankvalues if subsequent values are the same. + ** Add the method 'writeData' which duplicates the sample entry into two + ** if both specimen slides and paraffin blocks exists, each sample with + ** a suffix of 'G' (glas) or 'K' (kloss) respectively. + ** ** Revision 1.18 2006/06/20 14:24:29 amijdema ** Removed the inline class CurrentClassGetter. Now they use CurrentContext.java ** in its place. @@ -103,9 +115,9 @@ package se.biobanksregistersyd.vienti.datasources; +import java.sql.CallableStatement; import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; @@ -137,7 +149,8 @@ /** Current classname. * "se.biobanksregistersyd.vienti.datasource.DsSympathy_RBBR" */ - private static final String CLASSNAME = CurrentContext.getCurrentClassName(); + private static final String CLASSNAME = + CurrentContext.getCurrentClassName(); /** log4j. */ private static Logger logger = Logger.getLogger(CLASSNAME); @@ -145,7 +158,8 @@ /** * The name of the class that this Datasource is supposed to work with. */ - private final String companionDatafile = "se.biobanksregistersyd.vienti.datafiles.DfXML_RBBR"; + private final String companionDatafile = + "se.biobanksregistersyd.vienti.datafiles.DfXML_RBBR"; /** @@ -168,6 +182,9 @@ */ private Boolean complete = null; + /** The number of samples written to the <code>Datafile</code>. */ + private int numberOfExportedSamples = 0; + /** * Creates a <code>DsSympathy_RBBR</code> object and loads neccesary * properties to set it up correctly. Constructors in subclasses should call @@ -268,8 +285,9 @@ if (startdate == null) { throw new GatheringException( GatheringExceptionEnum.DATA_NOT_AVAILABLE, - "No value for 'startdate' available, neither through command line " - + "parameters nor persistent data (hint: see parameter '-s')."); + "No value for 'startdate' available, neither through " + + "command line parameters nor persistent data (hint: " + + "see parameter '-s')."); } // Get the end timestamp, either from command line @@ -315,8 +333,9 @@ } /** - * Generate random data and use callback methods on the associated - * <code>Datafile</code> to output it. + * Query the under-lying database for the laboratory system SymPathy + * and use callback methods on the associated <code>Datafile</code> + * to output it. * * @throws GatheringException * if an error was detected while opening any required @@ -332,53 +351,39 @@ // Recast to be able to use sub-class methods. DfXML_RBBR df = (DfXML_RBBR) getDatafile(); - // TODO Debug /JT - if (1 == 0) - return; +// // Some example of data. Works in Lund against the 'synergi' DB. +// // Small nice example of multiple tuples. +// startdate = "2006-10-04 11:07:00"; +// enddate = "2006-10-04 11:08:00"; +// // Example for a referral that contains 04x extries. +// startdate = "2004-04-22 10:14:00"; +// enddate = "2004-04-22 10:14:01"; +// // Just som tuples. +// startdate = "2004-04-23 10:14:00"; +// enddate = "2004-04-23 10:15:00"; +// // Consent decision changes 1 -> 6. +// startdate = "2006-06-26 10:50:00"; +// enddate = "2006-06-26 10:50:01"; +// // Example of multiple glass&blocks. +// startdate = "2005-08-30 09:50:00"; +// enddate = "2005-08-30 10:55:01"; + startdate = "2006-10-10 00:50:00"; + enddate = "2006-10-11 00:50:00"; - Statement stmt; + CallableStatement cs; try { - stmt = getDbConnection().createStatement(); + String query = "{call la_biobanksreg(" + + "'" + startdate + "'," + "'" + enddate + "')}"; + logger.debug("SQL-query: " + query); + cs = getDbConnection().prepareCall(query); } catch (SQLException e) { - throw new GatheringException( - GatheringExceptionEnum.COULD_NOT_OPEN_RESOURCE, e, - "Could not create a SQL statement."); + throw new GatheringException(GatheringExceptionEnum.IO_ERROR, e, + "Couldn't prepare a SQL-statement to gather data."); } - - /* - * Some comments about the database query: Both r.ankomstdat - * (arrivaldate) and b.datum (what seems to be the consent decision - * registration date) is compared in the selection (unfortunately this - * makes the query much slower) since they tend to have quite a lot of - * "clever" variations. In some found cases the former value is actually - * behind the latter, dispite that it should be impossible to register a - * consent decision on a sample that haven't arrived yet. The sorting - * order is important since the later algoritm depends on parts of it. - */ - final String ls = System.getProperty("line.separator"); - String query = "SELECT p.remisstyp, p.remissar, p.remissnr, p.provnr," - + ls - + " r.patientid, r.ankomstdat, r.provdat, r.remklinikkod," - + ls + " r.svarlakare, b.svarnr, b.kod, b.datum, b.kasserat, " - + ls + " b.kasserat_sign " + ls + "FROM pr_prov p " + ls - + "JOIN re_remiss r ON" + ls - + " r.remisstyp = p.remisstyp AND" + ls - + " r.remissar = p.remissar AND" + ls - + " r.remissnr = p.remissnr" + ls - + "LEFT OUTER JOIN re_nat_biobank b ON" + ls - + " b.remisstyp = p.remisstyp AND" + ls - + " b.remissar = p.remissar AND" + ls - + " b.remissnr = p.remissnr" + ls + "WHERE (r.ankomstdat > '" - + startdate + "' AND" + ls + " r.ankomstdat <= '" - + enddate + "')" + ls + " OR (b.datum > '" + startdate - + "' AND" + ls + " b.datum <= '" + enddate + "')" + ls - + "ORDER BY p.remisstyp, p.remissar, p.remissnr," + ls - + " p.provnr, b.datum" + ls; - logger.debug("SQL-query: " + ls + query); - + ResultSet rs; try { - rs = stmt.executeQuery(query); + rs = cs.executeQuery(); } catch (SQLException e) { throw new GatheringException(GatheringExceptionEnum.IO_ERROR, e, "Couldn't execute a SQL-statement to gather data."); @@ -386,7 +391,6 @@ String comment = "Information gathered by '" + CLASSNAME + "', revision " + revision + "."; - df.open(); df.writeHeader(comment, startdate, enddate); df.writeStartSampleCollection(sampleCollectionId, complete); @@ -394,96 +398,152 @@ Hashtable dataset = new Hashtable(); List biobankvalues = new ArrayList(); - int numberOfExportedSamples = 0; try { boolean tupleOk = rs.next(); - while (tupleOk) { - dataset.clear(); + String remisstyp; + String remissar; + String remissnr; + int svarnr; + String tmpSampleId = null; + String remisstypCurr = ""; + String remissarCurr = ""; + String remissnrCurr = ""; + int svarnrCurr = 0; + String kodPrev = ""; // Previously added biobankvalue. + // The number of paraffin blocks and specimen slides. + int klossant = 0; + int glasant = 0; - String remisstyp = rs.getString("remisstyp").trim(); - String remissar = rs.getString("remissar").trim(); - String remissnr = rs.getString("remissnr").trim(); - String provnr = rs.getString("provnr").trim(); + while (tupleOk) { + remisstyp = rs.getString("remisstyp").trim(); + remissar = rs.getString("remissar").trim(); + remissnr = rs.getString("remissnr").trim(); + svarnr = rs.getInt("svarnr"); + // logger.debug("remisstyp: " + remisstyp); + // logger.debug("remissar : " + remissar); + // logger.debug("remissnr : " + remissnr); + // logger.debug("svarnr : " + svarnr); + // Is this the same referral as the last iteration? + if (!remisstyp.equals(remisstypCurr) + || !remissar.equals(remissarCurr) + || !remissnr.equals(remissnrCurr)) { - dataset.put("rbbr:personId", rs.getString("patientid").trim()); - dataset.put("base:mainSampleId", remisstyp + remissar + "-" - + remissnr + "-" + provnr); - if (rs.getString("provdat") != null) { - dataset.put("base:samplingDate", rs.getString("provdat") - .substring(0, "YYYY-MM-DD".length())); - } - dataset.put("base:arrivalDate", rs.getString("ankomstdat") - .substring(0, "YYYY-MM-DD".length())); - dataset.put("rbbr:sampleOrderingUnit", rs.getString( - "remklinikkod").substring(0, "XXXXXX".length())); - dataset.put("rbbr:sampleOrderingUnitType", - "rbbr:sampleOrderingUnitMG"); - // The database field 'svarlakare' is unfortunately known to - // contain characters in the lower part of ASCII, and these - // are illegal in XML. - // TODO A better solution would be to try converting these chars - // back to their intended representation (é, ü and so on). - String tmp = cleanString(rs.getString("svarlakare").trim()); - if (tmp.length() != 0) { - dataset.put("rbbr:sampleOrderer", tmp); - } - if (rs.getString("remisstyp").trim().equals("O")) { - // O = Obduktion = Autopsy - dataset.put("base:vitalStatusAtSampling", "DEAD"); - } else { - dataset.put("base:vitalStatusAtSampling", "ALIVE"); - } + // Write the last entry if there is data. + if (!dataset.isEmpty() && tmpSampleId != null) { + // logger.debug("Write in-loop."); + writeData(df, tmpSampleId, dataset, biobankvalues, + klossant, glasant); + dataset.clear(); + biobankvalues.clear(); + } - // TODO Static values! - dataset.put("rbbr:preparationResult", "PLASMA"); - dataset.put("rbbr:storageForm", "FREEZED"); - dataset.put("rbbr:storageTemperature", "FREEZER_-20DEGC"); + // logger.debug("New referral."); + remisstypCurr = remisstyp; + remissarCurr = remissar; + remissnrCurr = remissnr; + svarnrCurr = 0; + kodPrev = ""; + tmpSampleId = remisstyp + remissar + "-" + remissnr; - if (rs.getString("svarnr") == null) { - // Sample has no biobankvalue/consent decision. - // Advance to the next tuple. - tupleOk = rs.next(); - } else { - biobankvalues.clear(); + /* Read basic data. + */ + String patientid = rs.getString("patientid").trim(); + // Sampling date + String provdat = null; + if (rs.getString("provdat") != null) { + provdat = rs.getString("provdat") + .substring(0, "YYYY-MM-DD".length()); + } + String ankomstdat = rs.getString("ankomstdat") + .substring(0, "YYYY-MM-DD".length()); + String remklinikkod = rs.getString( + "remklinikkod").substring(0, "XXXXXX".length()); + // The database field 'svarlakare' is unfortunately known to + // contain characters in the lower part of ASCII, and these + // are illegal in XML. + // TODO A better solution would be to try converting these chars back to their intended representation (é, ü and so on). + String svarlakare = + cleanString(rs.getString("svarlakare").trim()); + // The number of paraffin blocks and specimen slides. + klossant = rs.getInt("klossant"); + glasant = rs.getInt("glasant"); - addBiobankValue(biobankvalues, rs.getString("kod").trim(), + /* Put data into the dataset. + */ + dataset.put("rbbr:personId", patientid); + if (provdat != null) { + dataset.put("base:samplingDate", provdat); + } + dataset.put("base:arrivalDate", ankomstdat); + dataset.put("rbbr:sampleOrderingUnit", remklinikkod); + dataset.put("rbbr:sampleOrderingUnitType", + "rbbr:sampleOrderingUnitMG"); + if (svarlakare.length() != 0) { + dataset.put("rbbr:sampleOrderer", svarlakare); + } + if (remisstyp.toUpperCase().equals("O")) { + // O = Obduktion = Autopsy + dataset.put("base:vitalStatusAtSampling", "DEAD"); + } else { + dataset.put("base:vitalStatusAtSampling", "ALIVE"); + } + } + // Is this the same 'svar' as the last iteration? + // And is it non-zero (non-null)? + // The 'kod.startsWith("4")' bit is an ugly hack to + // coalesce the individual biobank values (4, 4a, 4b, and + // so on) into one. See addBiobankValue() for more info + // on how this is handled. Here be dragons... + String kod = rs.getString("kod"); + if (kod != null) { + kod = kod.trim(); + } + if ((svarnr != 0 && svarnr != svarnrCurr) + || (svarnr != 0 && kod.startsWith("4"))) { + // Since the biobank value is duplicated in each + // answer (svar) to the referral there is no real + // meaning of adding it if it's the same value. + // A special case would be if the sample provider + // actually has given the same consent decision + // twice, which would not be reflected in the + // transmission of data. However, the true spirit + // of the sample provider is still respected. + if (!kod.equals(kodPrev)) { + // logger.debug("Add biobank value: " + kod); + addBiobankValue(biobankvalues, kod, rs.getString("datum").trim().substring(0, - "YYYY-MM-DD HH:MM:SS".length()).replaceAll( - " ", "T")); - - // Advance to the next tuple. - tupleOk = rs.next(); - - while (tupleOk - && remisstyp.compareTo(rs.getString("remisstyp") - .trim()) == 0 - && remissar.compareTo(rs.getString("remissar") - .trim()) == 0 - && remissnr.compareTo(rs.getString("remissnr") - .trim()) == 0 - && provnr.compareTo(rs.getString("provnr").trim()) == 0) { - // Still on same referral. Just add on the - // biobanksvalue. - - addBiobankValue(biobankvalues, rs.getString("kod") - .trim(), rs.getString("datum").trim() - .substring(0, "YYYY-MM-DD HH:MM:SS".length()) - .replaceAll(" ", "T")); - - tupleOk = rs.next(); + "YYYY-MM-DD HH:MM:SS".length()). + replaceAll(" ", "T")); + kodPrev = kod; } - - dataset.put("rbbr:biobankValues", biobankvalues); + svarnrCurr = svarnr; } - // System.out.println(dataset.toString()); - - df.writeEntry(dataset); + // SNOMED data is duplicated for each 'svar', thus + // only read it for the first svar. + // TODO This critera will fail in the case of multiple + // 04x biobankvalues. The safest way is that for each tuple + // of SNOMED data, check if it already exists before + // adding it. +// if (svarnr == 1) { +// // Read SNOMED data. +// } - numberOfExportedSamples += 1; + // Advance to the next tuple. + tupleOk = rs.next(); + } + + // Write the last entry if there is data. + if (!dataset.isEmpty()) { + // logger.debug("Write after."); + writeData(df, tmpSampleId, dataset, biobankvalues, + klossant, glasant); + dataset.clear(); + biobankvalues.clear(); } } catch (SQLException e) { throw new GatheringException(GatheringExceptionEnum.IO_ERROR, e, - "An exception occured while retreiving data from a resultset."); + "An exception occured while retreiving data from " + + "a resultset."); } df.writeEndSampleCollection(); @@ -499,6 +559,57 @@ } /** + * Write data to the datafile. Utility method since the operation is + * done in two places. + * + * @param df The <code>Datafile</code> the data is to be + * written to. + * @param tmpSampleId A partial sample id for the sample to write. + * The final id is constructed in-metod. + * @param dataset A previously initialised hashset of values, + * describing one sample to write to the file. + * @param biobankvalues The list of biobankvalues for the sample. + * @param klossant The number of paraffin blocks that belongs + * to this referral. + * @param glasant The number of specimen slides that belongs + * to this referral. + * @throws GenerationException if an error was detected while + * generating the file. + */ + void writeData(final DfXML_RBBR df, final String tmpSampleId, + final Hashtable dataset, final List biobankvalues, + final int klossant, final int glasant) + throws GenerationException { + if (!dataset.isEmpty()) { + if (biobankvalues.size() > 0) { + dataset.put("rbbr:biobankValues", biobankvalues); + } + if (klossant > 0) { + dataset.put("base:mainSampleId", tmpSampleId + "K"); + dataset.put("rbbr:multiplicity", Integer.toString(klossant)); + dataset.put("rbbr:multiplicity-type", "MULTIPLE"); + dataset.put("rbbr:preparationResult", "PARAFFIN_BLOCK"); + dataset.put("rbbr:storageForm", "PARAFFIN_BLOCK"); + dataset.put("rbbr:storageTemperature", "ROOM_TEMPERATURE"); + df.writeEntry(dataset); + numberOfExportedSamples += 1; + // System.out.println(dataset.toString()); + } + if (glasant > 0) { + dataset.put("base:mainSampleId", tmpSampleId + "G"); + dataset.put("rbbr:multiplicity", Integer.toString(glasant)); + dataset.put("rbbr:multiplicity-type", "MULTIPLE"); + dataset.put("rbbr:preparationResult", "SPECIMEN_SLIDE"); + dataset.put("rbbr:storageForm", "SPECIMEN_SLIDE"); + dataset.put("rbbr:storageTemperature", "ROOM_TEMPERATURE"); + df.writeEntry(dataset); + numberOfExportedSamples += 1; + // System.out.println(dataset.toString()); + } + } + } + + /** * Add a biobankvalue retrieved from a SymPathy database to a list of * existing values. * @@ -536,7 +647,7 @@ || newCode.equals("4b") || newCode.equals("4c") || newCode.equals("4d") || newCode.equals("4e") || newCode.equals("4f")) { - // This newCode makes the assumption that every biobank value + // This code makes the assumption that every biobank value // 4/"04" is part of the same consent decision if and only // if they have the same date. At the moment this looks like // the norm in SymPathy. @@ -580,21 +691,22 @@ // In Java 1.5 it is possible to // String.replace(new CharSequence("..."), new CharSequence("...")); return str.replace((char) 0, '*').replace((char) 1, '*').replace( - (char) 2, '*').replace((char) 3, '*').replace((char) 4, '*') - .replace((char) 5, '*').replace((char) 6, '*').replace( - (char) 7, '*').replace((char) 8, '*').replace((char) 9, - '*').replace((char) 9, '*').replace((char) 10, '*') - .replace((char) 11, '*').replace((char) 12, '*').replace( - (char) 13, '*').replace((char) 14, '*').replace( - (char) 15, '*').replace((char) 16, '*').replace( - (char) 17, '*').replace((char) 18, '*').replace( - (char) 19, '*').replace((char) 20, '*').replace( - (char) 21, '*').replace((char) 22, '*').replace( - (char) 23, '*').replace((char) 24, '*').replace( - (char) 25, '*').replace((char) 26, '*').replace( - (char) 27, '*').replace((char) 28, '*').replace( - (char) 29, '*').replace((char) 30, '*').replace( - (char) 31, '*'); + (char) 2, '*').replace((char) 3, '*'). + replace((char) 4, '*').replace((char) 5, '*') + .replace((char) 6, '*').replace((char) 7, '*') + .replace((char) 8, '*').replace((char) 9,'*') + .replace((char) 9, '*').replace((char) 10, '*') + .replace((char) 11, '*').replace((char) 12, '*') + .replace((char) 13, '*').replace((char) 14, '*') + .replace((char) 15, '*').replace((char) 16, '*') + .replace((char) 17, '*').replace((char) 18, '*') + .replace((char) 19, '*').replace((char) 20, '*') + .replace((char) 21, '*').replace((char) 22, '*') + .replace((char) 23, '*').replace((char) 24, '*') + .replace((char) 25, '*').replace((char) 26, '*') + .replace((char) 27, '*').replace((char) 28, '*') + .replace((char) 29, '*').replace((char) 30, '*') + .replace((char) 31, '*'); } /** |