From: Steve S. <ssu...@us...> - 2003-07-28 17:13:49
|
Update of /cvsroot/dlese/dlese-tools-project/src/org/dlese/dpc/services/idmapper In directory sc8-pr-cvs1:/tmp/cvs-serv14781/idmapper Modified Files: Idmap.java ResourceDesc.java Log Message: restructure dup handling: now create warnings Index: Idmap.java =================================================================== RCS file: /cvsroot/dlese/dlese-tools-project/src/org/dlese/dpc/services/idmapper/Idmap.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -r1.17 -r1.18 *** Idmap.java 25 Jul 2003 01:00:49 -0000 1.17 --- Idmap.java 28 Jul 2003 17:13:47 -0000 1.18 *************** *** 1416,1426 **** throws IdmapException, MmdException { ! int ii, irsd; Object[][] dbmat; - String oldStatus, newStatus; int totalwarnings = 0; // Get all db recs and insure all are in the list of XML files. ! // If not, change DB status to "holding". HashSet idset = new HashSet(); --- 1416,1425 ---- throws IdmapException, MmdException { ! int ii, irsd, jrsd; Object[][] dbmat; int totalwarnings = 0; // Get all db recs and insure all are in the list of XML files. ! // If not, simply delete the relevant DB records. HashSet idset = new HashSet(); *************** *** 1430,1436 **** dbmat = dbconn.getDbTable( ! "SELECT id, fileName, primaryUrl, status FROM idmapMmd" + " WHERE collKey = " + dbconn.dbstring( collKey), ! new String[] { "string", "string", "string", "string"}, // types true); // allow nulls in primaryUrl for (ii = 0; ii < dbmat.length; ii++) { --- 1429,1435 ---- dbmat = dbconn.getDbTable( ! "SELECT id, fileName, primaryUrl FROM idmapMmd" + " WHERE collKey = " + dbconn.dbstring( collKey), ! new String[] { "string", "string", "string"}, // types true); // allow nulls in primaryUrl for (ii = 0; ii < dbmat.length; ii++) { *************** *** 1438,1453 **** String dbfilename = (String) dbmat[ii][1]; String primaryUrl = (String) dbmat[ii][2]; - oldStatus = (String) dbmat[ii][3]; - - newStatus = MmdRecord.STATUS_HOLDING; - if (( ! idset.contains( dbid)) && (! oldStatus.equals( newStatus))) - { - totalwarnings++; - Warning wng = new Warning( DpcErrors.IDMAP_NO_XML_FILE, dbid, - "(no xml file)", - "Changing the DB status from \"" + oldStatus - + "\" to \"" + newStatus + "\".", - primaryUrl, null); dbconn.updateDb( "INSERT INTO idmapMessages" + " (collKey, id, recCheckDate, msgType, fileName, msg," --- 1437,1442 ---- String dbfilename = (String) dbmat[ii][1]; String primaryUrl = (String) dbmat[ii][2]; + if ( ! idset.contains( dbid)) { dbconn.updateDb( "INSERT INTO idmapMessages" + " (collKey, id, recCheckDate, msgType, fileName, msg," *************** *** 1455,1485 **** + " VALUES ( " + dbconn.dbstringcom( collKey) ! + dbconn.dbstringcom( dbid) + dbconn.dbstringcom( new Timestamp( curdate)) // recCheckDate ! + dbconn.dbstringcom( DpcErrors.getMessage( wng.msgType)) + dbconn.dbstringcom( "(no xml file)") ! + dbconn.dbstringcom( wng.msg) ! + dbconn.dbstringcom( wng.info1) ! + dbconn.dbstring( wng.info2) + ")"); ! dbconn.updateDb("UPDATE idmapMmd SET" ! + " recCheckDate = " ! + dbconn.dbstring( new Timestamp( curdate)) ! + ", status = " + dbconn.dbstring( newStatus) ! + ", metaChecksum = " + dbconn.dbstring( 0) ! + ", hasError = " + dbconn.dbstring( 1) ! + " WHERE collKey = " + dbconn.dbstring(collKey) ! + " AND id = " + dbconn.dbstring(dbid)); ! ! dbconn.updateDb( "INSERT INTO idmapHistory" ! + " SELECT * FROM idmapMmd" ! + " WHERE collKey = " + dbconn.dbstring( collKey) ! + " AND id = " + dbconn.dbstring( dbid)); ! } } // for ii // For each ResourceDesc, check and store its results --- 1444,1517 ---- + " VALUES ( " + dbconn.dbstringcom( collKey) ! + dbconn.dbstringcom( "(id deleted)") // id + dbconn.dbstringcom( new Timestamp( curdate)) // recCheckDate ! + dbconn.dbstringcom( DpcErrors.getMessage( ! DpcErrors.IDMAP_NO_XML_FILE)) + dbconn.dbstringcom( "(no xml file)") ! + dbconn.dbstringcom( "No XML file found for id." ! + " DB info deleted.") ! + dbconn.dbstringcom( dbid) ! + dbconn.dbstring( "") + ")"); + totalwarnings++; ! dbconn.updateDb("DELETE FROM idmapMmd WHERE" ! + " collKey = " ! + dbconn.dbstring( collKey) ! + " AND id = " ! + dbconn.dbstring( dbid)); ! dbconn.updateDb("DELETE FROM idmapMessages WHERE" ! + " collKey = " ! + dbconn.dbstring( collKey) ! + " AND id = " ! + dbconn.dbstring( dbid)); } } // for ii + // For each ResourceDesc, check for duplicates + + for (irsd = 0; irsd < rsds.length; irsd++) { + ResourceDesc basersd = rsds[irsd]; + long basechecksum = basersd.getPrimaryChecksum(); + if (basechecksum != 0 && basersd.getDuplicateRsd() == null) { + + // n^2 search is not optimal. But since n is small + // and time isn't critical, it'll work here. + long numdups = 0; + for (jrsd = irsd + 1; jrsd < rsds.length; jrsd++) { + ResourceDesc testrsd = rsds[jrsd]; + long testchecksum = testrsd.getPrimaryChecksum(); + if (testchecksum == basechecksum) numdups++; + } + if (numdups > 0) { + String idstg = ""; + for (jrsd = irsd + 1; jrsd < rsds.length; jrsd++) { + ResourceDesc testrsd = rsds[jrsd]; + long testchecksum = testrsd.getPrimaryChecksum(); + if (testchecksum == basechecksum) { + idstg += " " + testrsd.getId() + + "(file: " + testrsd.getFileName() + ")"; + testrsd.addWarning( new Warning( + DpcErrors.IDMAP_DUP_DESCRIPTION, + testrsd.getId(), testrsd.getFileName(), + "This rec has " + numdups + " duplicates.", + "See " + basersd.getId() + " for details", + null)); + totalwarnings++; + testrsd.setDuplicateRsd( basersd); + } + } + basersd.addWarning( new Warning( + DpcErrors.IDMAP_DUP_DESCRIPTION, + basersd.getId(), basersd.getFileName(), + "This rec has " + numdups + " duplicates. They are: ", + idstg, + null)); + totalwarnings++; + } + } + } + // For each ResourceDesc, check and store its results *************** *** 1516,1519 **** --- 1548,1556 ---- } // for irsd + + + + + if (bugs >= 10) { prtln("\n==========================\n" *************** *** 1579,1583 **** int ii; Object[][] dbmat; - String oldStatus, newStatus; long lastDateUp, lastDateDown; double vitscale = 100; --- 1616,1619 ---- *************** *** 1611,1615 **** // Create the idmapMmd entry if it doesn't exist. // If it does exist: - // - if status == submitted or holding, change to accessioned // - if checksum changed, issue warning --- 1647,1650 ---- *************** *** 1640,1653 **** if (rsd.hasSevereError()) lastDateDown = curdate; - Timestamp firstaccessiondate; - if (func == FUNC_CHECKTEST) { - newStatus = MmdRecord.STATUS_SUBMITTED; - firstaccessiondate = null; - } - else { - newStatus = MmdRecord.STATUS_ACCESSIONED; - firstaccessiondate = new Timestamp( curdate); - } - dbconn.updateDb( "INSERT INTO idmapMmd" + " (collKey, id, fileName, primaryUrl, status," --- 1675,1678 ---- *************** *** 1661,1666 **** + dbconn.dbstringcom( rsd.getFileName()) + dbconn.dbstringcom( rsd.getPrimaryUrl()) ! + dbconn.dbstringcom( newStatus) ! + dbconn.dbstringcom( firstaccessiondate) + dbconn.dbstringcom( new Timestamp( curdate)) // lastMetaModDate + dbconn.dbstringcom( new Timestamp( curdate)) // recCheckDate --- 1686,1691 ---- + dbconn.dbstringcom( rsd.getFileName()) + dbconn.dbstringcom( rsd.getPrimaryUrl()) ! + dbconn.dbstringcom( MmdRecord.STATUS_ACCESSIONED) ! + dbconn.dbstringcom( new Timestamp( curdate)) // firstAccession + dbconn.dbstringcom( new Timestamp( curdate)) // lastMetaModDate + dbconn.dbstringcom( new Timestamp( curdate)) // recCheckDate *************** *** 1680,1684 **** String oldFilename = (String) dbmat[0][0]; String oldPrimaryUrl = (String) dbmat[0][1]; ! oldStatus = (String) dbmat[0][2]; Long firstaccessiondate = (Long) dbmat[0][3]; long oldMetaChecksum = ((Long) dbmat[0][4]).longValue(); --- 1705,1709 ---- String oldFilename = (String) dbmat[0][0]; String oldPrimaryUrl = (String) dbmat[0][1]; ! String oldStatus = (String) dbmat[0][2]; Long firstaccessiondate = (Long) dbmat[0][3]; long oldMetaChecksum = ((Long) dbmat[0][4]).longValue(); *************** *** 1687,1716 **** lastDateDown = ((Long) dbmat[0][7]).longValue(); - // If FUNC_CHECKALL and status is "submitted" or "holding", - // changed to "accessioned" and issue warning. - - newStatus = oldStatus; - if (func == FUNC_CHECKALL - && (oldStatus.equals(MmdRecord.STATUS_SUBMITTED) - || oldStatus.equals(MmdRecord.STATUS_HOLDING))) - { - newStatus = MmdRecord.STATUS_ACCESSIONED; - rsd.addWarning( new Warning( - DpcErrors.IDMAP_CHANGE_STATUS, rsd.getId(), - rsd.getFileName(), - "Old, new status:", - oldStatus, newStatus)); - dbconn.updateDb("UPDATE idmapMmd SET status = " - + dbconn.dbstring( newStatus) - + " WHERE collKey = " + dbconn.dbstring( collKey) - + " AND id = " + dbconn.dbstring( rsd.getId())); - - if (firstaccessiondate == null) { - dbconn.updateDb("UPDATE idmapMmd SET firstAccessionDate = " - + dbconn.dbstring( new Timestamp( curdate)) - + " WHERE collKey = " + dbconn.dbstring( collKey) - + " AND id = " + dbconn.dbstring( rsd.getId())); - } - } // If fileName has changed, update it --- 1712,1715 ---- *************** *** 1987,1991 **** int ii; Object[][] dbmat; - int totaldups = 0; // If this is a "send email day" and we haven't sent email --- 1986,1989 ---- *************** *** 2133,2194 **** - // adn only: check for duplicates - if (metastyle.equals( MmdRecord.MS_ADN)) { - dbmat = dbconn.getDbTable( - "SELECT id, primaryUrl, primaryChecksum" - + " FROM idmapHistory" - + " WHERE collKey = " + dbconn.dbstring( collKey) - + " AND recCheckDate = " - + dbconn.dbstring( new Timestamp( curdate)) - + " ORDER BY primaryChecksum", - new String[] { "string", "string", "long"}, // types - true); // allow nulls in fields - - boolean headerdone = false; - int igroup = 0; - for (ii = 0; ii < dbmat.length; ii++) { - String id = (String) dbmat[ii][0]; - String primaryUrl = (String) dbmat[ii][1]; - long checksum = ((Long) dbmat[ii][2]).longValue(); - long prevchecksum, nextchecksum; - - if (ii == 0) prevchecksum = 0; - else prevchecksum = ((Long) dbmat[ii-1][2]).longValue(); - - if (ii >= dbmat.length - 1) nextchecksum = 0; - else nextchecksum = ((Long) dbmat[ii+1][2]).longValue(); - - if (bugs >= 10) { - prtln("testEmailTime: dup chk: ii: " + ii); - prtln(" id: \"" + id + "\""); - prtln(" url: \"" + primaryUrl + "\""); - prtln(" checksum: " + checksum); - prtln(" prevchecksum: " + prevchecksum); - prtln(" nextchecksum: " + nextchecksum); - } ! if (checksum != 0) { ! if (checksum == prevchecksum) { ! totaldups++; ! msgbuf.append(" " + id + " " + primaryUrl + "\n"); ! } ! else if (checksum == nextchecksum) { ! totaldups++; ! if (! headerdone) { ! headerdone = true; ! msgbuf.append("\n==========\n\n"); ! msgbuf.append("Duplicates ...\n\n"); ! } ! msgbuf.append("\nGroup " + (igroup + 1) ! + ": The following ids appear identical:\n"); ! msgbuf.append(" " + id + " " + primaryUrl + "\n"); ! igroup++; ! } ! } ! } ! } // end if metastyle "adn" String subj = emailSubject + ", " + collKey ! + ", " + totalwarnings + " warnings, " + totaldups + " dups"; // Insert heading before main output --- 2131,2139 ---- ! String subj = emailSubject + ", " + collKey ! + ", " + totalwarnings + " warnings"; // Insert heading before main output Index: ResourceDesc.java =================================================================== RCS file: /cvsroot/dlese/dlese-tools-project/src/org/dlese/dpc/services/idmapper/ResourceDesc.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -r1.5 -r1.6 *** ResourceDesc.java 2 Jul 2003 00:53:30 -0000 1.5 --- ResourceDesc.java 28 Jul 2003 17:13:47 -0000 1.6 *************** *** 28,31 **** --- 28,32 ---- private String id; private long metaChecksum; + private ResourceDesc duplicateRsd = null; int numpages; *************** *** 125,128 **** --- 126,140 ---- this.metaChecksum = metaChecksum; } + + + ResourceDesc getDuplicateRsd() { + return duplicateRsd; + } + + + void setDuplicateRsd( ResourceDesc duplicateRsd) { + this.duplicateRsd = duplicateRsd; + } + |