|
From: Jonathan L. <le...@us...> - 2007-03-20 13:53:31
|
Update of /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26088/src/edu/harvard/syrah/pyxida/knn Modified Files: Zone.java KnnSim.java HyperCubeZoneManager.java Log Message: Trimmed memory use with KNN Index: Zone.java =================================================================== RCS file: /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn/Zone.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** Zone.java 14 Mar 2007 18:12:26 -0000 1.3 --- Zone.java 20 Mar 2007 13:17:26 -0000 1.4 *************** *** 34,40 **** // max guys per interval ! public static final int maxNodesPerInterval = 10; // remember this many slots of nodes public static final int intervalCount = 10; --- 34,43 ---- // max guys per interval ! // TODO Set to 10 ! public static final int maxNodesPerInterval = 1000; // remember this many slots of nodes + // Note: if you make this large, you may want to change the memory allocation + // below so to be a little more judicious public static final int intervalCount = 10; *************** *** 42,46 **** // maxNodesPerInterval x intervalCount pointers to NodeDescs ! final Vector<List<NodeDesc>> intervalSet; int currentInterval = 0; --- 45,50 ---- // maxNodesPerInterval x intervalCount pointers to NodeDescs ! final Vector<List<NodeDesc>> intervalSet; ! //final NodeDesc[][] intervalSet; int currentInterval = 0; *************** *** 51,57 **** public Zone (long _id) { id = _id; ! intervalSet = new Vector<List<NodeDesc>> (); for (int i = 0; i < intervalCount; i++) { ! intervalSet.add(new ArrayList<NodeDesc>()); } } --- 55,62 ---- public Zone (long _id) { id = _id; ! ! intervalSet = new Vector<List<NodeDesc>> (intervalCount); for (int i = 0; i < intervalCount; i++) { ! intervalSet.add(new ArrayList<NodeDesc>(0)); } } *************** *** 60,64 **** if (node == null) return; long intervalFloor = stamp / rotateSetInterval; ! if (intervalFloor > currentIntervalFloor || intervalSet.get(currentInterval).size() >= maxNodesPerInterval) { --- 65,69 ---- if (node == null) return; long intervalFloor = stamp / rotateSetInterval; ! if (intervalFloor > currentIntervalFloor || intervalSet.get(currentInterval).size() >= maxNodesPerInterval) { *************** *** 66,75 **** currentInterval++; if (currentInterval == intervalCount) currentInterval = 0; ! nodeCount -= intervalSet.get(currentInterval).size(); ! intervalSet.get(currentInterval).clear(); currentIntervalFloor = intervalFloor; } intervalSet.get(currentInterval).add(node); nodeCount++; } --- 71,84 ---- currentInterval++; if (currentInterval == intervalCount) currentInterval = 0; ! ! ArrayList<NodeDesc> current = (ArrayList)intervalSet.get(currentInterval); ! nodeCount -= current.size(); ! current.clear(); ! current.trimToSize(); currentIntervalFloor = intervalFloor; } intervalSet.get(currentInterval).add(node); + nodeCount++; } *************** *** 80,84 **** int intervalsPassed = 0; ! while (distance2nearbyNodes.size() < maxNearbyNodes && intervalsPassed < intervalCount) { for (NodeDesc nearbyNode : intervalSet.get(interval)) { if (!probeSrc.equals(nearbyNode) && probeSrc.coord != null && nearbyNode.coord != null) { --- 89,93 ---- int intervalsPassed = 0; ! while ( (maxNearbyNodes==0 || distance2nearbyNodes.size() < maxNearbyNodes) && intervalsPassed < intervalCount) { for (NodeDesc nearbyNode : intervalSet.get(interval)) { if (!probeSrc.equals(nearbyNode) && probeSrc.coord != null && nearbyNode.coord != null) { Index: HyperCubeZoneManager.java =================================================================== RCS file: /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn/HyperCubeZoneManager.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** HyperCubeZoneManager.java 14 Mar 2007 18:12:26 -0000 1.4 --- HyperCubeZoneManager.java 20 Mar 2007 13:17:26 -0000 1.5 *************** *** 30,39 **** final Map<Long,List<Long>> zone2adjacentZones; ! public HyperCubeZoneManager (int _dimensions, double _dimensionSideLength, int _zonesPerDimension, boolean _useAdjacentZones) { dimensions = _dimensions; dimensionSideLength = _dimensionSideLength; halfDimensionSideLength = dimensionSideLength / 2.; ! zonesPerDimension = _zonesPerDimension; ! zoneEdgeLength = _dimensionSideLength / _zonesPerDimension; useAdjacentZones = _useAdjacentZones; zoneMap = new HashMap<Long,Zone> (); --- 30,40 ---- final Map<Long,List<Long>> zone2adjacentZones; ! public HyperCubeZoneManager (int _dimensions, double _dimensionSideLength, double _zoneEdgeLength, boolean _useAdjacentZones) { dimensions = _dimensions; dimensionSideLength = _dimensionSideLength; halfDimensionSideLength = dimensionSideLength / 2.; ! zoneEdgeLength = _zoneEdgeLength; ! zonesPerDimension = (int)Math.floor(_dimensionSideLength / _zoneEdgeLength); ! useAdjacentZones = _useAdjacentZones; zoneMap = new HashMap<Long,Zone> (); *************** *** 78,82 **** //slog.info ("added "+distance2nearbyNodes.size()+" from our zone "+zoneId); ! if (useAdjacentZones && distance2nearbyNodes.size() < maxReturnSize) { List<Long> adjacentZones = zone2adjacentZones.get(zoneId); // compute our adjacent zones in case ours is empty --- 79,83 ---- //slog.info ("added "+distance2nearbyNodes.size()+" from our zone "+zoneId); ! if (useAdjacentZones && (maxReturnSize == 0 || distance2nearbyNodes.size() < maxReturnSize)) { List<Long> adjacentZones = zone2adjacentZones.get(zoneId); // compute our adjacent zones in case ours is empty *************** *** 92,96 **** //slog.info ("total "+distance2nearbyNodes.size()+" from zone "+adjacentZoneId); ! if (distance2nearbyNodes.size() >= maxReturnSize) { return; } --- 93,97 ---- //slog.info ("total "+distance2nearbyNodes.size()+" from zone "+adjacentZoneId); ! if (maxReturnSize == 0 || distance2nearbyNodes.size() >= maxReturnSize) { return; } Index: KnnSim.java =================================================================== RCS file: /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn/KnnSim.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** KnnSim.java 20 Mar 2007 12:27:53 -0000 1.5 --- KnnSim.java 20 Mar 2007 13:17:26 -0000 1.6 *************** *** 35,43 **** // Number of guys we would be returning to client. // We will see how many of the true NN of the same size we find. ! int returnSetSize = 50; String coordFilename = null; ! boolean useAdjacentZones = true; double dimensionSideLength = 10000; ! int zonesPerDimension = 2; final static protected int NFDigits = 3; --- 35,44 ---- // Number of guys we would be returning to client. // We will see how many of the true NN of the same size we find. ! int returnSetSize = 0; String coordFilename = null; ! boolean useAdjacentZones = false; double dimensionSideLength = 10000; ! double zoneEdgeLength = 25.; ! double zoneTargetRTT = 100.; final static protected int NFDigits = 3; *************** *** 65,68 **** --- 66,71 ---- " -h toggle height use ("+NCClient.USE_HEIGHT+")\n"+ " -s dimension side length ("+dimensionSideLength+")\n"+ + " -e zone edge length ("+zoneEdgeLength+")\n"+ + " -t zone target RTT ("+zoneTargetRTT+")\n"+ " -a toggle adjacent zone use ("+useAdjacentZones+")\n"+ " -r return set size ("+returnSetSize+")\n"+ *************** *** 73,77 **** public KnnSim (String[] args) { int c; ! Getopt g = new Getopt ("KnnSim", args, "d:hz:s:ar:c:"); while ((c = g.getopt()) != -1) { --- 76,80 ---- public KnnSim (String[] args) { int c; ! Getopt g = new Getopt ("KnnSim", args, "d:he:s:ar:c:"); while ((c = g.getopt()) != -1) { *************** *** 83,88 **** NCClient.USE_HEIGHT = !NCClient.USE_HEIGHT; break; ! case 'z': ! zonesPerDimension = Integer.parseInt(g.getOptarg()); break; case 's': --- 86,91 ---- NCClient.USE_HEIGHT = !NCClient.USE_HEIGHT; break; ! case 'e': ! zoneEdgeLength = Double.parseDouble(g.getOptarg()); break; case 's': *************** *** 102,121 **** } } if (coordFilename == null) printUsage ("coordinate file required"); if (dimensions <= 0) printUsage ("invalid number of dimensions"); if (zonesPerDimension <= 1) printUsage ("zones per dimension invalid"); if (dimensionSideLength < 1.) printUsage ("dimension side length invalid"); ! if (returnSetSize < 1) printUsage ("return set size invalid"); dimsPlusHeight = NCClient.USE_HEIGHT?(dimensions+1):dimensions; ! slog.info("PARAMS dims="+dimensions+ " height="+NCClient.USE_HEIGHT+ " dimensionSideLength="+dimensionSideLength+ " zonesPerDimension="+zonesPerDimension+ " useAdjacentZones="+useAdjacentZones+ " returnSetSize="+returnSetSize+ ! " coordFile="+coordFilename); slog.info ("reading coordinate file "+coordFilename); --- 105,131 ---- } } + + if (dimensionSideLength % zoneEdgeLength != 0.) + printUsage ("zone edge length must divide dimension side length evenly."); + int zonesPerDimension = (int)(Math.floor(dimensionSideLength / zoneEdgeLength)); + if (coordFilename == null) printUsage ("coordinate file required"); if (dimensions <= 0) printUsage ("invalid number of dimensions"); if (zonesPerDimension <= 1) printUsage ("zones per dimension invalid"); if (dimensionSideLength < 1.) printUsage ("dimension side length invalid"); ! if (returnSetSize < 0) printUsage ("return set size invalid"); dimsPlusHeight = NCClient.USE_HEIGHT?(dimensions+1):dimensions; ! String params = "PARAMS dims="+dimensions+ " height="+NCClient.USE_HEIGHT+ " dimensionSideLength="+dimensionSideLength+ " zonesPerDimension="+zonesPerDimension+ + " zoneEdgeLength="+zoneEdgeLength+ " useAdjacentZones="+useAdjacentZones+ " returnSetSize="+returnSetSize+ ! " coordFile="+coordFilename; ! System.out.println ("# "+params); slog.info ("reading coordinate file "+coordFilename); *************** *** 125,129 **** ! zm = new HyperCubeZoneManager (dimensions, dimensionSideLength, zonesPerDimension, useAdjacentZones); slog.info ("adding nodes to zones"); --- 135,139 ---- ! zm = new HyperCubeZoneManager (dimensions, dimensionSideLength, zoneEdgeLength, useAdjacentZones); slog.info ("adding nodes to zones"); *************** *** 137,146 **** for (NodeDesc me : nodes) { SortedMap<Double,NodeDesc> distance2node = new TreeMap<Double,NodeDesc>(); for (NodeDesc node : nodes) { if (me != node) { double distance = me.coord.distanceTo(node.coord); ! distance2node.put(distance, node); } } node2distance2node.put(me, distance2node); } --- 147,173 ---- for (NodeDesc me : nodes) { SortedMap<Double,NodeDesc> distance2node = new TreeMap<Double,NodeDesc>(); + double nearestDistance = Double.MAX_VALUE; + NodeDesc nearestNode = null; for (NodeDesc node : nodes) { if (me != node) { double distance = me.coord.distanceTo(node.coord); ! if (distance < nearestDistance) { ! nearestDistance = distance; ! nearestNode = node; ! } } } + + distance2node.put(nearestDistance, nearestNode); + + /* + SortedMap<Double,NodeDesc> distance2node = new TreeMap<Double,NodeDesc>(); + int count = 0; + final int MAX_NEARBY_TNN = 1; + for (Map.Entry<Double,NodeDesc> entry : allDistance2node.entrySet()) { + distance2node.put(entry.getKey(), entry.getValue()); + if (count > MAX_NEARBY_TNN) break; + } + */ node2distance2node.put(me, distance2node); } *************** *** 153,157 **** zm.add (stamp, node); } ! //slog.info (zm.toString()); } --- 180,185 ---- zm.add (stamp, node); } ! //slog.info (zm.toString()); ! //System.out.println (zm.toString()); } *************** *** 162,173 **** SortedMap<Double,NodeDesc> distance2nearbyNodes = new TreeMap<Double,NodeDesc>(); ! zm.query(node,distance2nearbyNodes,returnSetSize); // foundSet is who we would be returning to the client //Set<NodeDesc> foundSet = getSetHead (distance2node, returnSetSize); ! Set<NodeDesc> tnnSet = getSetHead (node2distance2node.get(node), returnSetSize); ! ! int unionSize = 0; ! /* slog.info (node.id+" true nearest neighbors:"); --- 190,204 ---- SortedMap<Double,NodeDesc> distance2nearbyNodes = new TreeMap<Double,NodeDesc>(); ! zm.query(node,distance2nearbyNodes,0); // foundSet is who we would be returning to the client //Set<NodeDesc> foundSet = getSetHead (distance2node, returnSetSize); ! SortedMap<Double,NodeDesc> trueDistance2node = node2distance2node.get(node); ! double trueNearestDistance = trueDistance2node.firstKey(); ! ! double rttSum = 0.; ! int countLtTargetRtt = 0; ! int countGtTargetRtt = 0; ! /* slog.info (node.id+" true nearest neighbors:"); *************** *** 177,193 **** */ ! for (NodeDesc foundNode : distance2nearbyNodes.values()) { //slog.info (node.id+" union "+foundNode.id); ! if (tnnSet.contains(foundNode)) { ! unionSize++; } } ! double pctOverlap = (double)unionSize / tnnSet.size(); //slog.info (node.id+" unionSize "+unionSize + " pct="+nf.format(pctOverlap)); ! slog.info (node.id+" pct="+nf.format(pctOverlap)); ! // TODO Could also eval mapping with abs(rank difference) // exponentially weighted by tnn rank --- 208,236 ---- */ ! for (Map.Entry<Double, NodeDesc> entry : distance2nearbyNodes.entrySet()) { //slog.info (node.id+" union "+foundNode.id); ! double rtt = entry.getKey(); ! rttSum += rtt; ! if (rtt < zoneTargetRTT) { ! countLtTargetRtt++; ! } else { ! countGtTargetRtt++; } } ! double avgRtt = (distance2nearbyNodes.size() > 0) ? ! rttSum / (double)distance2nearbyNodes.size() : 0.; //slog.info (node.id+" unionSize "+unionSize + " pct="+nf.format(pctOverlap)); ! String res = node.id+" avgRtt= "+nf.format(avgRtt)+ ! " lt= "+countLtTargetRtt+ " gt= "+countGtTargetRtt+ ! " pctLt= "+ nf.format ! ((distance2nearbyNodes.size() > 0) ? (double)countLtTargetRtt/(double)distance2nearbyNodes.size() : 0.)+ ! " tN= "+nf.format(trueNearestDistance)+ ! " c= "+node.coord; ! ! System.out.println (res); ! // Could also eval mapping with abs(rank difference) // exponentially weighted by tnn rank |