|
From: Jonathan L. <le...@us...> - 2007-03-14 16:38:01
|
Update of /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23918/src/edu/harvard/syrah/pyxida/knn Modified Files: Zone.java KnnSim.java ZoneManager.java HyperCubeZoneManager.java Log Message: Finds nodes in adjacent zones. Sorts result by distance to target. Index: HyperCubeZoneManager.java =================================================================== RCS file: /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn/HyperCubeZoneManager.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** HyperCubeZoneManager.java 14 Mar 2007 02:42:57 -0000 1.2 --- HyperCubeZoneManager.java 14 Mar 2007 16:38:00 -0000 1.3 *************** *** 6,14 **** package edu.harvard.syrah.pyxida.knn; import java.util.HashMap; import java.util.Map; import java.util.SortedMap; - import java.util.HashSet; - import java.util.Set; public class HyperCubeZoneManager implements ZoneManager { --- 6,14 ---- package edu.harvard.syrah.pyxida.knn; + import java.util.ArrayList; import java.util.HashMap; + import java.util.List; import java.util.Map; import java.util.SortedMap; public class HyperCubeZoneManager implements ZoneManager { *************** *** 21,40 **** final double zoneEdgeLength; final double dimensionSideLength; ! final int zonesPerDimension; final Map<Long,Zone> zoneMap; ! public HyperCubeZoneManager (int _dimensions, double _dimensionSideLength, int _zonesPerDimension) { dimensions = _dimensions; dimensionSideLength = _dimensionSideLength; zonesPerDimension = _zonesPerDimension; zoneEdgeLength = _dimensionSideLength / _zonesPerDimension; zoneMap = new HashMap<Long,Zone> (); } public void add(long stamp, NodeDesc node) { // determine his zone ! long zoneId = findZone(node); Zone z = zoneMap.get(zoneId); --- 21,50 ---- final double zoneEdgeLength; final double dimensionSideLength; ! final double halfDimensionSideLength; ! final int zonesPerDimension; ! final boolean useAdjacentZones; final Map<Long,Zone> zoneMap; + + // note that a particular adjacent zone may not have been materialized, + // so we just store its id + final Map<Long,List<Long>> zone2adjacentZones; ! public HyperCubeZoneManager (int _dimensions, double _dimensionSideLength, int _zonesPerDimension, boolean _useAdjacentZones) { dimensions = _dimensions; dimensionSideLength = _dimensionSideLength; + halfDimensionSideLength = dimensionSideLength / 2.; zonesPerDimension = _zonesPerDimension; zoneEdgeLength = _dimensionSideLength / _zonesPerDimension; + useAdjacentZones = _useAdjacentZones; zoneMap = new HashMap<Long,Zone> (); + zone2adjacentZones = new HashMap<Long,List<Long>>(); } public void add(long stamp, NodeDesc node) { + double vec[] = node.coord.asVectorFromZero(false).getComponents(); // determine his zone ! long zoneId = findZone(vec); Zone z = zoneMap.get(zoneId); *************** *** 42,77 **** z = new Zone (zoneId); zoneMap.put (zoneId,z); } z.add(stamp, node); - // TODO also create Zone->nearby Zone mapping and add nearby zones during query? slog.info ("added "+node.id+" to zone "+zoneId); } ! public void query(NodeDesc node, Set<NodeDesc> nearbyNodes, int maxReturnSize) { ! // determine his zone ! long zoneId = findZone(node); Zone z = zoneMap.get(zoneId); ! if (z != null) { ! z.probe(nearbyNodes,maxReturnSize); ! } } ! long findZone(NodeDesc node) { long zoneId = 0; - double vec[] = node.coord.asVectorFromZero(false).getComponents(); for (int i = 0; i < dimensions; i++) { ! final double halfDimensionSideLength = dimensionSideLength / 2.; ! // normalize coordinate s.t. all coords are >0 int zoneIndex = (int)(Math.floor((vec[i]+halfDimensionSideLength) / zoneEdgeLength)); zoneId += Math.pow(zonesPerDimension, i) * zoneIndex; ! //slog.info (i+" c"+vec[i]+" -> "+zoneIndex+ " id "+zoneId); } ! return zoneId; ! } } --- 52,156 ---- z = new Zone (zoneId); zoneMap.put (zoneId,z); + + if (useAdjacentZones) { + if (!zone2adjacentZones.containsKey(zoneId)) { + zone2adjacentZones.put(zoneId,findAdjacentZones (vec)); + } + } } z.add(stamp, node); slog.info ("added "+node.id+" to zone "+zoneId); } ! public void query(NodeDesc probeSrc, SortedMap<Double, NodeDesc> distance2nearbyNodes, int maxReturnSize) { ! double vec[] = probeSrc.coord.asVectorFromZero(false).getComponents(); ! // determine his zone ! long zoneId = findZone(vec); Zone z = zoneMap.get(zoneId); ! if (z != null) { ! z.probe(probeSrc, distance2nearbyNodes,maxReturnSize); ! } ! ! slog.info ("added "+distance2nearbyNodes.size()+" from our zone "+zoneId); ! ! if (useAdjacentZones && distance2nearbyNodes.size() < maxReturnSize) { ! List<Long> adjacentZones = zone2adjacentZones.get(zoneId); ! // compute our adjacent zones in case ours is empty ! if (adjacentZones == null) { ! adjacentZones = findAdjacentZones (vec); ! zone2adjacentZones.put(zoneId,adjacentZones); ! } ! for (Long adjacentZoneId : adjacentZones) { ! Zone adjacentZone = zoneMap.get(adjacentZoneId); ! if (adjacentZone != null) { ! adjacentZone.probe(probeSrc, distance2nearbyNodes,maxReturnSize); ! } ! slog.info ("total "+distance2nearbyNodes.size()+" from zone "+adjacentZoneId); ! ! if (distance2nearbyNodes.size() >= maxReturnSize) { ! return; ! } ! } ! } ! } ! long findZone(double vec[]) { long zoneId = 0; for (int i = 0; i < dimensions; i++) { ! // normalize coordinate s.t. all coords are >0 int zoneIndex = (int)(Math.floor((vec[i]+halfDimensionSideLength) / zoneEdgeLength)); zoneId += Math.pow(zonesPerDimension, i) * zoneIndex; ! //slog.info (i+" c"+vec[i]+" -> "+zoneIndex+ " id "+zoneId); } ! return zoneId; ! } ! ! List<Long> findAdjacentZones (double vec[]) { ! List<Long> adjacentZones = new ArrayList<Long>(); ! ! int zoneIndex[] = new int[dimensions]; ! for (int i = 0; i < dimensions; i++) { ! zoneIndex[i] = (int)(Math.floor((vec[i]+halfDimensionSideLength) / zoneEdgeLength)); ! } ! ! for (int i = 0; i < dimensions; i++) { ! ! if (zoneIndex[i] > 0) { ! zoneIndex[i]--; ! adjacentZones.add(findZone(zoneIndex)); ! zoneIndex[i]++; ! } ! if (zoneIndex[i] < zonesPerDimension - 1) { ! zoneIndex[i]++; ! adjacentZones.add(findZone(zoneIndex)); ! zoneIndex[i]--; ! } ! } ! ! return adjacentZones; ! } + long findZone(int zoneIndex[]) { + long zoneId = 0; + + for (int i = 0; i < dimensions; i++) { + zoneId += Math.pow(zonesPerDimension, i) * zoneIndex[i]; + } + return zoneId; + } + + public String toString () { + StringBuffer sb = new StringBuffer (); + for (Map.Entry<Long, Zone> zoneEntry : zoneMap.entrySet()) { + sb.append("\n"+zoneEntry.getValue()); + } + return new String (sb); + } + } Index: Zone.java =================================================================== RCS file: /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn/Zone.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** Zone.java 13 Mar 2007 21:48:08 -0000 1.1 --- Zone.java 14 Mar 2007 16:38:00 -0000 1.2 *************** *** 7,25 **** import java.util.ArrayList; - import java.util.HashSet; import java.util.List; ! import java.util.Set; import java.util.Vector; public class Zone { final long id; ! // rotate at once per minute at most public static final long rotateSetInterval = 60 * 1000; // remember this many slots of nodes public static final int intervalCount = 10; final Vector<List<NodeDesc>> intervalSet; --- 7,45 ---- import java.util.ArrayList; import java.util.List; ! import java.util.Map; ! import java.util.SortedMap; ! import java.util.TreeMap; import java.util.Vector; + /* + * Implements circular list of collections of nodes. + * We move to the next "interval" when either + * (a) we are storing too many nodes in the current one + * (b) enough time has passed. + * + * This way each zone will only take up a limited amount of storage, + * and will only return fresh results. + */ public class Zone { + protected static edu.harvard.syrah.prp.Log slog = + new edu.harvard.syrah.prp.Log(Zone.class); + final long id; ! // rotate at once per minute public static final long rotateSetInterval = 60 * 1000; + // max guys per interval + public static final int maxNodesPerInterval = 10; + // remember this many slots of nodes public static final int intervalCount = 10; + + // total storage per zone is: + // maxNodesPerInterval x intervalCount pointers to NodeDescs + final Vector<List<NodeDesc>> intervalSet; *************** *** 41,45 **** long intervalFloor = stamp / rotateSetInterval; ! if (intervalFloor > currentIntervalFloor) { currentInterval++; if (currentInterval == intervalCount) currentInterval = 0; --- 61,67 ---- long intervalFloor = stamp / rotateSetInterval; ! if (intervalFloor > currentIntervalFloor || ! intervalSet.get(currentInterval).size() >= maxNodesPerInterval) { ! // rotate to next interval currentInterval++; if (currentInterval == intervalCount) currentInterval = 0; *************** *** 53,63 **** } ! public void probe (Set<NodeDesc> nearbyNodes, int maxNearbyNodes) { ! if (nearbyNodes == null) return; int interval = currentInterval; int intervalsPassed = 0; ! while (nearbyNodes.size() < maxNearbyNodes && intervalsPassed < intervalCount) { ! nearbyNodes.addAll(intervalSet.get(interval)); interval--; if (interval == -1) interval = intervalCount-1; --- 75,95 ---- } ! public void probe (NodeDesc probeSrc, SortedMap<Double,NodeDesc> distance2nearbyNodes, int maxNearbyNodes) { ! if (distance2nearbyNodes == null) return; int interval = currentInterval; int intervalsPassed = 0; ! while (distance2nearbyNodes.size() < maxNearbyNodes && intervalsPassed < intervalCount) { ! for (NodeDesc nearbyNode : intervalSet.get(interval)) { ! if (!probeSrc.equals(nearbyNode) && probeSrc.coord != null && nearbyNode.coord != null) { ! // TODO might want to use planar distance ! // so that we are perhaps more likely to land in same ISP ! double distance = probeSrc.coord.distanceTo(nearbyNode.coord); ! if (distance > 0) { ! distance2nearbyNodes.put(distance, nearbyNode); ! slog.info ("probe adding "+nearbyNode.id+ " dist="+distance); ! } ! } ! } interval--; if (interval == -1) interval = intervalCount-1; *************** *** 107,116 **** System.out.println(z); ! Set<NodeDesc> nearbyNodes = new HashSet<NodeDesc>(); ! z.probe(nearbyNodes, 2); System.out.println ("nearbyNodes: "); ! for (NodeDesc node : nearbyNodes) { ! System.out.println(node.id+" "); } --- 139,148 ---- System.out.println(z); ! SortedMap<Double,NodeDesc> distance2nearbyNodes = new TreeMap<Double,NodeDesc>(); ! z.probe(n0,distance2nearbyNodes, 2); System.out.println ("nearbyNodes: "); ! for (Map.Entry<Double,NodeDesc> distance2node : distance2nearbyNodes.entrySet()) { ! System.out.println(distance2node.getKey()+" "+distance2node.getValue()); } *************** *** 120,128 **** System.out.println(z); ! nearbyNodes.clear(); ! z.probe(nearbyNodes, 2); System.out.println ("nearbyNodes: "); ! for (NodeDesc node : nearbyNodes) { ! System.out.println(node.id+" "); } --- 152,160 ---- System.out.println(z); ! distance2nearbyNodes.clear(); ! z.probe(n1,distance2nearbyNodes, 2); System.out.println ("nearbyNodes: "); ! for (Map.Entry<Double,NodeDesc> distance2node : distance2nearbyNodes.entrySet()) { ! System.out.println(distance2node.getKey()+" "+distance2node.getValue()); } Index: ZoneManager.java =================================================================== RCS file: /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn/ZoneManager.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** ZoneManager.java 14 Mar 2007 02:42:57 -0000 1.2 --- ZoneManager.java 14 Mar 2007 16:38:00 -0000 1.3 *************** *** 8,12 **** import java.util.SortedMap; import java.util.HashSet; - import java.util.Set; public interface ZoneManager { --- 8,11 ---- *************** *** 14,18 **** void add(long stamp, NodeDesc node); ! void query(NodeDesc node, Set<NodeDesc> nearbyNodes, int maxReturnSize); } --- 13,19 ---- void add(long stamp, NodeDesc node); ! void query(NodeDesc probeSrc, SortedMap<Double, NodeDesc> distance2nearbyNodes, int maxReturnSize); + String toString (); + } Index: KnnSim.java =================================================================== RCS file: /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn/KnnSim.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** KnnSim.java 14 Mar 2007 02:42:57 -0000 1.2 --- KnnSim.java 14 Mar 2007 16:38:00 -0000 1.3 *************** *** 21,24 **** --- 21,31 ---- import edu.harvard.syrah.pyxida.nc.lib.Coordinate; + import edu.harvard.syrah.pyxida.nc.lib.NCClient; + + /* + * Driver for testing Zone assignment algorithms. + * + * Make sure that input coordinates are not spaced at regular intervals. + */ public class KnnSim { *************** *** 30,36 **** new HashMap<NodeDesc,SortedMap<Double,NodeDesc>>(); ! final int dimensions = 4; ! final boolean height = true; ! int dimsPlusHeight = height?dimensions:(dimensions+1); ZoneManager zm = null; --- 37,43 ---- new HashMap<NodeDesc,SortedMap<Double,NodeDesc>>(); ! final int dimensions = 2; ! final int dimsPlusHeight; ! ZoneManager zm = null; *************** *** 51,54 **** --- 58,66 ---- public KnnSim (String[] args) { + NCClient.USE_HEIGHT = false; + dimsPlusHeight = NCClient.USE_HEIGHT?(dimensions+1):dimensions; + //dimsPlusHeight = dimensions+1; + slog.info("dims="+dimensions+" height="+NCClient.USE_HEIGHT); + String coordFilename = args[0]; // TODO make into variables *************** *** 60,67 **** assignTrueNN (); ! final double dimensionSideLength = 20000; //final int zonesPerDimension = 1000; final int zonesPerDimension = 2; ! zm = new HyperCubeZoneManager (dimensions, dimensionSideLength, zonesPerDimension); slog.info ("adding nodes to zones"); --- 72,80 ---- assignTrueNN (); ! final double dimensionSideLength = 2; //final int zonesPerDimension = 1000; final int zonesPerDimension = 2; ! final boolean useAdjacentZones = true; ! zm = new HyperCubeZoneManager (dimensions, dimensionSideLength, zonesPerDimension, useAdjacentZones); slog.info ("adding nodes to zones"); *************** *** 91,101 **** zm.add (stamp, node); } } public void evaluateZoneAssignment () { for (NodeDesc node : nodes) { ! //SortedMap<Double,NodeDesc> distance2node = new TreeMap<Double,NodeDesc>(); ! Set<NodeDesc> foundSet = new HashSet<NodeDesc> (); ! zm.query(node,foundSet,returnSetSize); // foundSet is who we would be returning to the client --- 104,117 ---- zm.add (stamp, node); } + slog.info (zm.toString()); } public void evaluateZoneAssignment () { for (NodeDesc node : nodes) { ! slog.info ("Finding nodes nearby "+node.id); ! ! SortedMap<Double,NodeDesc> distance2nearbyNodes = new TreeMap<Double,NodeDesc>(); ! ! zm.query(node,distance2nearbyNodes,returnSetSize); // foundSet is who we would be returning to the client *************** *** 104,108 **** int unionSize = 0; ! for (NodeDesc foundNode : foundSet) { if (tnnSet.contains(foundNode)) { unionSize++; --- 120,125 ---- int unionSize = 0; ! for (NodeDesc foundNode : distance2nearbyNodes.values()) { ! slog.info (node.id+" union "+foundNode.id); if (tnnSet.contains(foundNode)) { unionSize++; *************** *** 111,115 **** // TODO want pct ! slog.info (node.id+" union "+unionSize); // TODO Could also eval mapping with abs(rank difference) --- 128,132 ---- // TODO want pct ! slog.info (node.id+" unionSize "+unionSize); // TODO Could also eval mapping with abs(rank difference) *************** *** 118,122 **** // Or something that actually has to do with distance // as opposed to just rank ! } } --- 135,139 ---- // Or something that actually has to do with distance // as opposed to just rank ! //System.exit(0); } } |