|
From: Jonathan L. <le...@us...> - 2007-03-13 21:48:09
|
Update of /cvsroot/pyxida/Pyxida/src/edu/harvard/syrah/pyxida/knn In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12847/src/edu/harvard/syrah/pyxida/knn Added Files: Zone.java KnnSim.java ZoneManager.java HyperCubeZoneManager.java NodeDesc.java Log Message: First crack at continuous k-nearest neighbor --- NEW FILE: HyperCubeZoneManager.java --- /* * @author Last modified by $Author: ledlie $ * @version $Revision: 1.1 $ on $Date: 2007/03/13 21:48:08 $ * @since Mar 13, 2007 */ package edu.harvard.syrah.pyxida.knn; import java.util.HashMap; import java.util.Map; import java.util.SortedMap; public class HyperCubeZoneManager implements ZoneManager { final int dimensions; final double zoneEdgeLength; final Map<Long,Zone> zoneMap; public HyperCubeZoneManager (int _dimensions, double _dimensionSideLength, int _zonesPerDimension) { dimensions = _dimensions; zoneEdgeLength = _dimensionSideLength / _zonesPerDimension; zoneMap = new HashMap<Long,Zone> (); } public void add(long stamp, NodeDesc node) { // TODO UNTESTED // determine his zone long zoneId = 0; double vec[] = node.coord.asVectorFromZero(false).getComponents(); for (int i = 0; i < dimensions; i++) { int zoneIndex = (int)(Math.floor(vec[i] / zoneEdgeLength)); zoneId += Math.pow(zoneIndex, i+1); } Zone z = zoneMap.get(zoneId); if (z == null) { z = new Zone (zoneId); zoneMap.put (zoneId,z); } z.add(stamp, node); // TODO also create Zone->nearby Zone mapping and add nearby zones during query? } public void query(NodeDesc node, SortedMap<Double, NodeDesc> distance2node) { } } --- NEW FILE: Zone.java --- /* * @author Last modified by $Author: ledlie $ * @version $Revision: 1.1 $ on $Date: 2007/03/13 21:48:08 $ * @since Mar 13, 2007 */ package edu.harvard.syrah.pyxida.knn; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.Vector; public class Zone { final long id; // rotate at once per minute at most public static final long rotateSetInterval = 60 * 1000; // remember this many slots of nodes public static final int intervalCount = 10; final Vector<List<NodeDesc>> intervalSet; int currentInterval = 0; long currentIntervalFloor = 0; int nodeCount = 0; public Zone (long _id) { id = _id; intervalSet = new Vector<List<NodeDesc>> (); for (int i = 0; i < intervalCount; i++) { intervalSet.add(new ArrayList<NodeDesc>()); } } public void add (long stamp, NodeDesc node) { if (node == null) return; long intervalFloor = stamp / rotateSetInterval; if (intervalFloor > currentIntervalFloor) { currentInterval++; if (currentInterval == intervalCount) currentInterval = 0; nodeCount -= intervalSet.get(currentInterval).size(); intervalSet.get(currentInterval).clear(); currentIntervalFloor = intervalFloor; } intervalSet.get(currentInterval).add(node); nodeCount++; } public void probe (Set<NodeDesc> nearbyNodes, int maxNearbyNodes) { if (nearbyNodes == null) return; int interval = currentInterval; int intervalsPassed = 0; while (nearbyNodes.size() < maxNearbyNodes && intervalsPassed < intervalCount) { nearbyNodes.addAll(intervalSet.get(interval)); interval--; if (interval == -1) interval = intervalCount-1; intervalsPassed++; } } public long getId() { return id; } public int getNodeCount () { return nodeCount; } public String toString () { StringBuffer sb = new StringBuffer ("id="+id+" size="+nodeCount); for (int i = 0; i < intervalCount; i++) { sb.append(" "); if (i == currentInterval) sb.append("*"); sb.append (i+"->[ "); for (NodeDesc node : intervalSet.get(i)) { sb.append(node.id+" "); } sb.append("]"); } return new String(sb); } public static void main (String argv[]) { NodeDesc n0 = new NodeDesc (0,null); NodeDesc n1 = new NodeDesc (1,null); NodeDesc n2 = new NodeDesc (2,null); NodeDesc n3 = new NodeDesc (3,null); Zone z = new Zone (0); z.add (0,n0); System.out.println(z); z.add (1,n0); z.add (1,n1); z.add (1,n2); z.add (2,n1); z.add (Zone.rotateSetInterval*2,n0); System.out.println(z); Set<NodeDesc> nearbyNodes = new HashSet<NodeDesc>(); z.probe(nearbyNodes, 2); System.out.println ("nearbyNodes: "); for (NodeDesc node : nearbyNodes) { System.out.println(node.id+" "); } for (int i = 0; i < Zone.intervalCount*2; i++) { z.add(i*Zone.rotateSetInterval, n3); } System.out.println(z); nearbyNodes.clear(); z.probe(nearbyNodes, 2); System.out.println ("nearbyNodes: "); for (NodeDesc node : nearbyNodes) { System.out.println(node.id+" "); } } } --- NEW FILE: ZoneManager.java --- /* * @author Last modified by $Author: ledlie $ * @version $Revision: 1.1 $ on $Date: 2007/03/13 21:48:08 $ * @since Mar 13, 2007 */ package edu.harvard.syrah.pyxida.knn; import java.util.SortedMap; public interface ZoneManager { void add(long stamp, NodeDesc node); void query(NodeDesc node, SortedMap<Double, NodeDesc> distance2node); } --- NEW FILE: KnnSim.java --- /* * @author Last modified by $Author: ledlie $ * @version $Revision: 1.1 $ on $Date: 2007/03/13 21:48:08 $ * @since Mar 13, 2007 */ package edu.harvard.syrah.pyxida.knn; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.StringTokenizer; import java.util.TreeMap; import edu.harvard.syrah.pyxida.nc.lib.Coordinate; public class KnnSim { protected static edu.harvard.syrah.prp.Log slog = new edu.harvard.syrah.prp.Log(KnnSim.class); /** * @param args */ Set<NodeDesc> nodes = new HashSet<NodeDesc>(); Map<NodeDesc,SortedMap<Double,NodeDesc>> node2distance2node = new HashMap<NodeDesc,SortedMap<Double,NodeDesc>>(); final int dimensions = 4; final boolean height = true; int dimsPlusHeight = height?dimensions:(dimensions+1); ZoneManager zm; public static void main(String[] args) { KnnSim sim = new KnnSim (args); } public KnnSim (String[] args) { String coordFilename = args[0]; slog.info ("reading coordinate file "+coordFilename); readCoordFile (coordFilename); slog.info ("assigning true NN"); assignTrueNN (); final double dimensionSideLength = 20000; final int zonesPerDimension = 1000; zm = new HyperCubeZoneManager (dimensions, dimensionSideLength, zonesPerDimension); slog.info ("adding nodes to zones"); addNodesToZones (); slog.info ("eval zone assignment"); evaluateZoneAssignment (); } void assignTrueNN () { for (NodeDesc me : nodes) { SortedMap<Double,NodeDesc> distance2node = new TreeMap<Double,NodeDesc>(); for (NodeDesc node : nodes) { if (me != node) { double distance = me.coord.distanceTo(node.coord); distance2node.put(distance, node); } } node2distance2node.put(me, distance2node); } } public void addNodesToZones () { // for now, assume time is not a factor final long stamp = 0; for (NodeDesc node : nodes) { zm.add (stamp, node); } } public void evaluateZoneAssignment () { for (NodeDesc node : nodes) { SortedMap<Double,NodeDesc> distance2node = new TreeMap<Double,NodeDesc>(); zm.query(node,distance2node); /* * TODO Do something smart here to evaluate the mapping. * SortedMap<Double,NodeDesc> trueDistance2node = node2distance2node.get(node); StringBuffer sb = new StringBuffer ("truth "); for (Map.Entry<Double, NodeDesc>) entry : trueDistance2node) { } */ } } void readCoordFile (String coordFilename) { BufferedReader coordReader = null; try { coordReader = new BufferedReader (new FileReader (new File (coordFilename))); }catch (FileNotFoundException ex) { System.err.println ("Cannot find coord file "+coordFilename); System.exit (-1); } int idCounter = 0; try { String coordLine = coordReader.readLine(); while (coordLine != null) { StringTokenizer coordTokenizer = new StringTokenizer (coordLine); float v[] = new float[dimsPlusHeight]; for (int i = 0; i < dimsPlusHeight; i++) { v[i] = Float.parseFloat((String)(coordTokenizer.nextElement())); } NodeDesc node = new NodeDesc(idCounter++,new Coordinate(v)); nodes.add(node); coordLine = coordReader.readLine(); } } catch (IOException ex) { System.err.println("Error reading coord file: "+ex); System.exit (-1); } } } --- NEW FILE: NodeDesc.java --- /* * @author Last modified by $Author: ledlie $ * @version $Revision: 1.1 $ on $Date: 2007/03/13 21:48:08 $ * @since Mar 13, 2007 */ package edu.harvard.syrah.pyxida.knn; import edu.harvard.syrah.pyxida.nc.lib.Coordinate; public class NodeDesc { public final int id; public final Coordinate coord; public NodeDesc (int _id, Coordinate _coord) { id = _id; coord = _coord; } // used when adding to Set b/c one guy can be in the lists several times public boolean equals (NodeDesc _other) { if (id == _other.id) return true; return false; } } |