|
From: Theuns C. <the...@gm...> - 2009-06-10 06:42:30
|
This is achieved by introducing a reinitialise() method in the
CentroidsInitialisationStrategy interface.
Also made some small fixes regarding the DataSetManager and
StaticDataSetBuilder.
---
.../kmeans/CentroidsInitialisationStrategy.java | 16 +++-
...ataSetBasedCentroidsInitialisationStrategy.java | 23 +++++-
.../cilib/clustering/kmeans/KMeans.java | 3 +-
...ansPlusPlusCentroidsInitialisationStrategy.java | 44 +++++++----
.../RandomCentroidsInitialisationStrategy.java | 17 +++-
.../cilib/problem/ClusteringProblem.java | 6 +-
.../sourceforge/cilib/problem/dataset/DataSet.java | 2 +-
.../cilib/problem/dataset/DataSetManager.java | 9 +-
.../cilib/problem/dataset/LocalDataSet.java | 8 +-
.../problem/dataset/StaticDataSetBuilder.java | 88 ++++++++++++++------
xml/clustering-gbest-pso.xml | 18 ++--
xml/kmeans.xml | 42 +---------
12 files changed, 163 insertions(+), 113 deletions(-)
diff --git a/src/main/java/net/sourceforge/cilib/clustering/kmeans/CentroidsInitialisationStrategy.java b/src/main/java/net/sourceforge/cilib/clustering/kmeans/CentroidsInitialisationStrategy.java
index 22a203e..6465e56 100644
--- a/src/main/java/net/sourceforge/cilib/clustering/kmeans/CentroidsInitialisationStrategy.java
+++ b/src/main/java/net/sourceforge/cilib/clustering/kmeans/CentroidsInitialisationStrategy.java
@@ -30,12 +30,14 @@ import net.sourceforge.cilib.type.types.container.Vector;
import net.sourceforge.cilib.util.Cloneable;
/**
- * This strategy allows for different ways of initializing the centroids of a clustering.
+ * This strategy allows for different ways of initializing the centroids of a clustering. It also allows for a specific
+ * centroid to be {@link #reinitialise(java.util.ArrayList, int) reinitialised}.
* The following approaches have already been implemented:
* <ul>
* <li>Randomly ({@link RandomCentroidsInitialisationStrategy}); or</li>
* <li>Based on random patterns chosen from the dataset ({@link DataSetBasedCentroidsInitialisationStrategy})</li>
- * <li>Based on the potential that each centroid contributes ({@link KMeansPlusPlusCentroidsInitialisationStrategy})</li>
+ * <li>Based on the contribution that each centroid contributes towards the overall potential
+ * ({@link KMeansPlusPlusCentroidsInitialisationStrategy})</li>
* </ul>
*
* @author Theuns Cloete
@@ -45,7 +47,7 @@ public interface CentroidsInitialisationStrategy extends Serializable, Cloneable
public CentroidsInitialisationStrategy getClone();
/**
- * Initialize the centroid vectors for a clustering. Each centroid is individually initialised and then added to an
+ * Initialise the centroid vectors for a clustering. Each centroid is individually initialised and then added to an
* {@link ArrayList} that represents all the centroids. This structure is then returned. The problem and/or dataset
* that are currently being clustered can be used to get information about the clustering, such as the dimension of
* the search space and centroids.
@@ -55,4 +57,12 @@ public interface CentroidsInitialisationStrategy extends Serializable, Cloneable
* @return an {@link ArrayList} of {@link Vector}s that represent all the centroids
*/
public ArrayList<Vector> initialise(ClusteringProblem problem, StaticDataSetBuilder dataset);
+
+ /**
+ * Reinitialise the specified centroid (residing in the given list of centroids) and return it.
+ * @param centroids The list of centroid vectors containing the centroid that needs to be reinitialised.
+ * @param which The index of the centroid that should be reinitialised.
+ * @return the reinitialised centroid for convenience
+ */
+ public Vector reinitialise(ArrayList<Vector> centroids, int which);
}
diff --git a/src/main/java/net/sourceforge/cilib/clustering/kmeans/DataSetBasedCentroidsInitialisationStrategy.java b/src/main/java/net/sourceforge/cilib/clustering/kmeans/DataSetBasedCentroidsInitialisationStrategy.java
index e15ace7..1df40d6 100644
--- a/src/main/java/net/sourceforge/cilib/clustering/kmeans/DataSetBasedCentroidsInitialisationStrategy.java
+++ b/src/main/java/net/sourceforge/cilib/clustering/kmeans/DataSetBasedCentroidsInitialisationStrategy.java
@@ -39,6 +39,12 @@ import net.sourceforge.cilib.type.types.container.Vector;
public class DataSetBasedCentroidsInitialisationStrategy implements CentroidsInitialisationStrategy {
private static final long serialVersionUID = -3016201656688883387L;
+ private ArrayList<Pattern> patterns;
+
+ public DataSetBasedCentroidsInitialisationStrategy() {
+ this.patterns = null;
+ }
+
/**
* {@inheritDoc}
*/
@@ -59,14 +65,27 @@ public class DataSetBasedCentroidsInitialisationStrategy implements CentroidsIni
public ArrayList<Vector> initialise(ClusteringProblem problem, StaticDataSetBuilder dataset) {
int numberOfCentroids = problem.getNumberOfClusters();
ArrayList<Vector> centroids = new ArrayList<Vector>(numberOfCentroids);
- ArrayList<Pattern> patterns = dataset.getPatterns();
+ this.patterns = dataset.getPatterns();
Random random = new MersenneTwister();
for (int i = 0; i < numberOfCentroids; ++i) {
- Vector centroid = patterns.get(Math.round(random.nextInt(patterns.size()))).data.getClone();
+ Vector centroid = patterns.get(random.nextInt(patterns.size())).data.getClone();
centroids.add(centroid);
}
return centroids;
}
+
+ /**
+ * Just pick a random pattern from the data set.
+ * {@inheritDoc}
+ */
+ @Override
+ public Vector reinitialise(ArrayList<Vector> centroids, int which) {
+ Random random = new MersenneTwister();
+ Vector reinitialised = patterns.get(random.nextInt(patterns.size())).data.getClone();
+
+ centroids.set(which, reinitialised);
+ return reinitialised;
+ }
}
diff --git a/src/main/java/net/sourceforge/cilib/clustering/kmeans/KMeans.java b/src/main/java/net/sourceforge/cilib/clustering/kmeans/KMeans.java
index 1f1ab2f..c1f3f98 100644
--- a/src/main/java/net/sourceforge/cilib/clustering/kmeans/KMeans.java
+++ b/src/main/java/net/sourceforge/cilib/clustering/kmeans/KMeans.java
@@ -130,8 +130,7 @@ public class KMeans extends SingularAlgorithm {
// TODO: I don't know if this if-else is part of the original KMeans algorithm
if (cluster.isEmpty()) {
// reinitialise the centroid if no patterns "belong" to it
- ArrayList<Vector> tmp = this.centroidsInitialisationStrategy.initialise(helper.getClusteringProblem(), helper.getDataSetBuilder());
- centroid = tmp.get(tmp.size() - 1); // use the last centroid that was generated; might return an unbounded Vector
+ centroid = this.centroidsInitialisationStrategy.reinitialise(centroids, i); // might return unbounded Vector
}
else {
// the centroid becomes the mean of cluster i
diff --git a/src/main/java/net/sourceforge/cilib/clustering/kmeans/KMeansPlusPlusCentroidsInitialisationStrategy.java b/src/main/java/net/sourceforge/cilib/clustering/kmeans/KMeansPlusPlusCentroidsInitialisationStrategy.java
index c8b1e43..5ad35f8 100644
--- a/src/main/java/net/sourceforge/cilib/clustering/kmeans/KMeansPlusPlusCentroidsInitialisationStrategy.java
+++ b/src/main/java/net/sourceforge/cilib/clustering/kmeans/KMeansPlusPlusCentroidsInitialisationStrategy.java
@@ -46,12 +46,10 @@ import net.sourceforge.cilib.util.DistanceMeasure;
public class KMeansPlusPlusCentroidsInitialisationStrategy implements CentroidsInitialisationStrategy {
private ArrayList<Pattern> patterns;
private DistanceMeasure distanceMeasure;
- private ArrayList<Vector> chosenCentroids;
public KMeansPlusPlusCentroidsInitialisationStrategy() {
patterns = null;
distanceMeasure = null;
- chosenCentroids = null;
}
/**
@@ -71,29 +69,45 @@ public class KMeansPlusPlusCentroidsInitialisationStrategy implements CentroidsI
Random randomPattern = new MersenneTwister();
Random randomProbability = new MersenneTwister();
int numberOfClusters = problem.getNumberOfClusters();
- int centroidsChosen = 0;
this.patterns = dataset.getPatterns();
this.distanceMeasure = problem.getDistanceMeasure();
- this.chosenCentroids = new ArrayList<Vector>();
+ ArrayList<Vector> chosenCentroids = new ArrayList<Vector>();
- while (centroidsChosen < numberOfClusters) {
- Vector candidateCentroid = patterns.get(Math.round(randomPattern.nextInt(patterns.size()))).data.getClone();
+ for (int i = 0; i < numberOfClusters; ++i) {
+ Vector candidateCentroid = patterns.get(randomPattern.nextInt(patterns.size())).data.getClone();
- if (centroidsChosen > 0) {
- double probability = calculateProbability(candidateCentroid);
-
- if (randomProbability.nextDouble() >= probability) {
- continue;
+ if (i > 0) {
+ while (randomProbability.nextDouble() >= this.calculateProbability(chosenCentroids, candidateCentroid)) {
+ candidateCentroid = patterns.get(randomPattern.nextInt(patterns.size())).data.getClone();
}
}
- this.chosenCentroids.add(candidateCentroid);
- ++centroidsChosen;
+ chosenCentroids.add(candidateCentroid);
+ }
+ return chosenCentroids;
+ }
+
+ /**
+ * Remove the unwanted centroid and replace it with a newly chosen centroid, still based on its contribtution to the
+ * overall potential.
+ * {@inheritDoc}
+ */
+ @Override
+ public Vector reinitialise(ArrayList<Vector> centroids, int which) {
+ Vector candidateCentroid = null;
+ Random randomPattern = new MersenneTwister();
+ Random randomProbability = new MersenneTwister();
+
+ do {
+ candidateCentroid = patterns.get(randomPattern.nextInt(patterns.size())).data.getClone();
}
- return this.chosenCentroids;
+ while (randomProbability.nextDouble() >= this.calculateProbability(centroids, candidateCentroid));
+
+ centroids.set(which, candidateCentroid);
+ return candidateCentroid;
}
- private double calculateProbability(Vector candidateCentroid) {
+ private double calculateProbability(ArrayList<Vector> chosenCentroids, Vector candidateCentroid) {
double probability = 0.0;
double numerator = Double.MAX_VALUE;
diff --git a/src/main/java/net/sourceforge/cilib/clustering/kmeans/RandomCentroidsInitialisationStrategy.java b/src/main/java/net/sourceforge/cilib/clustering/kmeans/RandomCentroidsInitialisationStrategy.java
index d2abd86..38a2a2a 100644
--- a/src/main/java/net/sourceforge/cilib/clustering/kmeans/RandomCentroidsInitialisationStrategy.java
+++ b/src/main/java/net/sourceforge/cilib/clustering/kmeans/RandomCentroidsInitialisationStrategy.java
@@ -48,9 +48,7 @@ public class RandomCentroidsInitialisationStrategy implements CentroidsInitialis
* space. The built-representation of the behavioural domain of the given {@link ClusteringProblem} is used to build a
* {@link Vector} that will house the centroids.
*
- * @param problem the {@link ClusteringProblem} currently being optimized
- * @param dataset the {@link StaticDataSetBuilder} currently being clustered
- * @return an {@link ArrayList} of {@link Vector}s that represents all the centroids
+ * {@inheritDoc}
*/
@Override
public ArrayList<Vector> initialise(ClusteringProblem problem, StaticDataSetBuilder dataset) {
@@ -65,4 +63,17 @@ public class RandomCentroidsInitialisationStrategy implements CentroidsInitialis
}
return centroids;
}
+
+ /**
+ * Just randomize the centroid vector.
+ * {@inheritDoc}
+ */
+ @Override
+ public Vector reinitialise(ArrayList<Vector> centroids, int which) {
+ Vector reinitialised = centroids.get(which);
+
+ reinitialised.randomize();
+
+ return reinitialised;
+ }
}
diff --git a/src/main/java/net/sourceforge/cilib/problem/ClusteringProblem.java b/src/main/java/net/sourceforge/cilib/problem/ClusteringProblem.java
index ee89d00..dc51c96 100644
--- a/src/main/java/net/sourceforge/cilib/problem/ClusteringProblem.java
+++ b/src/main/java/net/sourceforge/cilib/problem/ClusteringProblem.java
@@ -184,7 +184,7 @@ public class ClusteringProblem extends OptimisationProblemAdapter {
/**
* Duplicate the domain string of this clustering problem's dataset based on the number
* of clusters that have been specified. For example, if the {@link #setDomain(String)}
- * method has has been called with <code>"R(-1.0,1.0),R(-1.0,1.0)"</code> as parameter
+ * method has been called with <code>"R(-1.0,1.0),R(-1.0,1.0)"</code> as parameter
* and the {@link #setNumberOfClusters(int)} method has been called with <code>3</code>
* as parameter, then the clustering fitness function's domain will automatically be
* regenerated to be
@@ -200,7 +200,7 @@ public class ClusteringProblem extends OptimisationProblemAdapter {
return;
}
- String duplicated = new String(domainRegistry.getDomainString());
+ String duplicated = domainRegistry.getDomainString();
for (int i = 1; i < numberOfClusters; i++) {
duplicated += "," + domainRegistry.getDomainString();
@@ -229,7 +229,7 @@ public class ClusteringProblem extends OptimisationProblemAdapter {
* clustered
*/
public void setDomain(String representation) {
- DomainParser parser = new DomainParser();//DomainParser.getInstance();
+ DomainParser parser = new DomainParser();
parser.parse(representation);
domainRegistry.setDomainString(representation);
diff --git a/src/main/java/net/sourceforge/cilib/problem/dataset/DataSet.java b/src/main/java/net/sourceforge/cilib/problem/dataset/DataSet.java
index d3f156f..9e171e9 100644
--- a/src/main/java/net/sourceforge/cilib/problem/dataset/DataSet.java
+++ b/src/main/java/net/sourceforge/cilib/problem/dataset/DataSet.java
@@ -39,7 +39,7 @@ public abstract class DataSet implements Cloneable, Serializable {
protected String identifier = null;
public DataSet() {
- identifier = "<not set>";
+ identifier = "<unknown data set>";
}
public DataSet(DataSet rhs) {
diff --git a/src/main/java/net/sourceforge/cilib/problem/dataset/DataSetManager.java b/src/main/java/net/sourceforge/cilib/problem/dataset/DataSetManager.java
index 6145dcc..2025ab8 100644
--- a/src/main/java/net/sourceforge/cilib/problem/dataset/DataSetManager.java
+++ b/src/main/java/net/sourceforge/cilib/problem/dataset/DataSetManager.java
@@ -78,12 +78,11 @@ public final class DataSetManager implements Serializable {
public synchronized ArrayList<Pattern> getDataFromSet(DataSet dataset) {
String identifier = dataset.getIdentifier();
- logger.debug("Requesting " + identifier);
+ logger.debug("Requesting data set: " + identifier);
if (!datasets.containsKey(identifier)) {
- logger.debug("Parsing " + identifier);
datasets.put(identifier, dataset.parseDataSet());
}
- logger.debug("Returning " + identifier);
+ logger.debug("Returning data set: " + identifier);
return datasets.get(identifier);
}
@@ -100,12 +99,12 @@ public final class DataSetManager implements Serializable {
public synchronized StaticDataSetBuilder getDataSetBuilder(StaticDataSetBuilder datasetBuilder) {
String identifier = datasetBuilder.getIdentifier();
- logger.debug("Requesting " + identifier);
+ logger.debug("Requesting built data set: " + identifier);
if (!builders.containsKey(identifier)) {
datasetBuilder.initialise();
builders.put(identifier, datasetBuilder);
}
- logger.debug("Returning " + identifier);
+ logger.debug("Returning built data set: " + identifier);
return builders.get(identifier);
}
}
diff --git a/src/main/java/net/sourceforge/cilib/problem/dataset/LocalDataSet.java b/src/main/java/net/sourceforge/cilib/problem/dataset/LocalDataSet.java
index 89705c1..ba5675f 100644
--- a/src/main/java/net/sourceforge/cilib/problem/dataset/LocalDataSet.java
+++ b/src/main/java/net/sourceforge/cilib/problem/dataset/LocalDataSet.java
@@ -140,7 +140,7 @@ public class LocalDataSet extends DataSet {
ArrayList<Pattern> patterns = new ArrayList<Pattern>();
BufferedReader br = new BufferedReader(new InputStreamReader(getInputStream()));
- logger.info("Parsing " + identifier);
+ logger.info("Parsing data set: " + identifier);
try {
// every line in a dataset represents a pattern
String line = br.readLine();
@@ -167,12 +167,12 @@ public class LocalDataSet extends DataSet {
*/
private Pattern parseLine(String line) {
// split the line using the 'delimiter' regular expression
- String[] elements = line.split(delimiter);
+ String [] elements = line.split(delimiter);
// the elements of the split are stored inside a vector that will form the pattern
Vector pattern = new Vector(endIndex - beginIndex + 1);
- for (int i = beginIndex; i <= endIndex; i++) {
- pattern.add(new Real(Double.valueOf(elements[i])));
+ for (int i = beginIndex; i <= endIndex; ++i) {
+ pattern.add(new Real(Double.parseDouble(elements[i])));
}
String clazz = "";
diff --git a/src/main/java/net/sourceforge/cilib/problem/dataset/StaticDataSetBuilder.java b/src/main/java/net/sourceforge/cilib/problem/dataset/StaticDataSetBuilder.java
index a673f58..faa68c1 100644
--- a/src/main/java/net/sourceforge/cilib/problem/dataset/StaticDataSetBuilder.java
+++ b/src/main/java/net/sourceforge/cilib/problem/dataset/StaticDataSetBuilder.java
@@ -33,10 +33,9 @@ import org.slf4j.LoggerFactory;
/**
* This class "collects" and holds all the patterns of the {@link DataSet}s specified
- * through the {@link #addDataSet(DataSet)} method. The name is no longer relevant, because
- * this class no longer keeps track of cluster assignments. That is now the job of the
- * {@link ClusteringUtils} class. Therefore, this class' name will probably change to
- * something like ClusterableDataSetBuilder.
+ * through the {@link #addDataSet(DataSet)} method. It was originally the <code>AssociatedPairDataSetBuilder</code>
+ * class, but has since changed quite a lot. Most of the functionality is now handled by the {@link ClusteringUtils}
+ * helper class.
*
* @author Gary Pampara
* @author Theuns Cloete
@@ -45,26 +44,32 @@ public class StaticDataSetBuilder extends DataSetBuilder {
private static final long serialVersionUID = -7035524554252462144L;
private static Logger logger = LoggerFactory.getLogger(StaticDataSetBuilder.class);
- protected ArrayList<Pattern> patterns = null;
- protected Vector cachedMean = null;
- protected double cachedVariance = 0.0;
- protected double distanceCache[] = null;
- protected String identifier = null;
+ protected ArrayList<Pattern> patterns;
+ protected Vector cachedMean;
+ protected double cachedVariance;
+ protected double distanceCache[];
+ protected String identifier;
+ protected boolean cached;
/**
* Initialise the patterns data structure and set the identifier to be blank.
*/
public StaticDataSetBuilder() {
- patterns = new ArrayList<Pattern>();
- identifier = "";
+ this.patterns = new ArrayList<Pattern>();
+ this.identifier = "<unknown built data set>";
+ this.cached = true;
}
public StaticDataSetBuilder(StaticDataSetBuilder rhs) {
super(rhs);
- patterns = new ArrayList<Pattern>();
+ this.patterns = new ArrayList<Pattern>();
+
for (Pattern pattern : rhs.patterns) {
- patterns.add(pattern.getClone());
+ this.patterns.add(pattern.getClone());
}
+
+ this.identifier = rhs.identifier;
+ this.cached = rhs.cached;
}
@Override
@@ -106,22 +111,17 @@ public class StaticDataSetBuilder extends DataSetBuilder {
throw new IllegalArgumentException("Cannot combine datasets of different dimensions");
}
patterns.addAll(data);
-
- if (identifier.equals("")) {
- identifier += dataset.getIdentifier();
- }
- else {
- identifier += "#|#" + dataset.getIdentifier();
- }
logger.debug(data.size() + " patterns added");
}
- cacheMeanAndVariance();
- cacheDistances();
+ this.cacheMeanAndVariance();
+ if (this.cached) {
+ this.cacheDistances();
+ }
}
/**
- * Calculate and cache the mean ({@link Vector}) and variance (scalar) of the dataset.
+ * Calculate and cached the mean ({@link Vector}) and variance (scalar) of the dataset.
*/
private void cacheMeanAndVariance() {
logger.info("Caching dataset mean and variance");
@@ -150,7 +150,7 @@ public class StaticDataSetBuilder extends DataSetBuilder {
}
/**
- * Calculate and cache the distances from all patterns to all other patterns. The cache
+ * Calculate and cache the distances from all patterns to all other patterns. The cached
* structure looks like this (x represents a distance):
* 0 1 2 3 4 5
* 0 0 x x x x x
@@ -241,11 +241,47 @@ public class StaticDataSetBuilder extends DataSetBuilder {
}
/**
- * Get the identifier that uniquely identifies this constructed/combined/built dataset.
+ * Get the identifier that uniquely identifies this constructed/combined/built data set. The returned string is
+ * created on the fly that lists all the {@link DataSet data sets} that comprises this built data set.
*
* @return the {@link #identifier}
*/
public String getIdentifier() {
- return identifier;
+ String identifiedAs = this.identifier + " = {";
+
+ for (int i = 0; i < this.dataSets.size(); ++i) {
+ identifiedAs += this.dataSets.get(i).getIdentifier();
+
+ if (i < this.dataSets.size() - 1) {
+ identifiedAs += ",";
+ }
+ }
+ return identifiedAs + "}";
+ }
+
+ /**
+ * Set the identifier that will initially uniquely identify this constructed/combined/built dataset that should
+ * still be constructed.
+ *
+ * @param id the identifying string that should be used.
+ */
+ public void setIdentifier(String id) {
+ this.identifier = id;
+ }
+
+ /**
+ * Checker whether the distances between every pattern (in this built-up data set) were cached?
+ * @return true/false
+ */
+ public boolean getCached() {
+ return this.cached;
+ }
+
+ /**
+ * Sets whether the distances between every pattern (in this built-up data set) should be cached.
+ * @param c true/false
+ */
+ public void setCached(boolean c) {
+ this.cached = c;
}
}
diff --git a/xml/clustering-gbest-pso.xml b/xml/clustering-gbest-pso.xml
index 948fa31..c640591 100644
--- a/xml/clustering-gbest-pso.xml
+++ b/xml/clustering-gbest-pso.xml
@@ -80,7 +80,7 @@
<!-- wdbc MINIMUMS and MAXIMUMS: R(6.9, 28.2), R(9.7, 39.3), R(43.7, 188.6), R(143.4, 2501.1), R(0.0, 0.2), R(0.0, 0.4), R(0.0, 0.5), R(0.0, 0.3), R(0.1, 0.4), R(0.0, 0.2), R(0.1, 2.9), R(0.3, 4.9), R(0.7, 22.0), R(6.7, 542.3), R(0.0, 0.1), R(0.0, 0.2), R(0.0, 0.5), R(0.0, 0.1), R(0.0, 0.1), R(0.0, 0.1), R(7.9, 36.1), R(12.0, 49.6), R(50.4, 251.3), R(185.1, 4254.1), R(0.0, 0.3), R(0.0, 1.1), R(0.0, 1.3), R(0.0, 0.3), R(0.1, 0.7), R(0.0, 0.3) -->
<dataSetBuilder id="wdbc" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/uci/breast-cancer-wisconsin/wdbc.data</identifier>
+ <identifier>../../datasets/uci/breast-cancer-wisconsin/wdbc.data</identifier>
<!-- <patternExpression>(\d+,(M|B))?,</patternExpression> -->
<delimiter>,</delimiter>
<beginIndex>2</beginIndex>
@@ -92,7 +92,7 @@
<!-- glass MINIMUMS and MAXIMUMS: R(1.5, 1.6), R(10.7, 17.4), R(0.0, 4.5), R(0.2, 3.6), R(69.8, 75.5), R(0.0, 6.3), R(5.4, 16.2), R(0.0, 3.2), R(0.0, 0.6) -->
<dataSetBuilder id="glass" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/uci/glass/glass.data</identifier>
+ <identifier>../../datasets/uci/glass/glass.data</identifier>
<!-- <patternExpression>(^\d+,|\d$|,)</patternExpression> -->
<delimiter>,</delimiter>
<beginIndex>1</beginIndex>
@@ -104,7 +104,7 @@
<!-- ionosphere MINIMUMS and MAXIMUMS: Z(0, 1), Z(0, 0), R(-1.0, 1.0)^32 -->
<dataSetBuilder id="ionosphere" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/uci/ionosphere/ionosphere.data</identifier>
+ <identifier>../../datasets/uci/ionosphere/ionosphere.data</identifier>
<!-- <patternExpression>,(g|b)?</patternExpression> -->
<delimiter>,</delimiter>
<beginIndex>0</beginIndex>
@@ -116,7 +116,7 @@
<!-- iris MINIMUMS and MAXIMUMS: R(4.2, 8.0), R(1.9, 4.5), R(0.9, 6.9), R(0.0, 2.6) -->
<dataSetBuilder id="iris" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/uci/iris/iris.data</identifier>
+ <identifier>../../datasets/uci/iris/iris.data</identifier>
<!-- <patternExpression>,\D*</patternExpression>-->
<delimiter>,</delimiter>
<beginIndex>0</beginIndex>
@@ -128,7 +128,7 @@
<!-- bupa MINIMUMS and MAXIMUMS: Z(65, 103), Z(23, 138), Z(4, 155), Z(5, 82), Z(5, 297), R(0.0, 20.0) -->
<dataSetBuilder id="bupa" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/uci/liver-disorders/bupa.data</identifier>
+ <identifier>../../datasets/uci/liver-disorders/bupa.data</identifier>
<!-- <patternExpression>,\d$|,</patternExpression> -->
<delimiter>,</delimiter>
<beginIndex>0</beginIndex>
@@ -140,7 +140,7 @@
<!-- pima indians diabetes MINIMUMS and MAXIMUMS: Z(0, 17), Z(0, 199), Z(0, 122), Z(0, 99), Z(0, 846), R(0, 67.1), R(0.08, 2.42), Z(21, 81) -->
<dataSetBuilder id="diabetes" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/uci/pima-indians-diabetes/pima-indians-diabetes.data</identifier>
+ <identifier>../../datasets/uci/pima-indians-diabetes/pima-indians-diabetes.data</identifier>
<!-- <patternExpression>,\d$|,</patternExpression> -->
<delimiter>,</delimiter>
<beginIndex>0</beginIndex>
@@ -152,7 +152,7 @@
<!-- new thyroid MINIMUMS and MAXIMUMS: Z(65, 144), R(0.5, 25.3), R(0.2, 10.0), R(0.1, 56.4), R(-0.7, 56.3) -->
<dataSetBuilder id="thyroid" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/uci/thyroid-disease/new-thyroid.data</identifier>
+ <identifier>../../datasets/uci/thyroid-disease/new-thyroid.data</identifier>
<!-- <patternExpression>^\d,|,</patternExpression> -->
<delimiter>,</delimiter>
<beginIndex>1</beginIndex>
@@ -164,7 +164,7 @@
<!-- wine MINIMUMS and MAXIMUMS: R(11.03, 14.83), R(0.74, 5.8), R(1.36, 3.23), R(10.6, 30.0), Z(70, 162), R(0.98, 3.88), R(0.34, 5.08), R(0.13, 0.66), R(0.41, 3.58), R(1.28, 13.0), R(0.48, 1.71), R(1.27, 4.0), Z(278, 1680) -->
<dataSetBuilder id="wine" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/uci/wine/wine.data</identifier>
+ <identifier>../../datasets/uci/wine/wine.data</identifier>
<!-- <patternExpression>^\d,|,</patternExpression> -->
<delimiter>,</delimiter>
<beginIndex>1</beginIndex>
@@ -176,7 +176,7 @@
<!-- artificial MINIMUMS and MAXIMUMS: R(-1.0, 1.0), R(-1.0, 1.0) -->
<dataSetBuilder id="artificial" class="problem.dataset.StaticDataSetBuilder">
<addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/generated/artificial2/artificial.txt</identifier>
+ <identifier>../../datasets/generated/artificial2/artificial.txt</identifier>
<!-- <patternExpression>,Class\d|,</patternExpression> -->
<delimiter>,</delimiter>
<beginIndex>0</beginIndex>
diff --git a/xml/kmeans.xml b/xml/kmeans.xml
index 31ae178..19c6f8b 100644
--- a/xml/kmeans.xml
+++ b/xml/kmeans.xml
@@ -32,50 +32,12 @@
</algorithms>
<problems>
-<!--
- <problem id="artificial.seperate" class="problem.ClusteringProblem">
- <domain>R(-1.0,1.0),R(-1.0,1.0)</domain>
- <innerProblem class="problem.FunctionMinimisationProblem">
- <function class="functions.clustering.QuantisationErrorFunction" />
- </innerProblem>
- <dataSetBuilder class="problem.dataset.StaticDataSetBuilder">
- <addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/generated/artificial2/class0</identifier>
- <delimiter>\s</delimiter>
- <beginIndex>0</beginIndex>
- <endIndex>1</endIndex>
- <classIndex>-1</classIndex>
- </addDataSet>
- <addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/generated/artificial2/class1</identifier>
- <delimiter>\s</delimiter>
- <beginIndex>0</beginIndex>
- <endIndex>1</endIndex>
- <classIndex>-1</classIndex>
- </addDataSet>
- <addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/generated/artificial2/class2</identifier>
- <delimiter>\s</delimiter>
- <beginIndex>0</beginIndex>
- <endIndex>1</endIndex>
- <classIndex>-1</classIndex>
- </addDataSet>
- <addDataSet class="problem.dataset.LocalDataSet">
- <identifier>../MSc/datasets/generated/artificial2/class3</identifier>
- <delimiter>\s</delimiter>
- <beginIndex>0</beginIndex>
- <endIndex>1</endIndex>
- <classIndex>-1</classIndex>
- </addDataSet>
- </dataSetBuilder>
- </problem>
--->
<problem id="artificial.combined" class="problem.ClusteringProblem" domain="R(-1.0,1.0),R(-1.0,1.0)">
<innerProblem class="problem.FunctionMinimisationProblem">
<function class="functions.clustering.QuantisationErrorFunction" />
</innerProblem>
- <dataSetBuilder class="problem.dataset.StaticDataSetBuilder">
- <addDataSet class="problem.dataset.LocalDataSet" identifier="../../datasets/generated/artificial2/artificial.txt" delimiter="," beginIndex="0" endIndex="1" classIndex="2" />
+ <dataSetBuilder class="problem.dataset.StaticDataSetBuilder" identifier="combined">
+ <addDataSet class="problem.dataset.LocalDataSet" identifier="../../datasets/generated/artificial2/artificial.txt" delimiter="\s+" beginIndex="0" endIndex="1" classIndex="2" />
</dataSetBuilder>
</problem>
</problems>
--
1.6.0.6
|