PyML 0.7.2, numpy 1.4.0, Python 2.6.4. To reproduce:
import PyML
data = PyML.SparseDataSet('PyML-0.7.2/data/heartSparse.data')#, labelsColumn=0)
rfe = PyML.featsel.RFE()
rfe.numFeatures = 5
feature_selector = PyML.classifiers.composite.FeatureSelect(PyML.svm.SVM(), rfe)
print feature_selector.cv(data)
Output:
scanned 100 patterns
scanned 200 patterns
read 100 patterns
read 200 patterns
{'11': 10, '10': 9, '13': 12, '12': 11, '1': 0, '3': 2, '2': 1, '5': 4, '4': 3, '7': 6, '6': 5, '9': 8, '8': 7}
read 270 patterns
training ***********************************************************
Cpos, Cneg: 5.55555555556 4.44444444444
...........*
optimization finished, #iter = 2544
obj = -377.998538, rho = -0.966764
nSV = 84, nBSV = 71
in LinearSparseSVModel
done LinearSparseSVModel
constructed model
numFeaturesToEliminate: 1
[6] 1
Traceback (most recent call last):
File "rfetest.py", line 12, in <module>
r = feature_selector.cv(data)
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/evaluators/assess.py", line 175, in cv
return cvFromFolds(classifier, data, trainingPatterns, testingPatterns, **args)
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/evaluators/assess.py", line 124, in cvFromFolds
trainingPatterns[fold], testingPatterns[fold], **args))
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/evaluators/assess.py", line 731, in trainTest
classifier.train(trainingData, **args)
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/classifiers/composite.py", line 180, in train
self.featureSelector.select(data, **args)
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/feature_selection/featsel.py", line 28, in select
features = self.selectFeatures(data, *options, **args)
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/feature_selection/featsel.py", line 302, in selectFeatures
self.run(data, *options, **args)
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/feature_selection/featsel.py", line 286, in run
for f in rfeIter : pass
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/feature_selection/featsel.py", line 270, in next
data.eliminateFeatures(featuresToEliminate)
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/containers/vectorDatasets.py", line 107, in eliminateFeatures
self.container.eliminateFeatures(self, featureList)
File "/home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/containers/ext/csparsedataset.py", line 385, in eliminateFeatures
def eliminateFeatures(*args): return _csparsedataset.SparseDataSet_eliminateFeatures(*args)
TypeError: in method 'SparseDataSet_eliminateFeatures', argument 2 of type 'std::vector<int,std::allocator<int > > const &'
Somewhat hackish patch:
--- /home/radon01/ljosa/src/PyML-0.7.2/PyML/feature_selection/featsel.py 2008-12-30 12:47:33.000000000 -0500
+++ /home/radon01/ljosa/.local/lib/python2.6/site-packages/PyML/feature_selection/featsel.py 2010-03-16 16:11:51.224914000 -0400
@@ -266,6 +266,7 @@
for feature in featuresToEliminate :
self.weights[data.featureID[feature]] = self.w[feature] + maxWeight
+ featuresToEliminate = map(int, featuresToEliminate)
data.eliminateFeatures(featuresToEliminate)
print '** numFeatures: ', data.numFeatures