java - WEKA HierarchicalClusterer class always return 2 clusters -
here code:
import weka.clusterers.clusterevaluation; import weka.clusterers.hierarchicalclusterer; import weka.clusterers.em; import weka.core.converters.csvloader; import weka.core.converters.converterutils.datasource; import weka.core.neighboursearch.performancestats; import java.io.file; import java.io.ioexception; import java.text.parseexception; import java.util.arraylist; import java.util.enumeration; import weka.core.*; public class wekasample1 { public static void main(string[] args) { instances data = null; csvloader csvloader = new csvloader(); try { csvloader.setsource(new file("d:\\weka\\numbers.csv")); data = csvloader.getdataset(); hierarchicalclusterer h = new hierarchicalclusterer(); distancefunction d = new distancefunction() { @override public void setoptions(string[] arg0) throws exception { } @override public enumeration listoptions() { return null; } @override public string[] getoptions() { return null; } @override public void update(instance arg0) { } @override public void setinvertselection(boolean arg0) { } @override public void setinstances(instances arg0) { } @override public void setattributeindices(string arg0) { } @override public void postprocessdistances(double[] arg0) { } @override public boolean getinvertselection() { return false; } @override public instances getinstances() { return null; } @override public string getattributeindices() { return null; } @override public double distance(instance arg0, instance arg1, double arg2, performancestats arg3) { return 0; } @override public double distance(instance arg0, instance arg1, double arg2) { return 0; } @override public double distance(instance arg0, instance arg1, performancestats arg2) throws exception { return 0; } @override public double distance(instance arg0, instance arg1) { double s1 = arg0.value(0); double s2 = arg1.value(0); return double.positive_infinity; } }; h.setdistancefunction(d); selectedtag s = new selectedtag(1, hierarchicalclusterer.tags_link_type); h.setlinktype(s); h.buildclusterer(data); // double[] arr; // for(int i=0; i<data.size(); i++) { // // arr = h.distributionforinstance(data.get(i)); // for(int j=0; j< arr.length; j++) // system.out.print(arr[j]+","); // system.out.println(); // // } system.out.println(h.numberofclusters()); } catch (exception e) { e.printstacktrace(); } } } now, output number of clusters generated 2 if modify distancefucntion method also. how know instance if of cluster? when uncomment code above written distribution instances, arrayoutofbound exception.
but in general, can explain how clustering done hierarchically weka here?
here data set, of length 10 , dimension 2:
1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10
try real data set, not evenly spaced array of points.
because have same distance next! single link, should single cluster, maybe there rounding issues.
plus, distance function use 0/infinity, too!
try using weka ui first.
Comments
Post a Comment