Skip to content

Commit a1afa2f

Browse files
committed
Final changes for journal paper experiments.
1 parent 59740f4 commit a1afa2f

File tree

1 file changed

+46
-21
lines changed

1 file changed

+46
-21
lines changed

src/EMTree.cpp

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -825,12 +825,14 @@ void clueweb() {
825825
}
826826

827827
// k-tree
828-
if (false) {
828+
if (true) {
829+
// build tree
829830
int m = 1000, maxiters = 10;
830831
cout << "-----" << endl;
831832
cout << "Building K-tree of order m=" << m
832833
<< ", k-means maxiters=" << maxiters << endl;
833834
boost::timer::auto_cpu_timer all;
835+
boost::timer::auto_cpu_timer building;
834836
KTree<vecType, clustererType, distanceType, protoType> kt(m, maxiters);
835837
kt.setDelayedUpdates(true);
836838
kt.setUpdateDelay(1000);
@@ -845,16 +847,24 @@ void clueweb() {
845847
}
846848
}
847849
cout << endl;
848-
cout << "rearranging K-tree" << endl;
850+
building.stop();
851+
cout << "Building K-tree took " << building.elapsed().wall / 1e9 << " seconds" << endl;
852+
853+
// rearrange leaves
854+
boost::timer::auto_cpu_timer rearranging;
855+
cout << "Rearranging K-tree" << endl;
849856
kt.rearrange();
857+
cout << "Rearranging K-tree took " << rearranging.elapsed().wall / 1e9 << " seconds" << endl;
858+
859+
// print stats
850860
all.stop();
851861
kt.printStats();
852862
double seconds = all.elapsed().wall / 1e9;
853-
cout << "Building K-tree took " << seconds << " seconds" << endl;
863+
cout << "K-tree took " << seconds << " seconds" << endl;
854864
}
855865

856866
// EM-tree
857-
if (true) {
867+
if (false) {
858868
int maxiters = 4;
859869
int clusters = 110000;
860870
int m = (int)sqrt(clusters);
@@ -902,52 +912,67 @@ void clueweb() {
902912

903913
// TSVQ EM-tree hybrid
904914
if (false) {
905-
int clusters = 110000;
906-
int m = (int)sqrt(clusters);
907-
int depth = 3;
908-
int maxiters = 4;
909-
int sampleSize = 2000000;
915+
// record time for all operations
916+
boost::timer::auto_cpu_timer all;
910917

911918
// sample data
919+
int sampleSize = 2000000;
912920
vector < SVector<bool>*> sample = vectors;
913921
random_shuffle(sample.begin(), sample.end());
914922
sample.resize(sampleSize);
915-
916-
// record time for all operations
917-
boost::timer::auto_cpu_timer all;
918-
923+
919924
// build TSVQ on sample
925+
int clusters = 110000;
926+
int m = (int)sqrt(clusters);
927+
int depth = 3;
928+
int tsvqMaxiters = 5;
920929
boost::timer::auto_cpu_timer tsvqTimer;
921-
TSVQ<vecType, clustererType, distanceType, protoType> tsvq(m, depth, maxiters);
922-
tsvqTimer.start();
930+
TSVQ<vecType, clustererType, distanceType, protoType> tsvq(m, depth, tsvqMaxiters);
923931
tsvq.cluster(sample);
924932
tsvqTimer.stop();
925933
tsvq.printStats();
926934
cout << endl << "Building TSVQ on sample took " << tsvqTimer.elapsed().wall / 1e9 << " seconds" << endl;
927935
cout << "--------" << endl;
928936

929937
// 2 iterations of EM-tree on all data, using TSVQ sample as seed
938+
int emtreeMaxiters = 2;
930939
EMTree<vecType, clustererType, distanceType, protoType> emtree(tsvq.getMWayTree());
931940
boost::timer::auto_cpu_timer emtreeTimer;
932941
{
933942
boost::timer::auto_cpu_timer iter;
934-
emtree.EMStep(vectors);
943+
944+
// place all data into TSVQ initialized tree
945+
emtree.replace(vectors);
946+
cout << "placed all points into TSVQ tree" << endl;
947+
emtree.printStats();
948+
cout << endl << "--------" << endl;
949+
950+
// prune
951+
int pruned = 1;
952+
while (pruned > 0) {
953+
pruned = emtree.prune();
954+
}
955+
956+
// update means
957+
emtree.rebuildInternal();
958+
959+
// print stats
935960
iter.stop();
936961
cout << "iteration 1 took " << iter.elapsed().wall / 1e9 << " seconds" << endl;
937-
cout << "RMSE = " << emtree.getRMSE();
938-
cout << "--------" << endl;
962+
emtree.printStats();
963+
cout << endl << "--------" << endl;
939964
}
940-
for (int i = 1; i < maxiters; ++i) {
965+
for (int i = 1; i < emtreeMaxiters; ++i) {
941966
boost::timer::auto_cpu_timer iter;
942967
emtree.EMStep();
943968
iter.stop();
944969
cout << "iteration " << i + 1 << " took " << iter.elapsed().wall / 1e9 << " seconds" << endl;
945-
cout << "RMSE = " << emtree.getRMSE();
970+
emtree.printStats();
946971
cout << "--------" << endl;
947972
}
948973
emtreeTimer.stop();
949974
emtree.printStats();
950-
cout << endl << "2 iterations of EM-tree took " << emtreeTimer.elapsed().wall / 1e9 << " seconds" << endl;
975+
cout << endl << emtreeMaxiters << " iterations of EM-tree took " << emtreeTimer.elapsed().wall / 1e9 << " seconds" << endl;
951976

952977
// report all time
953978
all.stop();

0 commit comments

Comments
 (0)