Some bugfixes. This seems functional now.

Jonathan Chang · Jonathan Chang · commit e9ce38903657 · 2011-09-03T18:07:19.000-04:00
diff --git a/pkg/R/Flim.R b/pkg/R/Flim.R
@@ -22,7 +22,13 @@ Flim <- function(singleton.counts,
                       pairwise.counts[,3],
                       document.count)
   for (ii in 1:num.iterations) {
-    flim.obj$optimizeAll()
+    cat("Iteration ")
+    print(ii)
+    print(system.time({
+      flim.obj$estimateExpectations()
+      cat("Total change: ")
+      print(flim.obj$optimizeAll())
+    }))
   }
   return(flim.obj)
 }
diff --git a/pkg/src/flim.cpp b/pkg/src/flim.cpp
@@ -2,6 +2,7 @@
 #include <gsl/gsl_blas.h>
 #include <gsl/gsl_sf_lambert.h>
 #include <vector>
+#include <math.h>
 
 class Flim {
   // Actual parameters.
@@ -40,7 +41,7 @@ class Flim {
                        empirical_pair_(N, N),
                        empirical_singleton_(N) {
     gsl_matrix_float_set_zero(lambda_);
-    gsl_vector_float_set_zero(ones_);
+    gsl_vector_float_set_all(ones_, 1.0);
   }
 
   ~Flim() {
@@ -108,7 +109,6 @@ class Flim {
     initializeKappa(num_documents);
   }
 
-
   float sigmoid(float x) {
     return 1.0 / (1.0 + exp(-x));
   }
@@ -130,19 +130,66 @@ class Flim {
     return exp(p11) / (exp(p11) + exp(p10) + exp(p01) + 1);
   }
 
-  void optimizeAll() {
+  double optimizeAll() {
+    double total_delta = 0.0;
     for (int x = 0; x < lambda_.nrow(); ++x) {
       for (int y = 0; y < x; ++y) {
-        optimizeLambda(y, x);
+        total_delta += optimizeLambda(y, x);
       }
     }
+    return total_delta;
   }
 
-  void optimizeLambda(unsigned int x, unsigned int y) {
+  /**
+   * Proof of lemma 3.2.1:
+   *
+   * x = a - b e^x
+   *
+   * iff 
+   *
+   *    x = a - W(be^a)
+   * => W(be^a) = a - x
+   * => (a-x) exp(a - x) = be^a
+   *
+   */
+
+  double optimizeLambda(unsigned int x, unsigned int y) {
+#ifdef NEW_WAY
     double A = getComputedExpectation(x, y) * exp(-lambda_(x, y));
     double B = 2 * beta2_;
     double C = empirical_pair_(x, y) - beta1_;
+#else
+    double A = getComputedExpectation(x, y);
+    double B = 2 * beta2_;
+    double C = empirical_pair_(x, y) - beta1_ - 2 * beta2_ * lambda_(x, y);
+#endif
+
+// #define DEBUGGING_NOISE
+#ifdef  DEBUGGING_NOISE
+    if (empirical_pair_(x, y) > 0 && rand() < RAND_MAX / 1000) {
+      double overcount_x = lambda_(x,y) * singleton_expectation_[y];
+      double overcount_y = lambda_(x,y) * singleton_expectation_[x];
     
+      double p10 = q_lambda_[x] + kappa_[x] - overcount_x;
+      double p01 = q_lambda_[y] + kappa_[y] - overcount_y;
+
+      double p11 = estimates_(x,y) - overcount_x - overcount_y;
+
+      std::cout << getComputedExpectation(x, y) << " "
+                << empirical_pair_(x, y) << " " 
+                << kappa_[x] << " " 
+                << kappa_[y] << " "
+                << lambda_(x,y) << " " 
+                << x << " " << y << " "
+                << q_lambda_[x] << " "
+                << q_lambda_[y] << " "
+                << estimates_(x,y) << " "
+                << overcount_x << " " << overcount_y << " "
+                << p10 << " " << p01 << " " << p11 << " "
+                << std::endl;
+    }
+#endif
+
     double delta1 = 0;
     double delta2 = 0;
     if (beta2_ == 0) {
@@ -171,8 +218,14 @@ class Flim {
       new_lambda = 0;
     }
 
+    double delta = fabs(new_lambda - lambda_(x,y));
+    if (delta - delta != 0) {
+      delta = 0.0;
+    }
+
     lambda_(x,y) = new_lambda;
     lambda_(y,x) = new_lambda;
+    return delta;
   }
 
   RcppGSL::matrix<float> getLambda() {
diff --git a/tests/cora.test.R b/tests/cora.test.R
@@ -5,7 +5,10 @@ require(Matrix)
 data(cora.documents)
 data(cora.vocab)
 
-counts <- count.pairs(cora.documents)
+## documents <- head(cora.documents)
+documents <- cora.documents
+
+counts <- count.pairs(documents)
 singleton.counts <- diag(counts)
 counts <- as(counts, 'dgTMatrix')
 pairwise.counts <- subset(data.frame(
@@ -17,7 +20,8 @@ save(pairwise.counts, singleton.counts, file="counts.Rdata")
 
 flim.instance <- Flim(singleton.counts,
                       pairwise.counts,
-                      length(cora.documents))
+                      length(documents),
+                      0.0, 0.0, 15)
 
 lambda <- flim.instance$getLambda()
 save(lambda, file="lambda.Rdata")