Merge branch 'master' of https://github.com/alesis/shogun

shogun-toolbox · Apr 17, 2011 · 47f74d3 · 47f74d3
2 parents 3cea610 + 68a9604
commit 47f74d3
Show file tree

Hide file tree

Showing 6 changed files with 108 additions and 30 deletions.
diff --git a/src/libshogun/distributions/Gaussian.h b/src/libshogun/distributions/Gaussian.h
@@ -24,10 +24,11 @@
 namespace shogun
 {
 class CDotFeatures;
-/** @brief gaussian distribution interface.
+/** @brief Gaussian distribution interface.
  *
- * A takes as input a mean vector and covariance matrix.
+ * Takes as input a mean vector and covariance matrix.
  * Also possible to train from data.
+ * Likelihood is computed using the Gaussian PDF \f$(2\pi)^{-\frac{k}{2}}|\Sigma|^{-\frac{1}{2}}e^{-\frac{1}{2}(x-\mu)'\Sigma^{-1}(x-\mu)}\f$
  */
 class CGaussian : public CDistribution
 {
@@ -36,13 +37,17 @@ class CGaussian : public CDistribution
 		CGaussian();
 		/** constructor
 		 *
-		 * @param mean and covariance
+		 * @param mean mean of the Gaussian
+		 * @param mean_length
+		 * @param cov covariance of the Gaussian
+		 * @param cov_rows
+		 * @param cov_cols
 		 */
 		CGaussian(float64_t* mean, int32_t mean_length,
 					float64_t* cov, int32_t cov_rows, int32_t cov_cols);
 		virtual ~CGaussian();
 
-		/** Set the distribution mean and covariance */
+		/** Compute the inverse covariance and constant part */
 		void init();
 
 		/** learn distribution
@@ -97,15 +102,19 @@ class CGaussian : public CDistribution
 		virtual float64_t get_likelihood_example(int32_t num_example);
 
 		/** compute PDF
+		 *
+		 * computes \f$(2\pi)^{-\frac{k}{2}}|\Sigma|^{-\frac{1}{2}}e^{-\frac{1}{2}(x-\mu)'\Sigma^{-1}(x-\mu)}\f$
 		 *
 		 * @param point
+		 * @param point_len
 		 * @return computed PDF
 		 */
 		virtual float64_t compute_PDF(float64_t* point, int32_t point_len);
 
 		/** get mean
 		 *
-		 * @param copy of the mean
+		 * @param mean copy of the mean
+		 * @param mean_length
 		 */
 		virtual inline void get_mean(float64_t** mean, int32_t* mean_length)
 		{
@@ -116,7 +125,8 @@ class CGaussian : public CDistribution
 
 		/** set mean
 		 *
-		 * @param new mean
+		 * @param mean new mean
+		 * @param mean_length has to match current mean length
 		 */
 		virtual inline void set_mean(float64_t* mean, int32_t mean_length)
 		{
@@ -126,7 +136,9 @@ class CGaussian : public CDistribution
 
 		/** get cov
 		 *
-		 * @param copy of the cov
+		 * @param cov copy of the cov
+		 * @param cov_rows
+		 * @param cov_cols
 		 */
 		virtual inline void get_cov(float64_t** cov, int32_t* cov_rows, int32_t* cov_cols)
 		{
@@ -138,7 +150,9 @@ class CGaussian : public CDistribution
 
 		/** set cov
 		 *
-		 * @param new cov
+		 * @param cov new cov
+		 * @param cov_rows has to match current cov rows
+		 * @param cov_cols has to be equal to cov_rows
 		 */
 		virtual inline void set_cov(float64_t* cov, int32_t cov_rows, int32_t cov_cols)
 		{

diff --git a/src/libshogun/lib/lapack.cpp b/src/libshogun/lib/lapack.cpp
@@ -25,11 +25,13 @@ using namespace shogun;
 #define DGESVD dgesvd
 #define DPOSV dposv
 #define DPOTRF dpotrf
+#define DPOTRF dpotri
 #else
 #define DSYEV dsyev_
 #define DGESVD dgesvd_
 #define DPOSV dposv_
 #define DPOTRF dpotrf_
+#define DPOTRF dpotri_
 #endif
 
 #ifndef HAVE_ATLAS
@@ -43,9 +45,10 @@ int clapack_dpotrf(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
 		if (Uplo==CblasUpper)
 			uplo='L';
 	}
-	else
-		if (Uplo==CblasLower)
-			uplo='L';
+	else if (Uplo==CblasLower)
+	{
+		uplo='L';
+	}
 #ifdef HAVE_ACML
 	DPOTRF(uplo, N, A, LDA, &info);
 #else
@@ -57,6 +60,31 @@ int clapack_dpotrf(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
 }
 #undef DPOTRF
 
+int clapack_dpotri(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
+		const int N, double *A, const int LDA)
+{
+	char uplo = 'U';
+	int info = 0;
+	if (Order==CblasRowMajor)
+	{//A is symmetric, we switch Uplo to get result for CblasRowMajor
+		if (Uplo==CblasUpper)
+			uplo='L';
+	}
+	else if (Uplo==CblasLower)
+	{
+		uplo='L';
+	}
+#ifdef HAVE_ACML
+	DPOTRI(uplo, N, A, LDA, &info);
+#else
+	int n=N;
+	int lda=LDA;
+	DPOTRI(&uplo, &n, A, &lda, &info);
+#endif
+	return info;
+}
+#undef DPOTRI
+
 /* DPOSV computes the solution to a real system of linear equations
  * A * X = B,
  * where A is an N-by-N symmetric positive definite matrix and X and B
@@ -73,9 +101,10 @@ int clapack_dposv(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
 		if (Uplo==CblasUpper)
 			uplo='L';
 	}
-	else
-		if (Uplo==CblasLower)
-			uplo='L';
+	else if (Uplo==CblasLower)
+	{
+		uplo='L';
+	}
 #ifdef HAVE_ACML
 	DPOSV(uplo,N,NRHS,A,lda,B,ldb,&info);
 #else

diff --git a/src/libshogun/lib/lapack.h b/src/libshogun/lib/lapack.h
@@ -63,6 +63,7 @@ int dgesvd_(char* jobu, char* jobvt, int* m, int* n, double* a, int* lda,
 		int* lwork, int* info);
 int dposv_(const char *uplo, const int *n, const int *nrhs, double *a, const int *lda, double *b, const int *ldb, int *info);
 int dpotrf_(const char *uplo, int *n, double *a, int * lda, int *info);
+int dpotri_(const char *uplo, int *n, double *a, int * lda, int *info);
 #endif
 }
 

diff --git a/src/libshogun/preproc/PCACut.cpp b/src/libshogun/preproc/PCACut.cpp
@@ -28,10 +28,10 @@
 
 using namespace shogun;
 
-CPCACut::CPCACut(bool do_whitening_, float64_t thresh_)
+CPCACut::CPCACut(bool do_whitening_, ECutoffType cutoff_type_, float64_t thresh_)
 : CSimplePreProc<float64_t>(), T(NULL), num_dim(0), mean(NULL),
 	length_mean(NULL), eigenvalues(NULL), num_eigenvalues(0),initialized(false),
-	do_whitening(do_whitening_), thresh(thresh_)
+	do_whitening(do_whitening_), cutoff_type(cutoff_type_), thresh(thresh_)
 {
 	init();
 }
@@ -126,10 +126,35 @@ bool CPCACut::init(CFeatures* f)
         num_eigenvalues=num_features;
 
 		num_dim=0;
-		for (i=0; i<num_features; i++)
+		if (cutoff_type == FIXED_NUMBER)
+		{
+			ASSERT(thresh <= num_features);
+			num_dim = thresh;
+		}
+		else if (cutoff_type == VARIANCE_EXPLAINED)
 		{
-			if (eigenvalues[i]>thresh)
+			float64_t eig_sum = 0;
+			for (i=0; i<num_features; i++)
+				eig_sum += eigenvalues[i];
+
+			float64_t com_sum = 0;		
+			for (i=num_features-1; i>-1; i--)
+			{
 				num_dim++;
+				com_sum += eigenvalues[i];
+				if (com_sum/eig_sum>=thresh)
+					break;
+			}
+		}
+		else
+		{
+			for (i=num_features-1; i>-1; i--)
+			{
+				if (eigenvalues[i]>thresh)
+					num_dim++;
+				else
+					break;
+			}
 		}
 
 		SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim) ;
@@ -139,17 +164,14 @@ bool CPCACut::init(CFeatures* f)
 		num_old_dim=num_features;
 
 		int32_t offs=0 ;
-		for (i=0; i<num_features; i++)
+		for (i=num_features-1; i<num_features-num_dim-1; i++)
 		{
-			if (eigenvalues[i]>thresh)
-			{
-				for (int32_t jj=0; jj<num_features; jj++)
-					if (do_whitening)
-						T[offs+jj*num_dim]=cov[num_features*i+jj]/sqrt(eigenvalues[i]);
-					else
-						T[offs+jj*num_dim]=cov[num_features*i+jj];
-				offs++;
-			}
+			for (int32_t jj=0; jj<num_features; jj++)
+				if (do_whitening)
+					T[offs+jj*num_dim]=cov[num_features*i+jj]/sqrt(eigenvalues[i]);
+				else
+					T[offs+jj*num_dim]=cov[num_features*i+jj];
+			offs++;
 		}
 
 		delete[] cov;
@@ -278,6 +300,8 @@ void CPCACut::init()
 			"initalized", "True when initialized.");
 	m_parameters->add(&do_whitening,
 			"do_whitening", "Whether data shall be whitened.");
+	m_parameters->add((machine_int_t*) &cutoff_type, "cutoff_type",
+			"Cutoff type.");
 	m_parameters->add(&thresh,
 			"thresh", "Cutoff threshold.");
 }

diff --git a/src/libshogun/preproc/PCACut.h b/src/libshogun/preproc/PCACut.h
@@ -25,6 +25,13 @@
 
 namespace shogun
 {
+enum ECutoffType
+{
+	THRESHOLD,
+	VARIANCE_EXPLAINED,
+	FIXED_NUMBER
+};
+
 /** @brief Preprocessor PCACut performs principial component analysis on the input
  * vectors and keeps only the n eigenvectors with eigenvalues above a certain
  * threshold.
@@ -43,9 +50,10 @@ class CPCACut : public CSimplePreProc<float64_t>
 		/** constructor
 		 *
 		 * @param do_whitening do whitening
+		 * @param type of cutoff
 		 * @param thresh threshold
 		 */
-		CPCACut(bool do_whitening=false, float64_t thresh=1e-6);
+		CPCACut(bool do_whitening=false, ECutoffType cutoff_type=THRESHOLD, float64_t thresh=1e-6);
 		virtual ~CPCACut();
 
 		/// initialize preprocessor from features
@@ -120,6 +128,8 @@ class CPCACut : public CSimplePreProc<float64_t>
 
 		/** do whitening */
 		bool do_whitening;
+		/** Cutoff type */
+		ECutoffType cutoff_type;
 		/** thresh */
 		float64_t thresh;
 };

diff --git a/src/libshogunui/GUIPreProc.cpp b/src/libshogunui/GUIPreProc.cpp
@@ -62,7 +62,7 @@ CPreProc* CGUIPreProc::create_prunevarsubmean(bool divide_by_std)
 CPreProc* CGUIPreProc::create_pcacut(bool do_whitening, float64_t threshold)
 {
 #ifdef HAVE_LAPACK
-	CPreProc* preproc=new CPCACut(do_whitening, threshold);
+	CPreProc* preproc=new CPCACut(do_whitening, THRESHOLD, threshold);
 
 	if (preproc)
 		SG_INFO("PCACUT created (%p), do_whitening %i threshold %e", preproc, do_whitening, threshold);