Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/alesis/shogun
Browse files Browse the repository at this point in the history
  • Loading branch information
Soeren Sonnenburg committed Apr 17, 2011
2 parents 3cea610 + 68a9604 commit 47f74d3
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 30 deletions.
30 changes: 22 additions & 8 deletions src/libshogun/distributions/Gaussian.h
Expand Up @@ -24,10 +24,11 @@
namespace shogun
{
class CDotFeatures;
/** @brief gaussian distribution interface.
/** @brief Gaussian distribution interface.
*
* A takes as input a mean vector and covariance matrix.
* Takes as input a mean vector and covariance matrix.
* Also possible to train from data.
* Likelihood is computed using the Gaussian PDF \f$(2\pi)^{-\frac{k}{2}}|\Sigma|^{-\frac{1}{2}}e^{-\frac{1}{2}(x-\mu)'\Sigma^{-1}(x-\mu)}\f$
*/
class CGaussian : public CDistribution
{
Expand All @@ -36,13 +37,17 @@ class CGaussian : public CDistribution
CGaussian();
/** constructor
*
* @param mean and covariance
* @param mean mean of the Gaussian
* @param mean_length
* @param cov covariance of the Gaussian
* @param cov_rows
* @param cov_cols
*/
CGaussian(float64_t* mean, int32_t mean_length,
float64_t* cov, int32_t cov_rows, int32_t cov_cols);
virtual ~CGaussian();

/** Set the distribution mean and covariance */
/** Compute the inverse covariance and constant part */
void init();

/** learn distribution
Expand Down Expand Up @@ -97,15 +102,19 @@ class CGaussian : public CDistribution
virtual float64_t get_likelihood_example(int32_t num_example);

/** compute PDF
*
* computes \f$(2\pi)^{-\frac{k}{2}}|\Sigma|^{-\frac{1}{2}}e^{-\frac{1}{2}(x-\mu)'\Sigma^{-1}(x-\mu)}\f$
*
* @param point
* @param point_len
* @return computed PDF
*/
virtual float64_t compute_PDF(float64_t* point, int32_t point_len);

/** get mean
*
* @param copy of the mean
* @param mean copy of the mean
* @param mean_length
*/
virtual inline void get_mean(float64_t** mean, int32_t* mean_length)
{
Expand All @@ -116,7 +125,8 @@ class CGaussian : public CDistribution

/** set mean
*
* @param new mean
* @param mean new mean
* @param mean_length has to match current mean length
*/
virtual inline void set_mean(float64_t* mean, int32_t mean_length)
{
Expand All @@ -126,7 +136,9 @@ class CGaussian : public CDistribution

/** get cov
*
* @param copy of the cov
* @param cov copy of the cov
* @param cov_rows
* @param cov_cols
*/
virtual inline void get_cov(float64_t** cov, int32_t* cov_rows, int32_t* cov_cols)
{
Expand All @@ -138,7 +150,9 @@ class CGaussian : public CDistribution

/** set cov
*
* @param new cov
* @param cov new cov
* @param cov_rows has to match current cov rows
* @param cov_cols has to be equal to cov_rows
*/
virtual inline void set_cov(float64_t* cov, int32_t cov_rows, int32_t cov_cols)
{
Expand Down
41 changes: 35 additions & 6 deletions src/libshogun/lib/lapack.cpp
Expand Up @@ -25,11 +25,13 @@ using namespace shogun;
#define DGESVD dgesvd
#define DPOSV dposv
#define DPOTRF dpotrf
#define DPOTRF dpotri
#else
#define DSYEV dsyev_
#define DGESVD dgesvd_
#define DPOSV dposv_
#define DPOTRF dpotrf_
#define DPOTRF dpotri_
#endif

#ifndef HAVE_ATLAS
Expand All @@ -43,9 +45,10 @@ int clapack_dpotrf(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
if (Uplo==CblasUpper)
uplo='L';
}
else
if (Uplo==CblasLower)
uplo='L';
else if (Uplo==CblasLower)
{
uplo='L';
}
#ifdef HAVE_ACML
DPOTRF(uplo, N, A, LDA, &info);
#else
Expand All @@ -57,6 +60,31 @@ int clapack_dpotrf(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
}
#undef DPOTRF

int clapack_dpotri(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
const int N, double *A, const int LDA)
{
char uplo = 'U';
int info = 0;
if (Order==CblasRowMajor)
{//A is symmetric, we switch Uplo to get result for CblasRowMajor
if (Uplo==CblasUpper)
uplo='L';
}
else if (Uplo==CblasLower)
{
uplo='L';
}
#ifdef HAVE_ACML
DPOTRI(uplo, N, A, LDA, &info);
#else
int n=N;
int lda=LDA;
DPOTRI(&uplo, &n, A, &lda, &info);
#endif
return info;
}
#undef DPOTRI

/* DPOSV computes the solution to a real system of linear equations
* A * X = B,
* where A is an N-by-N symmetric positive definite matrix and X and B
Expand All @@ -73,9 +101,10 @@ int clapack_dposv(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
if (Uplo==CblasUpper)
uplo='L';
}
else
if (Uplo==CblasLower)
uplo='L';
else if (Uplo==CblasLower)
{
uplo='L';
}
#ifdef HAVE_ACML
DPOSV(uplo,N,NRHS,A,lda,B,ldb,&info);
#else
Expand Down
1 change: 1 addition & 0 deletions src/libshogun/lib/lapack.h
Expand Up @@ -63,6 +63,7 @@ int dgesvd_(char* jobu, char* jobvt, int* m, int* n, double* a, int* lda,
int* lwork, int* info);
int dposv_(const char *uplo, const int *n, const int *nrhs, double *a, const int *lda, double *b, const int *ldb, int *info);
int dpotrf_(const char *uplo, int *n, double *a, int * lda, int *info);
int dpotri_(const char *uplo, int *n, double *a, int * lda, int *info);
#endif
}

Expand Down
52 changes: 38 additions & 14 deletions src/libshogun/preproc/PCACut.cpp
Expand Up @@ -28,10 +28,10 @@

using namespace shogun;

CPCACut::CPCACut(bool do_whitening_, float64_t thresh_)
CPCACut::CPCACut(bool do_whitening_, ECutoffType cutoff_type_, float64_t thresh_)
: CSimplePreProc<float64_t>(), T(NULL), num_dim(0), mean(NULL),
length_mean(NULL), eigenvalues(NULL), num_eigenvalues(0),initialized(false),
do_whitening(do_whitening_), thresh(thresh_)
do_whitening(do_whitening_), cutoff_type(cutoff_type_), thresh(thresh_)
{
init();
}
Expand Down Expand Up @@ -126,10 +126,35 @@ bool CPCACut::init(CFeatures* f)
num_eigenvalues=num_features;

num_dim=0;
for (i=0; i<num_features; i++)
if (cutoff_type == FIXED_NUMBER)
{
ASSERT(thresh <= num_features);
num_dim = thresh;
}
else if (cutoff_type == VARIANCE_EXPLAINED)
{
if (eigenvalues[i]>thresh)
float64_t eig_sum = 0;
for (i=0; i<num_features; i++)
eig_sum += eigenvalues[i];

float64_t com_sum = 0;
for (i=num_features-1; i>-1; i--)
{
num_dim++;
com_sum += eigenvalues[i];
if (com_sum/eig_sum>=thresh)
break;
}
}
else
{
for (i=num_features-1; i>-1; i--)
{
if (eigenvalues[i]>thresh)
num_dim++;
else
break;
}
}

SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim) ;
Expand All @@ -139,17 +164,14 @@ bool CPCACut::init(CFeatures* f)
num_old_dim=num_features;

int32_t offs=0 ;
for (i=0; i<num_features; i++)
for (i=num_features-1; i<num_features-num_dim-1; i++)
{
if (eigenvalues[i]>thresh)
{
for (int32_t jj=0; jj<num_features; jj++)
if (do_whitening)
T[offs+jj*num_dim]=cov[num_features*i+jj]/sqrt(eigenvalues[i]);
else
T[offs+jj*num_dim]=cov[num_features*i+jj];
offs++;
}
for (int32_t jj=0; jj<num_features; jj++)
if (do_whitening)
T[offs+jj*num_dim]=cov[num_features*i+jj]/sqrt(eigenvalues[i]);
else
T[offs+jj*num_dim]=cov[num_features*i+jj];
offs++;
}

delete[] cov;
Expand Down Expand Up @@ -278,6 +300,8 @@ void CPCACut::init()
"initalized", "True when initialized.");
m_parameters->add(&do_whitening,
"do_whitening", "Whether data shall be whitened.");
m_parameters->add((machine_int_t*) &cutoff_type, "cutoff_type",
"Cutoff type.");
m_parameters->add(&thresh,
"thresh", "Cutoff threshold.");
}
Expand Down
12 changes: 11 additions & 1 deletion src/libshogun/preproc/PCACut.h
Expand Up @@ -25,6 +25,13 @@

namespace shogun
{
enum ECutoffType
{
THRESHOLD,
VARIANCE_EXPLAINED,
FIXED_NUMBER
};

/** @brief Preprocessor PCACut performs principial component analysis on the input
* vectors and keeps only the n eigenvectors with eigenvalues above a certain
* threshold.
Expand All @@ -43,9 +50,10 @@ class CPCACut : public CSimplePreProc<float64_t>
/** constructor
*
* @param do_whitening do whitening
* @param type of cutoff
* @param thresh threshold
*/
CPCACut(bool do_whitening=false, float64_t thresh=1e-6);
CPCACut(bool do_whitening=false, ECutoffType cutoff_type=THRESHOLD, float64_t thresh=1e-6);
virtual ~CPCACut();

/// initialize preprocessor from features
Expand Down Expand Up @@ -120,6 +128,8 @@ class CPCACut : public CSimplePreProc<float64_t>

/** do whitening */
bool do_whitening;
/** Cutoff type */
ECutoffType cutoff_type;
/** thresh */
float64_t thresh;
};
Expand Down
2 changes: 1 addition & 1 deletion src/libshogunui/GUIPreProc.cpp
Expand Up @@ -62,7 +62,7 @@ CPreProc* CGUIPreProc::create_prunevarsubmean(bool divide_by_std)
CPreProc* CGUIPreProc::create_pcacut(bool do_whitening, float64_t threshold)
{
#ifdef HAVE_LAPACK
CPreProc* preproc=new CPCACut(do_whitening, threshold);
CPreProc* preproc=new CPCACut(do_whitening, THRESHOLD, threshold);

if (preproc)
SG_INFO("PCACUT created (%p), do_whitening %i threshold %e", preproc, do_whitening, threshold);
Expand Down

0 comments on commit 47f74d3

Please sign in to comment.