Cosan  1.0
Data Analytics Library
principalcomponentanalysis.h
Go to the documentation of this file.
1 //
2 // Created by Xinyu Zhang on 4/4/21.
3 //
4 
5 #ifndef COSAN_PRINCIPALCOMPONENTANALYSIS_H
6 #define COSAN_PRINCIPALCOMPONENTANALYSIS_H
7 #include <numeric>
9 
10 namespace Cosan{
11  /**
12  * @details https://en.wikipedia.org/wiki/Principal_component_analysis
13  *
14  */
15  template<Numeric NumericType>
16  class PrincipalComponentAnalysis: public Preprocessor<NumericType> {
17  public:
20  ncom = std::min(ncom,RD.GetcolsX());
21  this->fit(RD.GetInput(),ncom);
22  }
24  ncom = std::min(ncom,RD.GetcolsX());
25  this->fit(RD.GetInput(),ncom);
26  }
27 
29  ncom = std::min(ncom,RD.cols());
30  this->fit(RD,ncom);
31  }
32 
33 
35  private:
37  void fit(const CosanMatrix<NumericType>& X,gsl::index ncom){
38  fmt::print("*********************************\n");
39  fmt::print("Begin PCA on Input Data X. Select the first {:} principal components\n",ncom);
40  CosanMatrix<NumericType> centered = X.rowwise() - X.colwise().mean();
41  CosanMatrix<NumericType> cov = centered.adjoint() * centered;
42 
43  Eigen::SelfAdjointEigenSolver<CosanMatrix<NumericType>> eigensolver(cov);
44  if (eigensolver.info() != Eigen::Success) {
45  throw std::invalid_argument("Cannot solve eigenvalue decomposition.");}
46 
47  std::vector<NumericType> vec(eigensolver.eigenvalues().data(), eigensolver.eigenvalues().data() + eigensolver.eigenvalues().size());
48  std::reverse(vec.begin(),vec.end());
49  std::vector<NumericType> runningSum(vec.size());
50  std::partial_sum(vec.begin(), vec.end(), runningSum.begin());
51  for (gsl::index i =0;i<runningSum.size();i++){
52  fmt::print("The first {:} principal components explains {:f}% of the total variance.\n",i,runningSum[i]/runningSum.back()*100);
53  if (runningSum[i]/runningSum.back()>0.98 and i>=8){
54  break;
55  }
56  }
57 // std::cout<<eigensolver.eigenvalues()<<std::endl;
58 // std::cout<<eigensolver.eigenvectors()<<std::endl;
59 // std::cout<<eigensolver.eigenvectors().rightCols(1)<<std::endl;
60  fmt::print("Finsh PCA on Input Data X. The first {:} principal components explains {:f}% of the total variance.\n",ncom,runningSum[ncom]/runningSum.back()*100);
61  fmt::print("Uer .GetPC() function to get the principal components.\n" );
62  fmt::print("*********************************\n");
63  PrincipalComponent = eigensolver.eigenvectors().rightCols(ncom);
64  }
65 
66 
67  };
68 }
69 
70 
71 
72 
73 #endif //COSAN_PRINCIPALCOMPONENTANALYSIS_H
Cosan::PrincipalComponentAnalysis::PrincipalComponentAnalysis
PrincipalComponentAnalysis(CosanData< NumericType > &RD, gsl::index ncom=3)
Definition: principalcomponentanalysis.h:23
Cosan
Definition: CosanBO.h:29
Cosan::CosanRawData::GetInput
CosanMatrix< NumericType > GetInput()
Get a copy of CosanMatrix<NumericType> X.
Definition: CosanData.h:141
Cosan::PrincipalComponentAnalysis::PrincipalComponent
CosanMatrix< NumericType > PrincipalComponent
Definition: principalcomponentanalysis.h:36
NumericType
double NumericType
Definition: onehotencodingTest.cpp:20
Cosan::CosanRawData::GetcolsX
gsl::index GetcolsX()
Get the number of columns for X.
Definition: CosanData.h:270
Cosan::vec
std::vector< NumericType > vec
Definition: templateTest.cpp:14
Cosan::CosanMatrix
Eigen::Matrix< NumericType, Eigen::Dynamic, Eigen::Dynamic > CosanMatrix
Definition: CosanBO.h:37
Cosan::PrincipalComponentAnalysis::FullComponent
CosanMatrix< NumericType > FullComponent
Definition: principalcomponentanalysis.h:36
Cosan::CosanData
Data container.
Definition: CosanData.h:546
Cosan::PrincipalComponentAnalysis::PrincipalComponentAnalysis
PrincipalComponentAnalysis(const CosanMatrix< NumericType > &RD, gsl::index ncom=3)
Definition: principalcomponentanalysis.h:28
Cosan::PrincipalComponentAnalysis::GetPC
CosanMatrix< NumericType > & GetPC()
Definition: principalcomponentanalysis.h:34
Cosan::PrincipalComponentAnalysis
Definition: principalcomponentanalysis.h:16
Cosan::CosanRawData
Raw Data container.
Definition: CosanData.h:36
Cosan::PrincipalComponentAnalysis::fit
void fit(const CosanMatrix< NumericType > &X, gsl::index ncom)
Definition: principalcomponentanalysis.h:37
Cosan::Preprocessor
Definition: preprocessor.h:14
Cosan::PrincipalComponentAnalysis::PrincipalComponentAnalysis
PrincipalComponentAnalysis()=delete
Cosan::PrincipalComponentAnalysis::PrincipalComponentAnalysis
PrincipalComponentAnalysis(CosanRawData< NumericType > &RD, gsl::index ncom=3)
Definition: principalcomponentanalysis.h:19
preprocessor.h