Cosan  1.0
Data Analytics Library
standardScaler.h
Go to the documentation of this file.
1 #ifndef COSAN_STANDARDSCALER_H
2 #define COSAN_STANDARDSCALER_H
3 
5 
6 namespace Cosan {
7  /**
8  * Standardize features by removing the mean and scaling to unit variance
9  */
10  template<Numeric NumericType>
11  class StandardScaler : public Preprocessor<NumericType> {
12  public:
13  StandardScaler() = default;
14 // ~StandardScaler() override = default;
15 // CosanMatrix standardize(const CosanMatrix& X): Preprocessor(){
16 // this->fit(X);
17 // return this->transform(X);
18 // };
20  this->fit(RD);
21  }
22  CosanRowVector<NumericType> GetMean() const {return this->mean;}
23  CosanRowVector<NumericType> GetStd() const {return this->std;}
25  fit(RD.GetInput());
26  RD.UpdateData(transform(RD.GetInput()));
27  }
28  void fit(const CosanMatrix<NumericType> & X) override {
29  fmt::print("*********************************\n");
30  fmt::print("Begin standardizing data \n");
31  this->mean = X.colwise().mean();
32  this->std = ((X.rowwise() - this->mean).array().pow(2).colwise().sum() / X.rows()).sqrt();
33  if ((this->std.array()==0).any()==true){
34  std::cout<<"Error!"<<std::endl;
35  for (gsl::index idx = 0;idx<this->std.size();idx++ ){
36  if (this->std[idx]==0){
37  std::cout<<"Column "<<idx<<" has identical values!"<<std::endl;
38  }
39  }
40  throw std::invalid_argument(
41  "Check your column! Some column has identical values!"
42  );
43  }
44 
45  fmt::print("End of standardizing data. One may transform or reverse-transform by .transform(),.InvTransform() function. \n");
46  fmt::print("*********************************\n");
47  };
49  return (X.rowwise() - this->mean).array().rowwise() / this->std.array();
50  };
51 
53  return (X.array().rowwise()*(this->std.array())).array().rowwise()+this->mean.array();
54  };
55  private:
58  };
59 }
60 #endif //COSAN_STANDARDSCALER_H
Cosan
Definition: CosanBO.h:29
Cosan::CosanRawData::GetInput
CosanMatrix< NumericType > GetInput()
Get a copy of CosanMatrix<NumericType> X.
Definition: CosanData.h:141
NumericType
double NumericType
Definition: onehotencodingTest.cpp:20
Cosan::StandardScaler::StandardScaler
StandardScaler(CosanRawData< NumericType > &RD)
Definition: standardScaler.h:19
Cosan::StandardScaler::GetStd
CosanRowVector< NumericType > GetStd() const
Definition: standardScaler.h:23
Cosan::CosanRawData::UpdateData
void UpdateData(const CosanMatrix< NumericType > &inputX)
Update X using CosanMatrix<NumericType> input X.
Definition: CosanData.h:108
Cosan::CosanMatrix
Eigen::Matrix< NumericType, Eigen::Dynamic, Eigen::Dynamic > CosanMatrix
Definition: CosanBO.h:37
Cosan::StandardScaler::fit
void fit(CosanRawData< NumericType > &RD)
Definition: standardScaler.h:24
Cosan::StandardScaler::mean
CosanRowVector< NumericType > mean
Definition: standardScaler.h:54
Cosan::StandardScaler::GetMean
CosanRowVector< NumericType > GetMean() const
Definition: standardScaler.h:22
Cosan::CosanRawData
Raw Data container.
Definition: CosanData.h:36
Cosan::StandardScaler::transform
CosanMatrix< NumericType > transform(const CosanMatrix< NumericType > &X) override
Definition: standardScaler.h:48
Cosan::Preprocessor
Definition: preprocessor.h:14
Cosan::CosanRowVector
Eigen::Matrix< NumericType, 1, Eigen::Dynamic > CosanRowVector
Definition: CosanBO.h:52
Cosan::StandardScaler
Definition: standardScaler.h:11
Cosan::StandardScaler::StandardScaler
StandardScaler()=default
Cosan::StandardScaler::std
CosanRowVector< NumericType > std
Definition: standardScaler.h:57
Cosan::StandardScaler::fit
void fit(const CosanMatrix< NumericType > &X) override
Definition: standardScaler.h:28
Cosan::StandardScaler::InvTransform
CosanMatrix< NumericType > InvTransform(const CosanMatrix< NumericType > &X)
Definition: standardScaler.h:52
preprocessor.h