Cosan  1.0
Data Analytics Library
polynomialfeatures.h
Go to the documentation of this file.
1 //
2 // Created by Xinyu Zhang on 4/4/21.
3 //
4 
5 #ifndef COSAN_POLYNOMIALFEATURES_H
6 #define COSAN_POLYNOMIALFEATURES_H
8 
9 namespace Cosan{
10  /**
11  * Generate polynomial and interaction features. Generate a new feature matrix consisting of all polynomial combinations of the features with degree less than or equal to the specified degree.
12  */
13  template<Numeric NumericType>
14  class PolynomialFeatures: public Preprocessor<NumericType> {
15  public:
17  PolynomialFeatures(CosanRawData<NumericType> &RD, std::vector<std::pair<gsl::index,NumericType>> um,bool add_back = false): Preprocessor<NumericType>(){
18  fmt::print("*********************************\n");
19  fmt::print("Begin to generating polynomial features!!\n");
20 
21  decltype(auto) X = RD.GetInput();
22  __polynomialFeatures.conservativeResize(X.rows(),um.size());
23 // std::cout<<um.size()<<std::endl;
24 // X.conservativeResize(X.rows(),X.cols()+um.size());
25  gsl::index i = 0;
26  for (const auto & each : um) {
27  if (each.second>=3){
28  fmt::print("Your choice of power {:f} may be too big. High potential of overflow!",each.second);
29  }
30  __polynomialFeatures.col(i) = X.col(each.first).array().pow(each.second);
31  fmt::print("Generating power {:f} of Column {:d}!\n",each.second,each.first);
32  i++;
33  }
34  fmt::print("Finish generating polynomial features! Use .GetPolynomialFeatures() to get access it.\n");
35  fmt::print("*********************************\n");
36  if (add_back==true){
38  fmt::print("Notice that CRD.X has been modified. The dimension of X is ({:},{:}). {:} columns of polynomial features have been added.\n",RD.GetrowsX(),RD.GetcolsX(),__polynomialFeatures.cols());
39  }
40  }
41  void SetInteractionFeatures(CosanRawData<NumericType> &RD,std::pair<gsl::index,gsl::index> pair,bool add_back = false){
42  fmt::print("*********************************\n");
43  fmt::print("Begin generating interaction features between {:d} and {:d}!!\n",pair.first,pair.second);
44  decltype(auto) X = RD.GetInput();
45  __interactionFeatures.conservativeResize(X.rows(),1);
46  __polynomialFeatures.col(0) = X.col(pair.first).array()*X.col(pair.second).array();
47  fmt::print("End of generating interaction features. Get access to it via .GetInteractionFeatures()\n",pair.first,pair.second);
48  fmt::print("*********************************\n");
49  if (add_back==true){
51  fmt::print("Notice that CRD.X has been modified. The dimension of X is ({:d},{:d}). {:d} columns of interaction features have been added.\n",RD.GetrowsX(),RD.GetcolsX(),__interactionFeatures.cols());
52  }
53 
54  }
55 
56 // PolynomialFeatures(CosanRawData<NumericType> &RD,
57 // std::vector<std::pair<std::pair<gsl::index,NumericType>,std::pair<gsl::index,NumericType>> > um2): Preprocessor<NumericType>(){
58 // fmt::print("*********************************\n");
59 // fmt::print("Begin to generating polynomial features!!\n");
60 //
61 // decltype(auto) X = RD.GetInput();
62 // __polynomialFeatures.conservativeResize(X.rows(),um2.size());
63 // gsl::index i = 0;
64 // for (const auto & each: um2) {
65 // if (each.first.second>=3 || each.second.second>=3){
66 // fmt::print("Your choice of power {:f}/{:f} may be too big. High potential of overflow!",each.first.second,each.second.second);
67 // }
68 // __polynomialFeatures.col(i) = X.col(each.first.first).array().pow(each.first.second)*X.col(each.second.first).array().pow(each.second.second);
69 // fmt::print("Generating interactive power: power {:f} of Column {:d} times power {:f} of Column {:d}! \n",each.first.second,each.first.first,each.second.second,each.second.first);
70 //
71 // i++;
72 // }
73 // fmt::print("Finish generating polynomial features! Use .GetPolynomialFeatures() to get access it.\n");
74 // fmt::print("*********************************\n");
75 //
76 // }
77 
78 //
79 // PolynomialFeatures(CosanRawData<NumericType> &RD, std::vector<std::pair<gsl::index,NumericType>> um1,
80 // std::vector<std::pair<std::pair<gsl::index,NumericType>,std::pair<gsl::index,NumericType>> > um2): Preprocessor<NumericType>(){
81 // fmt::print("*********************************\n");
82 // fmt::print("Begin to generating polynomial features!!\n");
83 //
84 // decltype(auto) X = RD.GetInput();
85 // __polynomialFeatures.conservativeResize(X.rows(),um1.size()+um2.size());
86 // gsl::index i = 0;
87 // for (const auto & each : um1) {
88 // if (each.second>=3){
89 // fmt::print("Your choice of power {:f} may be too big. High potential of overflow!",each.second);
90 // }
91 // __polynomialFeatures.col(i) = X.col(each.first).array().pow(each.second);
92 // fmt::print("Generating power {:f} of Column {:d}!\n",each.second,each.first);
93 // i++;
94 // }
95 // for (const auto & each: um2) {
96 // if (each.first.second>=3 || each.second.second>=3){
97 // fmt::print("Your choice of power {:f}/{:f} may be too big. High potential of overflow!",each.first.second,each.second.second);
98 // }
99 // __polynomialFeatures.col(i) = X.col(each.first.first).array().pow(each.first.second)*X.col(each.second.first).array().pow(each.second.second);
100 // fmt::print("Generating interactive power: power {:f} of Column {:d} times power {:f} of Column {:d}! \n",each.first.second,each.first.first,each.second.second,each.second.first);
101 //
102 // i++;
103 // }
104 // fmt::print("Finish generating polynomial features! Use .GetPolynomialFeatures() to get access it.\n");
105 // fmt::print("*********************************\n");
106 //
107 // }
108  decltype(auto) GetPolynomialFeatures(){return __polynomialFeatures;}
111  private:
114  };
115 }
116 
117 
118 #endif //COSAN_POLYNOMIALFEATURES_H
Cosan
Definition: CosanBO.h:29
Cosan::CosanRawData::GetInput
CosanMatrix< NumericType > GetInput()
Get a copy of CosanMatrix<NumericType> X.
Definition: CosanData.h:141
Cosan::PolynomialFeatures::GetInteractionFeatures
decltype(auto) GetInteractionFeatures()
Definition: polynomialfeatures.h:109
NumericType
double NumericType
Definition: onehotencodingTest.cpp:20
Cosan::CosanRawData::GetcolsX
gsl::index GetcolsX()
Get the number of columns for X.
Definition: CosanData.h:270
Cosan::PolynomialFeatures
Definition: polynomialfeatures.h:14
Cosan::CosanMatrix
Eigen::Matrix< NumericType, Eigen::Dynamic, Eigen::Dynamic > CosanMatrix
Definition: CosanBO.h:37
Cosan::PolynomialFeatures::__interactionFeatures
CosanMatrix< NumericType > __interactionFeatures
Definition: polynomialfeatures.h:113
Cosan::CosanRawData
Raw Data container.
Definition: CosanData.h:36
Cosan::PolynomialFeatures::__polynomialFeatures
CosanMatrix< NumericType > __polynomialFeatures
Definition: polynomialfeatures.h:112
Cosan::Preprocessor
Definition: preprocessor.h:14
Cosan::PolynomialFeatures::SetInteractionFeatures
void SetInteractionFeatures(CosanRawData< NumericType > &RD, std::pair< gsl::index, gsl::index > pair, bool add_back=false)
Definition: polynomialfeatures.h:41
Cosan::PolynomialFeatures::PolynomialFeatures
PolynomialFeatures(CosanRawData< NumericType > &RD, std::vector< std::pair< gsl::index, NumericType >> um, bool add_back=false)
Definition: polynomialfeatures.h:17
Cosan::PolynomialFeatures::PolynomialFeatures
PolynomialFeatures()=delete
Cosan::CosanRawData::GetrowsX
gsl::index GetrowsX()
Get the number of rows for X.
Definition: CosanData.h:254
Cosan::PolynomialFeatures::UpdatePolynomialFeatures
void UpdatePolynomialFeatures(CosanMatrix< NumericType > inputX)
Definition: polynomialfeatures.h:110
Cosan::PolynomialFeatures::GetPolynomialFeatures
decltype(auto) GetPolynomialFeatures()
Definition: polynomialfeatures.h:108
preprocessor.h
Cosan::CosanRawData::ConcatenateData
void ConcatenateData(const CosanMatrix< NumericType > &inputX)
Concatenate X using CosanMatrix<NumericType> input X. Add new columns.
Definition: CosanData.h:94