Cosan  1.0
Data Analytics Library
featureengineering.cpp
Go to the documentation of this file.
1 //
2 // Created by Xinyu Zhang on 3/26/21.
3 //
4 #include <iostream>
5 #include <cosan/data/CosanData.h>
6 #include <cosan/base/CosanBO.h>
16 
17 
18 typedef double NumericType;
19 int main() {
20  Cosan::CosanRawData<NumericType> CRD("./example_data/toy2/X_.csv","./example_data/toy2/Y_.csv");
21  Cosan::OverUnderFlow ouf(CRD);
22  Cosan::MissingValues MissV(CRD);
23  Cosan::Normalizer NM(CRD,2);
24 // Cosan::StandardScaler SS(CRD);
25 // Cosan::Encoder ED(CRD,true);
26 // NumericType lb=0,ub=1;
27 // Cosan::MinmaxScaler MMS(CRD,lb,ub);//transform X to domain [lb,ub]. If not provided, default value lb =0,ub=1
28 // std::vector<std::pair<gsl::index,NumericType>> um = {{0,1.2},{1,1.1}};
29 // Cosan::PolynomialFeatures PF(CRD,um,true);//um is a vector of pair where each entries indicates index and power to be generated. The third argument indicates whether the new features should be added to CRD.X. Default is false.
30 // std::pair<gsl::index,gsl::index> pair = {4,5};
31 // PF.SetInteractionFeatures(CRD,pair,true);//to generative interactive features from column 4 and 5.
32  gsl::index ncomponent = 5;
33  Cosan::PrincipalComponentAnalysis PCA(CRD,ncomponent);
34 // gsl::index nrows = CRD.GetrowsX();
35 // std::default_random_engine generator;
36 // std::uniform_real_distribution<NumericType> distribution(0,1);
37 // std::vector<NumericType> newInputX(nrows);
38 // for (gsl::index i = 0;i<nrows;i++){
39 // newInputX[i] = distribution(generator);
40 // }
41 // Cosan::CustomTransform CT(CRD,newInputX);
42 
43  return 0;
44 }
45 //
46 
47  // Cosan::CosanLinearRegression CLR(true);
48 // CLR.fit(CD.GetInput(),CD.GetTarget());
49 
50  // std::cout<<CD.GetrowsX()<<" "<<CD.GetcolsX()<<" "<<std::endl;
51  // for (auto each : CD.GetIdxpinfX()) {std::cout<<each[0]<<" "<<each[1]<<std::endl;}
52  // for (auto each : CD.GetIdxminfX()) {std::cout<<each[0]<<" "<<each[1]<<std::endl;}
53  // for (auto each : CD.GetIdxmissingX()) {std::cout<<each[0]<<" "<<each[1]<<std::endl;}
54  // for (auto each : CD.GetcolCatX()) {std::cout<<each<<std::endl;}
55  // for (auto each : CD.GetsvaluesX()) {std::cout<<each<<std::endl;}
56  // for (const auto & [ key, value ] : CD.GetRawToNumIdx()){
57  // std::cout<<key<<":"<<value<<std::endl;}
58  // for (const auto & [ key, value ] : CD.GetRawToCatIdx()){
59  // std::cout<<key<<":"<<value<<std::endl;}
60 
61 // Cosan::CosanRawData CD1("./example_data/toy2/X_2.csv");
62 
63 // // Cosan::CosanLinearRegression CLR(true);
64 // // CLR.fit(CD.GetInput(),CD.GetTarget());
65 // std::cout<<CD1.GetSummaryMessageX()<<std::endl;
66 // // std::cout<<CD.GetrowsX()<<" "<<CD.GetcolsX()<<" "<<std::endl;
67 // // for (auto each : CD.GetIdxpinfX()) {std::cout<<each[0]<<" "<<each[1]<<std::endl;}
68 // // for (auto each : CD.GetIdxminfX()) {std::cout<<each[0]<<" "<<each[1]<<std::endl;}
69 // // for (auto each : CD.GetIdxmissingX()) {std::cout<<each[0]<<" "<<each[1]<<std::endl;}
70 // // for (auto each : CD.GetcolCatX()) {std::cout<<each<<std::endl;}
71 // // for (auto each : CD.GetsvaluesX()) {std::cout<<each<<std::endl;}
72 // for (const auto & [ key, value ] : CD1.GetRawToNumIdx()){
73 // std::cout<<key<<":"<<value<<std::endl;}
74 // for (const auto & [ key, value ] : CD1.GetRawToCatIdx()){
75 // std::cout<<key<<":"<<value<<std::endl;}
76 
77  // std::cout<<CD.GetInput()<<std::endl;
78 
79 // double RegularizationTerm = 1;
80 // Cosan::CosanRidgeRegression CRR(RegularizationTerm,true);
81 // CRR.fit(CD.GetInput(),CD.GetTarget());
82 // std::cout<<CRR.GetBeta()<<std::endl;
83 // save_csv("./example_data/toy/beta_c1.csv",CRR.GetBeta());
84 
85 // m(0, 0) = 3;
86 // m(1, 0) = 2.5;
87 // m(0, 1) = -1;
88 // m(1,1) = m(1,0)+m(0,1);
89 // std::cout<<y.rows()<<y.cols()<<std::endl;
90  // Eigen::MatrixXd beta = (X.transpose()*X).ldlt().solve(X.transpose()*y);
91 
92 
93 //
94 // m.transposeInPlace();
95 // cout<<m<<endl;
96 // m.resize(1,4);
97 // cout<<n<<endl;
98 
encoder.h
CosanData.h
customtransform.h
NumericType
double NumericType
Definition: featureengineering.cpp:18
Cosan::OverUnderFlow
Definition: overunderflow.h:14
normalizer.h
Cosan::Normalizer
Definition: normalizer.h:12
standardScaler.h
minmaxscaler.h
principalcomponentanalysis.h
Cosan::PrincipalComponentAnalysis
Definition: principalcomponentanalysis.h:16
polynomialfeatures.h
Cosan::CosanRawData
Raw Data container.
Definition: CosanData.h:36
overunderflow.h
Cosan::MissingValues
Definition: missingvalues.h:13
CosanBO.h
CosanBO.
missingvalues.h
main
int main()
Definition: featureengineering.cpp:19