Cosan  1.0
Data Analytics Library
overunderflow.h
Go to the documentation of this file.
1 //
2 // Created by Xinyu Zhang on 4/4/21.
3 //
4 
5 #ifndef COSAN_OVERUNDERFLOW_H
6 #define COSAN_OVERUNDERFLOW_H
8 namespace Cosan{
9  /**
10  * Clean up row with underflow or overflow data enty.
11  *
12  */
13  template<Numeric NumericType>
14  class OverUnderFlow: public Preprocessor<NumericType> {
15  public:
16  OverUnderFlow() = delete;
18  this->fit(RD);
19  }
20 
21  private:
23  fmt::print("*********************************\n");
24  fmt::print("Begin cleaning up data of overflow/underflow!\n");
25 
26  std::set<gsl::index> InfIdx;
27  for (auto & each: RD.GetIdxpinfX()){
28  InfIdx.insert(each[0]);
29  }
30  for (auto & each: RD.GetIdxminfX()){
31  InfIdx.insert(each[0]);
32  }
33  for (auto & each: RD.GetIdxpinfY()){
34  InfIdx.insert(each[0]);
35  }
36  for (auto & each: RD.GetIdxminfY()){
37  InfIdx.insert(each[0]);
38  }
39  std::vector<gsl::index> SelectedIdx;
40  for (auto i = 0; i<RD.GetrowsX();i++){
41  if (InfIdx.find(i)==InfIdx.end()){
42  SelectedIdx.push_back(i);
43  }
44  }
45  fmt::print("There are {:d} rows that have overflow/underflow data! Delete the whole rows. \n",InfIdx.size());
46  if (RD.GetTarget().size()==0){
47  RD.UpdateData(RD.GetInput()(SelectedIdx,Eigen::all));
48  }
49  else{
50  RD.UpdateData(RD.GetInput()(SelectedIdx,Eigen::all),RD.GetTarget()(SelectedIdx,Eigen::all));
51  }
52 
53  gsl::index NumOfCat = RD.GetcolCatX().size();
54  fmt::print("End of cleaning up data of overflow/underflow!\n");
55  fmt::print("*********************************\n");
56  if (NumOfCat==0){
57  return ;
58  }
59  std::vector<std::string> svaluesX;
60  std::vector<std::string> svaluesY;
61  for (gsl::index i=0; i< RD.GetsvaluesX().size();i++){
62  if (InfIdx.find(i/NumOfCat)==InfIdx.end()){
63  svaluesX.push_back(RD.GetsvaluesX().at(i));
64  }
65  }
66  RD.UpdateCat(svaluesX);
67 
68  }
69 
70  };
71 }
72 
73 #endif //COSAN_OVERUNDERFLOW_H
Cosan
Definition: CosanBO.h:29
Cosan::CosanRawData::GetInput
CosanMatrix< NumericType > GetInput()
Get a copy of CosanMatrix<NumericType> X.
Definition: CosanData.h:141
Cosan::CosanRawData::GetTarget
CosanMatrix< NumericType > GetTarget()
Get a copy of CosanMatrix<NumericType> Y.
Definition: CosanData.h:147
NumericType
double NumericType
Definition: onehotencodingTest.cpp:20
Cosan::CosanRawData::UpdateData
void UpdateData(const CosanMatrix< NumericType > &inputX)
Update X using CosanMatrix<NumericType> input X.
Definition: CosanData.h:108
Cosan::OverUnderFlow
Definition: overunderflow.h:14
Cosan::CosanRawData::GetsvaluesX
std::vector< std::string > GetsvaluesX() const
Get the vector of categorical data from X. order: row first.
Definition: CosanData.h:287
Cosan::CosanRawData::GetcolCatX
std::set< gsl::index > GetcolCatX() const
Get the column index (in the origin X of csv file) where the column is of categorical type.
Definition: CosanData.h:237
Cosan::CosanRawData::UpdateCat
void UpdateCat(const std::vector< std::string > &inputX)
Update categorical vector svaluesX using std::vector<std::string> & inputX.
Definition: CosanData.h:125
Cosan::OverUnderFlow::fit
void fit(CosanRawData< NumericType > &RD)
Definition: overunderflow.h:22
Cosan::OverUnderFlow::OverUnderFlow
OverUnderFlow()=delete
Cosan::CosanRawData::GetIdxminfY
std::vector< std::vector< gsl::index > > GetIdxminfY() const
Get the position of negative infinity in the origin data Y.
Definition: CosanData.h:227
Cosan::CosanRawData
Raw Data container.
Definition: CosanData.h:36
Cosan::OverUnderFlow::OverUnderFlow
OverUnderFlow(CosanRawData< NumericType > &RD)
Definition: overunderflow.h:17
Cosan::Preprocessor
Definition: preprocessor.h:14
Cosan::CosanRawData::GetIdxminfX
std::vector< std::vector< gsl::index > > GetIdxminfX() const
Get the position of negative infinity in the origin data X.
Definition: CosanData.h:212
Cosan::CosanRawData::GetrowsX
gsl::index GetrowsX()
Get the number of rows for X.
Definition: CosanData.h:254
Cosan::CosanRawData::GetIdxpinfX
std::vector< std::vector< gsl::index > > GetIdxpinfX() const
Get the position of positive infinity in the origin data X.
Definition: CosanData.h:207
preprocessor.h
Cosan::CosanRawData::GetIdxpinfY
std::vector< std::vector< gsl::index > > GetIdxpinfY() const
Get the position of positive infinity in the origin data Y.
Definition: CosanData.h:222