Cosan  1.0
Data Analytics Library
Cosan::MissingValues< NumericType > Class Template Reference

#include <missingvalues.h>

Inheritance diagram for Cosan::MissingValues< NumericType >:
Cosan::Preprocessor< NumericType > Cosan::CosanBO

Public Member Functions

 MissingValues ()=delete
 
 MissingValues (CosanRawData< NumericType > &RD)
 
- Public Member Functions inherited from Cosan::Preprocessor< NumericType >
 Preprocessor ()
 
virtual ~Preprocessor ()=default
 
virtual void fit (const CosanMatrix< NumericType > &X)
 
virtual CosanMatrix< NumericTypetransform (const CosanMatrix< NumericType > &X)
 
- Public Member Functions inherited from Cosan::CosanBO
 CosanBO ()
 Default constructor. More...
 
virtual const std::string GetName () const
 Get the name of the objects. More...
 

Private Member Functions

void fit (CosanRawData< NumericType > &RD)
 

Detailed Description

template<Numeric NumericType>
class Cosan::MissingValues< NumericType >

Definition at line 13 of file missingvalues.h.

Constructor & Destructor Documentation

◆ MissingValues() [1/2]

template<Numeric NumericType>
Cosan::MissingValues< NumericType >::MissingValues ( )
delete

◆ MissingValues() [2/2]

template<Numeric NumericType>
Cosan::MissingValues< NumericType >::MissingValues ( CosanRawData< NumericType > &  RD)
inline

Definition at line 16 of file missingvalues.h.

16  :Preprocessor<NumericType>(){
17  this->fit(RD);
18  }

Member Function Documentation

◆ fit()

template<Numeric NumericType>
void Cosan::MissingValues< NumericType >::fit ( CosanRawData< NumericType > &  RD)
inlineprivate

Definition at line 21 of file missingvalues.h.

21  {
22  fmt::print("*********************************\n");
23  fmt::print("Begin cleaning up missing data!\n");
24  std::set<gsl::index> MissingIdxX;
25  for (auto & each: RD.GetIdxmissingX()){
26  MissingIdxX.insert(RD.GetRawToNumIdx()[each[1]]);
27  }
28  bool HasMissingIdxY = RD.GetIdxmissingY().size()==0 ? false :true;
29  CosanMatrix<NumericType> X = RD.GetInput();
30 
31  for (auto &i : MissingIdxX ){
32  gsl::index _numofMissing = X.col(i).array().isNaN().template cast<NumericType>().sum();
33  fmt::print("For X Column {:d} has {:d} missing value(s).\n",i,_numofMissing);
34  NumericType mean = X.col(i).unaryExpr([](NumericType v) { return std::isnan(v)? 0 : v; }).sum()/(X.rows()-_numofMissing);
35  fmt::print("Fill the missing/nan entry with the column mean={:f}.\n",mean);
36  X.col(i) = X.col(i).array().isNaN().select(0,X.col(i)).array()+mean*(X.col(i).array().isNaN().template cast<NumericType>());
37 
38  }
39  if (HasMissingIdxY==true){
40  gsl::index i = 0;
41  CosanMatrix<NumericType> Y = RD.GetTarget();
42  gsl::index _numofMissing = Y.col(i).array().isNaN().template cast<NumericType>().sum();
43  fmt::print("For Y has {:d} missing value(s).\n",_numofMissing);
44 
45  NumericType mean = Y.col(i).unaryExpr([](NumericType v) { return std::isnan(v)? 0 : v; }).sum()/(Y.rows()-_numofMissing);
46  fmt::print("Fill the missing/nan entry with the column mean={:f}.\n",mean);
47  Y.col(i) = Y.col(i).array().isNaN().select(0,Y.col(i)).array()+mean*(Y.col(i).array().isNaN().template cast<NumericType>());
48  RD.UpdateData(X,Y);
49  }
50  else{
51  RD.UpdateData(X);
52  }
53  fmt::print("End of cleaning process\n");
54  fmt::print("*********************************\n");
55  }

The documentation for this class was generated from the following file:
NumericType
double NumericType
Definition: onehotencodingTest.cpp:20
Cosan::MissingValues::fit
void fit(CosanRawData< NumericType > &RD)
Definition: missingvalues.h:21