Cosan  1.0
Data Analytics Library
Cosan::Encoder< NumericType > Class Template Reference

#include <encoder.h>

Inheritance diagram for Cosan::Encoder< NumericType >:
Cosan::Preprocessor< NumericType > Cosan::CosanBO Cosan::OnehotEncoder< NumericType > Cosan::OrdinalEncoder< NumericType >

Public Member Functions

 Encoder ()=delete
 
 Encoder (CosanRawData< NumericType > &RD, bool add_back=false)
 
void fit (CosanRawData< NumericType > &RD)
 
CosanMatrix< NumericType > & GetCatMatrix ()
 
std::vector< std::unordered_map< std::string, gsl::index > > getCategories () const
 
- Public Member Functions inherited from Cosan::Preprocessor< NumericType >
 Preprocessor ()
 
virtual ~Preprocessor ()=default
 
virtual void fit (const CosanMatrix< NumericType > &X)
 
virtual CosanMatrix< NumericTypetransform (const CosanMatrix< NumericType > &X)
 
- Public Member Functions inherited from Cosan::CosanBO
 CosanBO ()
 Default constructor. More...
 
virtual const std::string GetName () const
 Get the name of the objects. More...
 

Private Attributes

std::vector< std::unordered_map< std::string, gsl::index > > categories
 
CosanMatrix< NumericTypeCatMatrix
 

Detailed Description

template<Numeric NumericType>
class Cosan::Encoder< NumericType >

Definition at line 9 of file encoder.h.

Constructor & Destructor Documentation

◆ Encoder() [1/2]

template<Numeric NumericType>
Cosan::Encoder< NumericType >::Encoder ( )
delete

◆ Encoder() [2/2]

template<Numeric NumericType>
Cosan::Encoder< NumericType >::Encoder ( CosanRawData< NumericType > &  RD,
bool  add_back = false 
)
inline

Definition at line 25 of file encoder.h.

25  : Preprocessor<NumericType>() {
26  fit(RD);
27  if (add_back==true){
28  RD.ConcatenateData(CatMatrix);
29  fmt::print("Notice that CRD.X has been modified. The dimension of X is ({:},{:}). {:} columns of one-hot encodings have been added.\n",RD.GetrowsX(),RD.GetcolsX(),CatMatrix.cols());
30  }
31  }

Member Function Documentation

◆ fit()

template<Numeric NumericType>
void Cosan::Encoder< NumericType >::fit ( CosanRawData< NumericType > &  RD)
inline

Definition at line 32 of file encoder.h.

32  {
33  fmt::print("*********************************\n");
34  fmt::print("Begin encoding categorical data !\n");
35 
36  gsl::index colCat = RD.GetcolCatX().size();
37  std::vector <std::string> svaluesX = RD.GetsvaluesX();
38  gsl::index totalCol = 0;
39  std::vector<std::vector<std::string>> svalues;
40  svalues = std::vector < std::vector <
41  std::string >> (colCat, std::vector<std::string>(svaluesX.size() / colCat, ""));
42  CosanMatrix <NumericType> OneHotMatrixi;
43  for (gsl::index col = 0; col < colCat; col++) {
44  std::unordered_map <std::string, gsl::index> categoryToOrdinal{};
45  for (gsl::index row = 0; row < svaluesX.size() / colCat; row++) {
46  svalues[col][row] = svaluesX[row * colCat + col];
47  if (categoryToOrdinal.find(svalues[col][row]) == categoryToOrdinal.end()) {
48  categoryToOrdinal.insert({svalues[col][row], categoryToOrdinal.size()});
49  }
50  }
51  this->categories.push_back(categoryToOrdinal);
52  OneHotMatrixi = CosanMatrix<NumericType>::Zero(svaluesX.size() / colCat, categoryToOrdinal.size());
53  for (gsl::index row = 0; row < svaluesX.size() / colCat; row++) {
54  OneHotMatrixi(row, categoryToOrdinal[svalues[col][row]]) = 1;
55  }
56  if (col == 0) {
57  CatMatrix = OneHotMatrixi;
58  } else {
59  for (gsl::index i = 0; i < OneHotMatrixi.cols(); i++) {
60  CatMatrix.conservativeResize(CatMatrix.rows(), CatMatrix.cols() + 1);
61  CatMatrix.col(CatMatrix.cols() - 1) = OneHotMatrixi.col(i);
62  }
63  }
64 
65  }
66  fmt::print("Finish encoding categorical data! Get access to the newly-generated additional matrix via .GetCatMatrix()\n");
67  fmt::print("*********************************\n");
68  }

◆ getCategories()

template<Numeric NumericType>
std::vector<std::unordered_map<std::string, gsl::index> > Cosan::Encoder< NumericType >::getCategories ( ) const
inline

Definition at line 71 of file encoder.h.

71  {
72  return this->categories;}

◆ GetCatMatrix()

template<Numeric NumericType>
CosanMatrix<NumericType>& Cosan::Encoder< NumericType >::GetCatMatrix ( )
inline

Definition at line 70 of file encoder.h.

70 {return CatMatrix;}

Member Data Documentation

◆ categories

template<Numeric NumericType>
std::vector<std::unordered_map<std::string, gsl::index> > Cosan::Encoder< NumericType >::categories
private

Definition at line 75 of file encoder.h.

◆ CatMatrix

template<Numeric NumericType>
CosanMatrix<NumericType> Cosan::Encoder< NumericType >::CatMatrix
private

Definition at line 76 of file encoder.h.


The documentation for this class was generated from the following file:
Cosan::Encoder::CatMatrix
CosanMatrix< NumericType > CatMatrix
Definition: encoder.h:76
Cosan::Encoder::fit
void fit(CosanRawData< NumericType > &RD)
Definition: encoder.h:32
Cosan::Encoder::categories
std::vector< std::unordered_map< std::string, gsl::index > > categories
Definition: encoder.h:75