1 #ifndef COSAN_ENCODER_H
2 #define COSAN_ENCODER_H
8 template<Numeric NumericType>
29 fmt::print(
"Notice that CRD.X has been modified. The dimension of X is ({:},{:}). {:} columns of one-hot encodings have been added.\n",RD.
GetrowsX(),RD.
GetcolsX(),
CatMatrix.cols());
33 fmt::print(
"*********************************\n");
34 fmt::print(
"Begin encoding categorical data !\n");
37 std::vector <std::string> svaluesX = RD.
GetsvaluesX();
38 gsl::index totalCol = 0;
39 std::vector<std::vector<std::string>> svalues;
40 svalues = std::vector < std::vector <
41 std::string >> (colCat, std::vector<std::string>(svaluesX.size() / colCat,
""));
43 for (gsl::index col = 0; col < colCat; col++) {
44 std::unordered_map <std::string, gsl::index> categoryToOrdinal{};
45 for (gsl::index row = 0; row < svaluesX.size() / colCat; row++) {
46 svalues[col][row] = svaluesX[row * colCat + col];
47 if (categoryToOrdinal.find(svalues[col][row]) == categoryToOrdinal.end()) {
48 categoryToOrdinal.insert({svalues[col][row], categoryToOrdinal.size()});
53 for (gsl::index row = 0; row < svaluesX.size() / colCat; row++) {
54 OneHotMatrixi(row, categoryToOrdinal[svalues[col][row]]) = 1;
59 for (gsl::index i = 0; i < OneHotMatrixi.cols(); i++) {
66 fmt::print(
"Finish encoding categorical data! Get access to the newly-generated additional matrix via .GetCatMatrix()\n");
67 fmt::print(
"*********************************\n");
71 std::vector <std::unordered_map<std::string, gsl::index>>
getCategories()
const {
75 std::vector<std::unordered_map<std::string, gsl::index>>
categories;
81 #endif //COSAN_ENCODER_H