Cosan  1.0
Data Analytics Library
test1.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <vector>
3 #include <set>
4 #include <sstream>
5 #include <fstream>
6 #include <math.h>
7 #include <limits>
8 int main(int argc, char *argv[]) {
9 // std::stringstream indata(std::string("125,23,,nan,-nan,infinity,-inf,0, 1E1vdfz,'bd'\n 1,2,3,4,5,6,7,8,9,'as'"));
10 // std::stringstream indata(std::string("125,'',,nan,-nan,infinity,-inf,0,1E1,'bd','ac'\n 1,'a',2,4,5,6,7,8,9,'as','sd"));
11 // std::stringstream indata(std::string("125,'',,nan,-nan,infinity,-inf,0,1E1fds,'bd','ac'\n 1,1,'abc',4,9,'as','sd' "));
12 // std::stringstream indata(std::string("125,'',,nan,-nan,infinity,-inf,0,1E1,'bd','ac'\n 1,1,'abc',4,9,'as','sd' "));
13 // std::stringstream indata(std::string("125,'',,nan,-nan,infinity,-inf,0,1E1,'bd','ac'\n 1,1,2fd,4,9,'as','sd' "));
14 // std::stringstream indata(std::string("125,'',,nan,-nan,infinity,-inf,0,1E1,'bd','ac'\n "
15 // " 1, 1,2,4, 9, 25,5,235,53, 'as','sd' "));
16 
17 // std::stringstream indata(std::string("125,'',,nan,-nan,infinity,-inf,0,1E1,'bd','ac'\n "
18 // " 1, 1,2,4, 9, 25,5,235,Infinity, 'fs','sd' "));
19 //
20  std::ifstream indata;
21  indata.open(argv[1]);
22  std::string line;
23  std::vector<double> values;
24  std::vector<std::string> svalues;
25  uint rows = 0,cols = 0,col_idx=0;
26  std::vector<std::vector<uint>> Idxpinf,Idxminf,Idxmissing;
27  std::set<uint> colCat;
28  double result;
29  std::size_t pos;
30 
31 /**
32  * Get the first line information
33  */
34 
35  std::getline(indata, line);
36  std::stringstream lineStream(line);
37  std::string cell;
38  while(getline(lineStream, cell, ',')) {
39  if (cell.size()==0){
40  values.push_back(stod(std::string("nan")));
41  Idxmissing.push_back(std::vector<uint>({rows,col_idx}));
42  col_idx++;
43  cols=std::max(cols,col_idx);
44  continue;
45  }
46  try{
47  result = std::stod(cell, &pos);
48  }catch(...){
49  svalues.push_back(cell);
50  colCat.insert(col_idx);
51  col_idx++;
52  cols=std::max(cols,col_idx);
53  continue;
54  }
55  if (pos!=cell.size()){
56  throw std::invalid_argument(
57  "Incorrect numeric format! Abort the program. The entry reads "+cell+
58  " and the position is ("+ std::to_string(rows)+","+ std::to_string(col_idx)+")");
59  }
60  values.push_back(result);
61  if (isinf(values.back())){
62  if (values.back()==std::numeric_limits<double>::infinity()){
63  Idxpinf.push_back(std::vector<uint>({rows,col_idx}));}
64  else {Idxminf.push_back(std::vector<uint>({rows,col_idx}));}
65  }
66  else if (isnan(values.back())){
67  Idxmissing.push_back(std::vector<uint>({rows,col_idx}));
68  }
69  col_idx++;
70  cols=std::max(cols,col_idx);
71  }
72  rows = 1;
73  col_idx = 0;
74 
75 
76  while (std::getline(indata, line)) {
77 // std::stringstream lineStream(line);
78 // std::string cell;
79  lineStream.str("");
80  lineStream.clear(); // Clear state flags.
81  lineStream<<line;
82  while(getline(lineStream, cell, ',')) {
83  if (cell.size()==0){
84  if (colCat.find(col_idx)==colCat.end()){
85  values.push_back(stod(std::string("nan")));
86  }
87  else{
88  svalues.push_back("");
89  }
90  Idxmissing.push_back(std::vector<uint>({rows,col_idx}));
91  col_idx++;
92  continue;
93  }
94  try{
95  result = std::stod(cell, &pos);
96  }catch(...){
97  if (colCat.find(col_idx)!=colCat.end())
98  {
99  svalues.push_back(cell);
100  colCat.insert(col_idx);
101  col_idx++;
102  continue;}
103  else{
104  throw std::invalid_argument(
105  "Incorrect value type! Should be numeric but non-numeric input. The entry reads "+cell+
106  " and the position is ("+ std::to_string(rows)+","+ std::to_string(col_idx)+")");
107  }
108  }
109  if (pos!=cell.size()){
110  throw std::invalid_argument(
111  "Incorrect numeric format! Abort the program. The entry reads "+cell+
112  " and the position is ("+ std::to_string(rows)+","+ std::to_string(col_idx)+")");
113  }
114  values.push_back(result);
115  if (isinf(values.back())){
116  if (values.back()==std::numeric_limits<double>::infinity()){
117  Idxpinf.push_back(std::vector<uint>({rows,col_idx}));}
118  else {Idxminf.push_back(std::vector<uint>({rows,col_idx}));}
119  }
120  else if (isnan(values.back())){
121  Idxmissing.push_back(std::vector<uint>({rows,col_idx}));
122  }
123  col_idx++;
124  }
125  if (cols!=col_idx){
126  std::cout<<cols<<" "<<col_idx<<std::endl;
127  throw std::invalid_argument("Not all rows has same number of entry! First row has "+std::to_string(cols)+" columns but row "+std::to_string(rows)+" has "+std::to_string(col_idx)+" columns!" );
128  }
129  ++rows;
130  col_idx=0;
131  }
132 
133 
134  std::cout<<"Number of rows: "<<rows<<std::endl;
135  std::cout<<"Number of columns: "<<cols<<std::endl;
136  std::cout<<"Number of positive infinity values: "<<Idxpinf.size()<<". They are at " ;
137  for(auto each :Idxpinf){
138  std::cout<<"("<<each[0]<<","<<each[1]<<")"<<" ";
139  }
140  std::cout<<std::endl;
141  std::cout<<"Number of negative infinity values: "<<Idxminf.size()<<". They are at ";
142  for(auto each :Idxminf){
143  std::cout<<"("<<each[0]<<","<<each[1]<<")"<<" ";
144  }
145  std::cout<<std::endl;
146  std::cout<<"Number of missing values: "<<Idxmissing.size()<<". They are at ";
147  for(auto each :Idxmissing){
148  std::cout<<"("<<each[0]<<","<<each[1]<<")"<<" ";
149  }
150  std::cout<<std::endl;
151 // for (auto fvalue:values) {std::cout<<fvalue<<std::endl;}
152  std::cout<<"Columns of categorical values: Column ";
153  for (auto idx:colCat) {std::cout<<idx<<" ";}
154  std::cout<<std::endl;
155 // std::cout<<svalues.size()<<std::endl;
156 // svalues.push_back("");
157 // std::cout<<svalues.size()<<std::endl;
158 // std::cout<<svalues[0]<<"HAHA"<<svalues.back()<<"HAHA"<<std::endl;
159 // std::cout<<(svalues[0].compare(svalues.back()))<<std::endl;
160  return 0;
161 }
main
int main(int argc, char *argv[])
Definition: test1.cpp:8