Cosan  1.0
Data Analytics Library
randomkfold.h
Go to the documentation of this file.
1 //
2 // Created by Xinyu Zhang on 4/6/21.
3 //
4 
5 #ifndef COSAN_RANDOMKFOLD_H
6 #define COSAN_RANDOMKFOLD_H
7 
9 
10 namespace Cosan{
11  class RandomKFold: public Splitter {
12  public:
14  RandomKFold(gsl::index kfoldnumber) : Splitter(kfoldnumber) {}
15  RandomKFold(gsl::index nrows, gsl::index kfoldnumber): Splitter(nrows,kfoldnumber){}
16  void SetSplit(gsl::index nrows){
17  if (nrows<=KFoldNumber){
18  throw SmallRows;
19  }
20  std::vector<gsl::index> idx(nrows);
21  std::iota(idx.begin(), idx.end(), 0);
22  gsl::index foldSize = nrows/KFoldNumber;
23  for (gsl::index i = 0;i<KFoldNumber;i++){
24  std::vector<gsl::index> testidx,trainidx;
25  std::sample(idx.begin(), idx.end(), std::back_inserter(testidx),
26  foldSize, std::mt19937{std::random_device{}()});
27  std::sort(testidx.begin(),testidx.end());
28  std::set_difference(idx.begin(), idx.end(), testidx.begin(), testidx.end(),
29  std::inserter(trainidx, trainidx.begin()));
30  fmt::print("Current Index is {:}, trainidx size:{:}, testidx size:{:}\n",
31  i,trainidx.size(),testidx.size());
32  split_batch.push_back({trainidx,testidx});
33  }
34  }
35 
36  std::vector< std::tuple<std::vector<gsl::index>,std::vector<gsl::index> > > GetSplit() {return split_batch;}
37  private:
38  std::vector< std::tuple<std::vector<gsl::index>,std::vector<gsl::index> > > split_batch;
39  };
40 
41 
42 
43  class RandomKFoldParallel: public Splitter {
44  public:
46  RandomKFoldParallel(gsl::index kfoldnumber) : Splitter(kfoldnumber) {}
47  RandomKFoldParallel(gsl::index nrows, gsl::index kfoldnumber): Splitter(nrows,kfoldnumber){}
48  void SetSplit(gsl::index nrows){
49  if (nrows<=KFoldNumber){
50  throw SmallRows;
51  }
52  std::vector<gsl::index> idx(nrows);
53  std::iota(idx.begin(), idx.end(), 0);
54  gsl::index foldSize = nrows/KFoldNumber;
55 // std::mutex mylock;
56  split_batch.resize(KFoldNumber);
57  #pragma omp parallel for
58  for (gsl::index i = 0;i<KFoldNumber;i++){
59  std::vector<gsl::index> testidx,trainidx;
60  std::sample(idx.begin(), idx.end(), std::back_inserter(testidx),
61  foldSize, std::mt19937{std::random_device{}()});
62  std::sort(testidx.begin(),testidx.end());
63  std::set_difference(idx.begin(), idx.end(), testidx.begin(), testidx.end(),
64  std::inserter(trainidx, trainidx.begin()));
65  fmt::print("Current Index is {:}, the current thread num is {:}, total number of threads {:}. trainidx size:{:}, testidx size:{:}\n",
66  i, omp_get_thread_num(),omp_get_num_threads(),trainidx.size(),testidx.size());
67 // mylock.lock();
68  split_batch[i] = {trainidx,testidx};
69 // mylock.unlock();
70  }
71  }
72 
73  std::vector< std::tuple<std::vector<gsl::index>,std::vector<gsl::index> > > GetSplit() & {return split_batch;}
74  private:
75  std::vector< std::tuple<std::vector<gsl::index>,std::vector<gsl::index> > > split_batch;
76  };
77 
78 
79 }
80 
81 #endif //COSAN_RANDOMKFOLD_H
selection.h
Cosan
Definition: CosanBO.h:29
Cosan::SmallRows
Cosan::TooSmallSizeException SmallRows
Cosan::RandomKFold
Definition: randomkfold.h:11
Cosan::RandomKFoldParallel
Definition: randomkfold.h:43
Cosan::RandomKFoldParallel::GetSplit
std::vector< std::tuple< std::vector< gsl::index >, std::vector< gsl::index > > > GetSplit() &
Definition: randomkfold.h:73
Cosan::Splitter::KFoldNumber
gsl::index KFoldNumber
Definition: selection.h:49
Cosan::RandomKFold::split_batch
std::vector< std::tuple< std::vector< gsl::index >, std::vector< gsl::index > > > split_batch
Definition: randomkfold.h:38
Cosan::RandomKFoldParallel::RandomKFoldParallel
RandomKFoldParallel(gsl::index nrows, gsl::index kfoldnumber)
Definition: randomkfold.h:47
Cosan::RandomKFoldParallel::split_batch
std::vector< std::tuple< std::vector< gsl::index >, std::vector< gsl::index > > > split_batch
Definition: randomkfold.h:75
Cosan::RandomKFoldParallel::RandomKFoldParallel
RandomKFoldParallel(gsl::index kfoldnumber)
Definition: randomkfold.h:46
Cosan::Splitter
Definition: selection.h:31
Cosan::RandomKFold::RandomKFold
RandomKFold(gsl::index nrows, gsl::index kfoldnumber)
Definition: randomkfold.h:15
Cosan::RandomKFold::RandomKFold
RandomKFold()
Definition: randomkfold.h:13
Cosan::RandomKFold::SetSplit
void SetSplit(gsl::index nrows)
Definition: randomkfold.h:16
Cosan::RandomKFoldParallel::RandomKFoldParallel
RandomKFoldParallel()
Definition: randomkfold.h:45
Cosan::RandomKFold::GetSplit
std::vector< std::tuple< std::vector< gsl::index >, std::vector< gsl::index > > > GetSplit()
Definition: randomkfold.h:36
Cosan::RandomKFoldParallel::SetSplit
void SetSplit(gsl::index nrows)
Definition: randomkfold.h:48
Cosan::RandomKFold::RandomKFold
RandomKFold(gsl::index kfoldnumber)
Definition: randomkfold.h:14