Cosan  1.0
Data Analytics Library
timeseriessplit.h
Go to the documentation of this file.
1 //
2 // Created by Xinyu Zhang on 4/6/21.
3 //
4 
5 #ifndef COSAN_TIMESERIESSPLIT_H
6 #define COSAN_TIMESERIESSPLIT_H
7 
9 namespace Cosan{
10  class TimeSeriesSplit: public Splitter {
11  public:
13  TimeSeriesSplit(gsl::index kfoldnumber) : Splitter(kfoldnumber) {}
14  TimeSeriesSplit(gsl::index nrows, gsl::index kfoldnumber): Splitter(nrows,kfoldnumber){}
15  void SetSplit(gsl::index nrows){
16  if (nrows <= KFoldNumber) {
17  throw SmallRows;
18  }
19  fmt::print("*********************************\n");
20  fmt::print("Begin Time Series Splitting!!\n");
21 
23  std::vector <gsl::index> idx(nrows);
24  std::iota(idx.begin(), idx.end(), 0);
25  gsl::index foldSize = nrows/(KFoldNumber+1);
26  for (gsl::index i = 1; i<=KFoldNumber;i++){
27 // std::vector<gsl::index> trainidx = std::vector<gsl::index>(idx.begin(), idx.begin()+foldSize*i+nrows%(kfoldnumber+1));
28 // std::vector<gsl::index> testidx = std::vector<gsl::index>(idx.begin()+foldSize*i+nrows%(kfoldnumber+1), idx.begin()+foldSize*(1+i)+nrows%(kfoldnumber+1));
29 // split_batch.push_back({trainidx,testidx});
30  split_batch.push_back({ std::vector<gsl::index>(idx.begin(), idx.begin()+foldSize*i+nrows%(KFoldNumber+1)),
31  std::vector<gsl::index>(idx.begin()+foldSize*i+nrows%(KFoldNumber+1), idx.begin()+foldSize*(1+i)+nrows%(KFoldNumber+1))});
32  fmt::print("Current Index is {:}, trainidx size:{:}, testidx size:{:}\n",
33  i-1,foldSize*i+nrows%(KFoldNumber+1),foldSize);
34  }
35  fmt::print("End Time Series Splitting!!\n");
36  fmt::print("*********************************\n");
37  }
38 
39  std::vector< std::tuple<std::vector<gsl::index>,std::vector<gsl::index> > > GetSplit() {return split_batch;}
40  private:
41  std::vector< std::tuple<std::vector<gsl::index>,std::vector<gsl::index> > > split_batch;
42 
43  };
44 
46  public:
47 
49  TimeSeriesSplitParallel(gsl::index kfoldnumber) : Splitter(kfoldnumber) {}
50  TimeSeriesSplitParallel(gsl::index nrows, gsl::index kfoldnumber): Splitter(nrows,kfoldnumber){}
51 
52 
53  void SetSplit(gsl::index nrows){
54  if (nrows <= KFoldNumber) {
55  throw SmallRows;
56  }
57  std::vector <gsl::index> idx(nrows);
58  std::iota(idx.begin(), idx.end(), 0);
59  gsl::index foldSize = nrows/(KFoldNumber+1);
60 // std::mutex mylock;
61  fmt::print("*********************************\n");
62  fmt::print("Begin Time Series Splitting in Parallel!!\n");
63  split_batch.resize(KFoldNumber);
64  #pragma omp parallel for
65  for (gsl::index i = 1; i<=KFoldNumber;i++){
66 // std::vector<gsl::index> trainidx = std::vector<gsl::index>(idx.begin(), idx.begin()+foldSize*i+nrows%(kfoldnumber+1));
67 // std::vector<gsl::index> testidx = std::vector<gsl::index>(idx.begin()+foldSize*i+nrows%(kfoldnumber+1), idx.begin()+foldSize*(1+i)+nrows%(kfoldnumber+1));
68 // split_batch.push_back({trainidx,testidx});
69 // mylock.lock();
70  fmt::print("Current Index is {:}, the current thread num is {:}, total number of threads {:}. trainidx size:{:}, testidx size:{:}\n",
71  i-1, omp_get_thread_num(),omp_get_num_threads(),foldSize*i+nrows%(KFoldNumber+1),foldSize);
72  split_batch[i-1]={ std::vector<gsl::index>(idx.begin(), idx.begin()+foldSize*i+nrows%(KFoldNumber+1)),
73  std::vector<gsl::index>(idx.begin()+foldSize*i+nrows%(KFoldNumber+1), idx.begin()+foldSize*(1+i)+nrows%(KFoldNumber+1))};
74 // mylock.unlock();
75  }
76 
77  fmt::print("End Time Series Splitting in Parallel!!\n");
78  fmt::print("*********************************\n");
79  }
80  std::vector< std::tuple<std::vector<gsl::index>,std::vector<gsl::index> > > &GetSplit() {return split_batch;}
81  private:
82  std::vector< std::tuple<std::vector<gsl::index>,std::vector<gsl::index> > > split_batch;
83  };
84 }
85 
86 #endif //COSAN_TIMESERIESSPLIT_H
Cosan::TimeSeriesSplitParallel::TimeSeriesSplitParallel
TimeSeriesSplitParallel()
Definition: timeseriessplit.h:48
selection.h
Cosan
Definition: CosanBO.h:29
Cosan::SmallRows
Cosan::TooSmallSizeException SmallRows
Cosan::TimeSeriesSplitParallel::SetSplit
void SetSplit(gsl::index nrows)
Definition: timeseriessplit.h:53
Cosan::TimeSeriesSplit::TimeSeriesSplit
TimeSeriesSplit(gsl::index kfoldnumber)
Definition: timeseriessplit.h:13
Cosan::TimeSeriesSplit::GetSplit
std::vector< std::tuple< std::vector< gsl::index >, std::vector< gsl::index > > > GetSplit()
Definition: timeseriessplit.h:39
Cosan::Splitter::KFoldNumber
gsl::index KFoldNumber
Definition: selection.h:49
Cosan::TimeSeriesSplit::SetSplit
void SetSplit(gsl::index nrows)
Definition: timeseriessplit.h:15
Cosan::TimeSeriesSplitParallel::GetSplit
std::vector< std::tuple< std::vector< gsl::index >, std::vector< gsl::index > > > & GetSplit()
Definition: timeseriessplit.h:80
Cosan::TimeSeriesSplit::TimeSeriesSplit
TimeSeriesSplit(gsl::index nrows, gsl::index kfoldnumber)
Definition: timeseriessplit.h:14
Cosan::Splitter
Definition: selection.h:31
Cosan::TimeSeriesSplitParallel::TimeSeriesSplitParallel
TimeSeriesSplitParallel(gsl::index kfoldnumber)
Definition: timeseriessplit.h:49
Cosan::TimeSeriesSplitParallel::TimeSeriesSplitParallel
TimeSeriesSplitParallel(gsl::index nrows, gsl::index kfoldnumber)
Definition: timeseriessplit.h:50
Cosan::TimeSeriesSplit::split_batch
std::vector< std::tuple< std::vector< gsl::index >, std::vector< gsl::index > > > split_batch
Definition: timeseriessplit.h:41
Cosan::TimeSeriesSplitParallel
Definition: timeseriessplit.h:45
Cosan::TimeSeriesSplit::TimeSeriesSplit
TimeSeriesSplit()
Definition: timeseriessplit.h:12
Cosan::TimeSeriesSplitParallel::split_batch
std::vector< std::tuple< std::vector< gsl::index >, std::vector< gsl::index > > > split_batch
Definition: timeseriessplit.h:82
Cosan::TimeSeriesSplit
Definition: timeseriessplit.h:10