3#ifndef cMHN_UTILITY_GET_CROSS_VAL_SPLITS_H
4#define cMHN_UTILITY_GET_CROSS_VAL_SPLITS_H
29 template<
class T, pRC::Size D>
33 std::ifstream file(filename);
36 decltype(expand(pRC::makeConstantSequence<pRC::Size, D, 2>(),
39 return pRC::Subscripts<seq...>();
44 pRC::Logging::error(
"Unable to open input file!");
47 std::vector<std::string> lines;
50 pRC::UnsignedInteger<64> totalSum = 0;
55 std::getline(file, line);
58 while(std::getline(file, line))
60 lines.push_back(line);
62 totalSum += pRC::unit<decltype(totalSum)>();
66 std::random_device rd;
68 std::shuffle(lines.begin(), lines.end(), g);
70 std::vector<std::map<Subscripts, T>> pDs;
71 pRC::Index length = totalSum() / k;
72 pRC::Index remainder = totalSum() % k;
74 std::vector<pRC::Index> lengths(k, length);
78 ++lengths[remainder - 1];
82 pRC::Index minInd = 0;
85 for(pRC::Index ind = 0; ind < k; ++ind)
87 std::map<Subscripts, T> pD;
88 pRC::UnsignedInteger<64> sum = 0;
91 for(pRC::Index innerInd = 0; innerInd < lengths[ind]; ++innerInd)
93 line = lines[minInd + innerInd];
96 std::replace(line.begin(), line.end(),
',',
' ');
98 std::istringstream iss(line);
108 if constexpr(pRC::cDebugLevel >= pRC::DebugLevel::Low)
113 "Number of events differs for input file and "
120 pD.try_emplace(bits, pRC::zero<T>());
121 pD[bits] += pRC::unit<T>();
123 sum += pRC::unit<decltype(sum)>();
126 minInd += lengths[ind];
129 for(
auto &[k, v] : pD)
136 return std::make_tuple(pDs, lengths);
pRC::Size const D
Definition: CalculatePThetaTests.cpp:9
Definition: calculate_pTheta.hpp:15
static auto getCrossValSplits(std::string const &filename, pRC::Index const &k)
Splits the samples in a dataset into <it>k</it> as equal as possible sized sets that can be used for ...
Definition: get_cross_val_splits.hpp:30