Last active
September 3, 2019 18:13
-
-
Save marty1885/5f063f58cc27f3dd9e9782a97d2b35ca to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A basic parser, it might get stuff wrong, but good enought for EDA | |
Int_t parse_year(std::string year) | |
{ | |
if(year == "") | |
throw std::runtime_error("year is empty"); | |
if(year[0] == '[' || year[0] == 'c' || year[0] == 'p') // Handle format of [2000], c2000 and p2000 | |
return std::stoi(std::string(year.begin()+1, year.begin()+5)); | |
else if(isdigit(year[0]) == true) | |
return std::stoi(std::string(year.begin(), year.begin()+4)); | |
throw std::runtime_error("cannot parse format"); | |
} | |
void to_root() | |
{ | |
// Open the raw archive | |
using string = std::string; | |
auto rdf = ROOT::RDataFrame("library_raw", "library_raw.root"); | |
//Create a new archive that we'll be coping to | |
auto f = new TFile("library.root", "recreate"); | |
auto t = new TTree("library", "checkout_data"); | |
// All the fields | |
TDatime date; | |
string usage, checkout_type, creator, material, title, publisher; | |
std::vector<std::string> subjects; | |
Int_t num_checkout, publication_year; | |
// Assign column/branchs to the fields | |
t->Branch("usage", &usage); | |
t->Branch("checkout_type", &checkout_type); | |
t->Branch("material", &material); | |
t->Branch("checkout_month", &date); | |
t->Branch("num_checkout", &num_checkout); | |
t->Branch("title", &title); | |
t->Branch("creator", &creator); | |
t->Branch("subjects", &subjects); | |
t->Branch("publisher", &publisher); | |
t->Branch("publication_year", &publication_year); | |
ProgressDisplay disp(*rdf.Count()); | |
size_t i=0; | |
rdf.Foreach([&](string usage_, string checkout_type_, string material_, string checkout_year_, string checkout_month_, string checkouts_, string title_ | |
, string creator_, string subjects_, string publisher_, string publication_year_) { | |
// Copy data to field | |
usage = usage_; | |
checkout_type = checkout_type_; | |
material = material_; | |
num_checkout = std::stoi(checkouts_); | |
title = title_; | |
creator = creator_; | |
subjects = split(subjects_); | |
publisher = publisher_; | |
try {publication_year = parse_year(publication_year_);} | |
catch(...) {publication_year = 0x7fffffff;} | |
date.Set(std::stoi(checkout_year_), std::stoi(checkout_month_),0, 0, 0, 0); | |
t->Fill(); | |
i++; | |
if(i%100000 == 0) | |
disp.update(i); | |
}, {"UsageClass", "CheckoutType", "MaterialType", "CheckoutYear", "CheckoutMonth", "Checkouts", "Title", "Creator", "Subjects", "Publisher", "PublicationYear"}); | |
f->Write(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment