Last active
November 4, 2016 17:50
-
-
Save jpivarski/e8b9da99152bccf70ba187cdab149563 to your computer and use it in GitHub Desktop.
Performance test comparison of C++ ROOT and Java ROOT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1. get the Java ROOT reader and compile it | |
git clone https://github.com/diana-hep/root4j.git | |
cd root4j | |
git checkout tags/0.1-pre2 | |
mvn package | |
cd .. | |
# 2. get some data and make some independent (though identical) copies | |
wget http://opendata.cern.ch/eos/opendata/cms/Run2010B/Mu/AOD/Apr21ReReco-v1/0000/00459D48-EB70-E011-AF09-90E6BA19A252.root -O Mu_Run2010B-Apr21ReReco-v1_AOD.root | |
cp Mu_Run2010B-Apr21ReReco-v1_AOD.root copy2.root | |
cp Mu_Run2010B-Apr21ReReco-v1_AOD.root copy3.root | |
cp Mu_Run2010B-Apr21ReReco-v1_AOD.root copy4.root | |
cp Mu_Run2010B-Apr21ReReco-v1_AOD.root copy5.root | |
# 3. compile and run the Java performance tests | |
javac -cp root4j/target/classes/ ReadOneBranch.java | |
java -cp `ls root4j/target/{*.jar,lib/*.jar} | tr '\n' ':'`. ReadOneBranch | tee java-results.log | |
# 4. compile and run the C++ performance tests | |
root -l readOneBranch.cxx+ | tee cxx-results.log | |
# 5. look at them side-by-side | |
diff -y -W 100 java-results.log cxx-results.log | |
# 6. gape because there's something wrong with that discrepancy |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <time.h> | |
#include "TFile.h" | |
#include "TTree.h" | |
#include "TLeaf.h" | |
#include "TBranch.h" | |
#include "TClass.h" | |
#include "TBranchElement.h" | |
#include "TCollection.h" | |
#include "TSystem.h" | |
void runfile(const char* fileName) { | |
std::cout << fileName << std::endl; | |
double total = 0.0; | |
TFile *tfile = new TFile(fileName); | |
TTree *ttree; | |
tfile->GetObject("Events", ttree); | |
std::cout << "number of TBaskets ???" << std::endl; | |
// get the branch with a full path | |
// it's a TBranchElement because recoTracks_generalTracks__RECO.obj is a structure | |
// flat ntuples don't have TBranchElements, but we only need TBranchElement::GetBranchCount for structures | |
TBranchElement *branch = (TBranchElement*)ttree->GetBranch("recoTracks_generalTracks__RECO.obj.chi2_"); | |
// essential!!! MakeClass mode lets us view the structure one leaf at a time. | |
ttree->SetMakeClass(1); | |
// has to be reassigned for each new TFile (we only know the maximum for *this file*) | |
int bufferSize = ((TLeaf*)(branch->GetListOfLeaves()->First()))->GetLeafCount()->GetMaximum(); | |
// allocate a buffer that's just big enough | |
Float_t *buffer = new Float_t[bufferSize]; | |
// have the branch fill this buffer | |
ttree->SetBranchAddress("recoTracks_generalTracks__RECO.obj.chi2_", buffer); | |
// allocating a place to put the size is also essential | |
Int_t size = 0; | |
ttree->SetBranchAddress("recoTracks_generalTracks__RECO.obj",&size); | |
// clock stuff | |
clock_t lastTime = clock(); | |
long items = 0L; | |
long itemsPerPrint = 100000L; | |
// the loop over events | |
Long64_t numEvents = ttree->GetEntries(); | |
for (Long64_t i = 0; i < numEvents; i++) { | |
// essential! GetEntry from the branch, not the ttree | |
branch->GetEntry(i); | |
// get the number of elements (tracks in this case) | |
int numTracks = branch->GetBranchCount()->GetNdata(); | |
// and loop over them | |
for (int j = 0; j < numTracks; ++j) { | |
// getting the data does not involve any function calls | |
total += buffer[j]; | |
// clock stuff | |
items++; | |
if (items % itemsPerPrint == 0) { | |
clock_t now = clock(); | |
std::cout << 1.0 * (now - lastTime) / itemsPerPrint / CLOCKS_PER_SEC * 1e9 << " ns/item" << std::endl; | |
lastTime = now; | |
} | |
} | |
} | |
// clock and checksum results | |
std::cout << "check total " << total << " == 1.55104e+07 (" << (abs(total - 1.55104e+07) < 1e-6*1.55104e+07 ? "true" : "false") << ")" << std::endl; | |
} | |
void readOneBranch() { | |
runfile("Mu_Run2010B-Apr21ReReco-v1_AOD.root"); | |
runfile("copy2.root"); | |
runfile("copy3.root"); | |
runfile("copy4.root"); | |
runfile("copy5.root"); | |
gSystem->Exit(0); | |
} | |
// SetMakeClass(1) | |
// GetLenType() * GetLenStatic() * GetLeafCount()->GetMaximum() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hep.io.root.core.RootInput; | |
import hep.io.root.*; | |
import hep.io.root.interfaces.*; | |
public class ReadOneBranch { | |
final static long itemsPerPrint = 100000L; | |
static long lastTime = 0L; | |
static long items = 0; | |
private static void printout() { | |
long now = System.nanoTime(); | |
System.out.println(String.format("%g ns/item", 1.0 * (now - lastTime) / itemsPerPrint)); | |
lastTime = now; | |
} | |
public static void main(String[] args) throws java.io.IOException, RootClassNotFound { | |
String[] fileNames = {"Mu_Run2010B-Apr21ReReco-v1_AOD.root", "copy2.root", "copy3.root", "copy4.root", "copy5.root"}; | |
for (String fileName : fileNames) { | |
System.out.println(fileName); | |
double total = 0.0; | |
RootFileReader reader = new RootFileReader(fileName); | |
TTree tree = (TTree)reader.get("Events"); | |
TBranch branch = tree.getBranch("recoTracks_generalTracks__RECO.").getBranchForName("obj").getBranchForName("chi2_"); | |
TLeaf leaf = (TLeaf)branch.getLeaves().get(0); | |
long[] startingEntries = branch.getBasketEntry(); | |
System.out.println(String.format("number of TBaskets %d", startingEntries.length - 1)); | |
lastTime = System.nanoTime(); | |
items = 0; | |
for (int i = 0; i < startingEntries.length - 1; i++) { | |
long endEntry = startingEntries[i + 1]; | |
// all but the last one | |
for (long entry = startingEntries[i]; entry < endEntry - 1; entry++) { | |
RootInput in = branch.setPosition(leaf, entry + 1); | |
long endPosition = in.getPosition(); | |
in = branch.setPosition(leaf, entry); | |
while (in.getPosition() < endPosition) { | |
total += in.readFloat(); | |
items += 1; | |
if (items % itemsPerPrint == 0) printout(); | |
} | |
} | |
// the last one | |
RootInput in = branch.setPosition(leaf, endEntry - 1); | |
long endPosition = in.getLast(); | |
while (in.getPosition() < endPosition) { | |
total += in.readFloat(); | |
items += 1; | |
if (items % itemsPerPrint == 0) printout(); | |
} | |
} | |
System.out.println(String.format("check total %g == 1.55104e+07 (%s)", total, Math.abs(total - 1.55104e+07) > 1e-12*1.55104e+07 ? "true" : "false")); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> | |
> Processing readOneBranch.cxx+... | |
Mu_Run2010B-Apr21ReReco-v1_AOD.root Mu_Run2010B-Apr21ReReco-v1_AOD.root | |
number of TBaskets 41 | number of TBaskets ??? | |
688.906 ns/item | 69.49 ns/item | |
320.936 ns/item | 51.83 ns/item | |
1796.14 ns/item | 50.64 ns/item | |
247.927 ns/item | 51.85 ns/item | |
269.145 ns/item | 64.4 ns/item | |
246.458 ns/item | 51.42 ns/item | |
238.634 ns/item | 51.86 ns/item | |
193.092 ns/item | 52.54 ns/item | |
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true) | |
copy2.root copy2.root | |
number of TBaskets 41 | number of TBaskets ??? | |
636.489 ns/item | 65.28 ns/item | |
244.687 ns/item | 53.48 ns/item | |
265.210 ns/item | 50.44 ns/item | |
244.471 ns/item | 51.34 ns/item | |
593.997 ns/item | 63.25 ns/item | |
247.644 ns/item | 51.3 ns/item | |
245.790 ns/item | 51.23 ns/item | |
247.591 ns/item | 51.99 ns/item | |
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true) | |
copy3.root copy3.root | |
number of TBaskets 41 | number of TBaskets ??? | |
348.744 ns/item | 64.75 ns/item | |
183.472 ns/item | 52.26 ns/item | |
183.461 ns/item | 50.96 ns/item | |
181.422 ns/item | 51.65 ns/item | |
194.688 ns/item | 62.68 ns/item | |
175.473 ns/item | 50.87 ns/item | |
185.986 ns/item | 51.31 ns/item | |
176.463 ns/item | 52.11 ns/item | |
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true) | |
copy4.root copy4.root | |
number of TBaskets 41 | number of TBaskets ??? | |
499.383 ns/item | 64.82 ns/item | |
341.218 ns/item | 52.15 ns/item | |
346.253 ns/item | 50.94 ns/item | |
354.199 ns/item | 51.4 ns/item | |
236.469 ns/item | 61.88 ns/item | |
127.569 ns/item | 51.55 ns/item | |
127.590 ns/item | 51.54 ns/item | |
129.381 ns/item | 52.24 ns/item | |
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true) | |
copy5.root copy5.root | |
number of TBaskets 41 | number of TBaskets ??? | |
413.388 ns/item | 64.8 ns/item | |
179.847 ns/item | 52.09 ns/item | |
185.854 ns/item | 51.96 ns/item | |
177.821 ns/item | 53.35 ns/item | |
192.151 ns/item | 62.77 ns/item | |
168.369 ns/item | 50.85 ns/item | |
128.449 ns/item | 51.09 ns/item | |
134.405 ns/item | 52.15 ns/item | |
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true) |
Updated with some more tweaks and now C++ is 50-60 ns/item, or 60-70 MB/second! I've heard that the theoretical limit (due to gzip) was 45 MB/second, so this is getting hard to explain. Nevertheless, I don't see any errors in my timing code.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ignoring the Java startup phase (training HotSpot optimizations), the Java time is 130-200 ns/item and C++ is 80 ns/item.
Compression on this branch is 1.09, so Java is 15-25 MB/second and C++ is 40 MB/second.
Java is 1.6 to 2 times slower than C++. That makes sense; typical of Java/C++ performance on the benchmarks game.