Last active
November 13, 2024 07:51
-
-
Save coderodde/fdabb2678c481a0ed709df89040f7470 to your computer and use it in GitHub Desktop.
A C++ program for counting byte histograms in files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <cstdlib> | |
#include <fstream> | |
#include <iomanip> | |
#include <ios> | |
#include <istream> | |
#include <iostream> | |
#include <sstream> | |
#include <cstdio> | |
#include <string> | |
#include <vector> | |
static constexpr size_t screenWidth = 80; | |
static constexpr size_t linePreambleWidth = 11; | |
using Histogram = std::vector<std::size_t>; | |
class ByteHistogram { | |
static const size_t histogramCapacity = 0x100; | |
Histogram histogram_; | |
public: | |
ByteHistogram() { | |
histogram_.resize(histogramCapacity, 0); | |
} | |
void insert(unsigned char ch) { | |
++histogram_[ch]; | |
} | |
Histogram::const_iterator cbegin() const { | |
return histogram_.cbegin(); | |
} | |
Histogram::const_iterator cend() const { | |
return histogram_.cend(); | |
} | |
Histogram::iterator begin() { | |
return histogram_.begin(); | |
} | |
Histogram::iterator end() { | |
return histogram_.end(); | |
} | |
size_t size() const { | |
return histogram_.size(); | |
} | |
size_t getMaximumCount() const { | |
return *std::max_element(histogram_.cbegin(), | |
histogram_.cend()); | |
} | |
std::istream& insert(std::istream& is) | |
{ | |
std::istream::int_type c; | |
while ((c = is.get()) != std::istream::traits_type::eof()) { | |
insert(static_cast<unsigned char>(c)); | |
} | |
return is; | |
} | |
auto const operator[](std::size_t i) const { | |
return histogram_[i]; | |
} | |
}; | |
static size_t computeCounterStringLength(const size_t maximumCount) { | |
return std::snprintf(nullptr, 0, "%zu", maximumCount); | |
} | |
static std::size_t computeBarLength(const size_t count, | |
const size_t maximumCount, | |
const size_t maximumCountLength) { | |
auto const availableSpace = | |
static_cast<double>(screenWidth - linePreambleWidth | |
- maximumCountLength); | |
return static_cast<size_t>(availableSpace * count / maximumCount); | |
} | |
std::ostream& operator<<(std::ostream& os, | |
ByteHistogram& hist) { | |
const size_t maximumCount = hist.getMaximumCount(); | |
const size_t countStringLength = computeCounterStringLength(maximumCount); | |
const size_t maximumBarLength = screenWidth - linePreambleWidth | |
- countStringLength; | |
std::size_t i = 0; | |
for (auto const& count : hist) { | |
const double ratio = static_cast<double>(count) / | |
static_cast<double>(maximumCount); | |
const std::size_t barLength = | |
static_cast<std::size_t>(ratio * maximumBarLength); | |
const unsigned char ch = static_cast<char>(i); | |
std::cout << "0x" | |
<< std::hex | |
<< std::setfill('0') | |
<< std::setw(2) | |
<< i | |
<< " [" | |
<< (std::isprint(ch) ? (char) ch : '?') | |
<< "]: " | |
<< std::dec | |
<< std::setfill(' ') | |
<< std::setw(countStringLength) | |
<< count | |
<< ' ' | |
<< std::setfill('*') | |
<< std::setw(barLength) | |
<< '\n'; | |
i++; | |
} | |
return os; | |
} | |
int main(int argc, char* argv[]) { | |
ByteHistogram hist; | |
if (argc == 1) { | |
hist.insert(std::cin); | |
if (std::cin.bad()) { | |
std::cerr << "Reading from standard input failed.\n"; | |
return EXIT_FAILURE; | |
} | |
} else { | |
for (int i = 1; i < argc; ++i) { | |
std::ifstream stream{ argv[i], std::ios::binary }; | |
hist.insert(stream); | |
if (stream.bad()) { | |
std::cerr << "Reading from \"" << argv[i] << "\" failed.\n"; | |
return EXIT_FAILURE; | |
} | |
} | |
} | |
std::cout << hist; | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment