Skip to content

Instantly share code, notes, and snippets.

@allenh1
Last active August 21, 2018 15:43
Show Gist options
  • Select an option

  • Save allenh1/06b2561c923aa6fc8d3d2d988940d3e6 to your computer and use it in GitHub Desktop.

Select an option

Save allenh1/06b2561c923aa6fc8d3d2d988940d3e6 to your computer and use it in GitHub Desktop.
Fix line endings for C++ files
// Copyright 2018 Hunter L. Allen
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#if __has_include(<filesystem>)
#include <filesystem>
/* compiler has std::filesystem */
namespace fs = std::filesystem;
#elif __has_include(<experimental/filesystem>)
/* compiler has std::experimental::filesystem */
namespace fs = std::experimental::filesystem;
#endif
#include <string_view>
#include <streambuf>
#include <iostream>
#include <fstream>
#include <string>
#include <list>
#include <memory>
#include <functional>
#include <mutex>
#include <cstring>
#include <execution>
#include <algorithm>
#include <iterator>
#include <thread>
/*************************************** exceptions **********************************/
namespace file_processor_exception {
using std::exception; // inherit from std::exception
/* exceptions during file processing */
struct read_file_exception : public exception
{
const char * what() const throw () {
return "error reading file";
}
};
/* exceptions during file replacing */
struct write_file_exception : public exception
{
const char * what() const throw () {
return "error writing file";
}
};
struct parse_file_exception : public exception
{
const char * what() const throw() {
return "error parsing file";
}
};
}
/************************************** file processor *********************************/
struct file_processor
{
public:
file_processor(const std::string & p_filename);
file_processor(const file_processor & other) = delete;
file_processor & operator = (file_processor & other) = delete;
~file_processor();
void output_file();
double get_progress();
size_t get_file_size();
size_t get_bytes_read();
bool is_done();
operator std::string();
void operator() ();
private:
void set_file_size(const size_t & p_size);
void inc_bytes_read(const size_t & p_bytes);
bool m_done = false;
size_t m_bytes_read = 0;
size_t m_bytes_total = 0;
std::string m_filename;
std::string m_text;
std::mutex m_read_mutex;
std::mutex m_total_mutex;
std::mutex m_done_lock;
};
size_t file_processor::get_file_size()
{
std::lock_guard<std::mutex> lock(m_total_mutex);
return m_bytes_total;
}
size_t file_processor::get_bytes_read()
{
std::lock_guard<std::mutex> lock(m_read_mutex);
return m_bytes_read;
}
void file_processor::set_file_size(const size_t & p_size)
{
std::lock_guard<std::mutex> lock(m_total_mutex);
m_bytes_total = p_size;
}
double file_processor::get_progress()
{
double ret =
static_cast<double>(get_bytes_read()) / static_cast<double>(get_file_size());
return ret;
}
file_processor::operator std::string()
{
unsigned int out_of_100 = static_cast<unsigned int>(get_progress() * 100);
return m_filename + ": " + std::to_string(out_of_100) + "%";
}
void file_processor::inc_bytes_read(const size_t & p_bytes)
{
std::lock_guard<std::mutex> lock(m_read_mutex);
m_bytes_read += p_bytes;
}
bool file_processor::is_done()
{
std::lock_guard<std::mutex> lock(m_done_lock);
return m_done;
}
file_processor::file_processor(const std::string & p_filename)
: m_filename(p_filename)
{
std::ifstream stream(p_filename, std::ios::in | std::ios::binary);
if (!stream.is_open()) {
std::cerr << "failed to read from file '" << p_filename << "'" << std::endl;
throw file_processor_exception::read_file_exception();
}
std::streampos size = stream.tellg();
stream.seekg(0, std::ios::end);
size = stream.tellg() - size;
stream.close();
}
void file_processor::operator() ()
{
/* do processing on the file */
std::ifstream file(m_filename, std::ios::in | std::ios::binary);
if (!file.is_open()) {
std::cerr << "failed to read from file '" << m_filename << "'" << std::endl;
throw file_processor_exception::read_file_exception();
}
/* we'll assume the file doesn't change */
for (char c = '\0'; (file.get(c), !file.eof());) {
if (c != '\r') {
/* don't append '\r' */
m_text += c;
}
else if (c == '\n') {
/* when you find '\n', write '\r\n' */
m_text += "\r\n";
}
inc_bytes_read(1);
}
file.close();
output_file();
}
file_processor::~file_processor()
{
/* NO-OP */
}
void file_processor::output_file()
{
std::cout << std::string(*this) << std::endl;
if (!m_text.size()) {
std::cerr << "file is empty? proceeding like things are fine..." << std::endl;
return;
}
std::ofstream file(m_filename);
if (!file.good()) {
std::cerr << "failed to output file '" << m_filename << "'" << std::endl;
throw file_processor_exception::write_file_exception();
}
file << m_text;
if (m_text.back() != '\n') {
file << std::endl; /* append '\r\n' if it's not there */
}
m_text.clear();
file.flush(); /* flush, because we should */
file.close();
{
std::lock_guard<std::mutex> lock(m_done_lock);
m_done = true;
}
}
/************************************** utilities **************************************/
std::vector<std::unique_ptr<file_processor>> get_files_by_extension(
std::string_view path, std::string_view extension)
{
auto files = fs::recursive_directory_iterator(path);
std::vector<std::unique_ptr<file_processor>> ret;
for (auto f : files) {
auto p = fs::path(f);
if (p.extension() == extension) {
ret.emplace_back(std::make_unique<file_processor>(p.string()));
}
}
std::cout << "moving out ret with " << ret.size() << " files" << std::endl;
return std::move(ret);
}
void threaded_line_fixup(
std::vector<std::unique_ptr<file_processor>> && file_list, const size_t & thread_count)
{
std::vector<std::thread> threads;
threads.resize(thread_count);
for (size_t x = 0; x < file_list.size(); x += thread_count) {
for (size_t y = 0; y < thread_count; ++y) {
if (x + y == file_list.size()) {
break; /* nothing left to do */
}
threads.emplace_back(std::bind(&file_processor::operator(), file_list.at(x + y).get()));
}
for (auto & t : threads) {
if (t.joinable()) {
t.join(); /* check if the thread already died */
}
}
threads.clear();
}
}
int main(int argc, char ** argv)
{
std::string usage(argv[0]);
usage += " [directory] [file extension] [thread count]";
if (argc != 4) {
goto error;
}
std::cout << "starting... recursive traversal into path '" << argv[1] << "'" << std::endl;
threaded_line_fixup(get_files_by_extension(argv[1], argv[2]), std::stol(argv[3]));
return 0;
error:
std::cerr << "Invalid arguments" << std::endl;
std::cerr << "Usage: " << usage << std::endl;
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment