Last active
August 21, 2018 15:43
-
-
Save allenh1/06b2561c923aa6fc8d3d2d988940d3e6 to your computer and use it in GitHub Desktop.
Fix line endings for C++ files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Copyright 2018 Hunter L. Allen | |
| // | |
| // Licensed under the Apache License, Version 2.0 (the "License"); | |
| // you may not use this file except in compliance with the License. | |
| // You may obtain a copy of the License at | |
| // | |
| // http://www.apache.org/licenses/LICENSE-2.0 | |
| // | |
| // Unless required by applicable law or agreed to in writing, software | |
| // distributed under the License is distributed on an "AS IS" BASIS, | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| // See the License for the specific language governing permissions and | |
| // limitations under the License. | |
| #include <iostream> | |
| #if __has_include(<filesystem>) | |
| #include <filesystem> | |
| /* compiler has std::filesystem */ | |
| namespace fs = std::filesystem; | |
| #elif __has_include(<experimental/filesystem>) | |
| /* compiler has std::experimental::filesystem */ | |
| namespace fs = std::experimental::filesystem; | |
| #endif | |
| #include <string_view> | |
| #include <streambuf> | |
| #include <iostream> | |
| #include <fstream> | |
| #include <string> | |
| #include <list> | |
| #include <memory> | |
| #include <functional> | |
| #include <mutex> | |
| #include <cstring> | |
| #include <execution> | |
| #include <algorithm> | |
| #include <iterator> | |
| #include <thread> | |
| /*************************************** exceptions **********************************/ | |
| namespace file_processor_exception { | |
| using std::exception; // inherit from std::exception | |
| /* exceptions during file processing */ | |
| struct read_file_exception : public exception | |
| { | |
| const char * what() const throw () { | |
| return "error reading file"; | |
| } | |
| }; | |
| /* exceptions during file replacing */ | |
| struct write_file_exception : public exception | |
| { | |
| const char * what() const throw () { | |
| return "error writing file"; | |
| } | |
| }; | |
| struct parse_file_exception : public exception | |
| { | |
| const char * what() const throw() { | |
| return "error parsing file"; | |
| } | |
| }; | |
| } | |
| /************************************** file processor *********************************/ | |
| struct file_processor | |
| { | |
| public: | |
| file_processor(const std::string & p_filename); | |
| file_processor(const file_processor & other) = delete; | |
| file_processor & operator = (file_processor & other) = delete; | |
| ~file_processor(); | |
| void output_file(); | |
| double get_progress(); | |
| size_t get_file_size(); | |
| size_t get_bytes_read(); | |
| bool is_done(); | |
| operator std::string(); | |
| void operator() (); | |
| private: | |
| void set_file_size(const size_t & p_size); | |
| void inc_bytes_read(const size_t & p_bytes); | |
| bool m_done = false; | |
| size_t m_bytes_read = 0; | |
| size_t m_bytes_total = 0; | |
| std::string m_filename; | |
| std::string m_text; | |
| std::mutex m_read_mutex; | |
| std::mutex m_total_mutex; | |
| std::mutex m_done_lock; | |
| }; | |
| size_t file_processor::get_file_size() | |
| { | |
| std::lock_guard<std::mutex> lock(m_total_mutex); | |
| return m_bytes_total; | |
| } | |
| size_t file_processor::get_bytes_read() | |
| { | |
| std::lock_guard<std::mutex> lock(m_read_mutex); | |
| return m_bytes_read; | |
| } | |
| void file_processor::set_file_size(const size_t & p_size) | |
| { | |
| std::lock_guard<std::mutex> lock(m_total_mutex); | |
| m_bytes_total = p_size; | |
| } | |
| double file_processor::get_progress() | |
| { | |
| double ret = | |
| static_cast<double>(get_bytes_read()) / static_cast<double>(get_file_size()); | |
| return ret; | |
| } | |
| file_processor::operator std::string() | |
| { | |
| unsigned int out_of_100 = static_cast<unsigned int>(get_progress() * 100); | |
| return m_filename + ": " + std::to_string(out_of_100) + "%"; | |
| } | |
| void file_processor::inc_bytes_read(const size_t & p_bytes) | |
| { | |
| std::lock_guard<std::mutex> lock(m_read_mutex); | |
| m_bytes_read += p_bytes; | |
| } | |
| bool file_processor::is_done() | |
| { | |
| std::lock_guard<std::mutex> lock(m_done_lock); | |
| return m_done; | |
| } | |
| file_processor::file_processor(const std::string & p_filename) | |
| : m_filename(p_filename) | |
| { | |
| std::ifstream stream(p_filename, std::ios::in | std::ios::binary); | |
| if (!stream.is_open()) { | |
| std::cerr << "failed to read from file '" << p_filename << "'" << std::endl; | |
| throw file_processor_exception::read_file_exception(); | |
| } | |
| std::streampos size = stream.tellg(); | |
| stream.seekg(0, std::ios::end); | |
| size = stream.tellg() - size; | |
| stream.close(); | |
| } | |
| void file_processor::operator() () | |
| { | |
| /* do processing on the file */ | |
| std::ifstream file(m_filename, std::ios::in | std::ios::binary); | |
| if (!file.is_open()) { | |
| std::cerr << "failed to read from file '" << m_filename << "'" << std::endl; | |
| throw file_processor_exception::read_file_exception(); | |
| } | |
| /* we'll assume the file doesn't change */ | |
| for (char c = '\0'; (file.get(c), !file.eof());) { | |
| if (c != '\r') { | |
| /* don't append '\r' */ | |
| m_text += c; | |
| } | |
| else if (c == '\n') { | |
| /* when you find '\n', write '\r\n' */ | |
| m_text += "\r\n"; | |
| } | |
| inc_bytes_read(1); | |
| } | |
| file.close(); | |
| output_file(); | |
| } | |
| file_processor::~file_processor() | |
| { | |
| /* NO-OP */ | |
| } | |
| void file_processor::output_file() | |
| { | |
| std::cout << std::string(*this) << std::endl; | |
| if (!m_text.size()) { | |
| std::cerr << "file is empty? proceeding like things are fine..." << std::endl; | |
| return; | |
| } | |
| std::ofstream file(m_filename); | |
| if (!file.good()) { | |
| std::cerr << "failed to output file '" << m_filename << "'" << std::endl; | |
| throw file_processor_exception::write_file_exception(); | |
| } | |
| file << m_text; | |
| if (m_text.back() != '\n') { | |
| file << std::endl; /* append '\r\n' if it's not there */ | |
| } | |
| m_text.clear(); | |
| file.flush(); /* flush, because we should */ | |
| file.close(); | |
| { | |
| std::lock_guard<std::mutex> lock(m_done_lock); | |
| m_done = true; | |
| } | |
| } | |
| /************************************** utilities **************************************/ | |
| std::vector<std::unique_ptr<file_processor>> get_files_by_extension( | |
| std::string_view path, std::string_view extension) | |
| { | |
| auto files = fs::recursive_directory_iterator(path); | |
| std::vector<std::unique_ptr<file_processor>> ret; | |
| for (auto f : files) { | |
| auto p = fs::path(f); | |
| if (p.extension() == extension) { | |
| ret.emplace_back(std::make_unique<file_processor>(p.string())); | |
| } | |
| } | |
| std::cout << "moving out ret with " << ret.size() << " files" << std::endl; | |
| return std::move(ret); | |
| } | |
| void threaded_line_fixup( | |
| std::vector<std::unique_ptr<file_processor>> && file_list, const size_t & thread_count) | |
| { | |
| std::vector<std::thread> threads; | |
| threads.resize(thread_count); | |
| for (size_t x = 0; x < file_list.size(); x += thread_count) { | |
| for (size_t y = 0; y < thread_count; ++y) { | |
| if (x + y == file_list.size()) { | |
| break; /* nothing left to do */ | |
| } | |
| threads.emplace_back(std::bind(&file_processor::operator(), file_list.at(x + y).get())); | |
| } | |
| for (auto & t : threads) { | |
| if (t.joinable()) { | |
| t.join(); /* check if the thread already died */ | |
| } | |
| } | |
| threads.clear(); | |
| } | |
| } | |
| int main(int argc, char ** argv) | |
| { | |
| std::string usage(argv[0]); | |
| usage += " [directory] [file extension] [thread count]"; | |
| if (argc != 4) { | |
| goto error; | |
| } | |
| std::cout << "starting... recursive traversal into path '" << argv[1] << "'" << std::endl; | |
| threaded_line_fixup(get_files_by_extension(argv[1], argv[2]), std::stol(argv[3])); | |
| return 0; | |
| error: | |
| std::cerr << "Invalid arguments" << std::endl; | |
| std::cerr << "Usage: " << usage << std::endl; | |
| return 1; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment