andyleejordan · March 20, 2022 15:32
diff --git a/main.cpp b/main.cpp
 /* Algorithm for Efficient Chunked File Reading in C++
 *
 * The MIT License (MIT)
 *
 * Copyright 2014 Andrew Schwartzmeyer
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

 #include <cstdlib>
 #include <fstream>
 #include <iostream>
 #include <vector>

 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>

 using namespace std;

 int main(int argc, char* argv[])
 {
  /* basic CLI interface */
  if (argc < 2)
    {
      cerr << "usage: input_file [chunk_size]" << endl;
      return 1;
    }

  ifstream file(argv[1], ifstream::binary);

  /* basic sanity check */
  if (not file)
    {
      cerr << "file: " << argv[1] << " failed to open" << endl;
      return 1;
    }

  /* *NIX way to get file size without seeking to the end and back */
  struct stat filestatus;
  stat(argv[1], &filestatus);

  size_t total_size = filestatus.st_size;
  size_t chunk_size = 0;

  /* C-string necessitates aoti to get chunk size */
  if (argc == 3)
    { chunk_size = atoi(argv[2]); }

  /* atoi may fail and leave us with an undefined chunk size*/
  if (not (chunk_size > 0))
    { chunk_size = 16 * 1024; }
  cout << "using chunk size: " << chunk_size << endl;

  /* on to the actual algorithm */
  size_t total_chunks = total_size / chunk_size;
  size_t last_chunk_size = total_size % chunk_size;

  if (last_chunk_size != 0) /* if the above division was uneven */
    {
      ++total_chunks; /* add an unfilled final chunk */
    }
  else /* if division was even, last chunk is full */
    {
      last_chunk_size = chunk_size;
    }

  /* the loop of chunking */
  for (size_t chunk = 0; chunk < total_chunks; ++chunk)
    {
      size_t this_chunk_size =
        chunk == total_chunks - 1 /* if last chunk */
        ? last_chunk_size /* then fill chunk with remaining bytes */
        : chunk_size; /* else fill entire chunk */

      /* if needed, we also have the position of this chunk in the file
         size_t start_of_chunk = chunk * chunk_size; */

      /* adapt this portion as necessary, this is the fast C++ way */
      vector<char> chunk_data(this_chunk_size);
      file.read(&chunk_data[0], /* address of buffer start */
                this_chunk_size); /* this many bytes is to be read */

      /* do something with chunk_data before next iteration */
      cout << "chunk #" << chunk << endl;
      for (const auto c : chunk_data) /* I like my C++11 extensions */
        {
          cout << c;
        }
      cout << endl;
    }
 }
	/* Algorithm for Efficient Chunked File Reading in C++
	*
	* The MIT License (MIT)
	*
	* Copyright 2014 Andrew Schwartzmeyer
	*
	* Permission is hereby granted, free of charge, to any person
	* obtaining a copy of this software and associated documentation
	* files (the "Software"), to deal in the Software without
	* restriction, including without limitation the rights to use, copy,
	* modify, merge, publish, distribute, sublicense, and/or sell copies
	* of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be
	* included in all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
	* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
	* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/

	#include <cstdlib>
	#include <fstream>
	#include <iostream>
	#include <vector>

	#include <sys/types.h>
	#include <sys/stat.h>
	#include <unistd.h>

	using namespace std;

	int main(int argc, char* argv[])
	{
	/* basic CLI interface */
	if (argc < 2)
	{
	cerr << "usage: input_file [chunk_size]" << endl;
	return 1;
	}

	ifstream file(argv[1], ifstream::binary);

	/* basic sanity check */
	if (not file)
	{
	cerr << "file: " << argv[1] << " failed to open" << endl;
	return 1;
	}

	/* NIX way to get file size without seeking to the end and back /
	struct stat filestatus;
	stat(argv[1], &filestatus);

	size_t total_size = filestatus.st_size;
	size_t chunk_size = 0;

	/* C-string necessitates aoti to get chunk size */
	if (argc == 3)
	{ chunk_size = atoi(argv[2]); }

	/* atoi may fail and leave us with an undefined chunk size*/
	if (not (chunk_size > 0))
	{ chunk_size = 16 * 1024; }
	cout << "using chunk size: " << chunk_size << endl;

	/* on to the actual algorithm */
	size_t total_chunks = total_size / chunk_size;
	size_t last_chunk_size = total_size % chunk_size;

	if (last_chunk_size != 0) /* if the above division was uneven */
	{
	++total_chunks; /* add an unfilled final chunk */
	}
	else /* if division was even, last chunk is full */
	{
	last_chunk_size = chunk_size;
	}

	/* the loop of chunking */
	for (size_t chunk = 0; chunk < total_chunks; ++chunk)
	{
	size_t this_chunk_size =
	chunk == total_chunks - 1 /* if last chunk */
	? last_chunk_size /* then fill chunk with remaining bytes */
	: chunk_size; /* else fill entire chunk */

	/* if needed, we also have the position of this chunk in the file
	size_t start_of_chunk = chunk * chunk_size; */

	/* adapt this portion as necessary, this is the fast C++ way */
	vector<char> chunk_data(this_chunk_size);
	file.read(&chunk_data[0], /* address of buffer start */
	this_chunk_size); /* this many bytes is to be read */

	/* do something with chunk_data before next iteration */
	cout << "chunk #" << chunk << endl;
	for (const auto c : chunk_data) /* I like my C++11 extensions */
	{
	cout << c;
	}
	cout << endl;
	}
	}
No results found