Last active
December 31, 2018 16:16
-
-
Save adamfisher/e4a9a6b5a67d2401703c945ccb8035f4 to your computer and use it in GitHub Desktop.
Extension method for FileInfo to split a big file into chunks.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Splits a file into multiple files based on the specified chunk size of each file. | |
/// </summary> | |
/// <param name="file">The file.</param> | |
/// <param name="chunkSize">The maximum number of bytes to store in each file. | |
/// If a chunk size is not provided, files will be split into 1 MB chunks by default. | |
/// The breakOnNewlines parameter can slightly affect the size of each file.</param> | |
/// <param name="targetPath">The destination where the split files will be saved.</param> | |
/// <param name="deleteAfterSplit">if set to <c>true</c>, the original file is deleted after creating the newly split files.</param> | |
/// <param name="breakOnNewlines">if set to <c>true</c> break the file on the next newline once the chunk size limit is reached.</param> | |
/// <returns> | |
/// An array of references to the split files. | |
/// </returns> | |
/// <exception cref="ArgumentNullException">file</exception> | |
/// <exception cref="ArgumentOutOfRangeException">chunkSize - The chunk size must be larger than 0 bytes.</exception> | |
public static FileInfo[] SplitOnChunkSize( | |
this FileInfo file, | |
int chunkSize = 1000000, | |
DirectoryInfo targetPath = null, | |
bool deleteAfterSplit = false, | |
bool breakOnNewlines = true | |
) | |
{ | |
if (file == null) | |
throw new ArgumentNullException(nameof(file)); | |
if (chunkSize < 1) | |
throw new ArgumentOutOfRangeException(nameof(chunkSize), chunkSize, | |
"The chunk size must be larger than 0 bytes."); | |
if (file.Length <= chunkSize) | |
return new[] {file}; | |
var buffer = new byte[chunkSize]; | |
var extraBuffer = new List<byte>(); | |
targetPath = targetPath ?? file.Directory; | |
var chunkedFiles = new List<FileInfo>((int)Math.Abs(file.Length / chunkSize) + 1); | |
using (var input = file.OpenRead()) | |
{ | |
var index = 1; | |
while (input.Position < input.Length) | |
{ | |
var chunkFileName = new FileInfo(Path.Combine(targetPath.FullName, $"{file.Name}.CHUNK_{index++}")); | |
chunkedFiles.Add(chunkFileName); | |
using (var output = chunkFileName.Create()) | |
{ | |
var chunkBytesRead = 0; | |
while (chunkBytesRead < chunkSize) | |
{ | |
var bytesRead = input.Read(buffer, | |
chunkBytesRead, | |
chunkSize - chunkBytesRead); | |
if (bytesRead == 0) | |
{ | |
break; | |
} | |
chunkBytesRead += bytesRead; | |
} | |
if (breakOnNewlines) | |
{ | |
var extraByte = buffer[chunkSize - 1]; | |
while (extraByte != '\n') | |
{ | |
var flag = input.ReadByte(); | |
if (flag == -1) | |
break; | |
extraByte = (byte)flag; | |
extraBuffer.Add(extraByte); | |
} | |
output.Write(buffer, 0, chunkBytesRead); | |
if (extraBuffer.Count > 0) | |
output.Write(extraBuffer.ToArray(), 0, extraBuffer.Count); | |
extraBuffer.Clear(); | |
} | |
} | |
} | |
} | |
if (deleteAfterSplit) | |
file.Delete(); | |
return chunkedFiles.ToArray(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment