Skip to content

Instantly share code, notes, and snippets.

@pszemraj
Last active July 24, 2022 23:34
Show Gist options
  • Save pszemraj/10db57c92386a6c87b3d6ab4f795b97b to your computer and use it in GitHub Desktop.
Save pszemraj/10db57c92386a6c87b3d6ab4f795b97b to your computer and use it in GitHub Desktop.
a function in Python that given a string path to a directory, converts it to a pathlib Path object and then loads all text files in the directory. The text files are sorted using the natsorted function and then each file is read and appended to a new file that is all the text files files merged. each file has its filename printed to the file bef…
from natsort import natsorted
from pathlib import Path
import argparse
def merge_files(path, output_path=None, outname=None):
"""
Given a path to a directory, merge all text files in the directory.
"""
path = Path(path)
output_path = Path(output_path) if output_path else path.parent
outname = outname if outname else f"merged_{path.name}.txt"
files = natsorted(path.glob("*.txt"))
with open(output_path / outname, "w", encoding="utf-8", errors="utf-8") as outfile:
for file in files:
with open(file) as infile:
outfile.write(f"{file.name}\n")
outfile.write(infile.read())
outfile.write("\n")
return output_path / outname
def get_parser():
"""
get_parser - a helper function for the argparse module
"""
parser = argparse.ArgumentParser("merge_text_dir.py")
parser.add_argument(
"-i",
"--input-path",
type=str,
help="path to directory of text files",
required=True,
)
parser.add_argument(
"-o",
"--output-path",
type=str,
required=False,
default=None,
help="path to output directory (optional)",
)
parser.add_argument(
"-n",
"--outname",
type=str,
required=False,
default=None,
help="name of output file (optional)",
)
return parser
if __name__ == "__main__":
args = get_parser().parse_args()
input_path = args.input_path
output_path = args.output_path
outname = args.outname
output_file = merge_files(input_path, output_path, outname)
print(f"Merged files into {output_file.name}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment