Last active
July 24, 2022 23:34
-
-
Save pszemraj/10db57c92386a6c87b3d6ab4f795b97b to your computer and use it in GitHub Desktop.
a function in Python that given a string path to a directory, converts it to a pathlib Path object and then loads all text files in the directory. The text files are sorted using the natsorted function and then each file is read and appended to a new file that is all the text files files merged. each file has its filename printed to the file bef…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from natsort import natsorted | |
from pathlib import Path | |
import argparse | |
def merge_files(path, output_path=None, outname=None): | |
""" | |
Given a path to a directory, merge all text files in the directory. | |
""" | |
path = Path(path) | |
output_path = Path(output_path) if output_path else path.parent | |
outname = outname if outname else f"merged_{path.name}.txt" | |
files = natsorted(path.glob("*.txt")) | |
with open(output_path / outname, "w", encoding="utf-8", errors="utf-8") as outfile: | |
for file in files: | |
with open(file) as infile: | |
outfile.write(f"{file.name}\n") | |
outfile.write(infile.read()) | |
outfile.write("\n") | |
return output_path / outname | |
def get_parser(): | |
""" | |
get_parser - a helper function for the argparse module | |
""" | |
parser = argparse.ArgumentParser("merge_text_dir.py") | |
parser.add_argument( | |
"-i", | |
"--input-path", | |
type=str, | |
help="path to directory of text files", | |
required=True, | |
) | |
parser.add_argument( | |
"-o", | |
"--output-path", | |
type=str, | |
required=False, | |
default=None, | |
help="path to output directory (optional)", | |
) | |
parser.add_argument( | |
"-n", | |
"--outname", | |
type=str, | |
required=False, | |
default=None, | |
help="name of output file (optional)", | |
) | |
return parser | |
if __name__ == "__main__": | |
args = get_parser().parse_args() | |
input_path = args.input_path | |
output_path = args.output_path | |
outname = args.outname | |
output_file = merge_files(input_path, output_path, outname) | |
print(f"Merged files into {output_file.name}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment