Last active
July 13, 2020 01:46
-
-
Save ross-spencer/cc078c662fdeb161749b3bd02fc31302 to your computer and use it in GitHub Desktop.
Create files with old dates
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""Script to generate a sample set of files with a random distribution of | |
dates. Right now, this is very likely to be a uniform distribution so numpy | |
needs to be explored some more. | |
""" | |
import argparse | |
import atexit | |
import datetime | |
import logging | |
import os | |
import shutil | |
import time | |
from numpy import random | |
LOGFORMAT = '%(asctime)-15s %(levelname)s: %(message)s' | |
DATEFORMAT = '%m/%d/%Y %H:%M:%S' | |
def _make_parser(): | |
"""Create an argument parser for this script.""" | |
parser = argparse.ArgumentParser() | |
parser.description = "create a set of files with a random date range" | |
parser.add_argument( | |
"-i", "--input", | |
help="path to file to duplicate", | |
required=False) | |
parser.add_argument( | |
"-o", "--output", | |
help="path to the output folder", | |
default="output-dates", | |
required=False) | |
parser.add_argument( | |
"-n", "--number", | |
help="number of files to create", | |
default=100, | |
required=False) | |
return parser | |
def change_time(filepath, year): | |
"""Create a timestamp to attach to our files based on the year provided.""" | |
year = year | |
month = random.randint(1, 12, 1) | |
day = random.randint(1, 28, 1) | |
hour = 10 | |
minute = 25 | |
second = 1 | |
date = datetime.datetime( | |
year=year, month=month, day=day, hour=hour, | |
minute=minute, second=second) | |
mod_time = time.mktime(date.timetuple()) | |
os.utime(filepath, (mod_time, mod_time)) | |
def copy_media_location(copy_path, output_path, date_range): | |
"""Create as maany copies of our sample data files as required by the | |
user in the argument media_n. | |
""" | |
ext = "ext" | |
try: | |
file_name, ext = os.path.basename(copy_path).split(".") | |
except ValueError: | |
file_name = copy_path | |
for count, _ in enumerate(date_range): | |
new_file_name = '{}_{}.{}'.format(file_name, count, ext) | |
new_path = os.path.join(output_path, new_file_name) | |
shutil.copyfile(copy_path, new_path) | |
change_time(new_path, date_range[count]) | |
logging.info("%s files created at %s", len(date_range), output_path) | |
def tmp_file_content(): | |
"""Generate some content for temporary files if no other file path is | |
provided to the script. | |
""" | |
header = "# Fake File Format Study Data" | |
content = "Some data for the study." | |
return "{}\n\n{}".format(header, content) | |
def remove_tmp(file_path): | |
"""Function to be registered with atexit if there isn't a file supplied | |
with the script args. | |
""" | |
try: | |
os.remove(file_path) | |
except OSError: | |
logging.info("Not removing %s at exit", file_path) | |
def main(): | |
"""Primary entry point for the script.""" | |
logging.basicConfig(format=LOGFORMAT, datefmt=DATEFORMAT, level="INFO") | |
parser = _make_parser() | |
args = parser.parse_args() | |
if args.output: | |
try: | |
os.mkdir(args.output) | |
except OSError: | |
pass | |
if not args.input: | |
tmp_file_path = "file.md" | |
atexit.register(remove_tmp, file_path=tmp_file_path) | |
with open(tmp_file_path, 'wb') as tmp_file: | |
tmp_file.write(tmp_file_content()) | |
args.input = tmp_file_path | |
dist = random.randint(1970, 2018, args.number) | |
copy_media_location(args.input, args.output, dist) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment