Skip to content

Instantly share code, notes, and snippets.

@ross-spencer
Last active July 13, 2020 01:46
Show Gist options
  • Save ross-spencer/cc078c662fdeb161749b3bd02fc31302 to your computer and use it in GitHub Desktop.
Save ross-spencer/cc078c662fdeb161749b3bd02fc31302 to your computer and use it in GitHub Desktop.
Create files with old dates
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Script to generate a sample set of files with a random distribution of
dates. Right now, this is very likely to be a uniform distribution so numpy
needs to be explored some more.
"""
import argparse
import atexit
import datetime
import logging
import os
import shutil
import time
from numpy import random
LOGFORMAT = '%(asctime)-15s %(levelname)s: %(message)s'
DATEFORMAT = '%m/%d/%Y %H:%M:%S'
def _make_parser():
"""Create an argument parser for this script."""
parser = argparse.ArgumentParser()
parser.description = "create a set of files with a random date range"
parser.add_argument(
"-i", "--input",
help="path to file to duplicate",
required=False)
parser.add_argument(
"-o", "--output",
help="path to the output folder",
default="output-dates",
required=False)
parser.add_argument(
"-n", "--number",
help="number of files to create",
default=100,
required=False)
return parser
def change_time(filepath, year):
"""Create a timestamp to attach to our files based on the year provided."""
year = year
month = random.randint(1, 12, 1)
day = random.randint(1, 28, 1)
hour = 10
minute = 25
second = 1
date = datetime.datetime(
year=year, month=month, day=day, hour=hour,
minute=minute, second=second)
mod_time = time.mktime(date.timetuple())
os.utime(filepath, (mod_time, mod_time))
def copy_media_location(copy_path, output_path, date_range):
"""Create as maany copies of our sample data files as required by the
user in the argument media_n.
"""
ext = "ext"
try:
file_name, ext = os.path.basename(copy_path).split(".")
except ValueError:
file_name = copy_path
for count, _ in enumerate(date_range):
new_file_name = '{}_{}.{}'.format(file_name, count, ext)
new_path = os.path.join(output_path, new_file_name)
shutil.copyfile(copy_path, new_path)
change_time(new_path, date_range[count])
logging.info("%s files created at %s", len(date_range), output_path)
def tmp_file_content():
"""Generate some content for temporary files if no other file path is
provided to the script.
"""
header = "# Fake File Format Study Data"
content = "Some data for the study."
return "{}\n\n{}".format(header, content)
def remove_tmp(file_path):
"""Function to be registered with atexit if there isn't a file supplied
with the script args.
"""
try:
os.remove(file_path)
except OSError:
logging.info("Not removing %s at exit", file_path)
def main():
"""Primary entry point for the script."""
logging.basicConfig(format=LOGFORMAT, datefmt=DATEFORMAT, level="INFO")
parser = _make_parser()
args = parser.parse_args()
if args.output:
try:
os.mkdir(args.output)
except OSError:
pass
if not args.input:
tmp_file_path = "file.md"
atexit.register(remove_tmp, file_path=tmp_file_path)
with open(tmp_file_path, 'wb') as tmp_file:
tmp_file.write(tmp_file_content())
args.input = tmp_file_path
dist = random.randint(1970, 2018, args.number)
copy_media_location(args.input, args.output, dist)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment