Created
August 19, 2019 14:16
-
-
Save Suor/006d52e6bd99479b8376845ea7c7508e to your computer and use it in GitHub Desktop.
Generate files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import os | |
import sys | |
import random | |
from funcy import re_find | |
from tqdm import tqdm | |
CHUNK_SIZE = 1024 * 512 | |
TEXT = ( | |
"This is an autogenerated text, which would be repated many times\n" | |
"This is a random number %f to make files different\n\n" | |
) | |
def main(): | |
parser = argparse.ArgumentParser(description='Generate files.') | |
parser.add_argument('dir', metavar="DIR", help='a directory path to fill') | |
parser.add_argument('n', metavar="N", type=int, help='number of files') | |
parser.add_argument('-s', '--size', type=parse_size, default='1m', | |
help='average file size') | |
parser.add_argument('-v', '--vary', type=int, default=15, | |
help='file size variation percent') | |
parser.add_argument('-e', '--ext', default='txt', help='file extension') | |
args = parser.parse_args() | |
os.makedirs(args.dir, exist_ok=True) | |
for i in tqdm(range(args.n)): | |
sample_bytes = (TEXT % random.random()).encode() | |
chunk = sample_bytes * (CHUNK_SIZE // len(sample_bytes)) | |
with open(f"{args.dir}/{i}.{args.ext}", "wb") as fd: | |
size = random.randint(int(args.size * (1 - args.vary / 100)), | |
int(args.size * (1 + args.vary / 100))) | |
for _ in range(size // len(chunk)): | |
fd.write(chunk) | |
if size % len(chunk): | |
fd.write(chunk[:size % len(chunk)]) | |
UNITS = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4} | |
UNITS["K"] = UNITS["KB"] | |
UNITS["M"] = UNITS["MB"] | |
UNITS["G"] = UNITS["GB"] | |
def parse_size(size): | |
number, unit = re_find(r'([\d.]+) ?(\w+)?', size) | |
unit = unit or 'B' | |
return int(float(number) * UNITS[unit.upper()]) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment