Last active
June 3, 2025 08:40
-
-
Save tkojima0107/c8ad6e500da2d2fcb9265f6ecb856adb to your computer and use it in GitHub Desktop.
A file rename script for IEEE CPS proceeding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import re | |
| import sys | |
| import os | |
| from argparse import ArgumentParser,SUPPRESS | |
| from pathvalidate import sanitize_filename | |
| import shutil | |
| START_STR = "var webpub = {data: " | |
| HELP_MSG=""" | |
| The IEEE Conference Publishing Services (CPS) provides Proceedings in a webpub format which is fine for viewing in a browser. However, accessing them through any other means is inconvenient as the file names appear hashed. This script organizes the file names and directories to make them more readable. | |
| Prerequisites: | |
| This script should be run with Python 3. | |
| It uses a package named 'pathvalidate' for checking file names. | |
| Please install it by running: $ pip3 install pathvalidate. | |
| Usage: | |
| Execute this script in the directory where 'index.html' is located. | |
| Before running, create a destination directory for the output. | |
| Run the script using: $ python3 rename.py data/data.js export | |
| The first argument specifies the JavaScript file containing the proceedings information, which is likely in 'data/data.js'. | |
| The second argument is the path to the created destination directory, 'export' in this example. | |
| The script will save the PDFs, organized by section, in the 'export' directory. | |
| """ | |
| def parse(): | |
| parser = ArgumentParser(description=HELP_MSG) | |
| parser.add_argument('data', type=str, help='path to data.js') | |
| parser.add_argument('export', help='export directory') | |
| parser.add_argument('--conf-index', type=int, help="Specify the index of the conference to process (default: 0). This is useful if the data file contains multiple conferences.") | |
| args = parser.parse_args() | |
| return args | |
| def copyfile(out_dir, element): | |
| src = element["articleLocation"] | |
| dst = out_dir + "/" + sanitize_filename(element["text"]) + ".pdf" | |
| shutil.copyfile(src, dst) | |
| if __name__ == '__main__': | |
| args = parse() | |
| # open data file | |
| try: | |
| with open(args.data, "r") as f: | |
| json_str = f.read() | |
| except FileNotFoundError: | |
| print("No such file:", args.data) | |
| sys.exit(1) | |
| except PermissionError: | |
| print("Permission Error, Cannot open:", args.data) | |
| sys.exit(1) | |
| # check export dir | |
| if not os.path.exists(args.export): | |
| print("No such directory:", args.export) | |
| sys.exit(1) | |
| if not os.path.isdir(args.export): | |
| print("Not directory:", args.export) | |
| sys.exit(1) | |
| if not os.access(args.export, os.W_OK): | |
| print("Not writable:", args.export) | |
| sys.exit(1) | |
| # check data | |
| m = re.match("^" + START_STR, json_str) | |
| if m is None: | |
| print(args.data, "is not valid data file of CPS proceedings") | |
| sys.exit(1) | |
| # make readable as json | |
| json_str = re.sub("^" + START_STR, "", json_str) | |
| json_str = re.sub("};$", "", json_str) | |
| # load json | |
| data = json.loads(json_str) | |
| if len(data["conferences"]) > 1: | |
| if args.conf_index is None: | |
| print("Warning: the data contains multiple conference data") | |
| print("Defaulting to the first conference data.") | |
| print("Please specify the index of the conference to process using --conf-index") | |
| print("This script use the first conference data") | |
| if args.conf_index is not None: | |
| conf_index = args.conf_index | |
| else: | |
| conf_index = 0 | |
| proc = data["conferences"][conf_index] | |
| title = proc["title"] | |
| print("Title:", title) | |
| # frontMatter | |
| if "frontMatter" in proc.keys(): | |
| os.mkdir(args.export + "/frontMatter") | |
| for l in proc["frontMatter"]: | |
| copyfile(args.export + "/frontMatter", l) | |
| # backMatter | |
| if "backMatter" in proc.keys(): | |
| os.mkdir(args.export + "/backMatter") | |
| for l in proc["backMatter"]: | |
| copyfile(args.export + "/backMatter", l) | |
| # each sections | |
| if "backMatter" in proc.keys(): | |
| for sec in proc["sections"]: | |
| sec_title = sanitize_filename(sec["title"]) | |
| dir_name = args.export + "/" + sec_title | |
| # check if the dir is already exist | |
| if not os.path.exists(dir_name): | |
| os.mkdir(args.export + "/" + sec_title) | |
| elif not os.path.isdir(dir_name): | |
| print(f"Error: {dir_name} is already exist but not a directory.") | |
| sys.exit(1) | |
| for l in sec["lineItems"]: | |
| copyfile(args.export + "/" + sec_title, l) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment