Created
April 17, 2025 12:37
-
-
Save sanjeed5/2efded237e2e78c12e51b5ecb75fcf39 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import json | |
| import os | |
| import sys | |
| def jsonl_to_json(jsonl_file_path: str, json_file_path: str): | |
| """ | |
| Converts a JSONL file to a JSON file. | |
| Args: | |
| jsonl_file_path: Path to the input JSONL file. | |
| json_file_path: Path to the output JSON file. | |
| """ | |
| data = [] | |
| try: | |
| with open(jsonl_file_path, 'r', encoding='utf-8') as infile: | |
| for line in infile: | |
| line = line.strip() | |
| if line: # Ensure the line is not empty | |
| try: | |
| data.append(json.loads(line)) | |
| except json.JSONDecodeError as e: | |
| print(f"Error decoding JSON on line: {line}", file=sys.stderr) | |
| print(f"Error: {e}", file=sys.stderr) | |
| return False # Indicate failure | |
| except FileNotFoundError: | |
| print(f"Error: Input file not found at {jsonl_file_path}", file=sys.stderr) | |
| return False | |
| except Exception as e: | |
| print(f"An unexpected error occurred while reading the file: {e}", file=sys.stderr) | |
| return False | |
| try: | |
| with open(json_file_path, 'w', encoding='utf-8') as outfile: | |
| json.dump(data, outfile, indent=2) # Use indent=2 for pretty printing | |
| print(f"Successfully converted '{jsonl_file_path}' to '{json_file_path}'") | |
| return True # Indicate success | |
| except Exception as e: | |
| print(f"An error occurred while writing the JSON file: {e}", file=sys.stderr) | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Convert a JSONL file to a JSON file.') | |
| parser.add_argument('jsonl_file', help='Path to the input JSONL file.') | |
| args = parser.parse_args() | |
| input_path = args.jsonl_file | |
| if not input_path.lower().endswith('.jsonl'): | |
| print("Warning: Input file does not have a .jsonl extension.", file=sys.stderr) | |
| # Create output path by replacing .jsonl with .json | |
| base_name = os.path.splitext(input_path)[0] | |
| output_path = base_name + '.json' | |
| if os.path.exists(output_path): | |
| overwrite = input(f"Output file '{output_path}' already exists. Overwrite? (y/N): ").lower() | |
| if overwrite != 'y': | |
| print("Operation cancelled.") | |
| sys.exit(0) | |
| if not jsonl_to_json(input_path, output_path): | |
| sys.exit(1) # Exit with error code if conversion failed | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment