harishkotra · March 31, 2025 00:41
diff --git a/gitingest_to_csv.py b/gitingest_to_csv.py
 """
 # GitIngest Code Extractor

 ## Description
 This script parses text files containing code snippets extracted from gitingest.com 
 and converts them into a CSV format for easier analysis and manipulation.

 ## Purpose
 When downloading code from gitingest.com, files are often consolidated into a single
 text file with delimiters separating each code file. This script identifies these
 delimiters (lines of equal signs followed by file paths) and extracts each code file
 into a separate cell in a CSV file.

 ## Input Format
 The script expects text files structured as follows:
 ```
 ================================================
 File: path/to/file1.ext
 ================================================
 [Content of file1]

 ================================================
 File: path/to/file2.ext
 ================================================
 [Content of file2]
 ```

 ## Output Format
 The script produces a CSV file with a single column named 'content'.
 Each cell contains:
 1. The file path from the original file
 2. The complete code content of that file

 This format makes it easier to:
 - Import the code into spreadsheet applications
 - Perform further analysis on the codebase
 - Share code extracts in a structured format

 ## Usage
 1. Save your gitingest.com extracted text to a file (default: "uniswap_code.txt")
 2. Run this script
 3. Access the organized code in the output CSV (default: "uniswap_code.csv")
 """

 import csv
 import re

 def parse_code_to_csv(input_file, output_file):
    # Read the input file
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Define a pattern to match file sections
    # This pattern matches everything between a file header and the next file header or end of file
    pattern = r'={48}\s*File:\s*(.*?)\s*={48}(.*?)(?=={48}|$)'
    
    # Find all matches
    matches = re.findall(pattern, content, re.DOTALL)
    
    # Prepare data for CSV
    code_files = []
    for file_path, code_content in matches:
        # Combine file path and code content into a single field
        combined_content = f"File: {file_path.strip()}\n{code_content.strip()}"
        code_files.append([combined_content])
    
    # Write to CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        
        # Write header
        writer.writerow(['content'])
        
        # Write each file's content to a single cell
        for file_data in code_files:
            writer.writerow(file_data)
    
    return len(code_files)

 if __name__ == "__main__":
    input_file = "gitingest_code.txt"  # Change this to your input file path
    output_file = "gitingest_code.csv"  # Change this to your desired output file path
    
    num_files = parse_code_to_csv(input_file, output_file)
    print(f"Successfully extracted {num_files} code files to {output_file}")
	"""
	# GitIngest Code Extractor

	## Description
	This script parses text files containing code snippets extracted from gitingest.com
	and converts them into a CSV format for easier analysis and manipulation.

	## Purpose
	When downloading code from gitingest.com, files are often consolidated into a single
	text file with delimiters separating each code file. This script identifies these
	delimiters (lines of equal signs followed by file paths) and extracts each code file
	into a separate cell in a CSV file.

	## Input Format
	The script expects text files structured as follows:
	```
	================================================
	File: path/to/file1.ext
	================================================
	[Content of file1]

	================================================
	File: path/to/file2.ext
	================================================
	[Content of file2]
	```

	## Output Format
	The script produces a CSV file with a single column named 'content'.
	Each cell contains:
	1. The file path from the original file
	2. The complete code content of that file

	This format makes it easier to:
	- Import the code into spreadsheet applications
	- Perform further analysis on the codebase
	- Share code extracts in a structured format

	## Usage
	1. Save your gitingest.com extracted text to a file (default: "uniswap_code.txt")
	2. Run this script
	3. Access the organized code in the output CSV (default: "uniswap_code.csv")
	"""

	import csv
	import re

	def parse_code_to_csv(input_file, output_file):
	# Read the input file
	with open(input_file, 'r', encoding='utf-8') as f:
	content = f.read()

	# Define a pattern to match file sections
	# This pattern matches everything between a file header and the next file header or end of file
	pattern = r'={48}\sFile:\s(.?)\s={48}(.*?)(?=={48}\|$)'

	# Find all matches
	matches = re.findall(pattern, content, re.DOTALL)

	# Prepare data for CSV
	code_files = []
	for file_path, code_content in matches:
	# Combine file path and code content into a single field
	combined_content = f"File: {file_path.strip()}\n{code_content.strip()}"
	code_files.append([combined_content])

	# Write to CSV
	with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
	writer = csv.writer(csvfile)

	# Write header
	writer.writerow(['content'])

	# Write each file's content to a single cell
	for file_data in code_files:
	writer.writerow(file_data)

	return len(code_files)

	if __name__ == "__main__":
	input_file = "gitingest_code.txt" # Change this to your input file path
	output_file = "gitingest_code.csv" # Change this to your desired output file path

	num_files = parse_code_to_csv(input_file, output_file)
	print(f"Successfully extracted {num_files} code files to {output_file}")
No results found