# ChatGPT:
#### conversations2/conversation_json_to_sessions_txt_html_sqlite_data

In [None]:
#!/home/jack/miniconda3/envs/cloned_base/bin/python
import json
import logging
import os
import glob
import subprocess

def split_and_save_and_convert(conversations_file, output_folder):
    try:
        with open(conversations_file, 'r', encoding='utf-8') as file:
            data = json.load(file)
            
            for conversation in data:
                title = conversation.get('title', 'Unknown_Title')
                title_with_underscores = title.replace(' ', '_')
                title_with_underscores = title_with_underscores.replace(':', '_')
                title_with_underscores = title_with_underscores.replace("'", "_")
                title_with_underscores = title_with_underscores.replace("&", "_")
                title_with_underscores = title_with_underscores.replace("*", "_")
                title_with_underscores = title_with_underscores.replace("(", "_")
                title_with_underscores = title_with_underscores.replace(")", "_")
                chapter_filename = f"{title_with_underscores}.json"
                chapter_filepath = os.path.join(output_folder, chapter_filename)
                
                logging.info(f"Saving data for conversation '{title}' to {chapter_filepath}")
                
                with open(chapter_filepath, 'w', encoding='utf-8') as chapter_file:
                    json.dump([conversation], chapter_file, indent=2)

                # Convert JSON to HTML
                html_output_file = os.path.join(output_folder, f"{title_with_underscores}.html")
                convert_to_html(chapter_filepath, html_output_file)

                # Convert JSON to TXT
                txt_output_file = os.path.join(output_folder, f"{title_with_underscores}.txt")
                convert_to_txt(chapter_filepath, txt_output_file)

    except FileNotFoundError:
        logging.error(f"File not found: {conversations_file}")
    except json.JSONDecodeError:
        logging.error(f"Error decoding JSON in file: {conversations_file}")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")

def convert_to_html(json_file, html_output_file):
    with open(json_file, 'r', encoding='utf-8') as file:
        json_data = json.load(file)

    result_str = get_conversation_result(json_data)

    with open(html_output_file, "w", encoding='utf-8') as html_output:
        result_html = result_str.replace("/n", "XXXXXXX\n")
        result_html = result_html.replace("<", "&lt;")
        result_html = result_html.replace(">", "&gt;")
        for line in result_html.split("XXXXXXX"):
            line = line.replace("\n", "<br />\n")
            html_output.write(line)

def convert_to_txt(json_file, txt_output_file):
    with open(json_file, 'r', encoding='utf-8') as file:
        json_data = json.load(file)

    result_str = get_conversation_result(json_data)

    with open(txt_output_file, "w", encoding='utf-8') as txt_output:
        result_txt = result_str.replace("/n", "XXXXXXX\n")
        for line in result_txt.split("XXXXXXX"):
            txt_output.write(line)

def get_conversation_result(json_data):
    result_str = ""
    for conversation in json_data:
        title = conversation.get('title', '')
        messages = get_conversation_messages(conversation)

        result_str += title + '\n'
        for message in messages:
            result_str += message['author'] + '\n' + message['text'] + '\n'
        result_str += '\n'

    return result_str

def get_conversation_messages(conversation):
    messages = []
    current_node = conversation.get('current_node')
    while current_node:
        node = conversation['mapping'][current_node]
        message = node.get('message')
        if (message and message.get('content') and message['content'].get('content_type') == 'text' and
                len(message['content'].get('parts', [])) > 0 and len(message['content']['parts'][0]) > 0 and
                (message['author']['role'] != 'system' or message.get('metadata', {}).get('is_user_system_message'))):
            author = message['author']['role']
            if author == 'assistant':
                author = 'ChatGPT'
            elif author == 'system' and message['metadata'].get('is_user_system_message'):
                author = 'Custom user info'
            messages.append({'author': author, 'text': message['content']['parts'][0]})
        current_node = node.get('parent')
    return messages[::-1]

# Example usage
conversations_file_path = 'CHATGPT/conversations.json'
output_folder = 'CHATGPT/output_all_in_one'

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Configure logging
logging.basicConfig(level=logging.INFO)

# Call the split, save, and convert function
split_and_save_and_convert(conversations_file_path, output_folder)


In [None]:
import sqlite3
import os
import hashlib

# Connect to SQLite database (creates a new database if it doesn't exist)
db_path = 'chat_database.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Create a table to store file information
cursor.execute('''
    CREATE TABLE IF NOT EXISTS files (
        id INTEGER PRIMARY KEY,
        filename TEXT NOT NULL,
        content BLOB NOT NULL,
        text_content TEXT NOT NULL,
        hash_value TEXT NOT NULL,
        format TEXT NOT NULL
    )
''')

# Commit changes and close the connection
conn.commit()
conn.close()

# Function to calculate SHA-256 hash of a file
def calculate_hash(file_path):
    sha256 = hashlib.sha256()
    with open(file_path, 'rb') as file:
        while chunk := file.read(8192):  # Read in 8KB chunks
            sha256.update(chunk)
    return sha256.hexdigest()

# Function to insert a file into the database
def insert_file(filename, content, text_content, hash_value, file_format):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute('INSERT INTO files (filename, content, text_content, hash_value, format) VALUES (?, ?, ?, ?, ?)',
                   (filename, content, text_content, hash_value, file_format))
    conn.commit()
    conn.close()

# Function to insert HTML files recursively
def insert_html_files(directory):
    for root, _, files in os.walk(directory):
        for file_name in files:
            if file_name.endswith('.html'):
                file_path = os.path.join(root, file_name)
                with open(file_path, 'rb') as file:
                    file_content = file.read()
                text_content = file_content.decode('utf-8', errors='ignore')  # Convert bytes to string
                hash_value = calculate_hash(file_path)
                insert_file(file_name, file_content, text_content, hash_value, 'html')
                print(f"Inserted: {file_name}")

# Example: Insert HTML files recursively from the specified directory
insert_html_files('CHATDPT/')

print('Insertion process completed.')


In [None]:
import sqlite3
import uuid

# Connect to SQLite database
db_path = 'chat_database.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

def retrieve_file_content(filename):
    cursor.execute('SELECT content FROM files WHERE filename = ?', (filename,))
    result = cursor.fetchone()
    return result[0] if result else None

def search_and_print_fourth_file(search_terms):
    Data = ""

    # Prepare the SQL query for searching files based on the given terms
    query = '''
        SELECT filename
        FROM files
        WHERE {}
    '''.format(' AND '.join(['text_content LIKE ?' for _ in search_terms]))

    # Add % around search terms for a partial match with spaces
    search_terms = ['% {} %'.format(term) for term in search_terms]

    # Execute the query and retrieve matching files
    cursor.execute(query, search_terms)
    matching_files = cursor.fetchall()

    # Check if there are at least 2 matching files
    if matching_files and len(matching_files) >= 2:
        fourth_file = matching_files[1][0]  # Get the filename of the second matching file
        print(fourth_file)

        # Retrieve the content of the matching file
        content = retrieve_file_content(fourth_file)
        
        if content:
            # Decode the content and append it to the Data variable
            Data = Data + f'{content.decode("utf-8", errors="ignore")}'
            print(Data)
            return Data
        else:
            print(f'Error: Content not found for {fourth_file}')
    else:
        print('Error: No matching files found or less than two matching files.')

# Example: Search for files containing 'flask' and '5200'
search_terms = ['Cephalux','Morpholux']
DATA = search_and_print_fourth_file(search_terms)

# Close the connection to the database
conn.close()

# If data is found, create a unique filename and write the content to an HTML file
if len(DATA) > 2:
    uid = str(uuid.uuid4())  # Generate a unique ID using uuid
    FileName = "_".join(search_terms) + "_" + uid + ".html"
    print(FileName)

    # Open the file for writing
    with open(FileName, "w") as IN:
        # Split the data into lines and write each line to the file with "<br />" appended
        ndata = DATA.split("<br />\n")
        for line in ndata:
            print(line)
            IN.write(line + "<br />\n")
