Created
March 23, 2025 21:08
-
-
Save ckarnell/b70c663d45ff2a3e8958467ff2045091 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask | |
import random | |
import re | |
import chardet | |
app = Flask(__name__) | |
def detect_encoding(filename): | |
with open(filename, 'rb') as f: | |
raw_data = f.read() | |
result = chardet.detect(raw_data) | |
return result['encoding'] or 'utf-8' | |
def read_messages(filename): | |
encoding = detect_encoding(filename) | |
with open(filename, 'r', encoding=encoding, errors='replace') as file: | |
lines = [line.strip() for line in file if len(line.strip()) > 10] | |
return lines | |
def build_markov_chain(lines, n=2): | |
chain = {} | |
for line in lines: | |
words = re.findall(r"\b\w+\b|[.,!?;]", line) | |
words = ['<START>'] + words + ['<END>'] | |
for i in range(len(words) - n): | |
key = tuple(words[i:i + n]) | |
next_word = words[i + n] | |
chain.setdefault(key, []).append(next_word) | |
return chain | |
def generate_sentence(chain, n=2, max_words=50): | |
current = random.choice([key for key in chain.keys() if key[0] == '<START>']) | |
sentence = list(current[1:]) | |
for _ in range(max_words): | |
next_words = chain.get(current) | |
if not next_words: | |
break | |
next_word = random.choice(next_words) | |
if next_word == '<END>': | |
break | |
sentence.append(next_word) | |
current = (*current[1:], next_word) | |
result = ' '.join(sentence) | |
result = re.sub(r'\s([?.!;,])', r'\1', result) | |
return result[0].upper() + result[1:] | |
# Preload the model | |
FILE_PATH = "kp_centi_twitch_logs.txt" | |
messages = read_messages(FILE_PATH) | |
markov_chain = build_markov_chain(messages, n=4) | |
@app.route("/") | |
def serve_quote(): | |
sentence = generate_sentence(markov_chain, n=4) | |
sentence = sentence.replace("Deleted by poostew", "(Deleted by poostew)") | |
sentence = sentence.replace("didn t", "didn't") | |
sentence = sentence.replace("it s", "it's") | |
sentence = sentence.replace("There s ", "There's ") | |
sentence = sentence.replace("I m ", "I'm ") | |
return f""" | |
<html> | |
<head> | |
<title>Generated AJ Sentence</title> | |
<style> | |
body {{ | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
background-color: #f9f9f9; | |
color: #333; | |
text-align: center; | |
padding: 80px 20px; | |
margin: 0; | |
}} | |
h1 {{ | |
font-size: 4.5em; | |
margin-bottom: 30px; | |
color: #222; | |
}} | |
.sentence {{ | |
font-size: 2.8em; | |
font-weight: 500; | |
margin-bottom: 20px; | |
}} | |
.refresh {{ | |
font-size: 1.1em; | |
color: #666; | |
}} | |
</style> | |
</head> | |
<body> | |
<h1>Generated AJ Sentence</h1> | |
<div class="sentence">kp_centi: {sentence}</div> | |
<div class="refresh">Refresh the page for a new one.</div> | |
</body> | |
</html> | |
""" | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=5055, debug=True) | |
# app.run(host="127.0.0.1", port=5000, debug=True) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment