Skip to content

Instantly share code, notes, and snippets.

@Vido
Created October 30, 2024 18:38
Show Gist options
  • Save Vido/02724ccde0425d1d9a06376d3c95bfde to your computer and use it in GitHub Desktop.
Save Vido/02724ccde0425d1d9a06376d3c95bfde to your computer and use it in GitHub Desktop.
RAG (Retrieval Augmented Generation) proof-of-concept based on LangChain and OpenAI's chatGPT. https://youtu.be/muFyRwfqVA0
<!doctype html>
<title>🐍📈 Python e Finanças - Retrieval Augmented Generation 🦜️🔗</title>
<body>
{% with messages = get_flashed_messages() %}
{% if messages %}
<ul class=flashes>
{% for message in messages %}
<li>{{ message }}</li>
{% endfor %}
</ul>
{% endif %}
{% endwith %}
<h1>🐍📈 Python e Finanças - Retrieval Augmented Generation 🦜️🔗</h1>
<a href='https://youtu.be/muFyRwfqVA0'>Watch the video on my YouTube Channel</a>
<h2>Ask a Question</h2>
<form method=post>
<input type=input name=question>
<input type=submit value=Ask!>
</form>
{% if answer %}
<h3>{{ question }}</h3>
<div id="content" style="width:50%">{{ answer }}</div>
{% endif %}
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script>
let original_text = document.getElementById('content').innerHTML
document.getElementById('content').innerHTML =
marked.parse(original_text);
</script>
<hr>
<h2>UI Endpoints:</h2>
<ul>
<li><a href='/'>Home -> Chat and Ask a Questions</a></li>
<li><a href='/embed'>Embed Documents into the VectorStore</a></li>
</ul>
</body>
from pathlib import Path
from flask import Flask, request, render_template, flash, redirect
app = Flask(__name__)
app.secret_key = 'super secret key'
app.config['SESSION_TYPE'] = 'filesystem'
VS, EMBEDDINGS = None, None
def init_vector_db():
global VS, EMBEDDINGS
if EMBEDDINGS is None:
from gen_ai_hub.proxy.langchain.init_models import init_embedding_model
EMBEDDINGS = init_embedding_model('text-embedding-3-small')
if VS is None:
# https://python.langchain.com/docs/integrations/vectorstores/
from langchain_core.vectorstores import InMemoryVectorStore
VS = InMemoryVectorStore(embedding=EMBEDDINGS)
return VS, EMBEDDINGS
@app.route('/embed', methods=['GET'])
def embed():
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
print('Loading Embeddings and Vectorstore...')
init_vector_db()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
print('... DONE!')
uploaded_files = [x for x in Path('./upload').glob('**/*') if x.is_file()]
for filepath in uploaded_files:
documents = []
loader = PyPDFLoader(filepath)
for n, doc_page in enumerate(loader.lazy_load()):
print(f'PDF to Text {filepath}, page {n}')
documents.append(doc_page)
text_chunks = text_splitter.split_documents(documents)
VS.add_documents(text_chunks)
print('Uploading DONE!')
flash(f'Files embeded into VectorStore! ✌️', 'success')
return redirect('/')
def get_llm():
from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
proxy_client = get_proxy_client('gen-ai-hub')
return ChatOpenAI(proxy_model_name='gpt-4o-mini', proxy_client=proxy_client)
def get_prompt():
from langchain.prompts import PromptTemplate
prompt_template = 'You are a helpful finance advisor. Context: {context} Question: {question}'
return PromptTemplate(
template = prompt_template,
input_variables=['context', 'question'])
@app.route('/', methods=['GET', 'POST'])
def chat():
from langchain.chains import RetrievalQA
init_vector_db()
question, answer = '', ''
if request.method == 'POST':
# Retrival
question = request.form['question']
retriever = VS.as_retriever(search_kwargs={'k': 5})
# API Call
qa = RetrievalQA.from_chain_type(llm=get_llm(),
retriever=retriever,
chain_type='stuff',
chain_type_kwargs={'prompt': get_prompt()})
ivk = qa.invoke(question)
question, answer = ivk['query'], ivk['result']
return render_template('chat.html',
question=question,
answer=answer)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment