Vido · October 30, 2024 18:38
diff --git a/chat.html b/chat.html
 <!doctype html>
 <title>🐍📈 Python e Finanças - Retrieval Augmented Generation 🦜️🔗</title>
 <body>

    {% with messages = get_flashed_messages() %}
      {% if messages %}
        <ul class=flashes>
        {% for message in messages %}
          <li>{{ message }}</li>
        {% endfor %}
        </ul>
      {% endif %}
    {% endwith %}

    <h1>🐍📈 Python e Finanças - Retrieval Augmented Generation 🦜️🔗</h1>
    <a href='https://youtu.be/muFyRwfqVA0'>Watch the video on my YouTube Channel</a>
    <h2>Ask a Question</h2>
    <form method=post>
      <input type=input name=question>
      <input type=submit value=Ask!>
    </form>

    {% if answer %}
    <h3>{{ question }}</h3>
    <div id="content" style="width:50%">{{ answer }}</div>
    {% endif %}

      <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
      <script>
 	    let original_text = document.getElementById('content').innerHTML
 	    document.getElementById('content').innerHTML =
 	      marked.parse(original_text);
      </script>

      <hr>
      <h2>UI Endpoints:</h2>
      <ul>
 	<li><a href='/'>Home -> Chat and Ask a Questions</a></li>
        <li><a href='/embed'>Embed Documents into the VectorStore</a></li>
      </ul>
 </body>
diff --git a/rag.py b/rag.py
 from pathlib import Path
 from flask import Flask, request, render_template, flash, redirect

 app = Flask(__name__)
 app.secret_key = 'super secret key'
 app.config['SESSION_TYPE'] = 'filesystem'

 VS, EMBEDDINGS = None, None

 def init_vector_db():
    global VS, EMBEDDINGS

    if EMBEDDINGS is None:
        from gen_ai_hub.proxy.langchain.init_models import init_embedding_model
        EMBEDDINGS = init_embedding_model('text-embedding-3-small')

    if VS is None:
        # https://python.langchain.com/docs/integrations/vectorstores/
        from langchain_core.vectorstores import InMemoryVectorStore
        VS = InMemoryVectorStore(embedding=EMBEDDINGS)

    return VS, EMBEDDINGS

 @app.route('/embed', methods=['GET'])
 def embed():
    from langchain_community.document_loaders import PyPDFLoader
    from langchain.text_splitter import CharacterTextSplitter

    print('Loading Embeddings and Vectorstore...')
    init_vector_db()
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    print('... DONE!')

    uploaded_files = [x for x in Path('./upload').glob('**/*') if x.is_file()]

    for filepath in uploaded_files:
        documents = []
        loader = PyPDFLoader(filepath)
        for n, doc_page in enumerate(loader.lazy_load()):
            print(f'PDF to Text {filepath}, page {n}')
            documents.append(doc_page)

        text_chunks = text_splitter.split_documents(documents)
        VS.add_documents(text_chunks)
        print('Uploading DONE!')

    flash(f'Files embeded into VectorStore! ✌️', 'success')
    return redirect('/')

 def get_llm():
    from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
    from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
    proxy_client = get_proxy_client('gen-ai-hub')
    return ChatOpenAI(proxy_model_name='gpt-4o-mini', proxy_client=proxy_client)

 def get_prompt():
    from langchain.prompts import PromptTemplate
    prompt_template = 'You are a helpful finance advisor. Context: {context} Question: {question}'
    return PromptTemplate(
        template = prompt_template,
        input_variables=['context', 'question'])

 @app.route('/', methods=['GET', 'POST'])
 def chat():

    from langchain.chains import RetrievalQA
    init_vector_db()
    question, answer = '', ''

    if request.method == 'POST':

        # Retrival
        question = request.form['question']
        retriever = VS.as_retriever(search_kwargs={'k': 5})

        # API Call
        qa = RetrievalQA.from_chain_type(llm=get_llm(),
             retriever=retriever,
             chain_type='stuff',
             chain_type_kwargs={'prompt': get_prompt()})

        ivk = qa.invoke(question)
        question, answer = ivk['query'], ivk['result']

    return render_template('chat.html',
        question=question,
        answer=answer)
    
 if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)
	<!doctype html>
	<title>🐍📈 Python e Finanças - Retrieval Augmented Generation 🦜️🔗</title>
	<body>

	{% with messages = get_flashed_messages() %}
	{% if messages %}
	<ul class=flashes>
	{% for message in messages %}
	<li>{{ message }}</li>
	{% endfor %}
	</ul>
	{% endif %}
	{% endwith %}

	<h1>🐍📈 Python e Finanças - Retrieval Augmented Generation 🦜️🔗</h1>
	<a href='https://youtu.be/muFyRwfqVA0'>Watch the video on my YouTube Channel</a>
	<h2>Ask a Question</h2>
	<form method=post>
	<input type=input name=question>
	<input type=submit value=Ask!>
	</form>

	{% if answer %}
	<h3>{{ question }}</h3>
	<div id="content" style="width:50%">{{ answer }}</div>
	{% endif %}

	<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
	<script>
	let original_text = document.getElementById('content').innerHTML
	document.getElementById('content').innerHTML =
	marked.parse(original_text);
	</script>

	<hr>
	<h2>UI Endpoints:</h2>
	<ul>
	<li><a href='/'>Home -> Chat and Ask a Questions</a></li>
	<li><a href='/embed'>Embed Documents into the VectorStore</a></li>
	</ul>
	</body>
	from pathlib import Path
	from flask import Flask, request, render_template, flash, redirect

	app = Flask(__name__)
	app.secret_key = 'super secret key'
	app.config['SESSION_TYPE'] = 'filesystem'

	VS, EMBEDDINGS = None, None

	def init_vector_db():
	global VS, EMBEDDINGS

	if EMBEDDINGS is None:
	from gen_ai_hub.proxy.langchain.init_models import init_embedding_model
	EMBEDDINGS = init_embedding_model('text-embedding-3-small')

	if VS is None:
	# https://python.langchain.com/docs/integrations/vectorstores/
	from langchain_core.vectorstores import InMemoryVectorStore
	VS = InMemoryVectorStore(embedding=EMBEDDINGS)

	return VS, EMBEDDINGS

	@app.route('/embed', methods=['GET'])
	def embed():
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter

	print('Loading Embeddings and Vectorstore...')
	init_vector_db()
	text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
	print('... DONE!')

	uploaded_files = [x for x in Path('./upload').glob('*/') if x.is_file()]

	for filepath in uploaded_files:
	documents = []
	loader = PyPDFLoader(filepath)
	for n, doc_page in enumerate(loader.lazy_load()):
	print(f'PDF to Text {filepath}, page {n}')
	documents.append(doc_page)

	text_chunks = text_splitter.split_documents(documents)
	VS.add_documents(text_chunks)
	print('Uploading DONE!')

	flash(f'Files embeded into VectorStore! ✌️', 'success')
	return redirect('/')

	def get_llm():
	from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
	from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
	proxy_client = get_proxy_client('gen-ai-hub')
	return ChatOpenAI(proxy_model_name='gpt-4o-mini', proxy_client=proxy_client)

	def get_prompt():
	from langchain.prompts import PromptTemplate
	prompt_template = 'You are a helpful finance advisor. Context: {context} Question: {question}'
	return PromptTemplate(
	template = prompt_template,
	input_variables=['context', 'question'])

	@app.route('/', methods=['GET', 'POST'])
	def chat():

	from langchain.chains import RetrievalQA
	init_vector_db()
	question, answer = '', ''

	if request.method == 'POST':

	# Retrival
	question = request.form['question']
	retriever = VS.as_retriever(search_kwargs={'k': 5})

	# API Call
	qa = RetrievalQA.from_chain_type(llm=get_llm(),
	retriever=retriever,
	chain_type='stuff',
	chain_type_kwargs={'prompt': get_prompt()})

	ivk = qa.invoke(question)
	question, answer = ivk['query'], ivk['result']

	return render_template('chat.html',
	question=question,
	answer=answer)

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=5000, debug=True)