Created
November 16, 2023 15:08
-
-
Save hwchase17/69a8cdef9b01760c244324339ab64f0c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.schema.output_parser import StrOutputParser | |
import requests | |
from bs4 import BeautifulSoup | |
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda | |
from langchain.utilities import DuckDuckGoSearchAPIWrapper | |
import json | |
RESULTS_PER_QUESTION = 3 | |
ddg_search = DuckDuckGoSearchAPIWrapper() | |
def web_search(query: str, num_results: int = RESULTS_PER_QUESTION): | |
results = ddg_search.results(query, num_results) | |
return [r["link"] for r in results] | |
SUMMARY_TEMPLATE = """{text} | |
----------- | |
Using the above text, answer in short the following question: | |
> {question} | |
----------- | |
if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available.""" # noqa: E501 | |
SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE) | |
def scrape_text(url: str): | |
# Send a GET request to the webpage | |
try: | |
response = requests.get(url) | |
# Check if the request was successful | |
if response.status_code == 200: | |
# Parse the content of the request with BeautifulSoup | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Extract all text from the webpage | |
page_text = soup.get_text(separator=" ", strip=True) | |
# Print the extracted text | |
return page_text | |
else: | |
return f"Failed to retrieve the webpage: Status code {response.status_code}" | |
except Exception as e: | |
print(e) | |
return f"Failed to retrieve the webpage: {e}" | |
url = "https://blog.langchain.dev/announcing-langsmith/" | |
scrape_and_summarize_chain = RunnablePassthrough.assign( | |
summary = RunnablePassthrough.assign( | |
text=lambda x: scrape_text(x["url"])[:10000] | |
) | SUMMARY_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser() | |
) | (lambda x: f"URL: {x['url']}\n\nSUMMARY: {x['summary']}") | |
web_search_chain = RunnablePassthrough.assign( | |
urls = lambda x: web_search(x["question"]) | |
) | (lambda x: [{"question": x["question"], "url": u} for u in x["urls"]]) | scrape_and_summarize_chain.map() | |
## This is for Arxiv | |
# from langchain.retrievers import ArxivRetriever | |
# | |
# retriever = ArxivRetriever() | |
# SUMMARY_TEMPLATE = """{doc} | |
# | |
# ----------- | |
# | |
# Using the above text, answer in short the following question: | |
# | |
# > {question} | |
# | |
# ----------- | |
# if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available.""" # noqa: E501 | |
# SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE) | |
# | |
# | |
# scrape_and_summarize_chain = RunnablePassthrough.assign( | |
# summary = SUMMARY_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser() | |
# ) | (lambda x: f"Title: {x['doc'].metadata['Title']}\n\nSUMMARY: {x['summary']}") | |
# | |
# web_search_chain = RunnablePassthrough.assign( | |
# docs = lambda x: retriever.get_summaries_as_docs(x["question"]) | |
# )| (lambda x: [{"question": x["question"], "doc": u} for u in x["docs"]]) | scrape_and_summarize_chain.map() | |
SEARCH_PROMPT = ChatPromptTemplate.from_messages( | |
[ | |
( | |
"user", | |
"Write 3 google search queries to search online that form an " | |
"objective opinion from the following: {question}\n" | |
"You must respond with a list of strings in the following format: " | |
'["query 1", "query 2", "query 3"].', | |
), | |
] | |
) | |
search_question_chain = SEARCH_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | json.loads | |
full_research_chain = search_question_chain | (lambda x: [{"question": q} for q in x]) | web_search_chain.map() | |
WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text." # noqa: E501 | |
# Report prompts from https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/master/prompts.py | |
RESEARCH_REPORT_TEMPLATE = """Information: | |
-------- | |
{research_summary} | |
-------- | |
Using the above information, answer the following question or topic: "{question}" in a detailed report -- \ | |
The report should focus on the answer to the question, should be well structured, informative, \ | |
in depth, with facts and numbers if available and a minimum of 1,200 words. | |
You should strive to write the report as long as you can using all relevant and necessary information provided. | |
You must write the report with markdown syntax. | |
You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions. | |
Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each. | |
You must write the report in apa format. | |
Please do your best, this is very important to my career.""" # noqa: E501 | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", WRITER_SYSTEM_PROMPT), | |
("user", RESEARCH_REPORT_TEMPLATE), | |
] | |
) | |
def collapse_list_of_lists(list_of_lists): | |
content = [] | |
for l in list_of_lists: | |
content.append("\n\n".join(l)) | |
return "\n\n".join(content) | |
chain = RunnablePassthrough.assign( | |
research_summary= full_research_chain | collapse_list_of_lists | |
) | prompt | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser() | |
#!/usr/bin/env python | |
from fastapi import FastAPI | |
from langserve import add_routes | |
app = FastAPI( | |
title="LangChain Server", | |
version="1.0", | |
description="A simple api server using Langchain's Runnable interfaces", | |
) | |
add_routes( | |
app, | |
chain, | |
path="/research-assistant", | |
) | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="localhost", port=8000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment