Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save KrishnanSriram/eabff8ea9eae14a829b2495c513f838a to your computer and use it in GitHub Desktop.
Save KrishnanSriram/eabff8ea9eae14a829b2495c513f838a to your computer and use it in GitHub Desktop.
import os
from typing import Type
from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.tools import BaseTool
# from langchain.pydantic_v1 import BaseModel, Field
from pydantic import BaseModel, Field
import requests
from bs4 import BeautifulSoup
from langchain_ollama.chat_models import ChatOllama
from langgraph.prebuilt import create_react_agent
from langchain_openai import ChatOpenAI
from typing import TypedDict, Annotated
import operator
from urllib3.exceptions import InsecureRequestWarning
from urllib3 import disable_warnings
# =========================
# Global LLM (shared)
# =========================
llm = ChatOllama(model="llama3.2", temperature=0.3)
disable_warnings(InsecureRequestWarning)
# --- Tool Definition ---
class WebScraperInput(BaseModel):
"""Input for the WebScraperTool."""
url: str = Field(..., description="The URL of the webpage to scrape.")
class WebScraperTool(BaseTool):
"""A tool to scrape text content from a webpage."""
name: str = "web_scraper"
description: str = "Scrapes the text content from a given URL."
args_schema: Type[BaseModel] = WebScraperInput
def _run(self, url: str) -> str:
"""
Scrapes the text content from the given URL.
"""
print("========================")
print("Fetching web content...", url)
print("========================")
try:
response = requests.get(url, verify=False, timeout=120)
response.raise_for_status()
# Use BeautifulSoup to parse the HTML and extract text
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.get_text(separator=' ', strip=True)
print("Content fetched and parsed successfully.")
return text
except requests.exceptions.RequestException as e:
return f"Error fetching content: {e}"
except Exception as e:
return f"Error parsing HTML: {e}"
class SummarizeContentInput(BaseModel):
"""Input for the SummarizeContentTool."""
content: str = Field(..., description="The text content to be summarized.")
class SummarizeContent(BaseTool):
"""Tool to Read any text and provide a simple English summary of its contents using LLM."""
name: str = "summarize_text"
description: str = "Read any text and provide a simple English summary of its contents"
args_schema: Type[BaseModel] = SummarizeContentInput
def _run(content: str) -> str:
"""Read and summarize any content using LLM and return simple text in not more than 20 to 30 lines."""
print("========================")
print("Summarizing content...")
print("========================")
summary_prompt = PromptTemplate.from_template(
"Please provide a concise summary of the following text in not more than 10 lines as plain continuous text only:\n\n{text}"
)
summarization_chain = (
{"text": RunnablePassthrough()} | summary_prompt | llm | StrOutputParser()
)
summary = summarization_chain.invoke(content)
return summary
# --- Agent State ---
class AgentState(TypedDict):
"""
Represents the state of our agent.
"""
messages: Annotated[list, operator.add]
# --- Main Application Logic ---
def main():
"""
Main function to run the LangGraph agent.
"""
# Initialize the LLM
# Make sure you have Ollama running with a model like 'llama3'
# You can pull a model with: ollama pull llama3
# llm = ChatOllama(model="llama3.2")
# Initialize the tool
tools = [WebScraperTool(), SummarizeContent()]
# Create the ReAct agent
agent_executor = create_react_agent(llm, tools)
# --- Run the agent ---
print("LangGraph Web Scraper Agent")
print("---------------------------")
# Example: Scrape a blog post
url_to_scrape = "https://www.usbank.com/credit-cards/cash-plus-visa-signature-credit-card.html"
task = f"""
Please help me with the following workflow:
1. Fetch content from this URL: {url_to_scrape} using WebScraperTool
2. Summarize the content using SummarizeContent
3. Finally present the summary as your final answer
Execute above steps in order.
"""
print(f"\nAttempting to scrape: {url_to_scrape}\n")
# Use invoke to get the final result directly
final_state = agent_executor.invoke({"messages": [HumanMessage(content=task)]})
# The result is the final state of the graph
final_message = final_state['messages'][-1]
print("--- Final AI Response ---")
print(final_message.content)
# If you want to see the tool output, you can inspect the messages list
# The tool output will likely be the second-to-last message
tool_output_message = final_state['messages'][-2]
if tool_output_message.type == 'tool':
print("\n--- Scraped Content (from final state) ---")
print(tool_output_message.content[:2000] + "...")
if __name__ == "__main__":
# To run this, you need to have the following packages installed:
# pip install langchain langgraph langchain_ollama beautifulsoup4 requests
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment