Last active
August 20, 2025 21:55
-
-
Save KrishnanSriram/eabff8ea9eae14a829b2495c513f838a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from typing import Type | |
| from langchain_core.messages import HumanMessage | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.tools import BaseTool | |
| # from langchain.pydantic_v1 import BaseModel, Field | |
| from pydantic import BaseModel, Field | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from langchain_ollama.chat_models import ChatOllama | |
| from langgraph.prebuilt import create_react_agent | |
| from langchain_openai import ChatOpenAI | |
| from typing import TypedDict, Annotated | |
| import operator | |
| from urllib3.exceptions import InsecureRequestWarning | |
| from urllib3 import disable_warnings | |
| # ========================= | |
| # Global LLM (shared) | |
| # ========================= | |
| llm = ChatOllama(model="llama3.2", temperature=0.3) | |
| disable_warnings(InsecureRequestWarning) | |
| # --- Tool Definition --- | |
| class WebScraperInput(BaseModel): | |
| """Input for the WebScraperTool.""" | |
| url: str = Field(..., description="The URL of the webpage to scrape.") | |
| class WebScraperTool(BaseTool): | |
| """A tool to scrape text content from a webpage.""" | |
| name: str = "web_scraper" | |
| description: str = "Scrapes the text content from a given URL." | |
| args_schema: Type[BaseModel] = WebScraperInput | |
| def _run(self, url: str) -> str: | |
| """ | |
| Scrapes the text content from the given URL. | |
| """ | |
| print("========================") | |
| print("Fetching web content...", url) | |
| print("========================") | |
| try: | |
| response = requests.get(url, verify=False, timeout=120) | |
| response.raise_for_status() | |
| # Use BeautifulSoup to parse the HTML and extract text | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| text = soup.get_text(separator=' ', strip=True) | |
| print("Content fetched and parsed successfully.") | |
| return text | |
| except requests.exceptions.RequestException as e: | |
| return f"Error fetching content: {e}" | |
| except Exception as e: | |
| return f"Error parsing HTML: {e}" | |
| class SummarizeContentInput(BaseModel): | |
| """Input for the SummarizeContentTool.""" | |
| content: str = Field(..., description="The text content to be summarized.") | |
| class SummarizeContent(BaseTool): | |
| """Tool to Read any text and provide a simple English summary of its contents using LLM.""" | |
| name: str = "summarize_text" | |
| description: str = "Read any text and provide a simple English summary of its contents" | |
| args_schema: Type[BaseModel] = SummarizeContentInput | |
| def _run(content: str) -> str: | |
| """Read and summarize any content using LLM and return simple text in not more than 20 to 30 lines.""" | |
| print("========================") | |
| print("Summarizing content...") | |
| print("========================") | |
| summary_prompt = PromptTemplate.from_template( | |
| "Please provide a concise summary of the following text in not more than 10 lines as plain continuous text only:\n\n{text}" | |
| ) | |
| summarization_chain = ( | |
| {"text": RunnablePassthrough()} | summary_prompt | llm | StrOutputParser() | |
| ) | |
| summary = summarization_chain.invoke(content) | |
| return summary | |
| # --- Agent State --- | |
| class AgentState(TypedDict): | |
| """ | |
| Represents the state of our agent. | |
| """ | |
| messages: Annotated[list, operator.add] | |
| # --- Main Application Logic --- | |
| def main(): | |
| """ | |
| Main function to run the LangGraph agent. | |
| """ | |
| # Initialize the LLM | |
| # Make sure you have Ollama running with a model like 'llama3' | |
| # You can pull a model with: ollama pull llama3 | |
| # llm = ChatOllama(model="llama3.2") | |
| # Initialize the tool | |
| tools = [WebScraperTool(), SummarizeContent()] | |
| # Create the ReAct agent | |
| agent_executor = create_react_agent(llm, tools) | |
| # --- Run the agent --- | |
| print("LangGraph Web Scraper Agent") | |
| print("---------------------------") | |
| # Example: Scrape a blog post | |
| url_to_scrape = "https://www.usbank.com/credit-cards/cash-plus-visa-signature-credit-card.html" | |
| task = f""" | |
| Please help me with the following workflow: | |
| 1. Fetch content from this URL: {url_to_scrape} using WebScraperTool | |
| 2. Summarize the content using SummarizeContent | |
| 3. Finally present the summary as your final answer | |
| Execute above steps in order. | |
| """ | |
| print(f"\nAttempting to scrape: {url_to_scrape}\n") | |
| # Use invoke to get the final result directly | |
| final_state = agent_executor.invoke({"messages": [HumanMessage(content=task)]}) | |
| # The result is the final state of the graph | |
| final_message = final_state['messages'][-1] | |
| print("--- Final AI Response ---") | |
| print(final_message.content) | |
| # If you want to see the tool output, you can inspect the messages list | |
| # The tool output will likely be the second-to-last message | |
| tool_output_message = final_state['messages'][-2] | |
| if tool_output_message.type == 'tool': | |
| print("\n--- Scraped Content (from final state) ---") | |
| print(tool_output_message.content[:2000] + "...") | |
| if __name__ == "__main__": | |
| # To run this, you need to have the following packages installed: | |
| # pip install langchain langgraph langchain_ollama beautifulsoup4 requests | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment