rbiswasfc · October 2, 2024 05:02
diff --git a/06_refactoring_example.ipynb b/06_refactoring_example.ipynb
 {
  "cells": [
    {
      "metadata": {
        "trusted": true
      },
      "id": "9b8020b3",
      "cell_type": "code",
      "source": "%ai reset",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "d726ed23",
      "cell_type": "code",
      "source": "%%aip 0\nWrite a function to extract structured data from *LLM responses*. Use claude api.\n\nSpecifically, I would like to extract named entities from a text. \nIt should have entity_type, entity_description and entity_text",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {},
      "id": "7f49639c",
      "cell_type": "markdown",
      "source": "## V1: Using Anthropic API"
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "f6d8025a",
      "cell_type": "code",
      "source": "import anthropic\nimport json\n\ndef extract_entities(text):\n    client = anthropic.Anthropic()\n    \n    prompt = f\"\"\"Extract named entities from the following text. For each entity, provide:\n    - entity_type: The category of the entity (e.g., PERSON, ORGANIZATION, LOCATION, etc.)\n    - entity_description: A brief description of the entity\n    - entity_text: The exact text of the entity as it appears in the original text\n\n    Format the output as a JSON list of objects.\n\n    Text: {text}\n\n    Extracted entities:\"\"\"\n    \n    messages = [\n        {\"role\": \"user\", \"content\": prompt}\n    ]\n\n    response = client.messages.create(\n        model=\"claude-3-sonnet-20240229\",\n        max_tokens=512,\n        messages=messages\n    )\n    \n    # Parse the JSON response\n    try:\n        return json.loads(response.content[0].text)\n    except json.JSONDecodeError:\n        # Handle parsing errors\n        return {\"error\": \"Failed to parse structured data from LLM response\"}\n    \n\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nentities = extract_entities(text)\nprint(json.dumps(entities, indent=2))",
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": "[\n  {\n    \"entity_type\": \"ORGANIZATION\",\n    \"entity_description\": \"A technology company known for its consumer electronics, computer software, and online services.\",\n    \"entity_text\": \"Apple Inc.\"\n  },\n  {\n    \"entity_type\": \"PERSON\",\n    \"entity_description\": \"An American business magnate and investor who co-founded Apple Inc.\",\n    \"entity_text\": \"Steve Jobs\"\n  },\n  {\n    \"entity_type\": \"LOCATION\",\n    \"entity_description\": \"A city in Santa Clara County, California, United States, which serves as the corporate headquarters of Apple Inc.\",\n    \"entity_text\": \"Cupertino, California\"\n  }\n]\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {},
      "id": "6bd0116a",
      "cell_type": "markdown",
      "source": "## V2: Using instructor"
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "b4d9c71f",
      "cell_type": "code",
      "source": "from toolslm.download import read_docs\ninstructor_doc = read_docs(\"https://raw.githubusercontent.com/jxnl/instructor/refs/heads/main/docs/index.md\")",
      "execution_count": 5,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "40b7fa90",
      "cell_type": "code",
      "source": "%%aip 0 -c \nlet's use the instructor python library for this. This is the documentation: $`instructor_doc`. Use claude!",
      "execution_count": 6,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "aa83f7e9",
      "cell_type": "code",
      "source": "import instructor\nfrom anthropic import Anthropic\nfrom pydantic import BaseModel\n\nclass Entity(BaseModel):\n    entity_type: str\n    entity_description: str\n    entity_text: str\n\nclass EntityExtraction(BaseModel):\n    entities: list[Entity]\n\nclient = instructor.from_anthropic(Anthropic())\n\ndef extract_entities(text: str) -> EntityExtraction:\n    return client.messages.create(\n        model=\"claude-3-sonnet-20240229\",\n        max_tokens=1000,\n        response_model=EntityExtraction,\n        messages=[\n            {\n                \"role\": \"user\", \n                \"content\": f\"Extract named entities from this text: {text}\\n\\nProvide the entity type, a brief description, and the exact text for each entity.\"\n            }\n        ]\n    )\n\n# Test the function\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nresult = extract_entities(text)\nprint(result)\n\n# To access individual entities:\nfor entity in result.entities:\n    print(f\"Type: {entity.entity_type}\")\n    print(f\"Description: {entity.entity_description}\")\n    print(f\"Text: {entity.entity_text}\")\n    print()",
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "text": "entities=[Entity(entity_type='Organization', entity_description='A technology company that designs, develops, and sells consumer electronics, computer software, and online services.', entity_text='Apple Inc.'), Entity(entity_type='Person', entity_description='Co-founder and former CEO of Apple Inc.', entity_text='Steve Jobs'), Entity(entity_type='Location', entity_description='A city in California where Apple Inc. is headquartered.', entity_text='Cupertino, California')]\nType: Organization\nDescription: A technology company that designs, develops, and sells consumer electronics, computer software, and online services.\nText: Apple Inc.\n\nType: Person\nDescription: Co-founder and former CEO of Apple Inc.\nText: Steve Jobs\n\nType: Location\nDescription: A city in California where Apple Inc. is headquartered.\nText: Cupertino, California\n\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {},
      "id": "2692d75c",
      "cell_type": "markdown",
      "source": "## V3: Using claudette"
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "8054673c",
      "cell_type": "code",
      "source": "def read_gist(s): return read_docs('https://gist.githubusercontent.com/rbiswasfc/'+s)\nclaudette_core = read_gist(\n    \"c47c80d35fe035aaf213dfcf690c6083/raw/615180e5def68d722ec8c6eb0cadda651a870d33/claudette.md\"\n)",
      "execution_count": 8,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "55ea733d",
      "cell_type": "code",
      "source": "%%aip 0 -c\nFinally, let's use the claudette structured api (cli.structured) for this. \nThis is the documentation: $`claudette_core`. This is an example of how to use the api\n\n```\nfrom fastcore.utils import *\nfrom claudette import *\n\nclass Fact():\n    \"Fact on a given topic\"\n    def __init__(self, topic: str, summary: str, details: str): store_attr()\n    __repr__ = basic_repr([\"topic\", \"summary\", \"details\"])\n    \nmodel = models[1]\ncli = Client(model)\n\ncli.structured(\"Tell me about Neural Networks\", Fact)\n```",
      "execution_count": 10,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "188c2ba4",
      "cell_type": "code",
      "source": "from fastcore.utils import *\nfrom claudette import *\n\nclass Entity():\n    \"an entity\"\n    def __init__(self, entity_type: str, entity_description: str, entity_text: str): store_attr()\n    __repr__ = basic_repr([\"entity_type\", \"entity_description\", \"entity_text\"])\n\nclass EntityExtraction():\n    \"list of entities\"\n    def __init__(self, entities: list[Entity]):\n        store_attr()\n    __repr__ = basic_repr([\"entities\"])\n\nmodel = models[1]\ncli = Client(model)\n\ndef extract_entities(text: str) -> EntityExtraction:\n    prompt = f\"Extract named entities from this text: {text}\\n\\nProvide the entity type, a brief description, and the exact text for each entity.\"\n    return cli.structured(prompt, EntityExtraction)\n\n# Test the function\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nresult = extract_entities(text)\nprint(result)",
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": "[__main__.EntityExtraction(entities=[{'type': 'ORGANIZATION', 'description': 'A multinational technology company', 'text': 'Apple Inc.'}, {'type': 'PERSON', 'description': 'Co-founder of Apple Inc.', 'text': 'Steve Jobs'}, {'type': 'LOCATION', 'description': 'City in California where Apple Inc. is headquartered', 'text': 'Cupertino'}, {'type': 'LOCATION', 'description': 'U.S. state where Apple Inc. is headquartered', 'text': 'California'}])]\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "cabca8c8",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "795c915b",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "1bf18604",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "00501b9d",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "35755e8d",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "f9bbaeae",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "16c706f6",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "42e43c40",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "c03ec678",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "e03f0920",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "8521718e",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "be2f6dbd",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "79d9a0f8",
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "gist": {
      "id": "",
      "data": {
        "description": "refactoring using aimagic",
        "public": true
      }
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3 (ipykernel)",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.10.15",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"metadata": {
	"trusted": true
	},
	"id": "9b8020b3",
	"cell_type": "code",
	"source": "%ai reset",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "d726ed23",
	"cell_type": "code",
	"source": "%%aip 0\nWrite a function to extract structured data from LLM responses. Use claude api.\n\nSpecifically, I would like to extract named entities from a text. \nIt should have entity_type, entity_description and entity_text",
	"execution_count": 2,
	"outputs": []
	},
	{
	"metadata": {},
	"id": "7f49639c",
	"cell_type": "markdown",
	"source": "## V1: Using Anthropic API"
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "f6d8025a",
	"cell_type": "code",
	"source": "import anthropic\nimport json\n\ndef extract_entities(text):\n client = anthropic.Anthropic()\n \n prompt = f\"\"\"Extract named entities from the following text. For each entity, provide:\n - entity_type: The category of the entity (e.g., PERSON, ORGANIZATION, LOCATION, etc.)\n - entity_description: A brief description of the entity\n - entity_text: The exact text of the entity as it appears in the original text\n\n Format the output as a JSON list of objects.\n\n Text: {text}\n\n Extracted entities:\"\"\"\n \n messages = [\n {\"role\": \"user\", \"content\": prompt}\n ]\n\n response = client.messages.create(\n model=\"claude-3-sonnet-20240229\",\n max_tokens=512,\n messages=messages\n )\n \n # Parse the JSON response\n try:\n return json.loads(response.content[0].text)\n except json.JSONDecodeError:\n # Handle parsing errors\n return {\"error\": \"Failed to parse structured data from LLM response\"}\n \n\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nentities = extract_entities(text)\nprint(json.dumps(entities, indent=2))",
	"execution_count": 4,
	"outputs": [
	{
	"output_type": "stream",
	"text": "[\n {\n \"entity_type\": \"ORGANIZATION\",\n \"entity_description\": \"A technology company known for its consumer electronics, computer software, and online services.\",\n \"entity_text\": \"Apple Inc.\"\n },\n {\n \"entity_type\": \"PERSON\",\n \"entity_description\": \"An American business magnate and investor who co-founded Apple Inc.\",\n \"entity_text\": \"Steve Jobs\"\n },\n {\n \"entity_type\": \"LOCATION\",\n \"entity_description\": \"A city in Santa Clara County, California, United States, which serves as the corporate headquarters of Apple Inc.\",\n \"entity_text\": \"Cupertino, California\"\n }\n]\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {},
	"id": "6bd0116a",
	"cell_type": "markdown",
	"source": "## V2: Using instructor"
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "b4d9c71f",
	"cell_type": "code",
	"source": "from toolslm.download import read_docs\ninstructor_doc = read_docs(\"https://raw.githubusercontent.com/jxnl/instructor/refs/heads/main/docs/index.md\")",
	"execution_count": 5,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "40b7fa90",
	"cell_type": "code",
	"source": "%%aip 0 -c \nlet's use the instructor python library for this. This is the documentation: $`instructor_doc`. Use claude!",
	"execution_count": 6,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "aa83f7e9",
	"cell_type": "code",
	"source": "import instructor\nfrom anthropic import Anthropic\nfrom pydantic import BaseModel\n\nclass Entity(BaseModel):\n entity_type: str\n entity_description: str\n entity_text: str\n\nclass EntityExtraction(BaseModel):\n entities: list[Entity]\n\nclient = instructor.from_anthropic(Anthropic())\n\ndef extract_entities(text: str) -> EntityExtraction:\n return client.messages.create(\n model=\"claude-3-sonnet-20240229\",\n max_tokens=1000,\n response_model=EntityExtraction,\n messages=[\n {\n \"role\": \"user\", \n \"content\": f\"Extract named entities from this text: {text}\\n\\nProvide the entity type, a brief description, and the exact text for each entity.\"\n }\n ]\n )\n\n# Test the function\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nresult = extract_entities(text)\nprint(result)\n\n# To access individual entities:\nfor entity in result.entities:\n print(f\"Type: {entity.entity_type}\")\n print(f\"Description: {entity.entity_description}\")\n print(f\"Text: {entity.entity_text}\")\n print()",
	"execution_count": 7,
	"outputs": [
	{
	"output_type": "stream",
	"text": "entities=[Entity(entity_type='Organization', entity_description='A technology company that designs, develops, and sells consumer electronics, computer software, and online services.', entity_text='Apple Inc.'), Entity(entity_type='Person', entity_description='Co-founder and former CEO of Apple Inc.', entity_text='Steve Jobs'), Entity(entity_type='Location', entity_description='A city in California where Apple Inc. is headquartered.', entity_text='Cupertino, California')]\nType: Organization\nDescription: A technology company that designs, develops, and sells consumer electronics, computer software, and online services.\nText: Apple Inc.\n\nType: Person\nDescription: Co-founder and former CEO of Apple Inc.\nText: Steve Jobs\n\nType: Location\nDescription: A city in California where Apple Inc. is headquartered.\nText: Cupertino, California\n\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {},
	"id": "2692d75c",
	"cell_type": "markdown",
	"source": "## V3: Using claudette"
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "8054673c",
	"cell_type": "code",
	"source": "def read_gist(s): return read_docs('https://gist.githubusercontent.com/rbiswasfc/'+s)\nclaudette_core = read_gist(\n \"c47c80d35fe035aaf213dfcf690c6083/raw/615180e5def68d722ec8c6eb0cadda651a870d33/claudette.md\"\n)",
	"execution_count": 8,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "55ea733d",
	"cell_type": "code",
	"source": "%%aip 0 -c\nFinally, let's use the claudette structured api (cli.structured) for this. \nThis is the documentation: $`claudette_core`. This is an example of how to use the api\n\n```\nfrom fastcore.utils import \nfrom claudette import \n\nclass Fact():\n \"Fact on a given topic\"\n def __init__(self, topic: str, summary: str, details: str): store_attr()\n __repr__ = basic_repr([\"topic\", \"summary\", \"details\"])\n \nmodel = models[1]\ncli = Client(model)\n\ncli.structured(\"Tell me about Neural Networks\", Fact)\n```",
	"execution_count": 10,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "188c2ba4",
	"cell_type": "code",
	"source": "from fastcore.utils import \nfrom claudette import \n\nclass Entity():\n \"an entity\"\n def __init__(self, entity_type: str, entity_description: str, entity_text: str): store_attr()\n __repr__ = basic_repr([\"entity_type\", \"entity_description\", \"entity_text\"])\n\nclass EntityExtraction():\n \"list of entities\"\n def __init__(self, entities: list[Entity]):\n store_attr()\n __repr__ = basic_repr([\"entities\"])\n\nmodel = models[1]\ncli = Client(model)\n\ndef extract_entities(text: str) -> EntityExtraction:\n prompt = f\"Extract named entities from this text: {text}\\n\\nProvide the entity type, a brief description, and the exact text for each entity.\"\n return cli.structured(prompt, EntityExtraction)\n\n# Test the function\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nresult = extract_entities(text)\nprint(result)",
	"execution_count": 11,
	"outputs": [
	{
	"output_type": "stream",
	"text": "[__main__.EntityExtraction(entities=[{'type': 'ORGANIZATION', 'description': 'A multinational technology company', 'text': 'Apple Inc.'}, {'type': 'PERSON', 'description': 'Co-founder of Apple Inc.', 'text': 'Steve Jobs'}, {'type': 'LOCATION', 'description': 'City in California where Apple Inc. is headquartered', 'text': 'Cupertino'}, {'type': 'LOCATION', 'description': 'U.S. state where Apple Inc. is headquartered', 'text': 'California'}])]\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "cabca8c8",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "795c915b",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "1bf18604",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "00501b9d",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "35755e8d",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "f9bbaeae",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "16c706f6",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "42e43c40",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "c03ec678",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "e03f0920",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "8521718e",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "be2f6dbd",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "79d9a0f8",
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"gist": {
	"id": "",
	"data": {
	"description": "refactoring using aimagic",
	"public": true
	}
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3 (ipykernel)",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.10.15",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}