Created
October 2, 2024 05:02
-
-
Save rbiswasfc/fb86f410da8ed9b3daf8e99b069aa833 to your computer and use it in GitHub Desktop.
refactoring using aimagic
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "9b8020b3", | |
"cell_type": "code", | |
"source": "%ai reset", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "d726ed23", | |
"cell_type": "code", | |
"source": "%%aip 0\nWrite a function to extract structured data from *LLM responses*. Use claude api.\n\nSpecifically, I would like to extract named entities from a text. \nIt should have entity_type, entity_description and entity_text", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"id": "7f49639c", | |
"cell_type": "markdown", | |
"source": "## V1: Using Anthropic API" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "f6d8025a", | |
"cell_type": "code", | |
"source": "import anthropic\nimport json\n\ndef extract_entities(text):\n client = anthropic.Anthropic()\n \n prompt = f\"\"\"Extract named entities from the following text. For each entity, provide:\n - entity_type: The category of the entity (e.g., PERSON, ORGANIZATION, LOCATION, etc.)\n - entity_description: A brief description of the entity\n - entity_text: The exact text of the entity as it appears in the original text\n\n Format the output as a JSON list of objects.\n\n Text: {text}\n\n Extracted entities:\"\"\"\n \n messages = [\n {\"role\": \"user\", \"content\": prompt}\n ]\n\n response = client.messages.create(\n model=\"claude-3-sonnet-20240229\",\n max_tokens=512,\n messages=messages\n )\n \n # Parse the JSON response\n try:\n return json.loads(response.content[0].text)\n except json.JSONDecodeError:\n # Handle parsing errors\n return {\"error\": \"Failed to parse structured data from LLM response\"}\n \n\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nentities = extract_entities(text)\nprint(json.dumps(entities, indent=2))", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[\n {\n \"entity_type\": \"ORGANIZATION\",\n \"entity_description\": \"A technology company known for its consumer electronics, computer software, and online services.\",\n \"entity_text\": \"Apple Inc.\"\n },\n {\n \"entity_type\": \"PERSON\",\n \"entity_description\": \"An American business magnate and investor who co-founded Apple Inc.\",\n \"entity_text\": \"Steve Jobs\"\n },\n {\n \"entity_type\": \"LOCATION\",\n \"entity_description\": \"A city in Santa Clara County, California, United States, which serves as the corporate headquarters of Apple Inc.\",\n \"entity_text\": \"Cupertino, California\"\n }\n]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"id": "6bd0116a", | |
"cell_type": "markdown", | |
"source": "## V2: Using instructor" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "b4d9c71f", | |
"cell_type": "code", | |
"source": "from toolslm.download import read_docs\ninstructor_doc = read_docs(\"https://raw.githubusercontent.com/jxnl/instructor/refs/heads/main/docs/index.md\")", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "40b7fa90", | |
"cell_type": "code", | |
"source": "%%aip 0 -c \nlet's use the instructor python library for this. This is the documentation: $`instructor_doc`. Use claude!", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "aa83f7e9", | |
"cell_type": "code", | |
"source": "import instructor\nfrom anthropic import Anthropic\nfrom pydantic import BaseModel\n\nclass Entity(BaseModel):\n entity_type: str\n entity_description: str\n entity_text: str\n\nclass EntityExtraction(BaseModel):\n entities: list[Entity]\n\nclient = instructor.from_anthropic(Anthropic())\n\ndef extract_entities(text: str) -> EntityExtraction:\n return client.messages.create(\n model=\"claude-3-sonnet-20240229\",\n max_tokens=1000,\n response_model=EntityExtraction,\n messages=[\n {\n \"role\": \"user\", \n \"content\": f\"Extract named entities from this text: {text}\\n\\nProvide the entity type, a brief description, and the exact text for each entity.\"\n }\n ]\n )\n\n# Test the function\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nresult = extract_entities(text)\nprint(result)\n\n# To access individual entities:\nfor entity in result.entities:\n print(f\"Type: {entity.entity_type}\")\n print(f\"Description: {entity.entity_description}\")\n print(f\"Text: {entity.entity_text}\")\n print()", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "entities=[Entity(entity_type='Organization', entity_description='A technology company that designs, develops, and sells consumer electronics, computer software, and online services.', entity_text='Apple Inc.'), Entity(entity_type='Person', entity_description='Co-founder and former CEO of Apple Inc.', entity_text='Steve Jobs'), Entity(entity_type='Location', entity_description='A city in California where Apple Inc. is headquartered.', entity_text='Cupertino, California')]\nType: Organization\nDescription: A technology company that designs, develops, and sells consumer electronics, computer software, and online services.\nText: Apple Inc.\n\nType: Person\nDescription: Co-founder and former CEO of Apple Inc.\nText: Steve Jobs\n\nType: Location\nDescription: A city in California where Apple Inc. is headquartered.\nText: Cupertino, California\n\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"id": "2692d75c", | |
"cell_type": "markdown", | |
"source": "## V3: Using claudette" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "8054673c", | |
"cell_type": "code", | |
"source": "def read_gist(s): return read_docs('https://gist.githubusercontent.com/rbiswasfc/'+s)\nclaudette_core = read_gist(\n \"c47c80d35fe035aaf213dfcf690c6083/raw/615180e5def68d722ec8c6eb0cadda651a870d33/claudette.md\"\n)", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "55ea733d", | |
"cell_type": "code", | |
"source": "%%aip 0 -c\nFinally, let's use the claudette structured api (cli.structured) for this. \nThis is the documentation: $`claudette_core`. This is an example of how to use the api\n\n```\nfrom fastcore.utils import *\nfrom claudette import *\n\nclass Fact():\n \"Fact on a given topic\"\n def __init__(self, topic: str, summary: str, details: str): store_attr()\n __repr__ = basic_repr([\"topic\", \"summary\", \"details\"])\n \nmodel = models[1]\ncli = Client(model)\n\ncli.structured(\"Tell me about Neural Networks\", Fact)\n```", | |
"execution_count": 10, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "188c2ba4", | |
"cell_type": "code", | |
"source": "from fastcore.utils import *\nfrom claudette import *\n\nclass Entity():\n \"an entity\"\n def __init__(self, entity_type: str, entity_description: str, entity_text: str): store_attr()\n __repr__ = basic_repr([\"entity_type\", \"entity_description\", \"entity_text\"])\n\nclass EntityExtraction():\n \"list of entities\"\n def __init__(self, entities: list[Entity]):\n store_attr()\n __repr__ = basic_repr([\"entities\"])\n\nmodel = models[1]\ncli = Client(model)\n\ndef extract_entities(text: str) -> EntityExtraction:\n prompt = f\"Extract named entities from this text: {text}\\n\\nProvide the entity type, a brief description, and the exact text for each entity.\"\n return cli.structured(prompt, EntityExtraction)\n\n# Test the function\ntext = \"Apple Inc., founded by Steve Jobs, is headquartered in Cupertino, California.\"\nresult = extract_entities(text)\nprint(result)", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[__main__.EntityExtraction(entities=[{'type': 'ORGANIZATION', 'description': 'A multinational technology company', 'text': 'Apple Inc.'}, {'type': 'PERSON', 'description': 'Co-founder of Apple Inc.', 'text': 'Steve Jobs'}, {'type': 'LOCATION', 'description': 'City in California where Apple Inc. is headquartered', 'text': 'Cupertino'}, {'type': 'LOCATION', 'description': 'U.S. state where Apple Inc. is headquartered', 'text': 'California'}])]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "cabca8c8", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "795c915b", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "1bf18604", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "00501b9d", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "35755e8d", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "f9bbaeae", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "16c706f6", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "42e43c40", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "c03ec678", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "e03f0920", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "8521718e", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "be2f6dbd", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "79d9a0f8", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "refactoring using aimagic", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.10.15", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment