Skip to content

Instantly share code, notes, and snippets.

@rajivmehtaflex
Last active December 26, 2024 13:54
Show Gist options
  • Save rajivmehtaflex/0eab9a00523cecaa6363e2b08da1fb6e to your computer and use it in GitHub Desktop.
Save rajivmehtaflex/0eab9a00523cecaa6363e2b08da1fb6e to your computer and use it in GitHub Desktop.
Demonstrates the use of Google's Gemini 2.0 Flash model with "thinking" output enabled, showing how the model reveals its thought process while solving a simple math problem.

File: gemini_thinking_deep.py

from google import genai
import os
os.environ['GEMINI_API_KEY'] = '<KEY>'

client = genai.Client(
    api_key=os.environ['GEMINI_API_KEY'],
    http_options={
        'api_version': 'v1alpha',
    })

stream = client.models.generate_content_stream(
    model='gemini-2.0-flash-thinking-exp-1219', 
    contents=f"""What is 2*2-1*4^4"""
)
thinking = True
print("****** Start thinking... ******")
for chunk in stream:
    for candidate in chunk.candidates:
        for part in candidate.content.parts:
            if part.thought:
                is_thinking = True
            elif is_thinking:  # prints "Finished thinking" when transitioning from thinking to not thinking
                is_thinking = False
                print("\n")
                print("****** Finished thinking... ******")
                print("\n")
        
            print(part.text, end="", flush=True)

File: gemini_gradio.py

import os
os.environ['GEMINI_API_KEY'] = 'AIzaSyAkTenlT1rs8KnV0YBNgTFOToFepPZNfW0'
import gradio as gr
import gemini_gradio

gr.load(
    name='gemini-2.0-flash-exp',
    src=gemini_gradio.registry,
    enable_voice=False,
    title='Gemini 2.0 Flash',
    description='Gemini 2.0 Flash is a model that can understand and generate text, images, and code. It is a large language model trained by Google DeepMind.'
).launch(share=True)

File: gemini_thinking.py

from google import genai
import os
os.environ['GEMINI_API_KEY'] = 'AIzaSyAkTenlT1rs8KnV0YBNgTFOToFepPZNfW0'

# create client
client = genai.Client(api_key=os.environ['GEMINI_API_KEY'])

# use Gemini 2.0 with Flash Thinking 
stream = client.models.generate_content_stream(
    model='gemini-2.0-flash-thinking-exp-1219', 
    contents=f"""Can you crack the code? 
9 2 8 5 (One number is correct but in the wrong position) 
1 9 3 7 (Two numbers are correct but in the wrong positions) 
5 2 0 1 (one number is correct and in the right position) 
6 5 0 7 (nothing is correct) 
8 5 24 (two numbers are correct but in the wrong"""
)
for chunk in stream:
    print(chunk.text, end="", flush=True)

File: pyproject.toml

[project]
name = "gdata"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
    "gemini-gradio>=0.0.3",
    "google-genai>=0.3.0",
]

How to use these in gist.github.com:

  1. Go to gist.github.com.
  2. You don't need to be logged in to create a public gist, or if you login it can be private.
  3. For each file:
    • Copy the entire code block for the given file.
    • Paste it into the gist editor.
    • Give the file a name that matches the .py or .toml file name (e.g., gemini_thinking_deep.py , pyproject.toml)
  4. Add a description for each gist if required
  5. Click "Create public gist" or "Create secret gist".

Now you have your gists hosted on GitHub!

Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "91114baf",
"metadata": {},
"outputs": [],
"source": [
"%pip install -U llama-index-core"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f25a2e61",
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-embeddings-huggingface llama-agents"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "211a1a9d",
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"\n",
"# Create a logger instance\n",
"logger = logging.getLogger(__name__)\n",
"\n",
"# Set the log level to INFO for informational messages\n",
"logger.setLevel(logging.INFO)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e33e5676-8d15-4372-bc32-8d43dd617b80",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()\n",
"\n",
"#import os\n",
"\n",
"#os.environ[\"OPENAI_API_KEY\"] = \"sk-proj-...\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4941563",
"metadata": {},
"outputs": [],
"source": [
"from llama_agents import (\n",
" AgentService,\n",
" AgentOrchestrator,\n",
" ControlPlaneServer,\n",
" LocalLauncher,\n",
" SimpleMessageQueue,\n",
")\n",
"\n",
"from llama_index.core.agent import ReActAgent\n",
"from llama_index.core.tools import FunctionTool\n",
"from llama_index.llms.ollama import Ollama\n",
"from llama_index.core import Settings\n",
"from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
"from llama_index.core.agent import FunctionCallingAgentWorker\n",
"\n",
"\n",
"# Calculator tool \n",
"def calculator(operation: str, a: float, b: float) -> str:\n",
" \"\"\"\n",
" Perform basic arithmetic operations.\n",
" \n",
" Args:\n",
" operation (str): One of 'add', 'subtract', 'multiply', or 'divide'.\n",
" a (float): First number.\n",
" b (float): Second number.\n",
" \n",
" Returns:\n",
" str: Result of the operation as a string.\n",
" \"\"\"\n",
" try:\n",
" if operation == \"add\":\n",
" result = a + b\n",
" elif operation == \"subtract\":\n",
" result = a - b\n",
" elif operation == \"multiply\":\n",
" result = a * b\n",
" elif operation == \"divide\":\n",
" if b == 0:\n",
" return \"Error: Cannot divide by zero.\"\n",
" result = a / b\n",
" else:\n",
" return f\"Error: Invalid operation '{operation}'. Choose 'add', 'subtract', 'multiply', or 'divide'.\"\n",
" \n",
" return f\"The result of {a} {operation} {b} is {result}\"\n",
" except Exception as e:\n",
" return f\"Error in calculation: {str(e)}\"\n",
"\n",
"# New text analysis tool\n",
"def text_analyzer(text: str) -> str:\n",
" \"\"\"\n",
" Perform basic text analysis.\n",
" \n",
" Args:\n",
" text (str): The text to analyze.\n",
" \n",
" Returns:\n",
" str: Analysis results as a string.\n",
" \"\"\"\n",
" try:\n",
" word_count = len(text.split())\n",
" char_count = len(text)\n",
" sentence_count = text.count('.') + text.count('!') + text.count('?')\n",
" \n",
" analysis = f\"Text Analysis Results:\\n\"\n",
" analysis += f\"- Word count: {word_count}\\n\"\n",
" analysis += f\"- Character count: {char_count}\\n\"\n",
" analysis += f\"- Approximate sentence count: {sentence_count}\\n\"\n",
" \n",
" return analysis\n",
" except Exception as e:\n",
" return f\"Error in text analysis: {str(e)}\"\n",
"\n",
"calculator_tool = FunctionTool.from_defaults(fn=calculator)\n",
"text_tool = FunctionTool.from_defaults(fn=text_analyzer)\n",
"\n",
"\n",
"llm = Ollama(model=\"arcee-ai/arcee-agent\", request_timeout=120.0)\n",
"\n",
"Settings.llm = llm\n",
"\n",
"\n",
"\n",
"agent1 = ReActAgent.from_tools(tools=[calculator_tool], llm=llm, verbose=True)\n",
"agent2 = ReActAgent.from_tools([text_tool], llm=Ollama(model=\"mistral\", request_timeout=120.0), verbose=True)\n",
"\n",
"# worker2 = FunctionCallingAgentWorker.from_tools([text_tool], llm=llm)\n",
"# agent2 = worker2.as_agent()\n",
"\n",
"\n",
"Settings.embed_model = HuggingFaceEmbedding(\n",
" model_name=\"BAAI/bge-small-en-v1.5\"\n",
")\n",
"\n",
"# create our multi-agent framework components\n",
"message_queue = SimpleMessageQueue(port=8000)\n",
"control_plane = ControlPlaneServer(\n",
" message_queue=message_queue,\n",
" orchestrator=AgentOrchestrator(llm=llm),\n",
" port=8001,\n",
"\n",
")\n",
"agent_server_1 = AgentService(\n",
" agent=agent1,\n",
" message_queue=message_queue,\n",
" description=\"Useful for performing basic arithmetic operations like calculations.\",\n",
" service_name=\"calculator_agent\",\n",
" port=8002,\n",
")\n",
"agent_server_2 = AgentService(\n",
" agent=agent2,\n",
" message_queue=message_queue,\n",
" description=\"Useful for performing NLP, Text Analysis and Text Processing.\",\n",
" service_name=\"nlp_agent\",\n",
" port=8003,\n",
")\n",
"\n",
"# launch it\n",
"launcher = LocalLauncher([agent_server_1, agent_server_2], control_plane, message_queue)\n",
"\n",
"try:\n",
" result = launcher.launch_single(\"can you divide 100 by 20?\")\n",
"\n",
"\n",
" print(f\"Result: {result}\")\n",
"except Exception as e:\n",
" print(f\"An error occurred: {str(e)}\")\n",
" print(\"Please check the agent logs for more details.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d39827b4",
"metadata": {},
"outputs": [],
"source": [
"# from llama_agents import ServerLauncher, CallableMessageConsumer\n",
"\n",
"\n",
"# # Additional human consumer\n",
"# def handle_result(message) -> None:\n",
"# print(f\"Got result:\", message.data)\n",
"\n",
"\n",
"# human_consumer = CallableMessageConsumer(\n",
"# handler=handle_result, message_type=\"human\"\n",
"# )\n",
"\n",
"# # Define Launcher\n",
"# launcher = ServerLauncher(\n",
"# [agent_server_1, agent_server_2],\n",
"# control_plane,\n",
"# message_queue,\n",
"# additional_consumers=[human_consumer],\n",
"# )\n",
"\n",
"# # Launch it!\n",
"# launcher.launch_servers()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19970fe2",
"metadata": {},
"outputs": [],
"source": [
"# from llama_agents import LlamaAgentsClient, AsyncLlamaAgentsClient\n",
"\n",
"# client = LlamaAgentsClient(\"http://127.0.0.1:8001\") # i.e. http://127.0.0.1:8001\n",
"# task_id = client.create_task(\"can you divide 100 by 20?\")\n",
"# # <Wait a few seconds>\n",
"# # returns TaskResult or None if not finished\n",
"# result = client.get_task_result(task_id)\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment