mahenzon · September 21, 2025 11:15
diff --git a/.python-version b/.python-version
 3.13
diff --git a/example-openai-api-call.py b/example-openai-api-call.py
 import requests

 API_KEY = "EMPTY"
 BASE_URL = "http://localhost:12345/engines/llama.cpp/v1"
 URL = f"{BASE_URL}/chat/completions"
 # MODEL = "ai/gemma3"
 MODEL = "ai/gemma3n"

 headers = {
    "Authorization": f"Bearer {API_KEY}",
 }


 def main():
    system_message = {
        "role": "system",
        "content": (
            "You are a helpful assistant. "
            "Answer questions clearly an concisely. "
            "If you don't know the answer, you have to say so. "
            "Otherwise provide concise answer, don't add any extra info that user didn't ask for."
        ),
    }
    user_message = {
        "role": "user",
        # "content": "Hello, who are you?",
        # "content": "What is the capital of Russia?",
        # "content": "What is the capital of Russia? Also give one the most important fact about the capital.",
        "content": (
            "Context: square is red, triangle is yellow, hexagon is blue, circle is green\n"
            "Question: what color is hex?"
            # "Question: what color is circle?"
        ),
    }
    data = {
        "model": MODEL,
        "messages": [
            system_message,
            user_message,
        ],
    }

    response = requests.post(URL, json=data, headers=headers)
    if response.ok:
        # pprint(response.json())
        print(response.json()["choices"][0]["message"]["content"])
    else:
        print(response)
        print(response.text)


 if __name__ == "__main__":
    main()
diff --git a/example-openai-langchain.ipynb b/example-openai-langchain.ipynb
 {
  "cells" : [ {
    "cell_type" : "code",
    "id" : "initial_id",
    "metadata" : {
      "collapsed" : true,
      "ExecuteTime" : {
        "end_time" : "2025-09-21T09:53:07.749565Z",
        "start_time" : "2025-09-21T09:53:07.744090Z"
      }
    },
    "source" : [ "OPENAI_API_KEY = \"EMPTY\"\n", "OPEN_AI_BASE_URL = \"http://localhost:12345/engines/llama.cpp/v1\"\n", "MODEL = \"ai/gemma3n\"" ],
    "outputs" : [ ],
    "execution_count" : 4
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T09:53:07.785362Z",
        "start_time" : "2025-09-21T09:53:07.765531Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "from langchain_openai import OpenAI\n", "\n", "llm = OpenAI(\n", "    model=MODEL,\n", "    base_url=OPEN_AI_BASE_URL,\n", "    api_key=OPENAI_API_KEY,\n", ")\n", "llm" ],
    "id" : "4c19d50059a7ff45",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "OpenAI(client=<openai.resources.completions.Completions object at 0x11af86350>, async_client=<openai.resources.completions.AsyncCompletions object at 0x11af86c10>, model_name='ai/gemma3n', model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://localhost:12345/engines/llama.cpp/v1')" ]
      },
      "execution_count" : 5,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 5
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T09:53:12.414022Z",
        "start_time" : "2025-09-21T09:53:07.816001Z"
      }
    },
    "cell_type" : "code",
    "source" : "llm.invoke(\"What's the capital of Russia?\")",
    "id" : "56d1ed3e8899abdd",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "'\\nMoscow.\\n'" ]
      },
      "execution_count" : 6,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 6
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T09:54:57.254087Z",
        "start_time" : "2025-09-21T09:54:57.219757Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "system_prompt = \"\"\"\n", "You are a helpful assistant. Answer the user's questions concisely and accurately.\n", "Answer directly to the user's question.\n", "Don't add any extra information if not asked. Be focused on the human context and input.\n", "If you don't know the answer, say so. Answer should be concise.\n", "\"\"\"" ],
    "id" : "6022b0d3def4e64f",
    "outputs" : [ ],
    "execution_count" : 7
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:00:40.423463Z",
        "start_time" : "2025-09-21T10:00:40.419347Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "from langchain_core.prompts import ChatPromptTemplate\n", "\n", "prompt_template = ChatPromptTemplate.from_messages(\n", "    [\n", "        (\"system\", system_prompt),\n", "        (\"user\", \"{user_input}\"),\n", "    ],\n", ")\n", "prompt_template" ],
    "id" : "317bfe79c3c20d14",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "ChatPromptTemplate(input_variables=['user_input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template=\"\\nYou are a helpful assistant. Answer the user's questions concisely and accurately.\\nAnswer directly to the user's question.\\nDon't add any extra information if not asked. Be focused on the human context and input.\\nIf you don't know the answer, say so. Answer should be concise.\\n\"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['user_input'], input_types={}, partial_variables={}, template='{user_input}'), additional_kwargs={})])" ]
      },
      "execution_count" : 14,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 14
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:00:40.894079Z",
        "start_time" : "2025-09-21T10:00:40.889740Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "prepared_prompt = prompt_template.invoke(\"What's the capital of Russia?\")\n", "\n", "print(prepared_prompt)" ],
    "id" : "9f1bc6afa46836bd",
    "outputs" : [ {
      "name" : "stdout",
      "output_type" : "stream",
      "text" : [ "messages=[SystemMessage(content=\"\\nYou are a helpful assistant. Answer the user's questions concisely and accurately.\\nAnswer directly to the user's question.\\nDon't add any extra information if not asked. Be focused on the human context and input.\\nIf you don't know the answer, say so. Answer should be concise.\\n\", additional_kwargs={}, response_metadata={}), HumanMessage(content=\"What's the capital of Russia?\", additional_kwargs={}, response_metadata={})]\n" ]
    } ],
    "execution_count" : 15
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:00:42.926865Z",
        "start_time" : "2025-09-21T10:00:41.688524Z"
      }
    },
    "cell_type" : "code",
    "source" : "llm.invoke(prepared_prompt)",
    "id" : "fa3e97f0da864eff",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "'\\nAssistant: Moscow.\\n'" ]
      },
      "execution_count" : 16,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 16
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:00:48.394063Z",
        "start_time" : "2025-09-21T10:00:46.077145Z"
      }
    },
    "cell_type" : "code",
    "source" : "llm.invoke(prompt_template.invoke(\"What's the capital of Russia? Also give one the most important fact about it.\"))",
    "id" : "78c7ab1714ad4d3d",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "'\\nAssistant: The capital of Russia is Moscow. One of the most important facts about Moscow is that it is home to the Kremlin, a historic fortified complex.\\n'" ]
      },
      "execution_count" : 17,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 17
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:00:52.913875Z",
        "start_time" : "2025-09-21T10:00:49.979289Z"
      }
    },
    "cell_type" : "code",
    "source" : "llm.invoke(prompt_template.invoke(\"what color is hex?\"))",
    "id" : "2f99d44021ea1aab",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "\"\\nAnswer: Hex is a color model, not a color itself. It's a way of representing colors using a six-digit hexadecimal code. The code indicates the amount of red, green, and blue light that make up the color.\"" ]
      },
      "execution_count" : 18,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 18
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:01:13.250177Z",
        "start_time" : "2025-09-21T10:01:13.244610Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "from langchain_core.prompts import ChatPromptTemplate\n", "\n", "prompt_template = ChatPromptTemplate.from_messages(\n", "    [\n", "        (\"system\", system_prompt),\n", "        (\"user\", \"Context: {context}\\nQuery: {user_input}\"),\n", "    ],\n", ")\n", "prompt_template" ],
    "id" : "7b34f818df371296",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "ChatPromptTemplate(input_variables=['context', 'user_input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template=\"\\nYou are a helpful assistant. Answer the user's questions concisely and accurately.\\nAnswer directly to the user's question.\\nDon't add any extra information if not asked. Be focused on the human context and input.\\nIf you don't know the answer, say so. Answer should be concise.\\n\"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'user_input'], input_types={}, partial_variables={}, template='Context: {context}\\nQuery: {user_input}'), additional_kwargs={})])" ]
      },
      "execution_count" : 19,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 19
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:02:17.555311Z",
        "start_time" : "2025-09-21T10:02:17.548896Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "human_context = \"square is red, triangle is yellow, hexagon is purple, circle is green\"\n", "human_prompt = \"what color is hex?\"\n", "\n", "prepared_prompt = prompt_template.invoke({\n", "    \"context\": human_context,\n", "    \"user_input\": human_prompt,\n", "})\n", "prepared_prompt" ],
    "id" : "3e398ac05083f80e",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "ChatPromptValue(messages=[SystemMessage(content=\"\\nYou are a helpful assistant. Answer the user's questions concisely and accurately.\\nAnswer directly to the user's question.\\nDon't add any extra information if not asked. Be focused on the human context and input.\\nIf you don't know the answer, say so. Answer should be concise.\\n\", additional_kwargs={}, response_metadata={}), HumanMessage(content='Context: square is red, triangle is yellow, hexagon is purple, circle is green\\nQuery: what color is hex?', additional_kwargs={}, response_metadata={})])" ]
      },
      "execution_count" : 22,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 22
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:02:19.440477Z",
        "start_time" : "2025-09-21T10:02:18.193534Z"
      }
    },
    "cell_type" : "code",
    "source" : "llm.invoke(prepared_prompt)",
    "id" : "55a2150e6a3d32c",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "'\\nResponse: purple'" ]
      },
      "execution_count" : 23,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 23
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:02:56.695976Z",
        "start_time" : "2025-09-21T10:02:56.690803Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "chain = prompt_template | llm\n", "\n", "chain" ],
    "id" : "c7e278737e528ce3",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "ChatPromptTemplate(input_variables=['context', 'user_input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template=\"\\nYou are a helpful assistant. Answer the user's questions concisely and accurately.\\nAnswer directly to the user's question.\\nDon't add any extra information if not asked. Be focused on the human context and input.\\nIf you don't know the answer, say so. Answer should be concise.\\n\"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'user_input'], input_types={}, partial_variables={}, template='Context: {context}\\nQuery: {user_input}'), additional_kwargs={})])\n", "| OpenAI(client=<openai.resources.completions.Completions object at 0x11af86350>, async_client=<openai.resources.completions.AsyncCompletions object at 0x11af86c10>, model_name='ai/gemma3n', model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://localhost:12345/engines/llama.cpp/v1')" ]
      },
      "execution_count" : 24,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 24
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:03:21.032911Z",
        "start_time" : "2025-09-21T10:03:19.375582Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "chain.invoke({\n", "    \"context\": human_context,\n", "    \"user_input\": human_prompt,\n", "})" ],
    "id" : "35c9afdc04fbad0",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "'\\nAnswer: purple'" ]
      },
      "execution_count" : 25,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 25
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:04:24.263302Z",
        "start_time" : "2025-09-21T10:04:24.234726Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "from langchain_openai import ChatOpenAI\n", "\n", "model = ChatOpenAI(\n", "    model=MODEL,\n", "    base_url=OPEN_AI_BASE_URL,\n", "    api_key=OPENAI_API_KEY,\n", ")" ],
    "id" : "12442e5298bae54c",
    "outputs" : [ ],
    "execution_count" : 26
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:04:32.995560Z",
        "start_time" : "2025-09-21T10:04:30.917737Z"
      }
    },
    "cell_type" : "code",
    "source" : "model.invoke(\"What's the capital of Russia?\")",
    "id" : "17178bcb1997cd42",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "AIMessage(content='The capital of Russia is **Moscow**. \\n', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 17, 'total_tokens': 28, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'ai/gemma3n', 'system_fingerprint': 'b1-c610b6c', 'id': 'chatcmpl-MsQ7wG9mok6D9xszdT1ZcmHpACkoIqgA', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--bce333f1-8e2e-440c-926a-3422c4e0344a-0', usage_metadata={'input_tokens': 17, 'output_tokens': 11, 'total_tokens': 28, 'input_token_details': {}, 'output_token_details': {}})" ]
      },
      "execution_count" : 27,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 27
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:04:44.503033Z",
        "start_time" : "2025-09-21T10:04:41.058508Z"
      }
    },
    "cell_type" : "code",
    "source" : "model.invoke(\"Who are you?\")",
    "id" : "fa0af49e4be2f7db",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "AIMessage(content=\"I'm Gemma, an open-weights AI assistant. I'm a large language model trained by Google DeepMind. I take text and image as inputs and output text only. \\n\\nThe Gemma team are my creators! I'm widely available to the public.\\n\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 57, 'prompt_tokens': 13, 'total_tokens': 70, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'ai/gemma3n', 'system_fingerprint': 'b1-c610b6c', 'id': 'chatcmpl-mTeDtGONDZ7o2yrqb2gFHc0qvq6DL8zY', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--22ddf7fa-9e23-4e3b-9e93-dfda89d9b3b9-0', usage_metadata={'input_tokens': 13, 'output_tokens': 57, 'total_tokens': 70, 'input_token_details': {}, 'output_token_details': {}})" ]
      },
      "execution_count" : 28,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 28
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:08:32.411741Z",
        "start_time" : "2025-09-21T10:08:32.406950Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "from pydantic import BaseModel, Field\n", "\n", "\n", "class ResponseFormat(BaseModel):\n", "    answer: str = Field(description=\"The answer to the user's question.\")" ],
    "id" : "afd4d4a2eb79399a",
    "outputs" : [ ],
    "execution_count" : 29
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:08:47.869056Z",
        "start_time" : "2025-09-21T10:08:47.860484Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "model_with_struct = model.with_structured_output(ResponseFormat)\n", "\n", "model_with_struct" ],
    "id" : "bebd76314d0d51d5",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x11b106900>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x11b106cf0>, root_client=<openai.OpenAI object at 0x11bc5bc50>, root_async_client=<openai.AsyncOpenAI object at 0x11bd4c2d0>, model_name='ai/gemma3n', model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://localhost:12345/engines/llama.cpp/v1'), kwargs={'response_format': <class '__main__.ResponseFormat'>, 'ls_structured_output_format': {'kwargs': {'method': 'json_schema', 'strict': None}, 'schema': {'type': 'function', 'function': {'name': 'ResponseFormat', 'description': '', 'parameters': {'properties': {'answer': {'description': \"The answer to the user's question.\", 'type': 'string'}}, 'required': ['answer'], 'type': 'object'}}}}}, config={}, config_factories=[])\n", "| RunnableBinding(bound=RunnableLambda(...), kwargs={}, config={}, config_factories=[], custom_output_type=<class '__main__.ResponseFormat'>)" ]
      },
      "execution_count" : 30,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 30
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:09:30.877859Z",
        "start_time" : "2025-09-21T10:09:27.838851Z"
      }
    },
    "cell_type" : "code",
    "source" : "model_with_struct.invoke(\"Who are you?\")",
    "id" : "285f4de854737af7",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "ResponseFormat(answer='I am Gemma, an open-weights AI assistant. I am a large language model trained by Google DeepMind. I take text and image as inputs and output text only.')" ]
      },
      "execution_count" : 31,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 31
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:10:04.930148Z",
        "start_time" : "2025-09-21T10:10:02.885509Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "chain = prompt_template | model_with_struct\n", "\n", "chain.invoke({\n", "    \"context\": human_context,\n", "    \"user_input\": human_prompt,\n", "})" ],
    "id" : "a0e38f291e34f0b3",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "ResponseFormat(answer='purple')" ]
      },
      "execution_count" : 32,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 32
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:14:12.035725Z",
        "start_time" : "2025-09-21T10:14:12.029948Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "from pydantic import BaseModel, Field\n", "\n", "\n", "class VideoIdea(BaseModel):\n", "    \"\"\"\n", "    This model describes short video idea for youtube.\n", "    This video should entertaining and utility for the viewer.\n", "    \"\"\"\n", "\n", "    title: str = Field(description=\"The title of the video.\")\n", "    short_description: str = Field(\n", "        description=\"The short description of the video: should contain a couple of sentences.\",\n", "    )\n", "    topics: list[str] = Field(\n", "        description=\"A list of topics describing the video. From 1 to 4 topics discussed in the video.\",\n", "    )\n", "\n", "\n", "\n", "class VideoIdeasResponse(BaseModel):\n", "    ideas: list[VideoIdea]" ],
    "id" : "45ff1f23ecaca815",
    "outputs" : [ ],
    "execution_count" : 33
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:14:41.148526Z",
        "start_time" : "2025-09-21T10:14:41.075255Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "model_for_video_ideas = model.with_structured_output(VideoIdeasResponse)\n", "\n", "model_for_video_ideas" ],
    "id" : "132593085572634c",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x11b106900>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x11b106cf0>, root_client=<openai.OpenAI object at 0x11bc5bc50>, root_async_client=<openai.AsyncOpenAI object at 0x11bd4c2d0>, model_name='ai/gemma3n', model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://localhost:12345/engines/llama.cpp/v1'), kwargs={'response_format': <class '__main__.VideoIdeasResponse'>, 'ls_structured_output_format': {'kwargs': {'method': 'json_schema', 'strict': None}, 'schema': {'type': 'function', 'function': {'name': 'VideoIdeasResponse', 'description': '', 'parameters': {'properties': {'ideas': {'items': {'description': 'This model describes short video idea for youtube.\\nThis video should entertaining and utility for the viewer.', 'properties': {'title': {'description': 'The title of the video.', 'type': 'string'}, 'short_description': {'description': 'The short description of the video: should contain a couple of sentences.', 'type': 'string'}, 'topics': {'description': 'A list of topics describing the video. From 1 to 4 topics discussed in the video.', 'items': {'type': 'string'}, 'type': 'array'}}, 'required': ['title', 'short_description', 'topics'], 'type': 'object'}, 'type': 'array'}}, 'required': ['ideas'], 'type': 'object'}}}}}, config={}, config_factories=[])\n", "| RunnableBinding(bound=RunnableLambda(...), kwargs={}, config={}, config_factories=[], custom_output_type=<class '__main__.VideoIdeasResponse'>)" ]
      },
      "execution_count" : 34,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 34
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:15:26.681913Z",
        "start_time" : "2025-09-21T10:15:26.678330Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "video_ideas_chain = prompt_template | model_for_video_ideas\n", "\n", "video_ideas_chain" ],
    "id" : "83d658d7822f1b6e",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "ChatPromptTemplate(input_variables=['context', 'user_input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template=\"\\nYou are a helpful assistant. Answer the user's questions concisely and accurately.\\nAnswer directly to the user's question.\\nDon't add any extra information if not asked. Be focused on the human context and input.\\nIf you don't know the answer, say so. Answer should be concise.\\n\"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'user_input'], input_types={}, partial_variables={}, template='Context: {context}\\nQuery: {user_input}'), additional_kwargs={})])\n", "| RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x11b106900>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x11b106cf0>, root_client=<openai.OpenAI object at 0x11bc5bc50>, root_async_client=<openai.AsyncOpenAI object at 0x11bd4c2d0>, model_name='ai/gemma3n', model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://localhost:12345/engines/llama.cpp/v1'), kwargs={'response_format': <class '__main__.VideoIdeasResponse'>, 'ls_structured_output_format': {'kwargs': {'method': 'json_schema', 'strict': None}, 'schema': {'type': 'function', 'function': {'name': 'VideoIdeasResponse', 'description': '', 'parameters': {'properties': {'ideas': {'items': {'description': 'This model describes short video idea for youtube.\\nThis video should entertaining and utility for the viewer.', 'properties': {'title': {'description': 'The title of the video.', 'type': 'string'}, 'short_description': {'description': 'The short description of the video: should contain a couple of sentences.', 'type': 'string'}, 'topics': {'description': 'A list of topics describing the video. From 1 to 4 topics discussed in the video.', 'items': {'type': 'string'}, 'type': 'array'}}, 'required': ['title', 'short_description', 'topics'], 'type': 'object'}, 'type': 'array'}}, 'required': ['ideas'], 'type': 'object'}}}}}, config={}, config_factories=[])\n", "| RunnableBinding(bound=RunnableLambda(...), kwargs={}, config={}, config_factories=[], custom_output_type=<class '__main__.VideoIdeasResponse'>)" ]
      },
      "execution_count" : 35,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 35
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:17:31.932263Z",
        "start_time" : "2025-09-21T10:17:18.234668Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "ideas_context = \"Programming, Python, LLM, AI, FastAPI, SQLAlchemy, Django. Web development\"\n", "ideas_query = \"Please create three video ideas about using LLM in Python web applications using langchain and fastapi or Django\"\n", "\n", "new_ideas = video_ideas_chain.invoke({\n", "    \"context\": ideas_context,\n", "    \"user_input\": ideas_query,\n", "})\n", "new_ideas" ],
    "id" : "28a4e477aaf15d3",
    "outputs" : [ {
      "data" : {
        "text/plain" : [ "VideoIdeasResponse(ideas=[VideoIdea(title='LLM-Powered Chatbot with Langchain & FastAPI', short_description='Build a conversational chatbot using Langchain for prompt management and FastAPI for a REST API. Focus on user input, LLM response, and API integration.', topics=['Langchain', 'FastAPI', 'LLM Chatbot', 'API Development']), VideoIdea(title='Automated Content Generation with LLM & Django', short_description=\"Develop a web application with Django that leverages an LLM to generate content (e.g., articles, summaries) based on user prompts.  Showcase Django's templating and form handling.\", topics=['Django', 'LLM Content Generation', 'Web Forms', 'Templating']), VideoIdea(title='LLM Integration for Data Analysis with Langchain & FastAPI', short_description='Demonstrate how to use an LLM with Langchain to analyze data (e.g., from a database via SQLAlchemy) and provide insights. Use FastAPI to expose the analysis as an API.', topics=['Langchain', 'FastAPI', 'SQLAlchemy', 'Data Analysis', 'LLM Insights'])])" ]
      },
      "execution_count" : 36,
      "metadata" : { },
      "output_type" : "execute_result"
    } ],
    "execution_count" : 36
  }, {
    "metadata" : {
      "ExecuteTime" : {
        "end_time" : "2025-09-21T10:17:57.891299Z",
        "start_time" : "2025-09-21T10:17:57.887378Z"
      }
    },
    "cell_type" : "code",
    "source" : [ "for idea in new_ideas.ideas:\n", "    print()\n", "    print(idea.title)\n", "    print(idea.short_description)\n", "    print(idea.topics)" ],
    "id" : "c63738a52ae7d1fc",
    "outputs" : [ {
      "name" : "stdout",
      "output_type" : "stream",
      "text" : [ "\n", "LLM-Powered Chatbot with Langchain & FastAPI\n", "Build a conversational chatbot using Langchain for prompt management and FastAPI for a REST API. Focus on user input, LLM response, and API integration.\n", "['Langchain', 'FastAPI', 'LLM Chatbot', 'API Development']\n", "\n", "Automated Content Generation with LLM & Django\n", "Develop a web application with Django that leverages an LLM to generate content (e.g., articles, summaries) based on user prompts.  Showcase Django's templating and form handling.\n", "['Django', 'LLM Content Generation', 'Web Forms', 'Templating']\n", "\n", "LLM Integration for Data Analysis with Langchain & FastAPI\n", "Demonstrate how to use an LLM with Langchain to analyze data (e.g., from a database via SQLAlchemy) and provide insights. Use FastAPI to expose the analysis as an API.\n", "['Langchain', 'FastAPI', 'SQLAlchemy', 'Data Analysis', 'LLM Insights']\n" ]
    } ],
    "execution_count" : 37
  }, {
    "metadata" : { },
    "cell_type" : "code",
    "outputs" : [ ],
    "execution_count" : null,
    "source" : "",
    "id" : "b3d00aabf1ddd23a"
  } ],
  "metadata" : {
    "kernelspec" : {
      "display_name" : "Python 3",
      "language" : "python",
      "name" : "python3"
    },
    "language_info" : {
      "codemirror_mode" : {
        "name" : "ipython",
        "version" : 2
      },
      "file_extension" : ".py",
      "mimetype" : "text/x-python",
      "name" : "python",
      "nbconvert_exporter" : "python",
      "pygments_lexer" : "ipython2",
      "version" : "2.7.6"
    }
  },
  "nbformat" : 4,
  "nbformat_minor" : 5
 }
diff --git a/pyproject.toml b/pyproject.toml
 [project]
 name = "075-ai-llm-how-to"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
    "langchain>=0.3.27",
    "langchain-openai>=0.3.33",
    "notebook>=7.4.5",
    "requests>=2.32.5",
 ]
	import requests

	API_KEY = "EMPTY"
	BASE_URL = "http://localhost:12345/engines/llama.cpp/v1"
	URL = f"{BASE_URL}/chat/completions"
	# MODEL = "ai/gemma3"
	MODEL = "ai/gemma3n"

	headers = {
	"Authorization": f"Bearer {API_KEY}",
	}


	def main():
	system_message = {
	"role": "system",
	"content": (
	"You are a helpful assistant. "
	"Answer questions clearly an concisely. "
	"If you don't know the answer, you have to say so. "
	"Otherwise provide concise answer, don't add any extra info that user didn't ask for."
	),
	}
	user_message = {
	"role": "user",
	# "content": "Hello, who are you?",
	# "content": "What is the capital of Russia?",
	# "content": "What is the capital of Russia? Also give one the most important fact about the capital.",
	"content": (
	"Context: square is red, triangle is yellow, hexagon is blue, circle is green\n"
	"Question: what color is hex?"
	# "Question: what color is circle?"
	),
	}
	data = {
	"model": MODEL,
	"messages": [
	system_message,
	user_message,
	],
	}

	response = requests.post(URL, json=data, headers=headers)
	if response.ok:
	# pprint(response.json())
	print(response.json()["choices"][0]["message"]["content"])
	else:
	print(response)
	print(response.text)


	if __name__ == "__main__":
	main()
	[project]
	name = "075-ai-llm-how-to"
	version = "0.1.0"
	description = "Add your description here"
	readme = "README.md"
	requires-python = ">=3.13"
	dependencies = [
	"langchain>=0.3.27",
	"langchain-openai>=0.3.33",
	"notebook>=7.4.5",
	"requests>=2.32.5",
	]