Last active
June 3, 2025 21:22
-
-
Save FinanceData/d7d2560824afa8c3f34c31e9ee7cd040 to your computer and use it in GitHub Desktop.
reponse-api-rag-file-search.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/FinanceData/d7d2560824afa8c3f34c31e9ee7cd040/reponse-api-rag-file-search.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "# Reponse API - RAG 구현\n", | |
| "\n", | |
| "\n", | |
| "**2024~2025 [FinanceData.KR]()**" | |
| ], | |
| "metadata": { | |
| "id": "LOooAvo9JCPv" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import os\n", | |
| "os.environ[\"OPENAI_API_KEY\"] = \"***********************************************\"\n" | |
| ], | |
| "metadata": { | |
| "id": "7eLEt9wE66sB" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "## 시총상위 10개사 사업보고서(2024년 반기)" | |
| ], | |
| "metadata": { | |
| "id": "rnF4L61eLyxP" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!gdown \"1T5tcPTQSi_jjgiP7HMwzDdvJul1zE4Vt\"" | |
| ], | |
| "metadata": { | |
| "id": "04kEt_grLsso" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!unzip \"2024_반기사업보고서.zip\"" | |
| ], | |
| "metadata": { | |
| "id": "EcTa8TlhLsnc" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import glob\n", | |
| "\n", | |
| "pdf_files = glob.glob(\"*.pdf\")\n", | |
| "pdf_files" | |
| ], | |
| "metadata": { | |
| "collapsed": true, | |
| "id": "Qwbwl3wBL69y" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "oZKfngV5lJZJ", | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "!pip install -q openai" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from openai import OpenAI\n", | |
| "client = OpenAI()" | |
| ], | |
| "metadata": { | |
| "id": "r7OHRdioOaDt" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "## 1단계: 벡터스토어 생성및 파일 업로드\n" | |
| ], | |
| "metadata": { | |
| "id": "Hinhl7q1SL6F" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from openai import OpenAI\n", | |
| "client = OpenAI()\n", | |
| "\n", | |
| "vector_store = client.vector_stores.create( # Create vector store\n", | |
| " name=\"dart-report\",\n", | |
| ")" | |
| ], | |
| "metadata": { | |
| "id": "J-xAKtB77ZMO" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "vector_store" | |
| ], | |
| "metadata": { | |
| "id": "BXKHpN5s8lZr" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "pdf_files" | |
| ], | |
| "metadata": { | |
| "id": "jVnBz9WP-Cbb" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "for pdf_file in pdf_files:\n", | |
| " client.vector_stores.files.upload_and_poll(\n", | |
| " vector_store_id=vector_store.id,\n", | |
| " file=open(pdf_file, \"rb\")\n", | |
| " )" | |
| ], | |
| "metadata": { | |
| "id": "evxhXXF4-Bnm" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "user_query = \"셀트리온의 제무제표를 표로 정리해서 출력해줘\"\n", | |
| "\n", | |
| "results = client.vector_stores.search(\n", | |
| " vector_store_id=vector_store.id,\n", | |
| " query=user_query,\n", | |
| ")\n", | |
| "\n", | |
| "results.to_dict() # 확인" | |
| ], | |
| "metadata": { | |
| "id": "hxltltLx-R96" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "## 2단계: 벡터스토어에 RAG 수행" | |
| ], | |
| "metadata": { | |
| "id": "iPcrDre2-aCt" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from openai import OpenAI\n", | |
| "client = OpenAI()\n", | |
| "\n", | |
| "response = client.responses.create(\n", | |
| " model=\"gpt-4o-mini\",\n", | |
| " input=\"최근 3년 셀트리온의 제무제표를 상세하게 출력해줘\",\n", | |
| " tools=[{\n", | |
| " \"type\": \"file_search\",\n", | |
| " \"vector_store_ids\": [vector_store.id]\n", | |
| " }]\n", | |
| ")\n", | |
| "\n", | |
| "print(response.output_text)" | |
| ], | |
| "metadata": { | |
| "id": "-y8M7D0i-Z9b" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from openai import OpenAI\n", | |
| "client = OpenAI()\n", | |
| "\n", | |
| "response = client.responses.create(\n", | |
| " model=\"gpt-4o-mini\",\n", | |
| " input=\"현대자동차의 재무제표를 투자 관점에서 평가해줘, 출처도 함께 표기해줘\",\n", | |
| " tools=[{\n", | |
| " \"type\": \"file_search\",\n", | |
| " \"vector_store_ids\": [vector_store.id]\n", | |
| " }]\n", | |
| ")\n", | |
| "\n", | |
| "print(response.output_text)" | |
| ], | |
| "metadata": { | |
| "id": "FP8lQSqGAavB" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "\n", | |
| "**2024~2025 [FinanceData.KR]()**" | |
| ], | |
| "metadata": { | |
| "id": "YzrtfGI0XdNG" | |
| } | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment