-
-
Save ninehills/ecf7107574c83016e8b68965bf9a51c4 to your computer and use it in GitHub Desktop.
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Chat with pdf file " | |
], | |
"metadata": { | |
"id": "4Sw_ysmQlk-8" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# 建议将 PDF 文件保存在 Google Drive 上\n", | |
"\n", | |
"from google.colab import drive\n", | |
"drive.mount('/content/drive')" | |
], | |
"metadata": { | |
"id": "WKhC2AZRjyok", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "65b23095-a911-4efa-b7f4-c9eec3a034a2" | |
}, | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# 在 Google Drive 上的工作目录\n", | |
"WORK_DIR = \"/content/drive/MyDrive/ChatGPT/Notebooks/ChatPDF/\"\n", | |
"# env 文件名称,里面存储 OPENAI API KEY\n", | |
"ENV_FILE = \".env\"\n", | |
"# 处理处理的原文件\n", | |
"SRC_FILE = \"jianshang.pdf\"\n", | |
"# 缓存的向量 index 文件\n", | |
"INDEX_FILE = SRC_FILE + \".index\"" | |
], | |
"metadata": { | |
"id": "UXw1TWw_nj_F" | |
}, | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%%capture\n", | |
"# update or install the necessary libraries\n", | |
"!pip install --upgrade llama_index==0.4.40\n", | |
"!pip install --upgrade langchain\n", | |
"!pip install --upgrade python-dotenv\n" | |
], | |
"metadata": { | |
"id": "Aqef8N2RlUpo" | |
}, | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import logging\n", | |
"import sys\n", | |
"\n", | |
"logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", | |
"logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))" | |
], | |
"metadata": { | |
"id": "GybUa0BykExM" | |
}, | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from llama_index import GPTSimpleVectorIndex, LLMPredictor, PromptHelper\n", | |
"from llama_index.response.notebook_utils import display_response\n", | |
"from llama_index.prompts.prompts import QuestionAnswerPrompt\n", | |
"from langchain.chat_models import ChatOpenAI\n", | |
"from IPython.display import Markdown, display\n", | |
"from langchain.callbacks.base import CallbackManager\n", | |
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler" | |
], | |
"metadata": { | |
"id": "Vp6JcErhmt_w" | |
}, | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Load environment variables (OPENAI_API_KEY)\n", | |
"\n", | |
"import os\n", | |
"import shutil\n", | |
"from dotenv import load_dotenv\n", | |
"\n", | |
"shutil.copyfile(os.path.join(WORK_DIR, ENV_FILE), \".env\")\n", | |
"\n", | |
"load_dotenv()\n", | |
"\n", | |
"# API configuration\n", | |
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", | |
"\n", | |
"if OPENAI_API_KEY == \"\":\n", | |
" raise Exception(\"Need set OPENAI_API_KEY\")" | |
], | |
"metadata": { | |
"id": "WKoA2bzul7Gz" | |
}, | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"准备 Index 文件,为了避免重复索引,增加缓存\n", | |
"\n", | |
"\n" | |
], | |
"metadata": { | |
"id": "SApFHwHCpEGJ" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Load pdf to documents\n", | |
"\n", | |
"from pathlib import Path\n", | |
"from llama_index import download_loader\n", | |
"\n", | |
"# 中文 PDF 建议使用 CJKPDFReader,英文建议用 PDFReader\n", | |
"# 其他类型文件,请去 https://llamahub.ai/ 寻找合适的 Loader\n", | |
"CJKPDFReader = download_loader(\"CJKPDFReader\")\n", | |
"\n", | |
"loader = CJKPDFReader()\n", | |
"index_file = os.path.join(Path(WORK_DIR), Path(INDEX_FILE))\n", | |
"\n", | |
"if os.path.exists(index_file) == False:\n", | |
" documents = loader.load_data(file=os.path.join(Path(WORK_DIR), Path(SRC_FILE)))\n", | |
" # 默认 chunk_size_limit=4096,缩减 chunk_size 可以有效降低 Token 使用,但是会导致最终提供给 GPT 的上下文变少,从而影响效果\n", | |
" index = GPTSimpleVectorIndex(documents, chunk_size_limit=1024)\n", | |
" index.save_to_disk(index_file)\n", | |
"else:\n", | |
" index = GPTSimpleVectorIndex.load_from_disk(index_file)\n" | |
], | |
"metadata": { | |
"id": "Cb98YMtrnTxU" | |
}, | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"llm_predictor = LLMPredictor(llm=ChatOpenAI(\n", | |
" # 将 temperature 范围为 0-1,越接近0越具备创造性\n", | |
" # 典型值:0(arc53/DocsGPT)、0.2(madawei2699/myGPTReader)\n", | |
" temperature=0,\n", | |
" model_name=\"gpt-3.5-turbo\",\n", | |
"))\n", | |
"\n", | |
"\n", | |
"QUESTION_ANSWER_PROMPT_TMPL = (\n", | |
" \"Context information is below. \\n\"\n", | |
" \"---------------------\\n\"\n", | |
" \"{context_str}\"\n", | |
" \"\\n---------------------\\n\"\n", | |
" \"{query_str}\\n\"\n", | |
")\n", | |
"\n", | |
"QUESTION_ANSWER_PROMPT_TMPL_2 = \"\"\"\n", | |
"You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.\n", | |
"If you can't find the answer in the context below, just say \"Hmm, I'm not sure.\" Don't try to make up an answer.\n", | |
"If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.\n", | |
"\n", | |
"Context information is below.\n", | |
"=========\n", | |
"{context_str}\n", | |
"=========\n", | |
"{query_str}\n", | |
"\"\"\"\n", | |
"\n", | |
"QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL_2)\n", | |
"\n", | |
"def chat(query):\n", | |
" # 当 chunk_size 较小以及问题较为简洁时,回答的语言就不是很好控制,需要在问题增加内容。\n", | |
" # 目前在 prompt 上进行多次尝试无效,所以直接加到query 上。\n", | |
" query = query + \" 请使用中文回答。\"\n", | |
" result = index.query(\n", | |
" query,\n", | |
" llm_predictor=llm_predictor,\n", | |
" text_qa_template=QUESTION_ANSWER_PROMPT,\n", | |
" # default: For the given index, “create and refine” an answer by sequentially \n", | |
" # going through each Node; make a separate LLM call per Node. Good for more \n", | |
" # detailed answers.\n", | |
" # compact: For the given index, “compact” the prompt during each LLM call \n", | |
" # by stuffing as many Node text chunks that can fit within the maximum prompt size. \n", | |
" # If there are too many chunks to stuff in one prompt, “create and refine” an answer \n", | |
" # by going through multiple prompts.\n", | |
" # tree_summarize: Given a set of Nodes and the query, recursively construct a \n", | |
" # tree and return the root node as the response. Good for summarization purposes.\n", | |
" response_mode=\"tree_summarize\",\n", | |
" similarity_top_k=3,\n", | |
" # mode=\"default\" will a create and refine an answer sequentially through \n", | |
" # the nodes of the list. \n", | |
" # mode=\"embedding\" will synthesize an answer by \n", | |
" # fetching the top-k nodes by embedding similarity.\n", | |
" mode=\"embedding\",\n", | |
" )\n", | |
" print(f\"Token used: {llm_predictor.last_token_usage}, total used: {llm_predictor.total_tokens_used}\")\n", | |
" return result\n", | |
"\n", | |
"# It's not work now, please don't use it.\n", | |
"# Bug: https://github.com/jerryjliu/llama_index/issues/831\n", | |
"def chat_stream(query):\n", | |
" return index.query(\n", | |
" query,\n", | |
" llm_predictor=llm_predictor,\n", | |
" text_qa_template=QUESTION_ANSWER_PROMPT,\n", | |
" response_mode=\"tree_summarize\",\n", | |
" similarity_top_k=3,\n", | |
" streaming=True,\n", | |
" mode=\"embedding\",\n", | |
" )\n", | |
"\n", | |
"# response_stream = chat_stream(\"这本书讲了什么?\")\n", | |
"# response_stream.print_response_stream()" | |
], | |
"metadata": { | |
"id": "6ddjxclno8tg" | |
}, | |
"execution_count": 64, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"resp = chat(\"这本书讲了什么?\")\n", | |
"display_response(resp)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 457 | |
}, | |
"id": "psvZXfWirq31", | |
"outputId": "a90d5eb2-8db5-4cbe-d651-693b84deb5f3" | |
}, | |
"execution_count": 65, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Token used: 4486, total used: 4486\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Final Response:`** 这本书讲述了商周变革的历史背景和过程,包括周灭商等事件,同时也描绘了许多熟视无睹的场景,让读者可以更好地理解古代中国的思想、信仰、伦理、心态、风俗,以及军事、政治、制度、规则等方面。根据历史学家和教授的评价,这场变革对于华夏文明的意义更深、更远,是中国历史上的重要事件。作者的视角和写法独特,让人耳目一新。对于对古代中国有兴趣的研究者或普通读者来说,是一个很好的探索起点。" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 1/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8053380387024119<br>**Text:** 在视频和音频节目中,呈现得肯定都是有限的。 说起来,李硕在本书中所描述的,都是我这个在新石器时代至夏 商周考古领域熬至“资深”的学者所耳熟能详的,但他的视角和写法 却又使我耳目一新:他赋予了我...<br>" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 2/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8041932654562923<br>**Text:** 的支持,其实是心理上的,让我意识到除了祭祀坑里的尸骨,这世界 上还有别的东西。 也许,人不应当凝视深渊;虽然深渊就在那里。 \f \f 始于一页,抵达世界 Humanities ■ Histor...<br>" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 3/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.7992281260807114<br>**Text:** 们再次进入幽暗的历史通道前,一窺我们这群人何以如此,何以至今。它将予我们 \n鼓励,认识自己,直面未来。\n\n刘苏里万圣书园创办人\n\n-部好的历史著作,不仅要数学家的逻辑,迁要文学家的想象。由此观之...<br>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"从这个问题的回答可以看到,当chunk_size 从默认的4096 缩减为 1024 后,因为缺乏上下文,回答不够完整,只有牧野之战的前半部分,之前的回答如下:\n", | |
"\n", | |
"在牧野之战开始时,武王率领西土联军面对着数量远超自己的商军。武王的前提是有殷都内部联络人的密约,但局势不断变化,没有商人助战,西土联军将被一边倒地屠杀。武王没有别的选择,他只能相信父亲描述的那位上帝站在自己一边,只要全心信任他,父亲开启的翦商事业就能成功。在战斗开始时,武王一方没有任何章法和战术可言,但商军阵列却突然自行解体,变成了互相砍杀的人群。或许是看到周军义无反顾的冲锋,商军中的密谋者终于鼓起勇气,倒戈杀向纣王中军。接着,西土联军全部投入了混战。后世的周人史诗说,“商庶若化\",即是说,商军队伍就像滚水冲刷的油脂,瞬间溃散,融化。最终,武王率领的西土联军获胜,商王朝终结。" | |
], | |
"metadata": { | |
"id": "NIF3KL1ttYhw" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"display_response(chat(\"牧野之战的具体过程是什么?\"))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 445 | |
}, | |
"id": "bS5LAJkuqR4U", | |
"outputId": "ca529dfd-7137-43f6-fcf7-6f90c44fca4c" | |
}, | |
"execution_count": 66, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Token used: 4788, total used: 9274\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Final Response:`** 根据新的上下文,牧野之战的具体过程是:盟军经过六天加急行军,于二月二十一日夜间抵达殷都南郊的牧野,两军都已侦知对方主力的位置,开始连夜整队列阵,准备天亮时一举消灭对手。二十二日甲子凌晨,规模较小的周军首先列队完毕,武王全身盔甲戎装,在阵前宣誓,这便是著名的《尚书•牧誓》。武王手持白旄,高声宣誓:“逖矣,西土之人!”,然后一一点名麾下的盟友、将领、军官,直到“百夫长”,命令他们:“拿起你们的戈,连接好你们的盾牌,立起你们的长矛,现在,我要立誓!”" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 1/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8107165489961021<br>**Text:** 的宗教思维,知道必须用法术对抗法术,化解纣王自我献祭可能带来 的后果与流言,方法则是表演一次战斗和处斩,展现纣王被俘和被杀 的全过程:周军直入鹿台宫,武王在战车上对着纣王尸体连射三箭, 然后跳下...<br>" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 2/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8084797679435898<br>**Text:** 事据点可以保持有效联络,一旦某个城邑遭到土著部落威胁,周邻据 点可以尽快参战,战报也可以迅速送到殷都,以便后方组织增援力量。 马拉战车比徒步快三倍以上,这意味着传递战报和命令的时间只需原 来的四...<br>" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 3/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8057765987066442<br>**Text:** 商人据点,一路向北直指殷都。经过六天加急行军,二月二十一日癸 \n丑夜间,盟军抵达殷都南郊的牧野。6这里是商王室蓄养牛羊的草原,\n地形平坦,商军集结地的营火已经遥遥在望。此时,两军都已侦知对\n方主...<br>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"display_response(chat(\"对商朝人祭文化做一个总结。\"))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 433 | |
}, | |
"id": "7c1Kwsj2waEB", | |
"outputId": "15d54fca-9c21-43e4-e34f-6b0b546f3af3" | |
}, | |
"execution_count": 68, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Token used: 3115, total used: 16625\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Final Response:`** 商朝人祭文化是一种漫长而顽固的风习,从新石器时代晚期以来算起,已经延续两三千年,商朝更是将其吸收到了王朝制度之中。人祭是商朝的国家宗教,也是商族人的全民宗教,王室成为人祭活动最大的主办者,代表着王权和神权的高度融合。人祭行为不仅出现在宫廷与民间,也被商人带到了各殖民城邑。周公与召公的谈话中,周公认为想要根除上千年的积习,谈何容易。" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 1/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8775562794009867<br>**Text:** 祭材料。 另外,商人的人祭宗教也和他们的复杂来源有关。灭夏初期,来 自多个文化的人群融合成新兴的“王朝商族”,因此,他们需要构建 一种维系自我认同的宗教文化,而用人献祭是最为明晰和便捷的方式:...<br>" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 2/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8724770531874126<br>**Text:** 和统治列族的权柄。 在商人的人祭宗教兴盛之际,王室成为人祭活动最大的主办者。 \n\n这代表着王权和神权的高度融合。比起二里头一夏朝,这是一个新\n变化:人祭是商朝的国家宗教,也是商族人的全民宗教。...<br>" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "---" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**`Source Node 3/3`**" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Markdown object>" | |
], | |
"text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.871134373879794<br>**Text:** 其实,此时周公真正关心的问题是商人的人祭文化。商王朝虽然 终结了,但他们用人牲祭祀、奠基和殉葬的传统并没有终止;而且 武王在位期间还曾举行商式献祭,甚至比商人更变本加厉。人祭是 一种漫长而顽固的...<br>" | |
}, | |
"metadata": {} | |
} | |
] | |
} | |
] | |
} |
LICENSE: MIT
COOOOL,有没有一个问题平均需要token的估计?
COOOOL,有没有一个问题平均需要token的估计?
这个要和你使用的参数来定,我更新了一个版本,可以计算每次计算消耗的 Token。
针对如下参数:
response_mode="tree_summarize",
similarity_top_k=3,
mode="embedding",
的情况下,每次问题估计消耗 10000 token,也就是相当于 $0.02 = 0.14 元人民币。
如果将 similarity_top_k 改成默认值 1,那么价格将会降低为原来的 1/3。
I did not know what to do, would u please help me with these error?
TypeError Traceback (most recent call last)
in <cell line: 1>()
----> 1 resp = chat("这本书讲了什么?")
2 display_response(resp)
7 frames
/usr/local/lib/python3.9/dist-packages/llama_index/indices/vector_store/base_query.py in init(self, index_struct, service_context, vector_store, similarity_top_k, **kwargs)
35 ) -> None:
36 """Initialize params."""
---> 37 super().init(
38 index_struct=index_struct, service_context=service_context, **kwargs
39 )
TypeError: init() got an unexpected keyword argument 'llm_predictor'
llama_index 0.5 have API broken!!!
so you can use pip install llama_index==0.4.40
.
I did not know what to do, would u please help me with these error?
TypeError: init() got an unexpected keyword argument 'llm_predictor'
提示语改成如下会避免错误的杜撰:(参考 mayooear/gpt4-pdf-chatbot-langchain 项目)