Last active
November 9, 2023 15:46
-
-
Save sasasin/17ef93423f32a97c87cb5b33492a03b5 to your computer and use it in GitHub Desktop.
OpenAI GPT-4 で標準入力の文章を15分の1の文字数に要約するPythonスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# OpenAI GPT-4 で標準入力の文章を15分の1の文字数に要約するPythonスクリプト | |
# つかいかた; | |
# pip3 install -u OpenAI | |
# export OPENAI_API_KEY="......." | |
## デフォの15以外の割合に設定したければ | |
# export SUMMERIZE_RATE=10 | |
## デフォの gpt-4-1106-preview 以外のモデルを使うなら | |
# export MODEL_NAME="gpt-3.5-turbo-1106" | |
# cat hoge.txt | python3 ./summerize-stdin-by-openai.py | |
import os | |
import sys | |
import io | |
import unicodedata | |
import openai | |
from openai import OpenAI | |
sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') | |
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') | |
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') | |
# APIキーの設定 | |
client = OpenAI( | |
api_key=os.environ["OPENAI_API_KEY"], | |
) | |
# 要約したい文章を stdin で流し込む | |
text = sys.stdin.read() | |
# OpenAI Whisper で文字起こした文章は怪しい文字列が含まれてることがある。 | |
# Unicode正規化し、きちんと扱えるようにする。 | |
# https://docs.python.org/ja/3/library/unicodedata.html | |
text = unicodedata.normalize('NFKC', text) | |
# 総文字数を summerize_rate で割った量に要約してもらう | |
summerize_rate = int(os.environ.get('SUMMERIZE_RATE', '15')) | |
summerize_count = int(len(text)/summerize_rate) | |
#GPTに送るプロンプトを組み立てる | |
prompt = "以下の内容を" + str(summerize_count) + "文字以内で要約してください\n\n###\n\n" + text | |
try: | |
response = client.chat.completions.create( | |
model=os.environ.get('MODEL_NAME', 'gpt-4-1106-preview'), | |
temperature = 0, | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": prompt} | |
], | |
) | |
result = response.choices[0].message.content | |
except openai.APIConnectionError as e: | |
print("The server could not be reached") | |
print(e.__cause__) # an underlying Exception, likely raised within httpx. | |
except openai.RateLimitError as e: | |
print("A 429 status code was received; we should back off a bit.") | |
except openai.APIStatusError as e: | |
print("Another non-200-range status code was received") | |
print(e.status_code) | |
print(e.response) | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment