Created
June 26, 2023 18:27
-
-
Save itobey/fe66b428514c5b26d7328496a7ea04b9 to your computer and use it in GitHub Desktop.
script to summarize a textfile chunked in openai
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openai | |
# Set up your OpenAI API credentials | |
openai.api_key = '<apikey>' | |
# Function to chunk text into parts of `max_tokens` tokens | |
def chunk_text(text, max_tokens): | |
tokens = text.split() | |
chunks = [] | |
current_chunk = "" | |
for token in tokens: | |
if len(current_chunk) + len(token) + 1 <= max_tokens: | |
current_chunk += token + " " | |
else: | |
chunks.append(current_chunk.strip()) | |
current_chunk = token + " " | |
if current_chunk: | |
chunks.append(current_chunk.strip()) | |
return chunks | |
# Function to send each chunk to OpenAI API and append the output to a file | |
def send_chunks_to_api(chunks, output_file): | |
with open(output_file, 'a') as file: | |
for chunk in chunks: | |
response = openai.Completion.create( | |
engine="text-davinci-003", | |
prompt=chunk, | |
max_tokens=100, | |
temperature=0.7 | |
) | |
file.write(response.choices[0].text + "\n") | |
# Main function to process the text file | |
def process_text_file(filename, output_file): | |
with open(filename, 'r', encoding='utf-8') as file: | |
text = file.read() | |
chunks = chunk_text(text, 4000) | |
send_chunks_to_api(chunks, output_file) | |
# Example usage | |
filename = 'openai-input.txt' | |
output_file = 'output.txt' | |
process_text_file(filename, output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment