fflorent · August 21, 2024 08:28
diff --git a/batch.py b/batch.py
 # !IMPORTANT
 # Adapt the model gpt-4o to gpt-4o-mini when the translated file is larger than 4096 tokens


 import os
 import openai
 from pathlib import Path
 import json
 import argparse
 import datetime

 from openai.types.batch import Batch

 client = openai.Client(
    api_key=os.environ.get("OPENAI_API_KEY"),
 )

 def create_batch_jsonl_item_for_markdown_to_translate(markdown_file: Path) -> str:
    """ Crée un simple élément jsonl consommable par l'API batch d'OpenAI pour chaque fichier markdown dans le répertoire source. """
    system_prompt = """
 You will be provided with markdown content written in English, and your task is to translate it into French. You must take into account the following glossary in TSV format:
 ```
 user attributes	propriété d'utilisateur
 lookup column	cible
 Lookup table	table d'appairage
 special rules	Règles avancées
 access rules	permissions avancées
 seed rules	règles par défaut
 personal site	espace personnel
 team site	espace d'équipe
 range	intervalle
 raw data tables	données sources
 record card	vue fiche
 widget	vue
 currency	devise
 sandbox	sandbox
 workspace	dossier
 table	table
 trigger formula	formule d'initialisation
 ```
    """
    markdown_content = markdown_file.read_text(encoding='utf-8')
    messages = [{
        "role": "system",
        "content": system_prompt
    }, {
        "role": "user",
        "content": markdown_content,
    }]

    return json.dumps({
        "custom_id": str(markdown_file).replace('en', 'fr', 1),
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            "model": "gpt-4o",
            "temperature": 0.3,
            "top_p": 1,
            "messages": messages,
        }
    })

 def create_translation_batch(markdown_files: list[str]):
    """Envoyer un fichier jsonl de traduction à l'API batch d'OpenAI."""
    jsonl_items = [
        create_batch_jsonl_item_for_markdown_to_translate(Path(markdown_file)) for markdown_file in markdown_files
    ]
    jsonl_content = "\n".join(jsonl_items)
    batch_input_file = client.files.create(
        file=bytes(jsonl_content, encoding="utf-8"),
        purpose="batch",
    )
    batch_input_file_id = batch_input_file.id
    print('Batch input file ID:', batch_input_file_id)
    input_file_path=f'batch_input-{batch_input_file_id}.jsonl'
    Path(input_file_path).write_text(jsonl_content, encoding='utf-8')
    print(f'Saved jsonl content to {input_file_path}')

    client.batches.create(
        input_file_id=batch_input_file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
    )

 def print_batch_info(batch: Batch):
    print('-----------------')
    print("Batch ID:", batch.id)
    print("Created At:", datetime.datetime.fromtimestamp(batch.created_at).isoformat().replace('T', ' '))
    print("Status:", batch.status)
    print('Output file ID:', batch.output_file_id)
    print('Errors:', batch.errors)
    print('-----------------')
    return client.batches.retrieve(batch.id)



 if __name__ == "__main__":
    """
    Usage:
        python batch.py upload md_file1 md_file2 md_file3...
        python batch.py check batch_id|all
        python batch.py download file_id --write
    """
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('command', type=str, help='upload|check|download')
    parser.add_argument('args', nargs='+', help='List of markdown files or batch id')
    parser.add_argument('--write', action='store_true', help='Write the downloaded content to a file', default=False)
    parser.add_argument('--raw', action='store_true', help='Show raw batch info', default=False)
    args = parser.parse_args()

    if args.command == 'upload':
        create_translation_batch(args.args)
    elif args.command == 'check':
        if args.args[0] == 'all':
            batches=client.batches.list().data
            batches.sort(key=lambda x: x.created_at, reverse=False)
            for batch in batches:
                print_batch_info(batch)
        else:
            if args.raw:
                print(client.batches.retrieve(args.args[0]))
            else:
                print_batch_info(client.batches.retrieve(args.args[0]))
    elif args.command == 'download':
        file_response = client.files.content(args.args[0])
        responses=[json.loads(line) for line in file_response.text.split('\n') if line]
        print('-----------------')
        for response in responses:
            if response['response']['status_code'] == 200:
                print(f'Treating {response["custom_id"]}')
                if args.write:
                    with open(response['custom_id'], 'w', encoding='utf-8') as f:
                        f.write(response['response']['body']['choices'][0]['message']['content'])
                    print(f'Saved to {response["custom_id"]}')
                else:
                    print(response['response']['body']['choices'][0]['message']['content'])
            else:
                print('Error:', response['response']['error'])
            print('-----------------')
    else:
        print('Invalid command')
	# !IMPORTANT
	# Adapt the model gpt-4o to gpt-4o-mini when the translated file is larger than 4096 tokens


	import os
	import openai
	from pathlib import Path
	import json
	import argparse
	import datetime

	from openai.types.batch import Batch

	client = openai.Client(
	api_key=os.environ.get("OPENAI_API_KEY"),
	)

	def create_batch_jsonl_item_for_markdown_to_translate(markdown_file: Path) -> str:
	""" Crée un simple élément jsonl consommable par l'API batch d'OpenAI pour chaque fichier markdown dans le répertoire source. """
	system_prompt = """
	You will be provided with markdown content written in English, and your task is to translate it into French. You must take into account the following glossary in TSV format:
	```
	user attributes propriété d'utilisateur
	lookup column cible
	Lookup table table d'appairage
	special rules Règles avancées
	access rules permissions avancées
	seed rules règles par défaut
	personal site espace personnel
	team site espace d'équipe
	range intervalle
	raw data tables données sources
	record card vue fiche
	widget vue
	currency devise
	sandbox sandbox
	workspace dossier
	table table
	trigger formula formule d'initialisation
	```
	"""
	markdown_content = markdown_file.read_text(encoding='utf-8')
	messages = [{
	"role": "system",
	"content": system_prompt
	}, {
	"role": "user",
	"content": markdown_content,
	}]

	return json.dumps({
	"custom_id": str(markdown_file).replace('en', 'fr', 1),
	"method": "POST",
	"url": "/v1/chat/completions",
	"body": {
	"model": "gpt-4o",
	"temperature": 0.3,
	"top_p": 1,
	"messages": messages,
	}
	})

	def create_translation_batch(markdown_files: list[str]):
	"""Envoyer un fichier jsonl de traduction à l'API batch d'OpenAI."""
	jsonl_items = [
	create_batch_jsonl_item_for_markdown_to_translate(Path(markdown_file)) for markdown_file in markdown_files
	]
	jsonl_content = "\n".join(jsonl_items)
	batch_input_file = client.files.create(
	file=bytes(jsonl_content, encoding="utf-8"),
	purpose="batch",
	)
	batch_input_file_id = batch_input_file.id
	print('Batch input file ID:', batch_input_file_id)
	input_file_path=f'batch_input-{batch_input_file_id}.jsonl'
	Path(input_file_path).write_text(jsonl_content, encoding='utf-8')
	print(f'Saved jsonl content to {input_file_path}')

	client.batches.create(
	input_file_id=batch_input_file_id,
	endpoint="/v1/chat/completions",
	completion_window="24h",
	)

	def print_batch_info(batch: Batch):
	print('-----------------')
	print("Batch ID:", batch.id)
	print("Created At:", datetime.datetime.fromtimestamp(batch.created_at).isoformat().replace('T', ' '))
	print("Status:", batch.status)
	print('Output file ID:', batch.output_file_id)
	print('Errors:', batch.errors)
	print('-----------------')
	return client.batches.retrieve(batch.id)



	if __name__ == "__main__":
	"""
	Usage:
	python batch.py upload md_file1 md_file2 md_file3...
	python batch.py check batch_id\|all
	python batch.py download file_id --write
	"""
	parser = argparse.ArgumentParser(description='Process some integers.')
	parser.add_argument('command', type=str, help='upload\|check\|download')
	parser.add_argument('args', nargs='+', help='List of markdown files or batch id')
	parser.add_argument('--write', action='store_true', help='Write the downloaded content to a file', default=False)
	parser.add_argument('--raw', action='store_true', help='Show raw batch info', default=False)
	args = parser.parse_args()

	if args.command == 'upload':
	create_translation_batch(args.args)
	elif args.command == 'check':
	if args.args[0] == 'all':
	batches=client.batches.list().data
	batches.sort(key=lambda x: x.created_at, reverse=False)
	for batch in batches:
	print_batch_info(batch)
	else:
	if args.raw:
	print(client.batches.retrieve(args.args[0]))
	else:
	print_batch_info(client.batches.retrieve(args.args[0]))
	elif args.command == 'download':
	file_response = client.files.content(args.args[0])
	responses=[json.loads(line) for line in file_response.text.split('\n') if line]
	print('-----------------')
	for response in responses:
	if response['response']['status_code'] == 200:
	print(f'Treating {response["custom_id"]}')
	if args.write:
	with open(response['custom_id'], 'w', encoding='utf-8') as f:
	f.write(response['response']['body']['choices'][0]['message']['content'])
	print(f'Saved to {response["custom_id"]}')
	else:
	print(response['response']['body']['choices'][0]['message']['content'])
	else:
	print('Error:', response['response']['error'])
	print('-----------------')
	else:
	print('Invalid command')