Luiz-Monad · December 28, 2024 15:45
diff --git a/requirements.txt b/requirements.txt
 aiohttp==3.8.5
 aiosignal==1.3.1
 async-timeout==4.0.2
 attrs==23.1.0
 certifi==2023.7.22
 charset-normalizer==3.2.0
 colorama==0.4.6
 filelock==3.12.2
 frozenlist==1.4.0
 fsspec==2023.6.0
 huggingface-hub==0.16.4
 idna==3.4
 multidict==6.0.4
 numpy==1.25.1
 openai==0.27.8
 packaging==23.1
 PyYAML==6.0.1
 regex==2023.6.3
 requests==2.31.0
 safetensors==0.3.1
 tokenizers==0.13.3
 tqdm==4.65.0
 transformers==4.31.0
 typing_extensions==4.7.1
 urllib3==2.0.4
 yarl==1.9.2
diff --git a/system.txt b/system.txt
 You are a translator that translates C code to C#, don't give hints, just generate the code, don't embed the code in markdown or format the code.
 Refactor functions to CamelCase.
 Pointer to primitive types should be converted to SpanPtr<>.
 Pointers to non-primitive types are references.
 Don't use the method Slice, SpanPtr has overloads for "+", "-", "++" and "--".
 Keep memcopy and memmove functions.
 Don't use unsafe.

diff --git a/translate.py b/translate.py
 import os
 import openai
 from transformers import GPT2Tokenizer

 # OpenAI GPT-2 tokenizer is the same as GPT-3 tokenizer
 # we use it to count the number of tokens in the text
 tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

 openai.api_key = os.getenv("OPENAI_API_KEY")

 esc = "\x1b"
 red = f"{esc}[31m"
 green = f"{esc}[92m"
 blue = f"{esc}[94m"
 white = f"{esc}[37m"
 yellow = f"{esc}[33m"
 zero_cursor = f"{esc}[2;0H"
 save_cursor = f"{esc}[s"
 restore_cursor = f"{esc}[u"

 with open("system.txt", "r") as system_file:
    system = "\n".join(system_file.readlines())

 def prompter(argument):
    global last_prompt
    cur_prompt = {"role": "user", "content": argument}
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        # model="gpt-4",
        messages=[
            {"role": "system", "content": system},
            cur_prompt,
        ]
    )
    # retrieve the model's message from the response
    return response['choices'][0]['message']['content']

 # https://github.com/openai/openai-cookbook/blob/main/examples/book_translation/translate_latex_book.ipynb
 def group_chunks(chunks, ntokens, max_len=1000, hard_max_len=3000):
    """
    Group very short chunks, to form approximately page long chunks.
    """
    batches = []
    cur_batch = ""
    cur_tokens = 0
    # iterate over chunks, and group the short ones together
    for chunk, ntoken in zip(chunks, ntokens):
        # discard chunks that exceed hard max length
        if ntoken > hard_max_len:
            print(f"Warning: Chunk discarded for being too long ({ntoken} tokens > {hard_max_len} token limit). Preview: '{chunk[:50]}...'")
            continue
        # if room in current batch, add new chunk
        if cur_tokens + 1 + ntoken <= max_len:
            cur_batch += chunk
            cur_tokens += 1 + ntoken  # adds 1 token for the newline
        # otherwise, record the batch and start a new one
        else:
            batches.append(cur_batch)
            cur_batch = chunk
            cur_tokens = ntoken
    if cur_batch:  # add the last batch if it's not empty
        batches.append(cur_batch)
    return batches

 def main():
    with open("source.c", "r") as source_file:
        chunks = source_file.readlines()

    with open("target.cs", "w") as target_file:
        target_file.write("/* automatic code translation from C to C# */\n")

    ntokens = []
    for chunk in chunks:
        ntokens.append(len(tokenizer.encode(chunk)))
    print(f"{white}longest number of tokens in a single line: {max(ntokens)}")
    chunks = group_chunks(chunks, ntokens)

    def status():
        print(f"{save_cursor}{zero_cursor}{yellow}chunk {i} / {len(chunks)}{restore_cursor}")

    for i in range(0, len(chunks)):
        code_slice = chunks[i]
        print(f"{green}{code_slice}\n")
        status()
        translation = prompter(code_slice)
        print(f"{blue}{translation}\n")
        status()
        if translation:
            with open("target.cs", "a") as target_file:
                target_file.write(f"{translation}\n")
        else:
            print(f"{red}Failed to translate the code slice starting at line: ", i)
            break


 if __name__ == "__main__":
    main()
	aiohttp==3.8.5
	aiosignal==1.3.1
	async-timeout==4.0.2
	attrs==23.1.0
	certifi==2023.7.22
	charset-normalizer==3.2.0
	colorama==0.4.6
	filelock==3.12.2
	frozenlist==1.4.0
	fsspec==2023.6.0
	huggingface-hub==0.16.4
	idna==3.4
	multidict==6.0.4
	numpy==1.25.1
	openai==0.27.8
	packaging==23.1
	PyYAML==6.0.1
	regex==2023.6.3
	requests==2.31.0
	safetensors==0.3.1
	tokenizers==0.13.3
	tqdm==4.65.0
	transformers==4.31.0
	typing_extensions==4.7.1
	urllib3==2.0.4
	yarl==1.9.2
	You are a translator that translates C code to C#, don't give hints, just generate the code, don't embed the code in markdown or format the code.
	Refactor functions to CamelCase.
	Pointer to primitive types should be converted to SpanPtr<>.
	Pointers to non-primitive types are references.
	Don't use the method Slice, SpanPtr has overloads for "+", "-", "++" and "--".
	Keep memcopy and memmove functions.
	Don't use unsafe.
	import os
	import openai
	from transformers import GPT2Tokenizer

	# OpenAI GPT-2 tokenizer is the same as GPT-3 tokenizer
	# we use it to count the number of tokens in the text
	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

	openai.api_key = os.getenv("OPENAI_API_KEY")

	esc = "\x1b"
	red = f"{esc}[31m"
	green = f"{esc}[92m"
	blue = f"{esc}[94m"
	white = f"{esc}[37m"
	yellow = f"{esc}[33m"
	zero_cursor = f"{esc}[2;0H"
	save_cursor = f"{esc}[s"
	restore_cursor = f"{esc}[u"

	with open("system.txt", "r") as system_file:
	system = "\n".join(system_file.readlines())

	def prompter(argument):
	global last_prompt
	cur_prompt = {"role": "user", "content": argument}
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	# model="gpt-4",
	messages=[
	{"role": "system", "content": system},
	cur_prompt,
	]
	)
	# retrieve the model's message from the response
	return response['choices'][0]['message']['content']

	# https://github.com/openai/openai-cookbook/blob/main/examples/book_translation/translate_latex_book.ipynb
	def group_chunks(chunks, ntokens, max_len=1000, hard_max_len=3000):
	"""
	Group very short chunks, to form approximately page long chunks.
	"""
	batches = []
	cur_batch = ""
	cur_tokens = 0
	# iterate over chunks, and group the short ones together
	for chunk, ntoken in zip(chunks, ntokens):
	# discard chunks that exceed hard max length
	if ntoken > hard_max_len:
	print(f"Warning: Chunk discarded for being too long ({ntoken} tokens > {hard_max_len} token limit). Preview: '{chunk[:50]}...'")
	continue
	# if room in current batch, add new chunk
	if cur_tokens + 1 + ntoken <= max_len:
	cur_batch += chunk
	cur_tokens += 1 + ntoken # adds 1 token for the newline
	# otherwise, record the batch and start a new one
	else:
	batches.append(cur_batch)
	cur_batch = chunk
	cur_tokens = ntoken
	if cur_batch: # add the last batch if it's not empty
	batches.append(cur_batch)
	return batches

	def main():
	with open("source.c", "r") as source_file:
	chunks = source_file.readlines()

	with open("target.cs", "w") as target_file:
	target_file.write("/* automatic code translation from C to C# */\n")

	ntokens = []
	for chunk in chunks:
	ntokens.append(len(tokenizer.encode(chunk)))
	print(f"{white}longest number of tokens in a single line: {max(ntokens)}")
	chunks = group_chunks(chunks, ntokens)

	def status():
	print(f"{save_cursor}{zero_cursor}{yellow}chunk {i} / {len(chunks)}{restore_cursor}")

	for i in range(0, len(chunks)):
	code_slice = chunks[i]
	print(f"{green}{code_slice}\n")
	status()
	translation = prompter(code_slice)
	print(f"{blue}{translation}\n")
	status()
	if translation:
	with open("target.cs", "a") as target_file:
	target_file.write(f"{translation}\n")
	else:
	print(f"{red}Failed to translate the code slice starting at line: ", i)
	break


	if __name__ == "__main__":
	main()