Created
September 18, 2024 09:10
-
-
Save davidgilbertson/fcabb55478b4a4e1537a706f808b8b09 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from io import StringIO | |
import re | |
from time import perf_counter | |
import json | |
import yaml | |
import pandas as pd | |
import tomlkit | |
from openai import OpenAI | |
client = OpenAI() | |
data_formats = [ | |
dict( | |
name="TSV", | |
parser=lambda text: pd.read_csv(StringIO(text), sep="\t"), | |
), | |
dict( | |
name="CSV", | |
hint="Wrap any values with commas in quotes.", | |
parser=lambda text: pd.read_csv(StringIO(text)), | |
), | |
dict( | |
name="columnar JSON", | |
hint="Return a top-level object with fields as keys and values as lists.", | |
parser=lambda text: pd.DataFrame(json.loads(text)), | |
), | |
dict( | |
name="YAML", | |
hint="Return a top-level list.", | |
parser=lambda text: pd.DataFrame(yaml.safe_load(text)), | |
), | |
dict( | |
name="TOML", | |
hint="Return an array of tables with the name 'countries'.", | |
parser=lambda text: pd.DataFrame(tomlkit.loads(text)["countries"]), | |
), | |
dict( | |
name="JSON", | |
parser=lambda text: pd.DataFrame(json.loads(text)), | |
), | |
] | |
prompt = """\ | |
Convert the below source text into a structure formatted as {data_format}. {hint} | |
Include the following fields: | |
- `name`, the country name in English | |
- `leaderName`, the name of the leader | |
- `leaderDOB`, the date of birth as a string like 'mmmm dd, yyyy' | |
- `leaderSO`, the leader's significant other | |
- `population`, as an integer | |
- `area`, as an integer in km² | |
Use the string 'Unknown' if data is missing. | |
<source_text> | |
Austria (Österreich), led by Chancellor Karl Nehammer (born October 18, 1972), is married to Katharina Nehammer. Austria has a population of approximately 9 million people and covers an area of 83,879 square kilometers. | |
Belgium (Belgique/België), under Prime Minister Alexander De Croo (born November 3, 1975), who is married to Annik Penders, has a population of around 11.6 million and an area of 30,528 square kilometers. | |
Bulgaria (България), led by acting Prime Minister Dimitar Glavchev, has a population of about 6.9 million and spans an area of 110,879 square kilometers. There is no publicly available information about Glavchev’s significant other. | |
Croatia (Hrvatska), where Prime Minister Andrej Plenković (born April 8, 1970) is married to Ana Maslać Plenković, has a population of around 3.9 million and covers an area of 56,594 square kilometers. | |
Cyprus (Κύπρος/Kıbrıs) is led by President Nikos Christodoulides (born December 6, 1973), who is married to Philippa Karsera. Cyprus has a population of approximately 1.2 million and an area of 9,251 square kilometers. | |
Czech Republic (Česká republika), under the leadership of Prime Minister Petr Fiala (born September 1, 1964), who is married to Jana Fialová, has a population of about 10.7 million and covers an area of 78,866 square kilometers. | |
Denmark (Danmark), led by Prime Minister Mette Frederiksen (born November 19, 1977), who is married to Bo Tengberg, has a population of approximately 5.8 million and an area of 42,933 square kilometers. | |
Estonia (Eesti), with Prime Minister Kaja Kallas (born June 18, 1977), married to Arvo Hallik, has a population of around 1.3 million and covers an area of 45,227 square kilometers. | |
Finland (Suomi), led by Prime Minister Petteri Orpo (born November 3, 1969), who is married to Niina Kanniainen, has a population of about 5.5 million and an area of 338,145 square kilometers. | |
France (France), where President Emmanuel Macron (born December 21, 1977) is married to Brigitte Macron, has a population of approximately 67 million and spans an area of 551,695 square kilometers. | |
Germany (Deutschland), led by Chancellor Olaf Scholz (born June 14, 1958), married to Britta Ernst, has a population of about 83 million and covers an area of 357,022 square kilometers. | |
Greece (Ελλάδα), under Prime Minister Kyriakos Mitsotakis (born March 4, 1968), who is married to Mareva Grabowski-Mitsotakis, has a population of approximately 10.4 million and an area of 131,957 square kilometers. | |
Hungary (Magyarország), led by Prime Minister Viktor Orbán (born May 31, 1963), married to Anikó Lévai, has a population of around 9.6 million and an area of 93,028 square kilometers. | |
Ireland (Éire), under Taoiseach Leo Varadkar (born January 18, 1979), who is in a relationship with Matthew Barrett, has a population of about 5 million and covers an area of 70,273 square kilometers. | |
Italy (Italia), led by Prime Minister Giorgia Meloni (born January 15, 1977), who is partnered with Andrea Giambruno, has a population of approximately 60 million and an area of 301,340 square kilometers. | |
Latvia (Latvija), where Prime Minister Evika Siliņa (born August 15, 1975) is married to Jānis Siliņš, has a population of about 1.8 million and covers an area of 64,589 square kilometers. | |
Lithuania (Lietuva), led by Prime Minister Ingrida Šimonytė (born November 15, 1974), has a population of around 2.8 million and an area of 65,300 square kilometers. There is no publicly known information about her significant other. | |
Luxembourg (Lëtzebuerg), where Prime Minister Xavier Bettel (born March 3, 1973) is married to Gauthier Destenay, has a population of approximately 640,000 and covers an area of 2,586 square kilometers. | |
Malta (Malta), led by Prime Minister Robert Abela (born December 7, 1977), who is married to Lydia Abela, has a population of around 514,000 and spans an area of 316 square kilometers. | |
Netherlands (Nederland), under the leadership of Prime Minister Mark Rutte (born February 14, 1967), who is not publicly known to have a significant other, has a population of about 17.4 million and covers an area of 41,543 square kilometers. | |
Poland (Polska), led by Prime Minister Mateusz Morawiecki (born June 20, 1968), married to Iwona Morawiecka, has a population of approximately 38 million and spans an area of 312,696 square kilometers. | |
Portugal (Portugal), where Prime Minister António Costa (born July 17, 1961) is married to Fernanda Maria Gonçalves Tadeu, has a population of about 10 million and an area of 92,090 square kilometers. | |
Romania (România), led by President Klaus Iohannis (born June 13, 1959), who is married to Carmen Iohannis, has a population of around 19 million and covers an area of 238,397 square kilometers. | |
Slovakia (Slovensko), where Prime Minister Robert Fico (born September 15, 1964) is married to Svetlana Ficová, has a population of about 5.4 million and an area of 49,035 square kilometers. | |
Slovenia (Slovenija), under the leadership of Prime Minister Robert Golob (born January 23, 1967), has a population of around 2.1 million and covers an area of 20,273 square kilometers. There is no publicly known information about his significant other. | |
Spain (España), led by Prime Minister Pedro Sánchez (born February 29, 1972), married to Begoña Gómez, has a population of approximately 47 million and an area of 505,990 square kilometers. | |
Sweden (Sverige), under Prime Minister Ulf Kristersson (born December 29, 1963), who is married to Birgitta Ed, has a population of about 10.4 million and covers an area of 450,295 square kilometers. | |
</source_text> | |
""" | |
df_data = [] | |
for data_format in data_formats: | |
start = perf_counter() | |
response_raw = client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[ | |
dict( | |
role="user", | |
content=prompt.format( | |
data_format=data_format["name"], | |
hint=data_format.get("hint", ""), | |
), | |
) | |
], | |
temperature=0, | |
) | |
response = response_raw.choices[0].message.content | |
response_time = perf_counter() - start | |
object_text = re.search("```.*?\n(.*?)```", response, re.DOTALL).group(1) | |
response_object = data_format["parser"](object_text) | |
df_data.append( | |
dict( | |
Format=data_format["name"], | |
ResponseTokenCount=response_raw.usage.completion_tokens, | |
ResponseTime=response_time, | |
ResponseObjectText=object_text, | |
ResponseObject=response_object, | |
) | |
) | |
df = pd.DataFrame.from_records(df_data) | |
df = df.sort_values(by="ResponseTokenCount", ascending=False) | |
# Assert all the response objects are identical | |
assert df.ResponseObject.apply(str).nunique() == 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment