Last active
December 10, 2024 03:54
-
-
Save huydhoang/ef2ae44cda7aee18873dd18cb4a3fab1 to your computer and use it in GitHub Desktop.
Test cases for _parse_response()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text1 = " ``` To extract the annual inflation rates for Vietnam from 1986 until 2023, we need to follow these steps:\n\n1. **Filter the DataFrame**: We need to filter the DataFrame to only include rows where the `Country Name` is 'Vietnam'.\n2. **Select the Relevant Columns**: After filtering, we select the columns for the years from 1986 to 2023.\n3. **Extract and Print the Data**: Finally, we extract the inflation rates and print them in a readable format.\n\nLet's write the Python code to accomplish this:``` ``` ```python\nimport pandas as pd\n\n# Assuming df is already loaded and parsed correctly\n\n# Step 1: Filter the DataFrame for Vietnam\nvietnam_df = df[df['Country Name'] == 'Vietnam']\n\n# Step 2: Select the relevant columns (years from 1986 to 2023)\nselected_columns = ['1986', '1987', '1988', '1989', '1990',\n '1991', '1992', '1993', '1994', '1995',\n '1996', '1997', '1998', '1999', '2000',\n '2001', '2002', '2003', '2004', '2005',\n '2006', '2007', '2008', '2009', '2010',\n '2011', '2012', '2013', '2014', '2015',\n '2016', '2017', '2018', '2019', '2020',\n '2021', '2022', '2023']\nvietnam_inflation = vietnam_df[selected_columns]\n\n# Step 3: Extract and print the data\ninflation_rates = vietnam_inflation.iloc[0] # Assuming there's only one row for Vietnam\n\nprint(\"Annual Inflation Rates for Vietnam from 1986 to 2023:\")\nfor year, rate in inflation_rates.items():\n print(f\"{year}: {rate}\")\n```\n\nThis code will filter the DataFrame for Vietnam and extract the annual inflation rates for the specified years. It then prints these rates in a readable format. Make sure that the `Country Name` column contains 'Vietnam' exactly as specified to avoid any filtering issues." | |
text2 = " ```python\n``` To solve this problem, we need to perform several steps:\n\n1. Filter the DataFrame to extract rows corresponding to Vietnam.\n2. Extract the annual inflation rates for Vietnam from 1986 to 2023.\n3. Print these inflation rates.\n4. Calculate the compound inflation rate from 2013 to 2023.\n\nLet's assume that the indicator name for inflation is \"Consumer Price Index (CPI): Inflation, average (%)\". We'll use this assumption to filter the data.\n\nHere's the Python code to achieve this:\n\n```python\nimport pandas as pd\n\n# Assuming df is already read and parsed correctly\n\n# Step 1: Filter the DataFrame for Vietnam\nvietnam_df = df[df['Country Name'] == 'Vietnam']\n\n# Step 2: Extract inflation rates from 1986 to 2023\ninflation_rates = vietnam_df[vietnam_df['Indicator Name'] == 'Consumer Price Index (CPI): Inflation, average (%)']\ninflation_rates = inflation_rates.iloc[:, 4:-1] # Select columns from 1986 to 2023\n\n# Step 3: Print the inflation rates\nprint(inflation_rates.to_markdown())\n\n# Step 4: Calculate the compound inflation rate from 2013 to 2023\ncompound_inflation_rate = 1.0\nfor year in range(2013, 2024):\n rate = inflation_rates[str(year)].values[0]\n if pd.notna(rate):\n compound_inflation_rate *= (1 + rate / 100)\n\n# Convert the compound inflation rate to a percentage and subtract 100%\ncompound_inflation_rate_percentage = (compound_inflation_rate - 1) * 100\n\nprint(f\"Compound Inflation Rate from 2013 to 2023: {compound_inflation_rate_percentage:.2f}%\")\n```\n\n### Explanation:\n\n1. **Filtering for Vietnam**: We filter the DataFrame to get rows where the 'Country Name' is 'Vietnam'.\n2. **Extracting Inflation Rates**: We further filter to get the row with the indicator name \"Consumer Price Index (CPI): Inflation, average (%)\". Then, we select columns from 1986 to 2023.\n3. **Printing Inflation Rates**: We print the extracted inflation rates in a markdown format for better readability.\n4. **Calculating Compound Inflation Rate**: We iterate over each year from 2013 to 2023, multiply the compound inflation rate by `(1 + rate / 100)` if the rate is not NaN, and finally convert it to a percentage.\n\nMake sure to adjust the indicator name if it differs in your dataset." | |
def _parse_response(response: str) -> list: | |
""" | |
Parses the response to separate text and code blocks, capturing valid language identifiers. | |
Args: | |
response (str): The input string containing text and code. | |
Returns: | |
list: A list of tuples where each tuple contains a segment of text/code, | |
a boolean indicating whether it's a code block, and an optional language identifier. | |
""" | |
known_languages = { | |
"python", "javascript", "java", "cpp", "c", "c#", "ruby", "go", "php", | |
"typescript", "shell", "bash", "html", "css", "json", "yaml", "xml", "sql", | |
"kotlin", "swift", "r", "perl", "scala", "lua", "rust", "dart", "haskell" | |
} | |
segments = [] | |
current_segment = [] | |
in_code_block = False | |
current_language = None # To capture the language identifier | |
for line in response.splitlines(): | |
# Split line on code block markers | |
parts = line.split("```") | |
for i, part in enumerate(parts): | |
if i % 2 == 0: # Text outside of code blocks | |
if part.strip(): # Add non-empty text | |
current_segment.append(part) | |
else: # Code block marker | |
if in_code_block: # Closing code block | |
if current_segment: | |
segments.append(("\n".join(current_segment), True, current_language)) | |
current_segment = [] | |
in_code_block = False | |
current_language = None | |
else: # Opening code block | |
if current_segment: | |
segments.append(("\n".join(current_segment), False, None)) | |
current_segment = [] | |
in_code_block = True | |
# Capture language identifier if present and valid | |
language_candidate = part.strip() | |
current_language = language_candidate if language_candidate in known_languages else None | |
# Handle the remaining part after the last marker on the line | |
if in_code_block and current_segment: | |
segments.append(("\n".join(current_segment), True, current_language)) | |
current_segment = [] | |
# Add any remaining text or code | |
if current_segment: | |
segments.append(("\n".join(current_segment), in_code_block, current_language)) | |
# Rejoin adjacent blocks | |
segments = _rejoin_adjacent_blocks_with_language(segments) | |
return segments | |
def _rejoin_adjacent_blocks_with_language(segments): | |
""" | |
Rejoins adjacent blocks with the same `is_code` status and language identifier. | |
Args: | |
segments (list): List of (content, is_code, language) tuples. | |
Returns: | |
list: Merged list of segments. | |
""" | |
if not segments: | |
return segments | |
merged_segments = [segments[0]] # Start with the first segment | |
for content, is_code, language in segments[1:]: | |
last_content, last_is_code, last_language = merged_segments[-1] | |
if last_is_code == is_code and last_language == language: # Same type and language, merge | |
merged_segments[-1] = (last_content + "\n" + content, is_code, language) | |
else: # Different type or language, append as new segment | |
merged_segments.append((content, is_code, language)) | |
return merged_segments | |
texts = [text1, text2] | |
for text in texts: | |
segments = _parse_response(text) | |
for content, is_code, language in segments: | |
if is_code: | |
lang_info = f" ({language})" if language else "" | |
print(f"[code{lang_info}]", content.strip(), "[/code]") | |
else: | |
print("[text]", content.strip(), "[/text]") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment