Created
February 13, 2023 18:45
-
-
Save wilsonusman/53179926d00b871a0c8b662f540fbdda to your computer and use it in GitHub Desktop.
How to parse an ACES XML file in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
import xmltodict | |
import pandas as pd | |
from timeit import default_timer as timer | |
# Open your XML and Convert to JSON | |
timer_start = timer() | |
with open('../path/to/file.xml') as f: | |
f = xmltodict.parse(f.read()) | |
apps = f['ACES']['App'] | |
seconds = timer() - timer_start | |
print(f'Finished parsing XML in {seconds} seconds') | |
# Convert JSON to DataFrame | |
# With the json_normalize() method you can quickly parse the dictionary into a pandas DataFrame | |
df = pd.json_normalize(apps) | |
# Summary | |
# With the describe() method you can quickly see the counts and unique values | |
df.describe() | |
# View all columns and their type | |
# View all the columns and select the ones you only want to see | |
df.dtypes | |
# Select the first few rows | |
# In this use case I just wanted to look at a couple of columns and preview some rows of data | |
df[columns].head() | |
# Filter any column based on criteria | |
# Select all rows equal to a part number and get all the unique basevehicles | |
df[df.Part == '5212 3521AD'].head() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment