Last active
May 4, 2023 16:34
-
-
Save mnoah66/19f54d471ae9c483b9b4832c475df269 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################################## | |
############################################################################################## | |
# Parse out event data from a M365 Compliance audit report to see, for example, what files and folders were moved. | |
# Creates columns in a dataframe to show source, destination, and item names. | |
# This was created to help inexperienced staff troubleshoot issues they have when moving files or folders around in | |
# OneDrive, especially if they are from/to shared locations and personal OneDrives. | |
############################################################################################## | |
############################################################################################## | |
import pandas as pd | |
import json | |
df = pd.read_csv("M365_EXPORTED_FILE.csv") | |
# Extract the EventData XML from within the AuditData json/dictionary | |
df['EventDataXML'] = df.apply(lambda x: json.loads(x['AuditData'])['EventData'] if 'EventData' in x['AuditData'] else "", axis = 1) | |
# Regex to extract text between SourceFileUrl and TargetWebUrl tags and put in their own columns | |
df['SourceFileUrl'] = df['EventDataXML'].str.extract(r"<SourceFileUrl>(.*)</SourceFileUrl>") | |
df['TargetFileUrl'] = df['EventDataXML'].str.extract(r"<TargetFileUrl>(.*)</TargetWebUrl>") | |
# Get the value after the last forward-slash from the URL | |
# E.g. https://contoso.sharepoint.com/sites/payroll/Shared Documents/General/Reports/2020/data.xlsx >>> data.xlsx | |
df['File or folder'] = df['SourceFileUrl'].str.rsplit("/", n=1, expand=False).str[-1] | |
# Save df to a csv in the current directory | |
df.to_csv("file.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment