Created
July 12, 2023 03:50
-
-
Save bjulius/d97b8824c86996ae7007a2fd6e21af93 to your computer and use it in GitHub Desktop.
Code Interpreter Python, M and R Code for Enterprise DNA Power Query Workout 10
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PYTHON CODE | |
*********** | |
import pandas as pd | |
# Load the data | |
data = pd.read_csv('/mnt/data/Matthias Challenge.csv') | |
# Convert 'Date' to datetime format | |
data['Date'] = pd.to_datetime(data['Date']) | |
# Remove commas from 'Unit Value' and 'Value Total' and convert to numeric | |
data['Unit Value'] = data['Unit Value'].str.replace(',', '').astype(float) | |
data['Value Total'] = data['Value Total'].str.replace(',', '').astype(float) | |
# Separate sales and returns | |
sales = data[data['Qty'] > 0].sort_values(by='Date') | |
returns = data[data['Qty'] < 0].sort_values(by='Date') | |
# Create a list to store the IDs of sales to be eliminated | |
sales_to_eliminate = [] | |
# For each return, find the corresponding sale | |
for _, return_row in returns.iterrows(): | |
product = return_row['ID_Product'] | |
qty = abs(return_row['Qty']) | |
return_date = return_row['Date'] | |
# Find sales of the same product and quantity before the return date | |
matching_sales = sales[(sales['ID_Product'] == product) & (sales['Qty'] == qty) & (sales['Date'] < return_date)] | |
# If there are matching sales, take the one with the latest date (nearest to the return) | |
if not matching_sales.empty: | |
sale_to_eliminate = matching_sales.iloc[-1] | |
sales_to_eliminate.append(sale_to_eliminate['ID_Sales']) | |
# Remove this sale from the sales dataframe to avoid using it for another return | |
sales = sales[sales['ID_Sales'] != sale_to_eliminate['ID_Sales']] | |
# Create a list of all IDs to eliminate (returns and their corresponding sales) | |
ids_to_eliminate = list(returns['ID_Sales']) + sales_to_eliminate | |
# Eliminate these IDs from the original dataframe | |
final_data = data[~data['ID_Sales'].isin(ids_to_eliminate)] | |
final_data | |
PQ/M CODE | |
********** | |
let | |
// Load the data | |
Source = Csv.Document(File.Contents("/mnt/data/Matthias Challenge.csv"),[Delimiter=",", Columns=7, Encoding=1252, QuoteStyle=QuoteStyle.None]), | |
#"Promoted Headers" = Table.PromoteHeaders(Source, [PromoteAllScalars=true]), | |
#"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"ID_Sales", Int64.Type}, {"Date", type date}, {"ID_Product", Int64.Type}, {"Product", type text}, {"Qty", Int64.Type}, {"Unit Value", Currency.Type}, {"Value Total", Currency.Type}}), | |
// Separate sales and returns | |
Sales = Table.SelectRows(#"Changed Type", each ([Qty] > 0)), | |
Returns = Table.SelectRows(#"Changed Type", each ([Qty] < 0)), | |
// Sort sales and returns by date | |
SortedSales = Table.Sort(Sales,{{"Date", Order.Ascending}}), | |
SortedReturns = Table.Sort(Returns,{{"Date", Order.Ascending}}), | |
// Join sales and returns on product and quantity | |
Joined = Table.NestedJoin(SortedReturns,{"ID_Product", "Qty"},SortedSales,{"ID_Product", "Qty"},"NewColumn",JoinKind.Inner), | |
// Filter out sales that occurred after the return | |
Filtered = Table.SelectRows(Joined, each [Date] > [NewColumn.Date]), | |
// Group by sales ID and keep the one with the latest date | |
Grouped = Table.Group(Filtered, {"ID_Sales"}, {{"LatestDate", each List.Max([Date]), type date}}), | |
// Remove returns and their corresponding sales from the original data | |
Final = Table.SelectRows(#"Changed Type", each ([ID_Sales] <> Grouped[ID_Sales])) | |
in | |
Final | |
R CODE | |
******* | |
# Load necessary libraries | |
library(dplyr) | |
library(lubridate) | |
# Load the data | |
data <- read.csv("/mnt/data/Matthias Challenge.csv") | |
# Convert 'Date' to date format | |
data$Date <- mdy(data$Date) | |
# Separate sales and returns | |
sales <- filter(data, Qty > 0) %>% arrange(Date) | |
returns <- filter(data, Qty < 0) %>% arrange(Date) | |
# Initialize an empty vector to store the IDs of sales to be eliminated | |
sales_to_eliminate <- c() | |
# For each return, find the corresponding sale | |
for (i in 1:nrow(returns)) { | |
# Find sales of the same product and quantity before the return date | |
matching_sales <- filter(sales, ID_Product == returns$ID_Product[i] & Qty == abs(returns$Qty[i]) & Date < returns$Date[i]) | |
# If there are matching sales, take the one with the latest date (nearest to the return) | |
if (nrow(matching_sales) > 0) { | |
sale_to_eliminate <- tail(matching_sales, n=1)$ID_Sales | |
sales_to_eliminate <- c(sales_to_eliminate, sale_to_eliminate) | |
# Remove this sale from the sales dataframe to avoid using it for another return | |
sales <- filter(sales, ID_Sales != sale_to_eliminate) | |
} | |
} | |
# Create a vector of all IDs to eliminate (returns and their corresponding sales) | |
ids_to_eliminate <- c(returns$ID_Sales, sales_to_eliminate) | |
# Eliminate these IDs from the original dataframe | |
final_data <- filter(data, !(ID_Sales %in% ids_to_eliminate)) | |
final_data | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
M code does not produce proper result due to a coding error by Code Interpreter in the Table.NestedJoin step.