Last active
September 12, 2019 01:36
-
-
Save blisst/ac798ec9563615fcd5bf9b10dca0e245 to your computer and use it in GitHub Desktop.
Ryan Johnson - Moloco String Equals when One Char Removed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def equalsWhenOneCharRemoved(x, y): | |
# Can't be different in length by more than 1 | |
if (abs(len(x) - len(y)) > 1): | |
return False | |
# Iterate through X and Y together | |
diffFound = 0; | |
lenX = len(x) | |
lenY = len(y) | |
for i in range(max(lenX, lenY)): | |
# If we have reached the end of either string, break | |
if (i >= lenX - 1 or i >= lenY -1): | |
break | |
# If they are different at an index, we add a difference | |
if (x[i] != y[i]): | |
diffFound += 1 | |
# If there is more than one difference, they are not similar enough | |
if (diffFound > 1): | |
return False | |
# If we get here, check the number of differences is less than 1 | |
return diffFound <= 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
def rankProducts(filePath): | |
""" | |
Loop through each line of file at filePath, parsing as JSON for each line, and loading it into a dictionary. | |
Structure of dictionary 'products' looks like: | |
{ | |
product_id_1: { | |
user_id_1: quantity_for_user_id_1 | |
user_id_2: quantity_for_user_id_2 | |
user_id_3: quantity_for_user_id_3 | |
} | |
product_id_2: { | |
user_id_1: quantity_for_user_id_1 | |
user_id_2: quantity_for_user_id_2 | |
user_id_3: quantity_for_user_id_3 | |
} | |
} | |
""" | |
products = {} | |
# Iterate through each line | |
for line in open(filePath): | |
lineJson = json.loads(line.rstrip('\n')) | |
productId = lineJson["product_id"] | |
userId = lineJson["user_id"] | |
quantity = lineJson["quantity"] | |
# Add product to dict if not in it already | |
if productId not in products: | |
products[productId] = {} | |
# Add user_id to the product if not there already | |
if userId not in products[productId]: | |
products[productId][userId] = 0 | |
# Add the quantity of that product for that user | |
products[productId][userId] += quantity | |
# Go through each product_id and check which has sold the most quantity and which had the most unique purchasers, allowing for ties | |
mostSoldMax = 0 | |
mostUsersMax = 0 | |
# Lists so we can have multiple entries for ties | |
mostSold = [] | |
mostUniqueUsers = [] | |
for product_id in products.keys(): | |
# Number of unique users for a product_id is the number of user_ids for that product | |
numUsers = len(products[product_id].keys()) | |
# The total sold is the sum of the quantities over all the users for a given product | |
productSum = sum(products[product_id].values()) | |
# Compute the maximum total quantity sold, filling a list with element product_ids that are tied | |
if productSum > mostSoldMax: | |
mostSold = [product_id] | |
mostSoldMax = productSum | |
elif productSum == mostSoldMax: | |
mostSold.append(product_id) | |
# Compute the product with the most unique users, filling a list with element product_ids that are tied | |
if numUsers > mostUsersMax: | |
mostUniqueUsers = [product_id] | |
mostUsersMax = numUsers | |
elif numUsers == mostUsersMax: | |
mostUniqueUsers.append(product_id) | |
print("Most popular product(s) based on the number of purchasers: " + ", ".join(mostUniqueUsers)) | |
print("Most popular product(s) based on the quantity of goods sold: " + ", ".join(mostSold)) | |
# If we didn't have to support ties, we could use the following: | |
# mostSold = max(products, key=lambda productId:sum(products[productId].values())) | |
# mostUniqueUsers = max(products, key=lambda productId:len(products[productId].keys())) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- LARGEST NUMBER OF UNIQUE USERS FOR COUNTRY BDV | |
SELECT *, COUNT(user_id) as counts FROM timeseries | |
WHERE country_id="BDV" | |
GROUP BY site_id, user_id | |
ORDER BY counts DESC; | |
-- 4 users who visited sites more than 10 times | |
SELECT user_id, site_id, COUNT(user_id) as user_vists FROM timeseries | |
WHERE ts > '2019-02-03 00:00:00' AND ts < '2019-02-04 23:59:59' | |
GROUP BY site_id, user_id | |
HAVING COUNT(user_id) > 10 | |
ORDER BY user_vists DESC; | |
-- Which site was the most last-visited by users | |
SELECT a.*, COUNT(*) | |
FROM timeseries a | |
INNER JOIN | |
( | |
SELECT user_id, MAX(ts) maxTime | |
FROM timeseries | |
GROUP BY user_id | |
) b on a.user_id = b.user_id AND | |
a.ts = maxTime | |
GROUP BY site_id | |
ORDER BY COUNT(*) DESC; | |
-- HOW MANY user's first/last visits were to the same site | |
SELECT COUNT(*), mnQ.user_id, mxQ.site_id last_site, mnQ.site_id first_site FROM | |
( | |
SELECT a.* | |
FROM timeseries a | |
INNER JOIN | |
( | |
SELECT user_id, MAX(ts) maxTime | |
FROM timeseries | |
GROUP BY user_id | |
) b on a.user_id = b.user_id AND | |
a.ts = maxTime | |
) mxQ | |
JOIN | |
( | |
SELECT c.* | |
FROM timeseries c | |
INNER JOIN | |
( | |
SELECT user_id, MIN(ts) minTime | |
FROM timeseries | |
GROUP BY user_id | |
) d on c.user_id = d.user_id AND | |
c.ts = minTime | |
) mnQ | |
WHERE mxQ.user_id = mnQ.user_id and mxQ.site_id = mnQ.site_id; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment