Skip to content

Instantly share code, notes, and snippets.

@RedaAffane
RedaAffane / coalition_worth.sql
Last active March 6, 2018 09:36
Compute the worth of each coalition
SELECT "channels_subset",
sum("conversion") as "conversions_sum"
FROM
(
SELECT "user_id",
string_agg(DISTINCT("channel"), ',') as "channels_subset",
max("conversion") as "conversion"
FROM
(
SELECT "user_id",
# First, let's convert the dataframe "subsets_conversions" into a dictionnary
C_values = user_logs_aggr.set_index("channels").to_dict()["conversions"]
#For each possible combination of channels A, we compute the total number of conversions yielded by every subset of A.
# Example : if A = {c1,c2}, then v(A) = C({c1}) + C({c2}) + C({c1,c2})
v_values = {}
for A in subsets(channels):
v_values[A] = v_function(A,C_values)
from collections import defaultdict
n=len(channels)
shapley_values = defaultdict(int)
for channel in channels:
for A in v_values.keys():
if channel not in A.split(","):
cardinal_A=len(A.split(","))
A_with_channel = A.split(",")
def v_function(A,C_values):
'''
This function computes the worth of each coalition.
inputs:
- A : a coalition of channels.
- C_values : A dictionnary containing the number of conversions that each subset of channels has yielded.
'''
subsets_of_A = subsets(A.split(","))
worth_of_A=0
for subset in subsets_of_A:
SELECT
*,
extract(epoch FROM "timestamp")
- lag(
extract(epoch from "timestamp")
) over (
PARTITION BY "user_id"
order by "timestamp"
) as "time_difference"
FROM log_data
SELECT
*,
CASE WHEN
EXTRACT(epoch FROM "timestamp")
- LAG(
EXTRACT(epoch FROM "timestamp")
) OVER (
PARTITION BY "user_id"
ORDER BY "timestamp"
) >= 30 * 60 THEN 1 ELSE 0 END as new_micro_session
SELECT *,
"user_id" || '_' || SUM("new_micro_session")
OVER (PARTITION BY "user_id"
ORDER BY "timestamp") AS "user_micro_session"
FROM (
SELECT
*,
CASE WHEN
EXTRACT(epoch FROM "timestamp")
- LAG(
SELECT "user_id",
"channel",
"timestamp"
FROM(
SELECT *,
FIRST_VALUE("touchpoint") over(
PARTITION BY "user_id", "user_micro_session"
ORDER BY "timestamp") as "channel"
FROM(
SELECT *,
SELECT "conversions".user_id,
"conversions".conversion_timestamp,
"touchpoints".channel,
"touchpoints".timestamp,
"conversions".user_id || extract(epoch from "conversions"."conversion_timestamp") AS "macro_user_session"
FROM "conversions" "conversions"
LEFT JOIN "user_touchpoints" "touchpoints"
ON "conversions"."user_id" = "touchpoints"."user_id"
SELECT *,
CASE WHEN
"timestamp" >= "conversion_timestamp" - interval '30 days'
AND "timestamp" <= "conversion_timestamp"
THEN 1 else 0 END as "keep_touchpoint"
FROM(
SELECT "conversions".user_id,
"conversions".conversion_timestamp,
"touchpoints".channel,
"touchpoints".timestamp,