Skip to content

Instantly share code, notes, and snippets.

@reallytiredofclowns
Created March 29, 2024 15:04
Show Gist options
  • Save reallytiredofclowns/c14d1e3d31eda37863dd410eae025065 to your computer and use it in GitHub Desktop.
Save reallytiredofclowns/c14d1e3d31eda37863dd410eae025065 to your computer and use it in GitHub Desktop.
Charting active Discuit daily, 2-daily, weekly users
import pandas
fullDF = pandas.read_csv("c:/users/bobloblaw/downloads/datadump.csv")
fullDF["date"] = fullDF["createdAt"].str.slice(0, 10).str.replace("-", "")
# for completeness, want to ensure every date is represented,
# but this should be true as there was daily activity
# DateRange = pandas.date_range(start = "20230616", end = "20240229")
# DateRange = pandas.DataFrame(index = DateRange)
# DateRange = DateRange.reset_index()
# DateRange["index"] = DateRange["index"].astype(str).str.replace("-", "")
# m = DateRange.merge(FullDF, left_on = "index", right_on = "date", indicator = "z").query("z != 'both'")
# unique user + date
userDays = fullDF[["user", "date"]].drop_duplicates().sort_values(["user", "date"])
# goal is to get a chart with horizontal axis time, and vertical axis
# counting number of users with daily activity (date had comment/post)
# activity in last two days (current day or previous day had activity)
# or activity in the last week
userDays["active"] = 1
userDays = userDays.query("date <= '20240229'").pivot(columns = "user", index = "date", values = "active").fillna(0)
# rolling sum to get total activiy over period x
activity1 = userDays.rolling(1).sum()
activity2 = userDays.rolling(2).sum()
activity7 = userDays.rolling(7).sum()
# convert to 0/1 indicator variable
activity1 = (activity1 > 0).astype(int)
activity2 = (activity2 > 0).astype(int)
activity7 = (activity7 > 0).astype(int)
# off-by-one error if directly assigning result to column, so
# separate the calcuation with integration into the DF
newCol = activity1.sum(axis = 1)
activity1["userCount1"] = newCol
newCol = activity2.sum(axis = 1)
activity2["userCount2"] = newCol
newCol = activity7.sum(axis = 1)
activity7["userCount7"] = newCol
mergedDFs = activity1[["userCount1"]].join(activity2[["userCount2"]].join(activity7[["userCount7"]]))
mergedDFs.plot(figsize = (20, 10))
# are the DFs sorted by date on the index?
# In [69]: mergedDFs.equals(mergedDFs.sort_index())
# Out[69]: True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment