soerface · May 12, 2019 18:59
diff --git a/NetflixViewingHistory.ipynb b/NetflixViewingHistory.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib notebook "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dateparse_en1 = lambda x: pd.datetime.strptime(x, '%m/%d/%y')\n",
    "dateparse_en2 = lambda x: pd.datetime.strptime(x, '%d/%m/%Y')\n",
    "dateparse_de = lambda x: pd.datetime.strptime(x, '%d.%m.%y')\n",
    "\n",
    "history = {\n",
    "    'profile1': pd.read_csv('data/NetflixViewingHistoryProfile1.csv', parse_dates=['Date'], date_parser=dateparse_en1),\n",
    "    'profile2': pd.read_csv('data/NetflixViewingHistoryProfile2.csv', parse_dates=['Date'], date_parser=dateparse_en2),\n",
    "    'profile3': pd.read_csv('data/NetflixViewingHistoryProfile3.csv', parse_dates=['Date'], date_parser=dateparse_de),\n",
    "    'profile4': pd.read_csv('data/NetflixViewingHistoryProfile4.csv', parse_dates=['Date'], date_parser=dateparse_de)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for k, v in history.items():\n",
    "    v['Name'] = k"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame()\n",
    "df = df.append(history['profile1'])\n",
    "df = df.append(history['profile2'])\n",
    "df = df.append(history['profile3'])\n",
    "df = df.append(history['profile4'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "idx = pd.date_range(min(df['Date']), max(df['Date']))\n",
    "daily = df.groupby(['Date', 'Name']).all().groupby('Date').size()\n",
    "daily = daily.reindex(idx, fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "count = daily.reset_index(name='streams')\n",
    "for i in range(5):\n",
    "    print(\n",
    "        f'Days, where {i} profile{\"s\" if i != 1 else \" \"} watched: ',\n",
    "        len(count[count['streams'] == i])\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "daily.plot()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"%matplotlib notebook "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"dateparse_en1 = lambda x: pd.datetime.strptime(x, '%m/%d/%y')\n",
	"dateparse_en2 = lambda x: pd.datetime.strptime(x, '%d/%m/%Y')\n",
	"dateparse_de = lambda x: pd.datetime.strptime(x, '%d.%m.%y')\n",
	"\n",
	"history = {\n",
	" 'profile1': pd.read_csv('data/NetflixViewingHistoryProfile1.csv', parse_dates=['Date'], date_parser=dateparse_en1),\n",
	" 'profile2': pd.read_csv('data/NetflixViewingHistoryProfile2.csv', parse_dates=['Date'], date_parser=dateparse_en2),\n",
	" 'profile3': pd.read_csv('data/NetflixViewingHistoryProfile3.csv', parse_dates=['Date'], date_parser=dateparse_de),\n",
	" 'profile4': pd.read_csv('data/NetflixViewingHistoryProfile4.csv', parse_dates=['Date'], date_parser=dateparse_de)\n",
	"}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"for k, v in history.items():\n",
	" v['Name'] = k"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.DataFrame()\n",
	"df = df.append(history['profile1'])\n",
	"df = df.append(history['profile2'])\n",
	"df = df.append(history['profile3'])\n",
	"df = df.append(history['profile4'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": false
	},
	"outputs": [],
	"source": [
	"idx = pd.date_range(min(df['Date']), max(df['Date']))\n",
	"daily = df.groupby(['Date', 'Name']).all().groupby('Date').size()\n",
	"daily = daily.reindex(idx, fill_value=0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"count = daily.reset_index(name='streams')\n",
	"for i in range(5):\n",
	" print(\n",
	" f'Days, where {i} profile{\"s\" if i != 1 else \" \"} watched: ',\n",
	" len(count[count['streams'] == i])\n",
	" )"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"daily.plot()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found