Skip to content

Instantly share code, notes, and snippets.

@anselmobd
Created May 13, 2020 19:48
Show Gist options
  • Save anselmobd/0c08e2a9e5ca7ab21db94d512656bc95 to your computer and use it in GitHub Desktop.
Save anselmobd/0c08e2a9e5ca7ab21db94d512656bc95 to your computer and use it in GitHub Desktop.
A test notebook
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Data Science: Introdução a Ciência de Dados.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "6rmT0VYmYDqt",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "F6mXF3ITd_XW",
"colab_type": "code",
"outputId": "a3bc087e-4132-4720-a19c-b3f10b37accb",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
}
},
"source": [
"uri = \"https://github.com/alura-cursos/introducao-a-data-science/raw/master/aula3.1/movies.csv\"\n",
"filmes = pd.read_csv(uri)\n",
"filmes.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movieId</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Toy Story (1995)</td>\n",
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Jumanji (1995)</td>\n",
" <td>Adventure|Children|Fantasy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Grumpier Old Men (1995)</td>\n",
" <td>Comedy|Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Waiting to Exhale (1995)</td>\n",
" <td>Comedy|Drama|Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Father of the Bride Part II (1995)</td>\n",
" <td>Comedy</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movieId ... genres\n",
"0 1 ... Adventure|Animation|Children|Comedy|Fantasy\n",
"1 2 ... Adventure|Children|Fantasy\n",
"2 3 ... Comedy|Romance\n",
"3 4 ... Comedy|Drama|Romance\n",
"4 5 ... Comedy\n",
"\n",
"[5 rows x 3 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "oW4lGFKmeK48",
"colab_type": "code",
"outputId": "5c40045b-9426-4bc3-ecc6-c7560b0a1c9e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
}
},
"source": [
"filmes.columns"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Index(['movieId', 'title', 'genres'], dtype='object')"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Rk2xOFU5e4HI",
"colab_type": "code",
"colab": {}
},
"source": [
"filmes.columns = ['filmeId', 'titulo', 'generos']"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "QDQ653f3fHio",
"colab_type": "code",
"outputId": "2b05c7af-ae39-41c6-dea6-8d6ef42636e2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
}
},
"source": [
"filmes.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>filmeId</th>\n",
" <th>titulo</th>\n",
" <th>generos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Toy Story (1995)</td>\n",
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Jumanji (1995)</td>\n",
" <td>Adventure|Children|Fantasy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Grumpier Old Men (1995)</td>\n",
" <td>Comedy|Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Waiting to Exhale (1995)</td>\n",
" <td>Comedy|Drama|Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Father of the Bride Part II (1995)</td>\n",
" <td>Comedy</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" filmeId ... generos\n",
"0 1 ... Adventure|Animation|Children|Comedy|Fantasy\n",
"1 2 ... Adventure|Children|Fantasy\n",
"2 3 ... Comedy|Romance\n",
"3 4 ... Comedy|Drama|Romance\n",
"4 5 ... Comedy\n",
"\n",
"[5 rows x 3 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Gk_iH5tVfJWO",
"colab_type": "code",
"outputId": "a0317de9-1bf2-4a12-e0c7-62f1a1585f5f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
}
},
"source": [
"uri = \"https://raw.githubusercontent.com/alura-cursos/introducao-a-data-science/master/aula1.2/ratings.csv\"\n",
"notas = pd.read_csv(uri) # dataFrame\n",
"notas.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>userId</th>\n",
" <th>movieId</th>\n",
" <th>rating</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4.0</td>\n",
" <td>964982703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4.0</td>\n",
" <td>964981247</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>4.0</td>\n",
" <td>964982224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>47</td>\n",
" <td>5.0</td>\n",
" <td>964983815</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>50</td>\n",
" <td>5.0</td>\n",
" <td>964982931</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" userId movieId rating timestamp\n",
"0 1 1 4.0 964982703\n",
"1 1 3 4.0 964981247\n",
"2 1 6 4.0 964982224\n",
"3 1 47 5.0 964983815\n",
"4 1 50 5.0 964982931"
]
},
"metadata": {
"tags": []
},
"execution_count": 30
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "xoHEY9AEfg-6",
"colab_type": "code",
"outputId": "3ff3e544-1cfc-4e8b-824b-d62ed4054482",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
}
},
"source": [
"notas.columns"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')"
]
},
"metadata": {
"tags": []
},
"execution_count": 31
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "D5zOBzUJfxjD",
"colab_type": "code",
"outputId": "47ff52c6-b2da-4314-f642-670ebb814bcf",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
}
},
"source": [
"notas.columns = ['usuarioId', 'filmeId', 'nota', 'momento']\n",
"notas.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>usuarioId</th>\n",
" <th>filmeId</th>\n",
" <th>nota</th>\n",
" <th>momento</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4.0</td>\n",
" <td>964982703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4.0</td>\n",
" <td>964981247</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>4.0</td>\n",
" <td>964982224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>47</td>\n",
" <td>5.0</td>\n",
" <td>964983815</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>50</td>\n",
" <td>5.0</td>\n",
" <td>964982931</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" usuarioId filmeId nota momento\n",
"0 1 1 4.0 964982703\n",
"1 1 3 4.0 964981247\n",
"2 1 6 4.0 964982224\n",
"3 1 47 5.0 964983815\n",
"4 1 50 5.0 964982931"
]
},
"metadata": {
"tags": []
},
"execution_count": 32
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "MS1-W2HUf9mA",
"colab_type": "code",
"outputId": "63a15ffb-5153-4fbc-fd28-28ee3f053421",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 127
}
},
"source": [
"notas['nota'].head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 4.0\n",
"1 4.0\n",
"2 4.0\n",
"3 5.0\n",
"4 5.0\n",
"Name: nota, dtype: float64"
]
},
"metadata": {
"tags": []
},
"execution_count": 33
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Qzx76nufgkqU",
"colab_type": "code",
"outputId": "277bbf6c-8ec2-447f-9412-0059b36b2f38",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
}
},
"source": [
"notas['nota'].unique()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([4. , 5. , 3. , 2. , 1. , 4.5, 3.5, 2.5, 0.5, 1.5])"
]
},
"metadata": {
"tags": []
},
"execution_count": 34
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5GTOnBe0hSQH",
"colab_type": "code",
"outputId": "fa857781-2612-4d6e-ef5f-0f1e46a1d9a7",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
}
},
"source": [
"notas['nota'].mean()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"3.501556983616962"
]
},
"metadata": {
"tags": []
},
"execution_count": 35
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "7Dy-lfhIhZMk",
"colab_type": "code",
"outputId": "a20f3172-e541-4b29-ba45-0f04b7982bb7",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
}
},
"source": [
"notas.describe()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>usuarioId</th>\n",
" <th>filmeId</th>\n",
" <th>nota</th>\n",
" <th>momento</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>100836.000000</td>\n",
" <td>100836.000000</td>\n",
" <td>100836.000000</td>\n",
" <td>1.008360e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>326.127564</td>\n",
" <td>19435.295718</td>\n",
" <td>3.501557</td>\n",
" <td>1.205946e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>182.618491</td>\n",
" <td>35530.987199</td>\n",
" <td>1.042529</td>\n",
" <td>2.162610e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.500000</td>\n",
" <td>8.281246e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>177.000000</td>\n",
" <td>1199.000000</td>\n",
" <td>3.000000</td>\n",
" <td>1.019124e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>325.000000</td>\n",
" <td>2991.000000</td>\n",
" <td>3.500000</td>\n",
" <td>1.186087e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>477.000000</td>\n",
" <td>8122.000000</td>\n",
" <td>4.000000</td>\n",
" <td>1.435994e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>610.000000</td>\n",
" <td>193609.000000</td>\n",
" <td>5.000000</td>\n",
" <td>1.537799e+09</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" usuarioId filmeId nota momento\n",
"count 100836.000000 100836.000000 100836.000000 1.008360e+05\n",
"mean 326.127564 19435.295718 3.501557 1.205946e+09\n",
"std 182.618491 35530.987199 1.042529 2.162610e+08\n",
"min 1.000000 1.000000 0.500000 8.281246e+08\n",
"25% 177.000000 1199.000000 3.000000 1.019124e+09\n",
"50% 325.000000 2991.000000 3.500000 1.186087e+09\n",
"75% 477.000000 8122.000000 4.000000 1.435994e+09\n",
"max 610.000000 193609.000000 5.000000 1.537799e+09"
]
},
"metadata": {
"tags": []
},
"execution_count": 36
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Zqi7cTQfhf6A",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment