Created
May 13, 2020 19:48
-
-
Save anselmobd/0c08e2a9e5ca7ab21db94d512656bc95 to your computer and use it in GitHub Desktop.
A test notebook
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Data Science: Introdução a Ciência de Dados.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"toc_visible": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "6rmT0VYmYDqt", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import pandas as pd" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "F6mXF3ITd_XW", | |
"colab_type": "code", | |
"outputId": "a3bc087e-4132-4720-a19c-b3f10b37accb", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 206 | |
} | |
}, | |
"source": [ | |
"uri = \"https://github.com/alura-cursos/introducao-a-data-science/raw/master/aula3.1/movies.csv\"\n", | |
"filmes = pd.read_csv(uri)\n", | |
"filmes.head()" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>movieId</th>\n", | |
" <th>title</th>\n", | |
" <th>genres</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>Toy Story (1995)</td>\n", | |
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>Jumanji (1995)</td>\n", | |
" <td>Adventure|Children|Fantasy</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>Grumpier Old Men (1995)</td>\n", | |
" <td>Comedy|Romance</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>Waiting to Exhale (1995)</td>\n", | |
" <td>Comedy|Drama|Romance</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>Father of the Bride Part II (1995)</td>\n", | |
" <td>Comedy</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" movieId ... genres\n", | |
"0 1 ... Adventure|Animation|Children|Comedy|Fantasy\n", | |
"1 2 ... Adventure|Children|Fantasy\n", | |
"2 3 ... Comedy|Romance\n", | |
"3 4 ... Comedy|Drama|Romance\n", | |
"4 5 ... Comedy\n", | |
"\n", | |
"[5 rows x 3 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "oW4lGFKmeK48", | |
"colab_type": "code", | |
"outputId": "5c40045b-9426-4bc3-ecc6-c7560b0a1c9e", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
} | |
}, | |
"source": [ | |
"filmes.columns" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"Index(['movieId', 'title', 'genres'], dtype='object')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Rk2xOFU5e4HI", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"filmes.columns = ['filmeId', 'titulo', 'generos']" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "QDQ653f3fHio", | |
"colab_type": "code", | |
"outputId": "2b05c7af-ae39-41c6-dea6-8d6ef42636e2", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 206 | |
} | |
}, | |
"source": [ | |
"filmes.head()" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>filmeId</th>\n", | |
" <th>titulo</th>\n", | |
" <th>generos</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>Toy Story (1995)</td>\n", | |
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>Jumanji (1995)</td>\n", | |
" <td>Adventure|Children|Fantasy</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>Grumpier Old Men (1995)</td>\n", | |
" <td>Comedy|Romance</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>Waiting to Exhale (1995)</td>\n", | |
" <td>Comedy|Drama|Romance</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>Father of the Bride Part II (1995)</td>\n", | |
" <td>Comedy</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" filmeId ... generos\n", | |
"0 1 ... Adventure|Animation|Children|Comedy|Fantasy\n", | |
"1 2 ... Adventure|Children|Fantasy\n", | |
"2 3 ... Comedy|Romance\n", | |
"3 4 ... Comedy|Drama|Romance\n", | |
"4 5 ... Comedy\n", | |
"\n", | |
"[5 rows x 3 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Gk_iH5tVfJWO", | |
"colab_type": "code", | |
"outputId": "a0317de9-1bf2-4a12-e0c7-62f1a1585f5f", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 206 | |
} | |
}, | |
"source": [ | |
"uri = \"https://raw.githubusercontent.com/alura-cursos/introducao-a-data-science/master/aula1.2/ratings.csv\"\n", | |
"notas = pd.read_csv(uri) # dataFrame\n", | |
"notas.head()" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>userId</th>\n", | |
" <th>movieId</th>\n", | |
" <th>rating</th>\n", | |
" <th>timestamp</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>4.0</td>\n", | |
" <td>964982703</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>4.0</td>\n", | |
" <td>964981247</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>6</td>\n", | |
" <td>4.0</td>\n", | |
" <td>964982224</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>47</td>\n", | |
" <td>5.0</td>\n", | |
" <td>964983815</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>50</td>\n", | |
" <td>5.0</td>\n", | |
" <td>964982931</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" userId movieId rating timestamp\n", | |
"0 1 1 4.0 964982703\n", | |
"1 1 3 4.0 964981247\n", | |
"2 1 6 4.0 964982224\n", | |
"3 1 47 5.0 964983815\n", | |
"4 1 50 5.0 964982931" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 30 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "xoHEY9AEfg-6", | |
"colab_type": "code", | |
"outputId": "3ff3e544-1cfc-4e8b-824b-d62ed4054482", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
} | |
}, | |
"source": [ | |
"notas.columns" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 31 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "D5zOBzUJfxjD", | |
"colab_type": "code", | |
"outputId": "47ff52c6-b2da-4314-f642-670ebb814bcf", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 206 | |
} | |
}, | |
"source": [ | |
"notas.columns = ['usuarioId', 'filmeId', 'nota', 'momento']\n", | |
"notas.head()" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>usuarioId</th>\n", | |
" <th>filmeId</th>\n", | |
" <th>nota</th>\n", | |
" <th>momento</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>4.0</td>\n", | |
" <td>964982703</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>4.0</td>\n", | |
" <td>964981247</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>6</td>\n", | |
" <td>4.0</td>\n", | |
" <td>964982224</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>47</td>\n", | |
" <td>5.0</td>\n", | |
" <td>964983815</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>50</td>\n", | |
" <td>5.0</td>\n", | |
" <td>964982931</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" usuarioId filmeId nota momento\n", | |
"0 1 1 4.0 964982703\n", | |
"1 1 3 4.0 964981247\n", | |
"2 1 6 4.0 964982224\n", | |
"3 1 47 5.0 964983815\n", | |
"4 1 50 5.0 964982931" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 32 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MS1-W2HUf9mA", | |
"colab_type": "code", | |
"outputId": "63a15ffb-5153-4fbc-fd28-28ee3f053421", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 127 | |
} | |
}, | |
"source": [ | |
"notas['nota'].head()" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0 4.0\n", | |
"1 4.0\n", | |
"2 4.0\n", | |
"3 5.0\n", | |
"4 5.0\n", | |
"Name: nota, dtype: float64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 33 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Qzx76nufgkqU", | |
"colab_type": "code", | |
"outputId": "277bbf6c-8ec2-447f-9412-0059b36b2f38", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
} | |
}, | |
"source": [ | |
"notas['nota'].unique()" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([4. , 5. , 3. , 2. , 1. , 4.5, 3.5, 2.5, 0.5, 1.5])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 34 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5GTOnBe0hSQH", | |
"colab_type": "code", | |
"outputId": "fa857781-2612-4d6e-ef5f-0f1e46a1d9a7", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
} | |
}, | |
"source": [ | |
"notas['nota'].mean()" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"3.501556983616962" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 35 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "7Dy-lfhIhZMk", | |
"colab_type": "code", | |
"outputId": "a20f3172-e541-4b29-ba45-0f04b7982bb7", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 300 | |
} | |
}, | |
"source": [ | |
"notas.describe()" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>usuarioId</th>\n", | |
" <th>filmeId</th>\n", | |
" <th>nota</th>\n", | |
" <th>momento</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>100836.000000</td>\n", | |
" <td>100836.000000</td>\n", | |
" <td>100836.000000</td>\n", | |
" <td>1.008360e+05</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>326.127564</td>\n", | |
" <td>19435.295718</td>\n", | |
" <td>3.501557</td>\n", | |
" <td>1.205946e+09</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>182.618491</td>\n", | |
" <td>35530.987199</td>\n", | |
" <td>1.042529</td>\n", | |
" <td>2.162610e+08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>8.281246e+08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>177.000000</td>\n", | |
" <td>1199.000000</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>1.019124e+09</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>325.000000</td>\n", | |
" <td>2991.000000</td>\n", | |
" <td>3.500000</td>\n", | |
" <td>1.186087e+09</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>477.000000</td>\n", | |
" <td>8122.000000</td>\n", | |
" <td>4.000000</td>\n", | |
" <td>1.435994e+09</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>610.000000</td>\n", | |
" <td>193609.000000</td>\n", | |
" <td>5.000000</td>\n", | |
" <td>1.537799e+09</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" usuarioId filmeId nota momento\n", | |
"count 100836.000000 100836.000000 100836.000000 1.008360e+05\n", | |
"mean 326.127564 19435.295718 3.501557 1.205946e+09\n", | |
"std 182.618491 35530.987199 1.042529 2.162610e+08\n", | |
"min 1.000000 1.000000 0.500000 8.281246e+08\n", | |
"25% 177.000000 1199.000000 3.000000 1.019124e+09\n", | |
"50% 325.000000 2991.000000 3.500000 1.186087e+09\n", | |
"75% 477.000000 8122.000000 4.000000 1.435994e+09\n", | |
"max 610.000000 193609.000000 5.000000 1.537799e+09" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 36 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Zqi7cTQfhf6A", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment