We can't make this file beautiful and searchable because it's too large.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
index,submission_date,reviewer_id,product_id,product_name,product_brand,site_category_lv1,site_category_lv2,review_title,overall_rating,recommend_to_a_friend,review_text,reviewer_birth_year,reviewer_gender,reviewer_state | |
45,2018-01-01 06:04:46,3e8a71fb5dd1b1ccea3cd139908d759d83124a7757d69f8392f28b404c8a480f,118212404,Livro - Primeiro Amor,,Livros,Literatura Estrangeira,Nao chegou,2,No,Nao veio esse produto nao chegou entao não tem como eu avalia,1992.0,F,SP | |
48,2018-01-01 06:07:02,2bd5d5aca0691c9dd0c12cc66260d2ddeb7151262e99f3da647972f004c5fb20,124499501,Livro - Física Conceitual,,Livros,Ciências Exatas,o produto foi entregue dentro do prazo,5,Yes,"O produto chegou no prazo e em ótimo estado, tenho sido muito bem atendida quando solicito algo no site.",1957.0,F,MG | |
145,2018-01-01 07:21:09,263617650a44b0d2dae824f83cca33a0e73f913013ea0f35ef13826983951ef8,277621,Livro - Redescobrindo A Sua Beleza,,Livros,Moda e Beleza,Comprei e não recebi,1,No,"Como assim? Comprei e não recebi. Questionei aqui nas lojas americanas |
We can make this file beautiful and searchable if this error is corrected: It looks like row 9 should actually have 15 columns, instead of 14 in line 8.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Continente,ISO 3166-1 alpha-3,País,Respondentes,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code | |
África,ZAF,África do Sul,1,South Africa,ZA,ZAF,710,ISO 3166-2:ZA,Africa,Sub-Saharan Africa,Southern Africa,2.0,202.0,18.0 | |
Europa,ALB,Albânia,1,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,,150.0,39.0, | |
Europa,DEU,Alemanha,5,Germany,DE,DEU,276,ISO 3166-2:DE,Europe,Western Europe,,150.0,155.0, | |
América do Sul,ARG,Argentina,2,Argentina,AR,ARG,32,ISO 3166-2:AR,Americas,Latin America and the Caribbean,South America,19.0,419.0,5.0 | |
Oceania,AUS,Austrália,12,Australia,AU,AUS,36,ISO 3166-2:AU,Oceania,Australia and New Zealand,,9.0,53.0, | |
Ásia,BGD,Bangladesh,1,Bangladesh,BD,BGD,50,ISO 3166-2:BD,Asia,Southern Asia,,142.0,34.0, | |
Europa,BEL,Bélgica,3,Belgium,BE,BEL,56,ISO 3166-2:BE,Europe,Western Europe,,150.0,155.0, | |
América do Sul,BRA,Brasil,21,Brazil,BR,BRA,76,ISO 3166-2:BR,Americas,Latin America and the Caribbean,South America,19.0,41 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import yaml | |
from sklearn.naive_bayes import MultinomialNB | |
import pickle | |
# read the command line params | |
if len(sys.argv) != 3: | |
sys.stderr.write('Arguments error. Usage:\n') | |
sys.stderr.write( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
from sklearn.metrics import precision_recall_curve, auc | |
import pickle | |
import json | |
# read command line parameters | |
if len(sys.argv) != 5: | |
sys.stderr.write('Arguments error. Usage:\n') | |
sys.stderr.write( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import yaml | |
import pandas as pd | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
import pickle | |
# read command line params | |
if len(sys.argv) != 3: | |
sys.stderr.write('Arguments error. Usage:\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import yaml | |
from sklearn.datasets import fetch_20newsgroups | |
import pandas as pd | |
# read params | |
params = yaml.safe_load(open('params.yaml'))['prepare'] | |
categories = params['categories'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import fetch_20newsgroups | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.metrics import precision_recall_curve, auc | |
categories = ["comp.graphics","sci.space"] | |
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories) | |
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories) | |
newsgroups_all = fetch_20newsgroups(subset='all', categories=categories) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import modin.pandas as pd_modin | |
import cudf as pd_cudf | |
results_groupby = [] | |
### Read in the data with Pandas | |
for run in range(0,30): | |
df = pd.read_csv("../inep/dados/microdados_educacao_superior_2018//microdados_ed_superior_2018/dados/DM_ALUNO.CSV", | |
delimiter="|", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import modin.pandas as pd_modin | |
import cudf as pd_cudf | |
results_fillna = [] | |
### Read in the data with Pandas | |
for run in range(0,30): | |
df = pd.read_csv("../inep/dados/microdados_educacao_superior_2018//microdados_ed_superior_2018/dados/DM_ALUNO.CSV") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import modin.pandas as pd_modin | |
import cudf as pd_cudf | |
results_loading = [] | |
### Read in the data with Pandas | |
for run in range(0,30): | |
s = time.time() | |
df = pd.read_csv("../inep/dados/microdados_educacao_superior_2018//microdados_ed_superior_2018/dados/DM_ALUNO.CSV") |
NewerOlder