Skip to content

Instantly share code, notes, and snippets.

View rodrigols89's full-sized avatar
🎯
Creating Things & Solving Problems

Rodrigo Leite rodrigols89

🎯
Creating Things & Solving Problems
View GitHub Profile
import pandas as pd
pd.set_option('display.max_columns', 42)
data = pd.read_csv('../datasets/2015-building-energy-benchmarking.csv')
# Exibe a média de cada coluna.
print((data.isnull().sum() / len(data['OSEBuildingID'])) * 100, '\n')
data['ENERGYSTARScore'] = data['ENERGYSTARScore'].fillna(data['ENERGYSTARScore'].median())
import pandas as pd
pd.set_option('display.max_columns', 18)
data = pd.read_csv('../datasets/athlete_events.csv')
data['Height'] = data['Height'].fillna(data['Height'].mean())
data['Weight'] = data['Weight'].fillna(data['Weight'].mean())
print(data[['Height', 'Weight']].head(20))
import pandas as pd
pd.set_option('display.max_columns', 18)
data = pd.read_csv('../datasets/athlete_events.csv')
data['Medal'] = data['Medal'].fillna('Nenhuma')
print(data['Medal'].head(10))
import pandas as pd
pd.set_option('display.max_columns', 18)
data = pd.read_csv('../datasets/athlete_events.csv')
percentMissing = (data.isnull().sum() / len(data['ID'])) * 100
print(percentMissing)
import pandas as pd
pd.set_option('display.max_columns', 18)
data = pd.read_csv('../datasets/athlete_events.csv')
isNullSum = data.isnull().sum()
print(isNullSum)
import pandas as pd
pd.set_option('display.max_columns', 18)
data = pd.read_csv('../datasets/athlete_events.csv')
isnull = data.isnull()
print(isnull)
import pandas as pd
pd.set_option('display.max_columns', 18)
data = pd.read_csv('../datasets/athlete_events.csv')
dt = data.dropna()
print("Full sample: {0}".format(data.shape))
print("Sample without NaN: {0}".format(dt.shape))
import pandas as pd
pd.set_option('display.max_columns', 18)
data = pd.read_csv('../datasets/athlete_events.csv')
dt = data.dropna()
print(dt.head())
import pandas as pd
pd.set_option('display.max_columns', 18)
data = pd.read_csv('../datasets/athlete_events.csv')
print(data.head())
print(data.dtypes)
import pandas as pd
pd.set_option('display.max_columns', 42)
data = pd.read_csv('../datasets/2015-building-energy-benchmarking.csv')
data['DataYear'] = data['DataYear'].astype(object)
print(data.dtypes)