Created
July 24, 2014 16:47
-
-
Save jaidevd/164a5d410da51afcefba to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
#attendance_data = pd.read_csv('AttendanceData.csv',index_col=2, | |
# parse_dates=True) | |
#attendance_data.columns = ['event_id', 'user_id'] | |
event_data = pd.read_csv('EventData.csv',#skipfooter=1, | |
index_col=0) | |
event_data.columns = ['latitude', 'longitude'] | |
event_data.dropna(inplace=True) | |
user_data = pd.read_csv('UserData.csv', #skipfooter=1, | |
index_col=0) | |
user_data.columns = ['latitude', 'longitude'] | |
user_data.dropna(inplace=True) | |
training_data = pd.read_csv('ratings_for_train.csv') | |
#skipfooter=1) | |
training_data.columns = ['user_id','event_id','rating'] | |
training_data.dropna(inplace=True) | |
#testing_data = pd.read_csv('ratings_for_eval.csv',usecols=['User ID', 'Event ID']) | |
#testing_data.columns = ['user_id','event_id'] | |
def clean_missing(training_data, user_data, event_data): | |
""" | |
Drop training examples for users and events whose data isn't available. | |
""" | |
users_in_training = training_data['user_id'].unique() | |
events_in_training = training_data['event_id'].unique() | |
for uid in users_in_training: | |
if uid not in user_data.index: | |
inds = training_data[training_data['user_id'] == uid].index | |
training_data.drop(inds, inplace=True) | |
for eid in events_in_training: | |
if eid not in event_data.index: | |
inds = training_data[training_data['event_id'] == eid].index | |
training_data.drop(inds, inplace=True) | |
return training_data, user_data, event_data | |
#def fill_coordinates(training_data, user_data, event_data): | |
# """ | |
# Add latitude and longitude columns to the training data containing the | |
# coordinates for each row. | |
# """ | |
# training_data['ulat'] = np.zeros((training_data.shape[0],)) | |
# training_data['ulong'] = np.zeros((training_data.shape[0],)) | |
# training_data['elat'] = np.zeros((training_data.shape[0],)) | |
# training_data['elong'] = np.zeros((training_data.shape[0],)) | |
# event_ids = event_data.index.unique() | |
# user_ids = user_data.index.unique() | |
# for uid in user_ids: | |
# ulat, ulong = user_data.ix[uid] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment