Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import matthews_corrcoef as mc
import operator
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.cross_validation import cross_val_score, train_test_split
#input the data
data0 = pd.read_csv('./data/train.csv', sep=',', header=False, names=range(378))
data = data0
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.cross_validation import cross_val_score, train_test_split
import xgboost as xgb
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import seaborn as sns
#matplotlib inline
from sklearn import model_selection, preprocessing
import xgboost as xgb
import datetime
import operator
from sklearn.cross_validation import train_test_split
#utilizing one-hot-encoding, Randomforest, and xgboost to predict the outliers
import numpy as np
from sklearn.base import BaseEstimator,TransformerMixin, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.ensemble import GradientBoostingClassifier
import time
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier