Kaggle_CrowdFlower
git clone https://github.com/ChenglongChen/Kaggle_CrowdFlower.git
cd Kaggle_CrowdFlower
cd Data
cp ~/Downloads/*.csv .
install pip https://pip.pypa.io/en/stable/installing/
wget https://bootstrap.pypa.io/get-pip.py
sudo python get-pip.py
sudo pip install --upgrade pip
install modules
sudo pip install numpy scipy pandas nltk bs4 sklearn hyperopt keras xgboost ml_metrics
sudo yum install rgf libfm
python
>>> import nltk
>>> nltk.download
>>> d
>>> l
>>> all // download all work list here
cd Code/Feat
python run_all.py
This may take a few hours.
sudo pip install pymongo networkx h5py
// do not install bson
cd Code/Model
python generate_best_single_model.py