This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.corpus import stopwords | |
class PhraseExtractor(object): | |
def __init__(self): | |
self.sentence_re = r'(?:(?:[A-Z])(?:.[A-Z])+.?)|(?:\w+(?:-\w+)*)|(?:\$?\d+(?:.\d+)?%?)|(?:...|)(?:[][.,;"\'?():-_`])' | |
self.lemmatizer = nltk.WordNetLemmatizer() | |
self.stemmer = nltk.stem.porter.PorterStemmer() | |
self.grammar = r""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Function to read xlsx files | |
def readExcel(self): | |
from xlrd import open_workbook | |
wb = open_workbook('Shortlisted Rumours Jan to June 2016.xlsx') | |
for s in wb.sheets(): | |
for row in range(1, s.nrows): | |
# pdb.set_trace() | |
col_names = s.row(0) | |
col_value = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.corpus import stopwords | |
text = raw_input("Enter the text please ...") | |
print text | |
sentence_re = r'(?:(?:[A-Z])(?:.[A-Z])+.?)|(?:\w+(?:-\w+)*)|(?:\$?\d+(?:.\d+)?%?)|(?:...|)(?:[][.,;"\'?():-_`])' | |
lemmatizer = nltk.WordNetLemmatizer() | |
stemmer = nltk.stem.porter.PorterStemmer() | |
grammar = r""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo apt-get install build-essential | |
wget https://www.python.org/ftp/python/2.7.6/Python-2.7.6.tar.xz | |
tar -xf Python-2.7.6.tar.xz | |
cd Python-2.7.6 | |
./configure | |
make | |
make install | |
sudo apt-get install build-essential python-dev python-setuptools python-numpy python-scipy |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Read basics about Python - http://www.tutorialspoint.com/python/python_overview.htm | |
Tutorials | |
http://www.tutorialspoint.com/python/index.htm | |
http://askpython.com/ | |
More resources | |
https://wiki.python.org/moin/BeginnersGuide/Programmers |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
""" | |
NltkSentTokenize Class for all nltk sent tokenize | |
""" | |
class NltkSentTokenize(object): | |
""" | |
Initialization function of NltkSentTokenize Class | |
""" | |
def __init__(self): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Using the words as features removing stopwords | |
""" | |
from sklearn.utils import check_random_state | |
from sklearn.datasets import load_files | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.feature_extraction.text import HashingVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.metrics import accuracy_score, average_precision_score, f1_score, precision_score, recall_score |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Libraries: | |
https://www.tensorflow.org/ | |
Articles: | |
https://www.kaggle.com/c/titanic/details/getting-started-with-python | |
https://www.kaggle.com/c/titanic/details/getting-started-with-python-ii |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
from math import log, exp | |
from operator import mul | |
from collections import Counter | |
import os | |
import pylab | |
import cPickle | |
class MyDict(dict): |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
13 15 15 0 0 0 14 0 0 0 14 16 14 16 0 0 13 15 0 0 14 16 14 15 0 13 16 13 0 16 13 16 14 0 15 0 0 16 0 16 13 15 15 15 13 0 13 15 13 14 0 0 14 14 16 0 14 0 0 0 0 13 0 0 0 0 0 13 14 0 0 15 13 0 14 0 16 13 13 14 15 0 14 13 0 16 16 13 15 16 0 15 16 14 0 0 0 0 16 14 0 14 0 0 13 16 14 13 13 15 16 0 0 14 15 0 0 15 0 15 13 16 0 0 13 15 16 0 0 14 16 13 13 15 0 0 15 0 0 0 14 0 15 14 16 16 0 0 0 13 0 14 13 0 15 15 14 0 15 0 0 0 14 0 0 0 16 0 16 13 14 0 0 14 0 16 13 16 14 13 15 0 14 14 14 0 13 13 15 16 0 0 0 13 14 13 13 0 0 15 0 14 14 13 13 0 15 14 0 0 14 0 0 13 0 0 14 0 13 0 15 14 0 0 15 0 15 16 14 0 0 16 15 0 0 0 16 0 0 15 13 15 15 13 0 14 14 0 14 15 15 15 14 14 16 15 0 0 16 0 0 0 0 13 13 15 15 15 0 0 15 13 0 0 0 14 0 14 0 15 16 16 14 13 14 15 0 13 0 13 15 0 14 16 16 14 14 0 15 0 15 0 0 0 14 13 0 13 0 0 0 0 16 14 13 14 0 13 0 16 15 0 14 16 16 13 15 0 13 14 14 13 0 0 14 0 14 16 16 15 13 13 14 13 0 13 0 0 13 16 13 0 0 16 14 0 0 15 15 13 0 16 0 15 16 13 0 13 0 15 16 16 14 0 16 0 15 15 0 0 0 13 15 14 14 0 0 14 0 15 14 16 0 1 |