This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
docs = "Can I eat the Pizza".lower().split() | |
doc1 = set(docs) | |
doc1 = sorted(doc1) | |
print ("\nvalues: ", doc1) | |
integer_encoded = [] | |
for i in docs: | |
v = np.where( np.array(doc1) == i)[0][0] | |
integer_encoded.append(v) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy import array | |
from numpy import argmax | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.preprocessing import OneHotEncoder | |
# define example | |
# data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot'] | |
doc1 = "Can I eat the Pizza".lower() | |
doc2 = "You can eat the Pizza".lower() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.preprocessing.text import Tokenizer | |
from numpy import array | |
from numpy import argmax | |
from keras.utils import to_categorical | |
doc = "Can I eat the Pizza".lower().split() | |
def using_Tokenizer(doc): | |
# create the tokenizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import pandas as pd | |
text = 'My cat is a great cat' | |
tokens = text.lower().split() | |
vocab = set(tokens) | |
vocab = pd.Series(range(len(vocab)), index=vocab) | |
word_ids = vocab.loc[tokens].values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Get total of each column values | |
""" | |
def totalcount(data): | |
return data.assign(Total=data.drop('Total', errors='ignore', axis=1).sum(1)) | |
def pandas_get_total_row(df): | |
df = df.pipe(totalcount).T.pipe(totalcount).T | |
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_total(df): | |
df.loc['Total'] = pd.Series(df['Marks'].sum(), index = ['Marks']) | |
return df | |
df = pd.DataFrame({'Subjects': ["Maths","Science","English"], 'Marks': [80,90,75]}) | |
df = df.reindex(columns=['Subjects','Marks']) | |
df = get_total(df) | |
df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import TfidfVectorizer | |
import pandas as pd | |
from sklearn import svm | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score | |
import pickle | |
from sklearn import linear_model | |
Project_path = "<path to the project folder>" | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
Project_path = "<path to project>" | |
model_path = Project_path + "/08. Multi-class_text_classification/models/model.pickle" | |
vectorizer_path = Project_path + "/08. Multi-class_text_classification/models/vectorizer.pickle" | |
vectorizer = pickle.load(open(vectorizer_path,'rb')) | |
model = pickle.load(open(model_path,'rb')) | |
pred = model.predict(vectorizer.transform(["i have got a new phone. its from Apple.. and i love it!"]))[0] | |
print ("predicted class:", pred) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
regex based to search if a page is inner page or home page or category page. | |
""" | |
import re | |
def url_check(url): | |
url = url.split("/") | |
url = list(filter(None, url)) | |
if "http" in url[0]: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
In audio production, a sample rate (or "sampling rate") defines how many times per second a sound is sampled. | |
Technically speaking, it is the frequency of samples used in a digital recording. | |
""" | |
import numpy as np | |
from scipy.io import wavfile | |
sampleRate = 100 | |
frequency = 10 | |
audio_length = 1 #second |
OlderNewer