working...

Pema Gurung pemagrg1

working...

Studying MSc in Computational Linguistics at Stuttgart University, Germany

pemagrg1 / one hot encoding using numpy

Created January 9, 2019 04:31

one hot encoding using numpy

	import numpy as np
	docs = "Can I eat the Pizza".lower().split()
	doc1 = set(docs)
	doc1 = sorted(doc1)
	print ("\nvalues: ", doc1)

	integer_encoded = []
	for i in docs:
	v = np.where( np.array(doc1) == i)[0][0]
	integer_encoded.append(v)

pemagrg1 / one hot encoding using sklearn

Created January 9, 2019 04:36

one hot encoding using sklearn

	from numpy import array
	from numpy import argmax
	from sklearn.preprocessing import LabelEncoder
	from sklearn.preprocessing import OneHotEncoder
	# define example
	# data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']


	doc1 = "Can I eat the Pizza".lower()
	doc2 = "You can eat the Pizza".lower()

pemagrg1 / one hot encoding using Keras

Created January 9, 2019 04:37

one hot encoding using Keras

	from keras.preprocessing.text import Tokenizer
	from numpy import array
	from numpy import argmax
	from keras.utils import to_categorical


	doc = "Can I eat the Pizza".lower().split()

	def using_Tokenizer(doc):
	# create the tokenizer

pemagrg1 / one hot encoding using Tensorflow

Created January 10, 2019 10:52

one hot encoding using Tensorflow

	import tensorflow as tf
	import pandas as pd

	text = 'My cat is a great cat'
	tokens = text.lower().split()

	vocab = set(tokens)
	vocab = pd.Series(range(len(vocab)), index=vocab)

	word_ids = vocab.loc[tokens].values

pemagrg1 / pandas_get_total_row.py

Last active July 9, 2019 06:31

	"""
	Get total of each column values
	"""
	def totalcount(data):
	return data.assign(Total=data.drop('Total', errors='ignore', axis=1).sum(1))

	def pandas_get_total_row(df):
	df = df.pipe(totalcount).T.pipe(totalcount).T
	return df

pemagrg1 / pandas_calculate_total.py

Created July 9, 2019 06:34


	def get_total(df):
	df.loc['Total'] = pd.Series(df['Marks'].sum(), index = ['Marks'])
	return df

	df = pd.DataFrame({'Subjects': ["Maths","Science","English"], 'Marks': [80,90,75]})
	df = df.reindex(columns=['Subjects','Marks'])
	df = get_total(df)
	df

pemagrg1 / sklearn-model-to-pickle.py

Last active September 4, 2019 11:38

	from sklearn.feature_extraction.text import TfidfVectorizer
	import pandas as pd
	from sklearn import svm
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score
	import pickle
	from sklearn import linear_model
	Project_path = "<path to the project folder>"

pemagrg1 / load-sklearn-pickle-and-predict.py

Created September 4, 2019 11:39

	import pickle

	Project_path = "<path to project>"
	model_path = Project_path + "/08. Multi-class_text_classification/models/model.pickle"
	vectorizer_path = Project_path + "/08. Multi-class_text_classification/models/vectorizer.pickle"

	vectorizer = pickle.load(open(vectorizer_path,'rb'))
	model = pickle.load(open(model_path,'rb'))
	pred = model.predict(vectorizer.transform(["i have got a new phone. its from Apple.. and i love it!"]))[0]
	print ("predicted class:", pred)

pemagrg1 / url_check

Created October 23, 2019 11:11

	"""
	regex based to search if a page is inner page or home page or category page.
	"""
	import re


	def url_check(url):
	url = url.split("/")
	url = list(filter(None, url))
	if "http" in url[0]:

pemagrg1 / generating_sinewaves.py

Created March 16, 2020 08:06

	"""
	In audio production, a sample rate (or "sampling rate") defines how many times per second a sound is sampled.
	Technically speaking, it is the frequency of samples used in a digital recording.
	"""
	import numpy as np
	from scipy.io import wavfile

	sampleRate = 100
	frequency = 10
	audio_length = 1 #second