This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import urllib.request | |
import zipfile | |
glove_url = "http://nlp.stanford.edu/data/glove.6B.zip" | |
glove_zip_path = "glove.6B.zip" | |
glove_dir = "glove.6B" | |
if not os.path.exists(glove_dir): | |
urllib.request.urlretrieve(glove_url, glove_zip_path) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Embedding, LSTM, Dense | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
x = "தமிழ்நாடு இந்தியாவின் தெற்கே அமைந்த ஒரு அழகிய மாநிலமாகும். இது பல்வேறு கலாச்சார பாரம்பரியங்களையும், செழிப்பான சாகுபடிமுறையையும் கொண்டுள்ளது. தமிழ்நாட்டின் தலைநகரமான சென்னை, தொழில்நுட்பம் மற்றும் கல்வியில் முன்னணி வகிக்கிறது. மாமல்லபுரம், தஞ்சாவூர் பெரிய கோயில் போன்ற வரலாற்று முக்கியத்துவம் வாய்ந்த இடங்கள் சுற்றுலாப் பயணிகளை ஈர்க்கின்றன. தமிழ்நாட்டின் கலை, இலக்கியம் மற்றும் இசை உலகளாவிய புகழ் பெற்றவை" | |
tokens = Tokenizer() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, SimpleRNN | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
x = "தமிழ்நாடு இந்தியாவின் தெற்கே அமைந்த ஒரு அழகிய மாநிலமாகும். இது பல்வேறு கலாச்சார பாரம்பரியங்களையும், செழிப்பான சாகுபடிமுறையையும் கொண்டுள்ளது. தமிழ்நாட்டின் தலைநகரமான சென்னை, தொழில்நுட்பம் மற்றும் கல்வியில் முன்னணி வகிக்கிறது. மாமல்லபுரம், தஞ்சாவூர் பெரிய கோயில் போன்ற வரலாற்று முக்கியத்துவம் வாய்ந்த இடங்கள் சுற்றுலாப் பயணிகளை ஈர்க்கின்றன. தமிழ்நாட்டின் கலை, இலக்கியம் மற்றும் இசை உலகளாவிய புகழ் பெற்றவை" | |
tokens = Tokenizer() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow.keras.models import model_from_json | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
import pickle | |
tokens = pickle.load(open(r'/content/Ilayaraja_book_tokens.pkl', 'rb')) | |
model_file = pickle.load(open(r'/content/Ilayaraja_book_model.pkl', 'rb')) | |
model = model_from_json(model_file['model_json']) | |
model.set_weights(model_file['model_weights']) | |
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Embedding, LSTM, Dense | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
import pickle | |
x = open(r'/content/இசை_ஜீனியஸ்_ராஜா_ரவி_நடராஜன்.txt', 'rb').read().decode(encoding='utf-8') | |
x = x.replace('\n', '').replace('\r', '').replace('\ufeff', '').replace('“','').replace('”','') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.models.fasttext import FastText | |
paragraph = "Periyar was a social reformer in Tamil Nadu. He founded the Self-Respect Movement. This movement aimed to promote equality and end caste discrimination. Today, he is celebrated as a key figure in the fight for social justice and equality in Tamil Nadu" | |
lines = [i for i in paragraph.split('.')] | |
x= [[word for word in nltk.word_tokenize(each_line) if word.lower() not in nltk.corpus.stopwords.words('english')] for each_line in lines] | |
model = FastText(x, window=20, min_count=1, sg=1, sample=1e-3) | |
print (model.wv.index_to_key) | |
print (model.wv['Periyar']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install gensim | |
pip install numpy==1.23.5 | |
from gensim.models import word2vec | |
import nltk | |
nltk.download('stopwords') | |
nltk.download('punkt_tab') | |
paragraph = "Periyar was a social reformer in Tamil Nadu. He founded the Self-Respect Movement. This movement aimed to promote equality and end caste discrimination. Today, he is celebrated as a key figure in the fight for social justice and equality in Tamil Nadu" | |
lines = [i for i in paragraph.split('.')] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import LabelEncoder | |
import numpy as np | |
paragraph = "Periyar was a social reformer in Tamil Nadu. He founded the Self-Respect Movement. This movement aimed to promote equality and end caste discrimination. Today, he is celebrated as a key figure in the fight for social justice and equality in Tamil Nadu." | |
x = [i for i in paragraph.split('.')] | |
l1 = [] | |
for i in x: | |
l1.append(LabelEncoder().fit_transform(i.split())) | |
padded_arrays = [np.pad(i, (0, max(len(i) for i in l1) - len(i)), 'constant', constant_values=99) for i in l1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
nltk.download('stopwords') | |
nltk.download('punkt') | |
nltk.download('punkt_tab') | |
from sklearn.feature_extraction.text import CountVectorizer | |
paragraph = "Periyar was a social reformer in Tamil Nadu. He founded the Self-Respect Movement. This movement aimed to promote equality and end caste discrimination. Today, he is celebrated as a key figure in the fight for social justice and equality in Tamil Nadu." | |
x = [i for i in paragraph.split('.')] | |
tokens = CountVectorizer() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, SimpleRNN | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
x = "தமிழ்நாடு இந்தியாவின் தெற்கே அமைந்த ஒரு அழகிய மாநிலமாகும். இது பல்வேறு கலாச்சார பாரம்பரியங்களையும், செழிப்பான சாகுபடிமுறையையும் கொண்டுள்ளது. தமிழ்நாட்டின் தலைநகரமான சென்னை, தொழில்நுட்பம் மற்றும் கல்வியில் முன்னணி வகிக்கிறது. மாமல்லபுரம், தஞ்சாவூர் பெரிய கோயில் போன்ற வரலாற்று முக்கியத்துவம் வாய்ந்த இடங்கள் சுற்றுலாப் பயணிகளை ஈர்க்கின்றன. தமிழ்நாட்டின் கலை, இலக்கியம் மற்றும் இசை உலகளாவிய புகழ் பெற்றவை" | |
tokens = Tokenizer() |
NewerOlder