Skip to content

Instantly share code, notes, and snippets.

@nithyadurai87
Created March 7, 2025 18:35
Show Gist options
  • Save nithyadurai87/62c94526e4b8ab25b6c54633ce1036b9 to your computer and use it in GitHub Desktop.
Save nithyadurai87/62c94526e4b8ab25b6c54633ce1036b9 to your computer and use it in GitHub Desktop.
08_Modelbuilding_on_GPU.py
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
files = [r'/content/இசை_ஜீனியஸ்_ராஜா_ரவி_நடராஜன்.txt',r'/content/தமிழின்_எதிர்காலமும்_தகவல்_தொழில்நுட்பமும்_இரா_அசோகன்.txt',r'/content/திறந்த_மூல_மென்பொருளில்_முதல்_அடி_எடுத்து_வைக்கலாம்_வாங்க_இரா_அசோகன்.txt',r'/content/தொழிலியல்_விஞ்ஞானி_ஜி_டி_நாயுடு_என்_வி_கலைமணி.txt',r'/content/நான்_இந்துவல்ல_நீங்கள்_தொ_பரமசிவம்.txt']
x = ""
for i in files:
x += open(i, 'rb').read().decode(encoding='utf-8')
x = x.replace('\n', '').replace('\r', '').replace('\ufeff', '').replace('“','').replace('”','')
tokens = Tokenizer()
tokens.fit_on_texts([x])
pickle.dump(tokens, open('தமிழ்_புத்தகங்கள்_டோக்கன்.pkl', 'wb'))
dictionary = tokens.word_index
x_n_grams = []
for line in x.split('.'):
line_tokens = tokens.texts_to_sequences([line])[0]
for i in range(1, len(line_tokens)):
n_grams = line_tokens[:i+1]
x_n_grams.append(n_grams)
max_line_len = max([len(i) for i in x_n_grams])
training_data = np.array(pad_sequences(x_n_grams, maxlen=max_line_len, padding='pre'))
train_X = training_data[:, :-1]
train_y = training_data[:, -1]
total_words = len(dictionary) + 1
print (total_words)
y = np.array(tf.keras.utils.to_categorical(train_y, num_classes=total_words))
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_line_len-1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))
model.build(input_shape=(None, max_line_len-1))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(train_X, y, epochs=500, verbose=1)
books_model = {'model_json': model.to_json(),'model_weights': model.get_weights()}
pickle.dump(books_model, open('தமிழ்_புத்தகங்கள்_மாடல்.pkl', 'wb'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment