Sagor Sarker sagorbrur

🎯

Focusing

An enthusiastic NLP/AI/ML practitioner.

sagorbrur / process_wiki_corpus.py

Created January 16, 2021 03:59

	import re
	import glob
	import json
	from tqdm import tqdm

	def cleanhtml(raw_html):
	cleanr = re.compile('<.*?>')
	cleantext = re.sub(cleanr, '', raw_html)
	return cleantext

sagorbrur / visualize_word2vec.py

Created December 6, 2020 05:41

	import itertools
	from gensim.models import Word2Vec
	from sklearn.manifold import TSNE
	import matplotlib.pyplot as plt
	import matplotlib.font_manager as fm



	def tsne_plot(model, vocab):
	"Creates and TSNE model and plots it"

sagorbrur / load_json_to_pandas.py

Last active March 7, 2020 14:02

	import pandas as pd

	def get_meta_from_json(json_file):
	df = pd.read_json(json_file)
	df = df.T
	return df


	if __name__=="__main__":
	json_file = "myjson.json"

sagorbrur / draw_bengali_text_using_pillow.py

Created January 14, 2020 18:03

	"""
	Dependencies:
	$sudo apt-get install libfreetype6-dev libharfbuzz-dev libfribidi-dev gtk-doc-tools
	$git clone https://github.com/python-pillow/Pillow.git
	$cd Pillow/depends
	$chmod +x install_raqm.sh
	$./install_raqm.sh
	$pip install pillow

	"""

sagorbrur / replace_multiple_newline_with_single_newline.py

Created December 23, 2019 07:38

	import re
	text = "I live in Bangladesh.\n\n\nBangladesh is a beautiful country.\n\nI love my country."
	res = re.sub(r'\n+', '\n',text)
	print(res)

sagorbrur / select_all_using_ext.py

Created December 3, 2019 03:56

	import os
	from tqdm import tqdm
	count = 0

	for root, dirs, files in tqdm(os.walk("/path")):
	for file in files:
	if file.endswith(".txt"):
	# print(file)
	filename = "files/text_{}".format(count)
	output = open(filename, "w")

sagorbrur / remove_html_tag.py

Created November 30, 2019 09:01

	# Removing HTML Tag from text using regex

	# code ref: https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string

	import re

	def cleanhtml(raw_html):
	cleanr = re.compile('<.*?>')
	cleantext = re.sub(cleanr, '', raw_html)
	return cleantext

sagorbrur / digit_separate_regex.py

Created November 19, 2019 18:05

sagorbrur / variable_regex.py

Created October 31, 2019 08:47

	import re

	TEXTO = sys.argv[1]
	my_regex = r"\b(?=\w)" + re.escape(TEXTO) + r"\b(?!\w)"

	result = re.search(my_regex, subject, re.IGNORECASE)
	print(result)

	# ref
	# https://stackoverflow.com/questions/6930982/how-to-use-a-variable-inside-a-regular-expression

sagorbrur / image_2_pdf.py

Created October 20, 2019 09:20