Skip to content

Instantly share code, notes, and snippets.

View sagorbrur's full-sized avatar
🎯
Focusing

Sagor Sarker sagorbrur

🎯
Focusing
View GitHub Profile
import re
import glob
import json
from tqdm import tqdm
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', raw_html)
return cleantext
import itertools
from gensim.models import Word2Vec
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
def tsne_plot(model, vocab):
"Creates and TSNE model and plots it"
import pandas as pd
def get_meta_from_json(json_file):
df = pd.read_json(json_file)
df = df.T
return df
if __name__=="__main__":
json_file = "myjson.json"
"""
Dependencies:
$sudo apt-get install libfreetype6-dev libharfbuzz-dev libfribidi-dev gtk-doc-tools
$git clone https://github.com/python-pillow/Pillow.git
$cd Pillow/depends
$chmod +x install_raqm.sh
$./install_raqm.sh
$pip install pillow
"""
import re
text = "I live in Bangladesh.\n\n\nBangladesh is a beautiful country.\n\nI love my country."
res = re.sub(r'\n+', '\n',text)
print(res)
import os
from tqdm import tqdm
count = 0
for root, dirs, files in tqdm(os.walk("/path")):
for file in files:
if file.endswith(".txt"):
# print(file)
filename = "files/text_{}".format(count)
output = open(filename, "w")
# Removing HTML Tag from text using regex
# code ref: https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
import re
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', raw_html)
return cleantext
import re
text = "Hello123 with 563"
result = re.findall(r"\d+", text)
print(result)
# output: ['123', '563']
import re
TEXTO = sys.argv[1]
my_regex = r"\b(?=\w)" + re.escape(TEXTO) + r"\b(?!\w)"
result = re.search(my_regex, subject, re.IGNORECASE)
print(result)
# ref
# https://stackoverflow.com/questions/6930982/how-to-use-a-variable-inside-a-regular-expression
# Dependecies
# python 3
# pip install fpdf
# to run: python fpdf.py
from fpdf import FPDF
from PIL import Image
import os
listPages = os.listdir("images")