Skip to content

Instantly share code, notes, and snippets.

View sagorbrur's full-sized avatar
🎯
Focusing

Sagor Sarker sagorbrur

🎯
Focusing
View GitHub Profile
"""
----------------------------------------------
| Convert XML file to JSON file Using Python |
| Writer: Sagor Sarker |
| Date : 27 June 2019 |
----------------------------------------------
"""
"""
Name: Merging multiple json file into one json file
Date: 29/06/2019
Writer: Sagor Sarker
"""
import glob
import json
# Removing english word from non english document
import re
doc = """
নতুন করে কিছু english শব্দ নিয়ে সমস্যা তৈরি হয়েছে। বাক্যের মধ্যে এই English শব্দ খুবেই বিরক্তির উদ্রেক করছে।
Just figure out to remove these words
"""
import re
result = re.sub(r'[A-Za-z]', '', doc)
# glob list has a sorting problem. this scrip will solve this issue.
import glob
files = glob.glob('test/*.txt')
files.sort(key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
for file in files:
# Dependecies
# python 3
# pip install fpdf
# to run: python fpdf.py
from fpdf import FPDF
from PIL import Image
import os
listPages = os.listdir("images")
import re
TEXTO = sys.argv[1]
my_regex = r"\b(?=\w)" + re.escape(TEXTO) + r"\b(?!\w)"
result = re.search(my_regex, subject, re.IGNORECASE)
print(result)
# ref
# https://stackoverflow.com/questions/6930982/how-to-use-a-variable-inside-a-regular-expression
import re
text = "Hello123 with 563"
result = re.findall(r"\d+", text)
print(result)
# output: ['123', '563']
# Removing HTML Tag from text using regex
# code ref: https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
import re
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', raw_html)
return cleantext
import os
from tqdm import tqdm
count = 0
for root, dirs, files in tqdm(os.walk("/path")):
for file in files:
if file.endswith(".txt"):
# print(file)
filename = "files/text_{}".format(count)
output = open(filename, "w")
import re
text = "I live in Bangladesh.\n\n\nBangladesh is a beautiful country.\n\nI love my country."
res = re.sub(r'\n+', '\n',text)
print(res)