dardanxhymshiti’s gists

dardanxhymshiti / generate_tables_in_msword.py

Last active March 23, 2023 08:10

	from docx import Document
	from docx.shared import Cm, Pt


	article_1 = """Bayern Munich came out on top in a thrilling German Cup final, beating Bayer Leverkusen 4-2 to secure its 20th title and remain on course for an historic treble.
	David Alaba's stunning free kick and Serge Gnabry's clinical finish gave Bayern a commanding lead heading into half time and Hans-Dieter Flick's side seemingly already had one hand on the trophy.
	However, Leverkusen responded well early in the second half and had a golden opportunity to halve the deficit through substitute Kevin Volland."""

	article_2 = """(CNN)Liverpool got its Premier League title-winning celebrations back on track with a 2-0 win over Aston Villa, just days after being on the receiving end of a record-equaling defeat.
	Many had suggested Jurgen Klopp's side was suffering from something of a hangover during Thursday's 4-0 demolition at the hands of Manchester City -- the joint-heaviest defeat by a team already crowned Premier League champion -- but Liverpool re

dardanxhymshiti / describe_text.py

Last active July 3, 2020 14:39

	def describe_text(text):
	import re, string

	description = dict()

	# remove punctuation marks
	text_wo_punctuation_marks = re.sub(f'[%s]' % re.escape(string.punctuation), '', text)

	# tokens of the text without punctuation marks
	tokens_of_text_wo_punctuation_marks = text_wo_punctuation_marks.split(' ')

dardanxhymshiti / remove_punctuation_marks.py

Last active July 3, 2020 14:34

	def remove_punctuation_marks(text):
	import string
	import re
	pattern = f'[%s]' % re.escape(string.punctuation)
	text_wo_punctuation_marks = re.sub(pattern, '', text)
	return text_wo_punctuation_marks


	# Test
	text = """Hello, World!"""

dardanxhymshiti / get_text_within_brackets.py

Last active July 3, 2020 14:30

	def get_text_within_brackets(text):
	import re
	pattern = r"[\(\|\[\|\{](.*?)[\)\|\]\|\}]"
	list_of_findings = re.findall(pattern, text)
	return list_of_findings


	# Test
	text = '''I was very surprised (and it's pretty hard to surprise me!)... He [Felix] is a gret friends of me...'''
	get_text_within_brackets(text)

dardanxhymshiti / get_consequent_title_words.py

Last active July 3, 2020 15:34

	def get_consequent_title_words(text):
	import re
	pattern_compiled = re.compile(r'([A-Z][^\.!?]*[\.!?])', re.M)
	list_of_sentences = re.findall(pattern_compiled, text)
	list_of_sentence_tokens = [sentence.split(' ') for sentence in list_of_sentences]

	list_of_consequent_tokens = list()
	for tokens in list_of_sentence_tokens:
	temp_list_of_title_tokens = list()
	for index, t in enumerate(tokens):

dardanxhymshiti / get_context.py

Last active July 3, 2020 14:31

	def get_context(text, list_of_tokens, context_span=20):
	import re
	context = []
	for token in list_of_tokens:
	all_occurences_indices = [m.start() for m in re.finditer(token, text)]
	for index in all_occurences_indices:
	left_index = max(index - context_span, 0)
	right_index = min(index + context_span, len(text))
	substring = text[left_index: right_index].strip()

dardanxhymshiti / get_sentences.py

Last active July 3, 2020 14:32

	def get_sentences(text):
	import re
	pattern = r'([A-Z][^\.!?]*[\.!?])'
	pattern_compiled = re.compile(pattern, re.M)
	list_of_sentences = re.findall(pattern, text)
	return list_of_sentences


	# Test
	text = """This is the most frequent question we're asked by prospective students. And our response? Absolutely! We've trained people from all walks of life."""

dardanxhymshiti / get_capital_words.py

Last active July 3, 2020 14:32

	def get_capital_words(text):
	import re
	pattern = r'(\b[A-Z]{2,}\b)'
	list_of_capital_words = re.findall(pattern, text)
	return list_of_capital_words;


	# Test
	text = """Thank you! Your customer service request has been logged. A specialist will reach out by EOD"""
	get_capital_words(text)

dardanxhymshiti / get_text_within_quotes.py

Last active July 3, 2020 14:32

	def get_text_within_quotes(text):
	import re
	pattern = "\"(.*?)\""
	list_of_findings = re.findall(pattern, text)
	return list_of_findings


	# Test
	text = """The sign said, "Walk". Then it said, "Don't Walk" then, "Walk" all within thirty seconds"""
	get_text_within_quotes(text)

dardanxhymshiti / get_numbers_from_text.py

Last active July 3, 2020 15:40

	def get_numbers_from_text(text):
	import re
	pattern = '[-+]?[.]?[\d]+(?:,\d\d\d)[\.]?\d(?:[eE][-+]?\d+)?'
	list_of_numbers = re.findall(pattern, text)
	return list_of_numbers


	# Test
	text = """A rise in cases was re[prted acrpss a staggering 36 US states last week. In Florida, officals recorded 9,585 new cases on Saturday."""
	get_numbers_from_text(text)

DardanX dardanxhymshiti