LouisdeBruijn’s gists

LouisdeBruijn / os_path.py

Last active April 13, 2021 07:33

	## use pathlib for Python >= 3.4
	from pathlib import Path

	directory = Path('main_dir', 'sub_dir')
	file = 'example.json'

	print(Path(directory, file))
	>>> "main_dir/sub_dir/example.json"

	## use os for Python < 3.4

LouisdeBruijn / jsonl.py

Created December 31, 2020 11:31

	import jsonlines

	def write_jsonl(json_data, file_loc=False):
	"""Write list object to newline-delimited JSON format.

	:param json_data: data in an object that is to be converted to JSONL format
	:type json_data: list
	:param file_loc: location of file to write to
	:type file_loc: str, bool
	"""

LouisdeBruijn / json_encoding_decoding.py

Created December 31, 2020 11:06

	import json

	# JSON encoding
	string = "I am a data-scientist and love working with Natural Language Processing."
	s = json.dumps(string)

	with open("data.json", "w") as json_out:
	json.dump(string)

	# JSON decoding

LouisdeBruijn / nested_text_data.py

Last active December 31, 2020 11:01

	data = [
	{'text': 'I am currently an employee of ING bank.', 'entities': [{'start': 30, 'end': 38, 'label': 'ORGANIZATION', 'text': 'ING bank'},
	{'text': 'Named Entity Recognitions finds all named entities, such as the Netherlands, in a given sentence.', 'entities': [{'start': 60, 'end': 75, 'label': 'LOCATION', 'text': 'the Netherlands'}
	]

LouisdeBruijn / out_of_bounds.py

Last active December 31, 2020 10:52

	data = [
	("Hi my name is Louis, and I write compound sentences, often delimited by a comma.", "26", "de Bruijn")
	]
	with open("texts.csv", "w") as csv_out:
	for element in data:
	csv_out.write("{0}\n".format(",".join(element)))

	objects = []
	with open("texts.csv", "r") as csv_in:
	for line in csv_in:

LouisdeBruijn / unescape_html_wrapper.py

Created December 31, 2020 10:27

	def unescape_html_wrapper(json_data):
	"""Escapes HTML entities from strings in data to be saved in JSON format.

	:param json_data: The data that is going to be saved in JSON format.
	:type json_data: list

	:return: The data with HTML entities unescaped
	:rtype: list
	"""
	if isinstance(json_data, list):

LouisdeBruijn / json_html.py

Last active December 31, 2020 09:42

	import json

	s = "Ik wil de te naamstelling van   mijn betaalrekening & pas aanpassen Mej. \u2014-> Mw."

	print(json.dumps(s, ensure_ascii=True)) # default parameter setting
	>>> "Ik wil de te naamstelling van   mijn betaalrekening & pas aanpassen Mej. \u2014-> Mw."

	print(json.dumps(s, ensure_ascii=False))
	>>> "Ik wil de te naamstelling van   mijn betaalrekening & pas aanpassen Mej. —-> Mw."

LouisdeBruijn / unescape_html.py

Last active December 31, 2020 10:22

	import html

	def unescape_html(
	text: str) -> str:
	"""Converts any HTML entities found in text to their textual representation.

	:param text: utterance that may contain HTML entities
	:type text: str

	Example of HTML entities found during annotations

LouisdeBruijn / json_to_file.py

Last active December 31, 2020 11:15

	import os
	import json
	from datetime import datetime
	from logging import info

	def json_to_file(file_loc=False, json_data=None, create_indexes=False, unescape_html=False):
	"""Create JSON file object.

	:param file_loc: location of file to write to
	:type file_loc: str, bool

LouisdeBruijn / asterisk_operators.py

Last active September 2, 2020 08:19

	# Example 1: shuffle data to ensure random class distribution in train/test split
	import random
	documents = ["positive tweet message", "negative tweet message"]
	labels = ["pos", "neg"]

	tuples = [(doc, label) for doc, label in zip(documents, labels)]
	random.shuffle(tuples)
	X, Y = zip(*tuples)