Skip to content

Instantly share code, notes, and snippets.

## use pathlib for Python >= 3.4
from pathlib import Path
directory = Path('main_dir', 'sub_dir')
file = 'example.json'
print(Path(directory, file))
>>> "main_dir/sub_dir/example.json"
## use os for Python < 3.4
import jsonlines
def write_jsonl(json_data, file_loc=False):
"""Write list object to newline-delimited JSON format.
:param json_data: data in an object that is to be converted to JSONL format
:type json_data: list
:param file_loc: location of file to write to
:type file_loc: str, bool
"""
import json
# JSON encoding
string = "I am a data-scientist and love working with Natural Language Processing."
s = json.dumps(string)
with open("data.json", "w") as json_out:
json.dump(string)
# JSON decoding
data = [
{'text': 'I am currently an employee of ING bank.', 'entities': [{'start': 30, 'end': 38, 'label': 'ORGANIZATION', 'text': 'ING bank'},
{'text': 'Named Entity Recognitions finds all named entities, such as the Netherlands, in a given sentence.', 'entities': [{'start': 60, 'end': 75, 'label': 'LOCATION', 'text': 'the Netherlands'}
]
data = [
("Hi my name is Louis, and I write compound sentences, often delimited by a comma.", "26", "de Bruijn")
]
with open("texts.csv", "w") as csv_out:
for element in data:
csv_out.write("{0}\n".format(",".join(element)))
objects = []
with open("texts.csv", "r") as csv_in:
for line in csv_in:
def unescape_html_wrapper(json_data):
"""Escapes HTML entities from strings in data to be saved in JSON format.
:param json_data: The data that is going to be saved in JSON format.
:type json_data: list
:return: The data with HTML entities unescaped
:rtype: list
"""
if isinstance(json_data, list):
import json
s = "Ik wil de te naamstelling van &nbsp; mijn betaalrekening &amp; pas aanpassen Mej. \u2014-&gt; Mw."
print(json.dumps(s, ensure_ascii=True)) # default parameter setting
>>> "Ik wil de te naamstelling van &nbsp; mijn betaalrekening &amp; pas aanpassen Mej. \u2014-&gt; Mw."
print(json.dumps(s, ensure_ascii=False))
>>> "Ik wil de te naamstelling van &nbsp; mijn betaalrekening &amp; pas aanpassen Mej. —-&gt; Mw."
import html
def unescape_html(
text: str) -> str:
"""Converts any HTML entities found in text to their textual representation.
:param text: utterance that may contain HTML entities
:type text: str
Example of HTML entities found during annotations
import os
import json
from datetime import datetime
from logging import info
def json_to_file(file_loc=False, json_data=None, create_indexes=False, unescape_html=False):
"""Create JSON file object.
:param file_loc: location of file to write to
:type file_loc: str, bool
# Example 1: shuffle data to ensure random class distribution in train/test split
import random
documents = ["positive tweet message", "negative tweet message"]
labels = ["pos", "neg"]
tuples = [(doc, label) for doc, label in zip(documents, labels)]
random.shuffle(tuples)
X, Y = zip(*tuples)