I hereby claim:
- I am psorianom on github.
- I am psoriano (https://keybase.io/psoriano) on keybase.
- I have a public key ASBEtv4RYHXAyi-Dzj24fMUzLFjCWqwBS88Cg8Oxw0AY4Qo
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
from importlib import import_module | |
from pprint import pprint | |
from typing import List, Dict | |
from dash.development.base_component import Component | |
from dash_html_components import Div | |
from dash_html_components import P ,Mark | |
from dash_interface.helper import serialize_components |
""" | |
Class that inherits MosesTokenizer and adds a method which returns the spans. Kinda flaky with the escape, unescape, | |
detokenize situation, so watch out! | |
""" | |
from sacremoses import MosesTokenizer, MosesDetokenizer | |
class MosesTokenizerSpans(MosesTokenizer): | |
def __init__(self, lang="en", custom_nonbreaking_prefixes_file=None): | |
MosesTokenizer.__init__(self, lang=lang, |
'''Genreates a syntethic dataset (csv) of persons to test the SNU_assignator | |
Usage: | |
SNU_gen.py <o> [options] | |
Arguments: | |
<o> An output path to store the ysntethic data csv | |
-n PER Number of persons to generate [default: 2000:int] | |
-f FIL Representation proportion of the filiere. Ex: "0.1,0.1,...,0.1" (default: None) | |
-r RES Representation proportion of the residence Ex: "0.1,0.1,...,0.1" (default: None) |
import xml.etree.ElementTree | |
import glob | |
texts = [] | |
all_files = list(glob.glob('./extracted/*.xml')) | |
n_files = len(all_files) | |
with open("all_capp_new.txt", "w") as filo: | |
for i,f in enumerate(all_files): | |
print("Treating file {0} => {1}/{2}\n".format(f, i+1 , n_files)) | |
e = xml.etree.ElementTree.parse(f).getroot() |
# -*- coding: utf-8 -*- | |
# Authors: Olivier Grisel <[email protected]> | |
# Mathieu Blondel <[email protected]> | |
# Lars Buitinck <[email protected]> | |
# Robert Layton <[email protected]> | |
# Jochen Wersdörfer <[email protected]> | |
# Roman Sinayev <[email protected]> | |
# | |
# License: BSD 3 clause | |
""" |