Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.

SOCCER | O | |
---|---|---|
- | O | |
JAPAN | B-LOC | |
GET | O | |
LUCKY | O | |
WIN | O | |
, | O | |
CHINA | B-PER | |
IN | O | |
SURPRISE | O |
import torch | |
from torch.nn.utils.rnn import pack_sequence, pad_packed_sequence | |
def unpack_sequence(packed_sequences): | |
"""Unpacks PackedSequence into a list of variable length Tensors""" | |
unpacked_sequences = [] | |
padded_sequences, lengths = pad_packed_sequence(packed_sequences, batch_first=True) |
from itertools import permutations | |
import numpy as np | |
from sklearn.metrics import accuracy_score | |
np.random.seed(42) | |
y_true = np.random.randint(low=0, high=3, size=100) | |
noize_mapper = {0: 1, 1: 2, 2: 0} |
from sklearn.feature_extraction.text import TfidfVectorizer | |
# pymorphy2 lemmatizer | |
import pymorphy2 | |
class Lemmatizer: | |
def __init__(self): | |
self.morph = pymorphy2.MorphAnalyzer() | |
def __call__(self, x: str) -> str: |
from sklearn.feature_extraction.text import TfidfVectorizer | |
# data | |
corpus = [ | |
'This is the first document.', | |
'This document is the second document.', | |
'And this is the third one.', | |
'Is this the first document?', | |
] |
import pymorphy2 | |
class Lemmatizer: | |
""" | |
Pymorphy2 lemmatizer class. | |
""" | |
def __init__(self): | |
""" |
def humanize_bytes(bytes: int, suffix: str = "B") -> str: | |
""" | |
Convert bytes to human readable format. | |
:param int bytes: number of bytes. | |
:param str suffix: bytes suffix. | |
:return: human readable size. | |
:rtype: str | |
""" |
FROM cr.msk.sbercloud.ru/aicloud-jupyter/jupyter-cuda10.1-tf2.2.0-mlspace:latest | |
MAINTAINER Dani El-Ayyass <[email protected]> | |
USER root | |
# Docker | |
# Set up the repository | |
RUN apt-get update | |
RUN apt-get -y install apt-transport-https ca-certificates curl gnupg lsb-release |