From https://unix.stackexchange.com/a/93158
gzip a folder:
tar -zcvf archive.tar.gz directory/ unzip the folder:
| import re | |
| import codecs | |
| class WarcHeader(dict): | |
| def __init__(self): | |
| dict.__init__(self) | |
| self.__dict__ = self | |
| # This script is largely based on this forum post from the elastic blog | |
| # https://www.elastic.co/blog/loading-wikipedia | |
| # variables | |
| es="$ES_HOST:$ES_PORT" | |
| site="en.wikipedia.org" | |
| index="enwiki" | |
| indexDate="20160919" | |
| indexType="content" |
| import os | |
| import json | |
| from bs4 import BeautifulSoup | |
| def get_extracted_concepts(doc, ctakes_doc_content): | |
| ctakes_doc = BeautifulSoup(ctakes_doc_content, 'xml') | |
| umls_concepts = [] | |
| for cas_FSArray in ctakes_doc.find_all('uima.cas.FSArray'): |
| {"lastUpload":"2020-09-23T06:02:58.659Z","extensionVersion":"v3.4.3"} |
| from collections import namedtuple | |
| def NamedTuple(key, data): | |
| data = [k[0] for k in data] | |
| return namedtuple(key, data) | |
| class __List: | |
| def __getitem__(_, elem): | |
| return None |
| function prompt | |
| { | |
| # How many characters of the $PWD should be kept | |
| local pwdmaxlen=30 | |
| # Indicator that there has been directory truncation: | |
| local trunc_symbol="..." | |
| if [ ${#PWD} -gt $pwdmaxlen ] | |
| then | |
| local pwdoffset=$(( ${#PWD} - $pwdmaxlen )) | |
| newPWD="${trunc_symbol}${PWD:$pwdoffset:$pwdmaxlen}" |
| #!/usr/bin/env python | |
| """ | |
| Call any command line process from Python. | |
| Useful as a wrapper in PyCharm or other IDEs that | |
| do not support running scripts in any other language | |
| other than the one(s) supported by the IDE. | |
| """ |
From https://unix.stackexchange.com/a/93158
gzip a folder:
tar -zcvf archive.tar.gz directory/ unzip the folder:
From https://unix.stackexchange.com/a/93158
gzip a folder:
tar -zcvf archive.tar.gz directory/ unzip the folder:
| # coding: utf-8 | |
| # In[1]: | |
| from thinc.extra import datasets | |
| import mxnet as mx | |
| import random | |
| import re | |
| import tqdm |