I hereby claim:
- I am nbroad1881 on github.
- I am nicholasbroad (https://keybase.io/nicholasbroad) on keybase.
- I have a public key ASB10K5suwte9WvhBvNox4bXW95vszH1jaJXZ54ejZAeUAo
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
| !split -l 250000 text_file.txt smaller_ | |
| ### split [options] filename prefix | |
| ### -l linenumber | |
| ### -b bytes | |
| import glob | |
| file_list = glob.glob("smaller_*") |
| # help for a function | |
| %timeit? | |
| # run code block multiple times to get average time | |
| %%timeit | |
| L = [n ** 2 for n in range(1000)] | |
| # paste multi-line code to cell | |
| %paste | |
| >>> def donothing(x): |
| # ************************************************** | |
| # Commands to run this dockerfile | |
| # $docker build -t name_of_image directory | |
| # | |
| # $docker run -v ~/path/to/local/dir:/root/work -it --name my_container -p 8888:8888 --rm name_of_image | |
| # (-v stands for volumes. This mounts a local dir to a dir in the container) | |
| # -v ~/path/to/local/dir:/root/work -it \ | |
| # (-it stands for interactive. Any changes to local dir will then be seen in the connected dir in the container | |
| # --name my_container \ |
| FROM ubuntu:18.04 | |
| # Set character encoding environment variables | |
| ENV LC_ALL=C.UTF-8 LANG=C.UTF-8 | |
| # Allow apt-get install without interaction from console | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| # Set the working dir to the root user home folder | |
| WORKDIR /root |
| import os | |
| from pathlib import Path | |
| # Absolute path of file | |
| absolute_path = os.path.dirname(os.path.abspath(__file__)) | |
| # OR | |
| absolute_path = Path(__file__).resolve() | |
| # List contents of directory | |
| os.listdir('dirname-or-blank-for-current-dir') |
| # see here https://huggingface.co/docs/datasets/faiss_and_ea.html#adding-a-faiss-index | |
| # I loaded my dataset from a Pandas dataframe | |
| import pandas as pd | |
| df = pd.read_csv("dataset.csv") | |
| from transformers import DPRContextEncoder, DPRContextEncoderTokenizerFast | |
| import torch | |
| torch.set_grad_enabled(False) |
| # full name to abbreviation here: https://gist.github.com/mshafrir/2646763 | |
| ste_to_reg = { | |
| 'AA': 'Other', | |
| 'AE': 'Other', | |
| 'AP': 'Other', | |
| 'AK': 'West', | |
| 'AL': 'South', | |
| 'AR': 'South', | |
| 'AS': 'Other', |
| from torch import nn | |
| from transformers import AutoModel | |
| class Model(nn.Module): | |
| def __init__(self, config): | |
| super().__init__() | |
| self.model = AutoModel.from_pretrained(...) | |
| self.dropout = nn.Dropout(config.hidden_dropout_prob) |
| def reinit_model_weights(model, n_layers, config): | |
| # use whatever you named your transformer module | |
| backbone = model.backbone | |
| encoder_layers = backbone.encoder.layer | |
| reinit_layers(encoder_layers, n_layers, std) | |
| # use whatever you named the output | |
| reinit_modules([model.output], std) |