Created
February 9, 2023 08:53
-
-
Save raven44099/445056d1d77c89b12cf56f8953c3a7ed to your computer and use it in GitHub Desktop.
BioGPT_230208.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"machine_shape": "hm", | |
"mount_file_id": "1hVzHfVzRq14XUfWECEf_btm-g1x6HpfJ", | |
"authorship_tag": "ABX9TyMvLOcgn3+3iC2t4110dfzv", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU", | |
"gpuClass": "premium" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/raven44099/445056d1d77c89b12cf56f8953c3a7ed/biogpt_230208.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## start\n", | |
"https://github.com/microsoft/BioGPT" | |
], | |
"metadata": { | |
"id": "HoJ61YPQ_IxB" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 36 | |
}, | |
"id": "em4vMY2N_BmE", | |
"outputId": "fc7efd6e-040b-4a18-cba4-e1c88ee2824c" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'1.13.1+cu116'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 1 | |
} | |
], | |
"source": [ | |
"import torch\n", | |
"torch.__version__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# !pip install fairseq\n", | |
"\n", | |
"!git clone https://github.com/pytorch/fairseq\n", | |
"%cd fairseq\n", | |
"!git checkout v0.12.0\n", | |
"!pip install .\n", | |
"!python setup.py build_ext --inplace\n", | |
"%cd .." | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "lQ7hdoaD_UUI", | |
"outputId": "b9c3d12d-6ac9-48fa-c6a6-0b2305ea0b9f" | |
}, | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Requirement already satisfied: fairseq in /usr/local/lib/python3.8/dist-packages (0.12.0)\n", | |
"Requirement already satisfied: cython in /usr/local/lib/python3.8/dist-packages (from fairseq) (0.29.33)\n", | |
"Requirement already satisfied: sacrebleu>=1.4.12 in /usr/local/lib/python3.8/dist-packages (from fairseq) (2.3.1)\n", | |
"Requirement already satisfied: hydra-core<1.1,>=1.0.7 in /usr/local/lib/python3.8/dist-packages (from fairseq) (1.0.7)\n", | |
"Requirement already satisfied: cffi in /usr/local/lib/python3.8/dist-packages (from fairseq) (1.15.1)\n", | |
"Requirement already satisfied: bitarray in /usr/local/lib/python3.8/dist-packages (from fairseq) (2.7.0)\n", | |
"Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from fairseq) (4.64.1)\n", | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from fairseq) (1.21.6)\n", | |
"Requirement already satisfied: regex in /usr/local/lib/python3.8/dist-packages (from fairseq) (2022.6.2)\n", | |
"Requirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (from fairseq) (1.13.1+cu116)\n", | |
"Requirement already satisfied: omegaconf<2.1 in /usr/local/lib/python3.8/dist-packages (from fairseq) (2.0.6)\n", | |
"Requirement already satisfied: torchaudio>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from fairseq) (0.13.1+cu116)\n", | |
"Requirement already satisfied: importlib-resources in /usr/local/lib/python3.8/dist-packages (from hydra-core<1.1,>=1.0.7->fairseq) (5.10.2)\n", | |
"Requirement already satisfied: antlr4-python3-runtime==4.8 in /usr/local/lib/python3.8/dist-packages (from hydra-core<1.1,>=1.0.7->fairseq) (4.8)\n", | |
"Requirement already satisfied: PyYAML>=5.1.* in /usr/local/lib/python3.8/dist-packages (from omegaconf<2.1->fairseq) (6.0)\n", | |
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from omegaconf<2.1->fairseq) (4.4.0)\n", | |
"Requirement already satisfied: lxml in /usr/local/lib/python3.8/dist-packages (from sacrebleu>=1.4.12->fairseq) (4.9.2)\n", | |
"Requirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.8/dist-packages (from sacrebleu>=1.4.12->fairseq) (0.8.10)\n", | |
"Requirement already satisfied: portalocker in /usr/local/lib/python3.8/dist-packages (from sacrebleu>=1.4.12->fairseq) (2.7.0)\n", | |
"Requirement already satisfied: colorama in /usr/local/lib/python3.8/dist-packages (from sacrebleu>=1.4.12->fairseq) (0.4.6)\n", | |
"Requirement already satisfied: pycparser in /usr/local/lib/python3.8/dist-packages (from cffi->fairseq) (2.21)\n", | |
"Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from importlib-resources->hydra-core<1.1,>=1.0.7->fairseq) (3.12.0)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#@title prepare package Moses\n", | |
"!git clone https://github.com/moses-smt/mosesdecoder.git\n", | |
"!export MOSES=${PWD}/mosesdecoder" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Jy4XY-zwAesc", | |
"outputId": "8ebfb84e-bf0e-4179-a038-5b15fd63da3c" | |
}, | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Cloning into 'mosesdecoder'...\n", | |
"remote: Enumerating objects: 148097, done.\u001b[K\n", | |
"remote: Counting objects: 100% (525/525), done.\u001b[K\n", | |
"remote: Compressing objects: 100% (229/229), done.\u001b[K\n", | |
"remote: Total 148097 (delta 323), reused 441 (delta 292), pack-reused 147572\u001b[K\n", | |
"Receiving objects: 100% (148097/148097), 129.88 MiB | 25.47 MiB/s, done.\n", | |
"Resolving deltas: 100% (114349/114349), done.\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#@title setup package fasBPE\n", | |
"!git clone https://github.com/glample/fastBPE.git\n", | |
"!export FASTBPE=${PWD}/fastBPE\n", | |
"%cd fastBPE\n", | |
"!g++ -std=c++11 -pthread -O3 fastBPE/main.cc -IfastBPE -o fast" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "XEfxn6icAnYX", | |
"outputId": "f346017b-8d80-485e-d685-1bc2004dc180" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Cloning into 'fastBPE'...\n", | |
"remote: Enumerating objects: 59, done.\u001b[K\n", | |
"Unpacking objects: 1% (1/59)\rUnpacking objects: 3% (2/59)\rUnpacking objects: 5% (3/59)\rUnpacking objects: 6% (4/59)\rUnpacking objects: 8% (5/59)\rUnpacking objects: 10% (6/59)\rUnpacking objects: 11% (7/59)\rUnpacking objects: 13% (8/59)\rUnpacking objects: 15% (9/59)\rUnpacking objects: 16% (10/59)\rUnpacking objects: 18% (11/59)\rUnpacking objects: 20% (12/59)\rUnpacking objects: 22% (13/59)\rUnpacking objects: 23% (14/59)\rUnpacking objects: 25% (15/59)\rUnpacking objects: 27% (16/59)\rUnpacking objects: 28% (17/59)\rUnpacking objects: 30% (18/59)\rUnpacking objects: 32% (19/59)\rUnpacking objects: 33% (20/59)\rUnpacking objects: 35% (21/59)\rUnpacking objects: 37% (22/59)\rremote: Total 59 (delta 0), reused 0 (delta 0), pack-reused 59\u001b[K\n", | |
"Unpacking objects: 38% (23/59)\rUnpacking objects: 40% (24/59)\rUnpacking objects: 42% (25/59)\rUnpacking objects: 44% (26/59)\rUnpacking objects: 45% (27/59)\rUnpacking objects: 47% (28/59)\rUnpacking objects: 49% (29/59)\rUnpacking objects: 50% (30/59)\rUnpacking objects: 52% (31/59)\rUnpacking objects: 54% (32/59)\rUnpacking objects: 55% (33/59)\rUnpacking objects: 57% (34/59)\rUnpacking objects: 59% (35/59)\rUnpacking objects: 61% (36/59)\rUnpacking objects: 62% (37/59)\rUnpacking objects: 64% (38/59)\rUnpacking objects: 66% (39/59)\rUnpacking objects: 67% (40/59)\rUnpacking objects: 69% (41/59)\rUnpacking objects: 71% (42/59)\rUnpacking objects: 72% (43/59)\rUnpacking objects: 74% (44/59)\rUnpacking objects: 76% (45/59)\rUnpacking objects: 77% (46/59)\rUnpacking objects: 79% (47/59)\rUnpacking objects: 81% (48/59)\rUnpacking objects: 83% (49/59)\rUnpacking objects: 84% (50/59)\rUnpacking objects: 86% (51/59)\rUnpacking objects: 88% (52/59)\rUnpacking objects: 89% (53/59)\rUnpacking objects: 91% (54/59)\rUnpacking objects: 93% (55/59)\rUnpacking objects: 94% (56/59)\rUnpacking objects: 96% (57/59)\rUnpacking objects: 98% (58/59)\rUnpacking objects: 100% (59/59)\rUnpacking objects: 100% (59/59), 29.97 KiB | 1.67 MiB/s, done.\n", | |
"/content/fastBPE\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%cd ..\n", | |
"!pip install sacremoses\n", | |
"!pip install scikit-learn" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "whLrxejtBmWg", | |
"outputId": "55aa3df6-5e50-4ff2-da4d-1273779037b1" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"/content\n", | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Collecting sacremoses\n", | |
" Downloading sacremoses-0.0.53.tar.gz (880 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m880.6/880.6 KB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
"Requirement already satisfied: regex in /usr/local/lib/python3.8/dist-packages (from sacremoses) (2022.6.2)\n", | |
"Requirement already satisfied: six in /usr/local/lib/python3.8/dist-packages (from sacremoses) (1.15.0)\n", | |
"Requirement already satisfied: click in /usr/local/lib/python3.8/dist-packages (from sacremoses) (7.1.2)\n", | |
"Requirement already satisfied: joblib in /usr/local/lib/python3.8/dist-packages (from sacremoses) (1.2.0)\n", | |
"Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from sacremoses) (4.64.1)\n", | |
"Building wheels for collected packages: sacremoses\n", | |
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895260 sha256=855ea489e7887ff5678e3dd944909791621679099c5b38912fa960ef65dd9e98\n", | |
" Stored in directory: /root/.cache/pip/wheels/82/ab/9b/c15899bf659ba74f623ac776e861cf2eb8608c1825ddec66a4\n", | |
"Successfully built sacremoses\n", | |
"Installing collected packages: sacremoses\n", | |
"Successfully installed sacremoses-0.0.53\n", | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.0.2)\n", | |
"Requirement already satisfied: numpy>=1.14.6 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.21.6)\n", | |
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n", | |
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n", | |
"Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%cd /content\n", | |
"!git clone https://github.com/microsoft/BioGPT.git" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "fVThQat1DQIJ", | |
"outputId": "c8147df9-d1f8-4e11-b2ca-3c43d42b7b0a" | |
}, | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"/content\n", | |
"Cloning into 'BioGPT'...\n", | |
"remote: Enumerating objects: 320, done.\u001b[K\n", | |
"remote: Counting objects: 100% (39/39), done.\u001b[K\n", | |
"remote: Compressing objects: 100% (20/20), done.\u001b[K\n", | |
"remote: Total 320 (delta 26), reused 22 (delta 19), pack-reused 281\u001b[K\n", | |
"Receiving objects: 100% (320/320), 31.44 MiB | 36.17 MiB/s, done.\n", | |
"Resolving deltas: 100% (158/158), done.\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#@title Download them and extract them to the checkpoints folder of this project.\n", | |
"%cd /content/BioGPT\n", | |
"!mkdir checkpoints\n", | |
"%cd checkpoints\n", | |
"# !wget https://msramllasc.blob.core.windows.net/modelrelease/BioGPT/checkpoints/Pre-trained-BioGPT.tgz\n", | |
"'''RE-DTI-BioGPT.tgz = 3.7G.'''\n", | |
"!wget https://msramllasc.blob.core.windows.net/modelrelease/BioGPT/checkpoints/RE-DTI-BioGPT.tgz\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "RmaF_L8eB4eI", | |
"outputId": "a60ab8a7-e610-4723-d8b5-c45d9d383d46" | |
}, | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"/content/checkpoints/checkpoints\n", | |
"--2023-02-09 07:53:36-- https://msramllasc.blob.core.windows.net/modelrelease/BioGPT/checkpoints/RE-DTI-BioGPT.tgz\n", | |
"Resolving msramllasc.blob.core.windows.net (msramllasc.blob.core.windows.net)... 20.209.34.164\n", | |
"Connecting to msramllasc.blob.core.windows.net (msramllasc.blob.core.windows.net)|20.209.34.164|:443... connected.\n", | |
"HTTP request sent, awaiting response... 200 OK\n", | |
"Length: 3960967808 (3.7G) [application/octet-stream]\n", | |
"Saving to: ‘RE-DTI-BioGPT.tgz’\n", | |
"\n", | |
"RE-DTI-BioGPT.tgz 100%[===================>] 3.69G 22.1MB/s in 4m 57s \n", | |
"\n", | |
"2023-02-09 07:58:33 (12.7 MB/s) - ‘RE-DTI-BioGPT.tgz’ saved [3960967808/3960967808]\n", | |
"\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# !cp /content/checkpoints/RE-DTI-BioGPT.tgz /content/drive/MyDrive/work/public/language_model" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "jYCjLdOhKn50", | |
"outputId": "47ffc020-c0de-4e1d-b41e-1384a253ab27" | |
}, | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"cp: cannot stat '/content/checkpoints/RE-DTI-BioGPT.tgz': No such file or directory\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import time\n", | |
"start_time = time.time()\n", | |
"# !tar -zxvf Pre-trained-BioGPT.tgz\n", | |
"!tar -zxvf /content/BioGPT/checkpoints/RE-DTI-BioGPT.tgz\n", | |
"\n", | |
"end_time = time.time()\n", | |
"print(f'time was: {end_time - start_time} s')#.format(ent_time - start_time))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "gx7qX2mAFT7s", | |
"outputId": "89686bcb-4e50-409d-e20f-21126c08aef2" | |
}, | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"time was: 44.21730136871338 s\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# %cd /content/BioGPT/checkpoints\n", | |
"%cd /content/BioGPT" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "_JGH8HlaDdYp", | |
"outputId": "fd22425d-42e6-4616-ffb4-73c6808a526b" | |
}, | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"/content/BioGPT\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"text3 = 'The increased prevalence of obesity and diabetes, with the attendant increase in morbidity and mortality, pose a substantial therapeutic challenge. Genetic screens in lower organisms provide evidence that gain-of-function of the deacetylase Sir2 results in beneficial metabolic effects and lifespan extension. Sirtuin agonists increase metabolic efficiency in rodents through a mechanism bearing similarity with calorie restriction. However, the specificity of these compounds remains undefined.'" | |
], | |
"metadata": { | |
"id": "7MAy9Q8-D5T2" | |
}, | |
"execution_count": 15, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"path1 = '/content/BioGPT/'\n", | |
"\n", | |
"from src.transformer_lm_prompt import TransformerLanguageModelPrompt\n", | |
"\n", | |
"m = TransformerLanguageModelPrompt.from_pretrained(\n", | |
" path1 + \"checkpoints/RE-DTI-BioGPT\", \n", | |
" \"checkpoint_avg.pt\", \n", | |
" path1 + \"data/KD-DTI/relis-bin\",\n", | |
" tokenizer='moses', \n", | |
" bpe='fastbpe', \n", | |
" bpe_codes=\"data/bpecodes\",\n", | |
" max_len_b=1024,\n", | |
" beam=1)\n", | |
"m.cuda()\n", | |
"src_text= [text3,text3] # input text, e.g., a PubMed abstract\n", | |
"src_tokens = m.encode(src_text)\n", | |
"generate = m.generate([src_tokens], beam=args.beam)[0]\n", | |
"output = m.decode(generate[0][\"tokens\"])\n", | |
"print(output)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 688 | |
}, | |
"id": "CEFyAg4RGrHu", | |
"outputId": "657f66db-e17d-4877-9454-33387942140d" | |
}, | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "error", | |
"ename": "TypeError", | |
"evalue": "ignored", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-21-41726c6a2fdf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_lm_prompt\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTransformerLanguageModelPrompt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m m = TransformerLanguageModelPrompt.from_pretrained(\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mpath1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"checkpoints/RE-DTI-BioGPT\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\"checkpoint_avg.pt\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/fairseq/models/fairseq_model.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, model_name_or_path, checkpoint_file, data_name_or_path, **kwargs)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mfairseq\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mhub_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m x = hub_utils.from_pretrained(\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0mmodel_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0mcheckpoint_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/fairseq/hub_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(model_name_or_path, checkpoint_file, data_name_or_path, archive_map, **kwargs)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;34m\"vocab.json\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"bpe_vocab\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m }.items():\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/python3.8/posixpath.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(a, *p)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0mwill\u001b[0m \u001b[0mbe\u001b[0m \u001b[0mdiscarded\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mAn\u001b[0m \u001b[0mempty\u001b[0m \u001b[0mlast\u001b[0m \u001b[0mpart\u001b[0m \u001b[0mwill\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ma\u001b[0m \u001b[0mpath\u001b[0m \u001b[0mthat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m ends with a separator.\"\"\"\n\u001b[0;32m---> 76\u001b[0;31m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfspath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 77\u001b[0m \u001b[0msep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_sep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mTypeError\u001b[0m: expected str, bytes or os.PathLike object, not NoneType" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from src.transformer_lm_prompt import TransformerLanguageModelPrompt\n", | |
"\n", | |
"m = TransformerLanguageModelPrompt.from_pretrained(\n", | |
" \"checkpoints/RE-DTI-BioGPT\", \n", | |
" \"checkpoint_avg.pt\", \n", | |
" \"data/KD-DTI/relis-bin\",\n", | |
" tokenizer='moses', \n", | |
" bpe='fastbpe', \n", | |
" bpe_codes=\"data/bpecodes\",\n", | |
" max_len_b=1024,\n", | |
" beam=1)\n", | |
"m.cuda()\n", | |
"src_text= [textius,textius] # input text, e.g., a PubMed abstract\n", | |
"src_tokens = m.encode(src_text)\n", | |
"generate = m.generate([src_tokens], beam=args.beam)[0]\n", | |
"output = m.decode(generate[0][\"tokens\"])\n", | |
"print(output)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 396 | |
}, | |
"id": "hbHUGUaJCdX3", | |
"outputId": "e05db92c-477b-4864-f9aa-1e814dff8be4" | |
}, | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "error", | |
"ename": "TypeError", | |
"evalue": "ignored", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-20-2e11355943d6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_lm_prompt\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTransformerLanguageModelPrompt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m m = TransformerLanguageModelPrompt.from_pretrained(\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\"checkpoints/RE-DTI-BioGPT\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\"checkpoint_avg.pt\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/fairseq/models/fairseq_model.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, model_name_or_path, checkpoint_file, data_name_or_path, **kwargs)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mfairseq\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mhub_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m x = hub_utils.from_pretrained(\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0mmodel_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0mcheckpoint_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/fairseq/hub_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(model_name_or_path, checkpoint_file, data_name_or_path, archive_map, **kwargs)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;34m\"vocab.json\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"bpe_vocab\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m }.items():\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/python3.8/posixpath.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(a, *p)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0mwill\u001b[0m \u001b[0mbe\u001b[0m \u001b[0mdiscarded\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mAn\u001b[0m \u001b[0mempty\u001b[0m \u001b[0mlast\u001b[0m \u001b[0mpart\u001b[0m \u001b[0mwill\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ma\u001b[0m \u001b[0mpath\u001b[0m \u001b[0mthat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m ends with a separator.\"\"\"\n\u001b[0;32m---> 76\u001b[0;31m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfspath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 77\u001b[0m \u001b[0msep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_sep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mTypeError\u001b[0m: expected str, bytes or os.PathLike object, not NoneType" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "bLGKobf4UsH5" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## 3. Huggingface-version\n", | |
"might run" | |
], | |
"metadata": { | |
"id": "XpvMW7LVUteY" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "-r8vuZzaUvvg" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment