Skip to content

Instantly share code, notes, and snippets.

@raven44099
Created February 9, 2023 08:53
Show Gist options
  • Save raven44099/445056d1d77c89b12cf56f8953c3a7ed to your computer and use it in GitHub Desktop.
Save raven44099/445056d1d77c89b12cf56f8953c3a7ed to your computer and use it in GitHub Desktop.
BioGPT_230208.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"machine_shape": "hm",
"mount_file_id": "1hVzHfVzRq14XUfWECEf_btm-g1x6HpfJ",
"authorship_tag": "ABX9TyMvLOcgn3+3iC2t4110dfzv",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "premium"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/raven44099/445056d1d77c89b12cf56f8953c3a7ed/biogpt_230208.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"## start\n",
"https://github.com/microsoft/BioGPT"
],
"metadata": {
"id": "HoJ61YPQ_IxB"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 36
},
"id": "em4vMY2N_BmE",
"outputId": "fc7efd6e-040b-4a18-cba4-e1c88ee2824c"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'1.13.1+cu116'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 1
}
],
"source": [
"import torch\n",
"torch.__version__"
]
},
{
"cell_type": "code",
"source": [
"# !pip install fairseq\n",
"\n",
"!git clone https://github.com/pytorch/fairseq\n",
"%cd fairseq\n",
"!git checkout v0.12.0\n",
"!pip install .\n",
"!python setup.py build_ext --inplace\n",
"%cd .."
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lQ7hdoaD_UUI",
"outputId": "b9c3d12d-6ac9-48fa-c6a6-0b2305ea0b9f"
},
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: fairseq in /usr/local/lib/python3.8/dist-packages (0.12.0)\n",
"Requirement already satisfied: cython in /usr/local/lib/python3.8/dist-packages (from fairseq) (0.29.33)\n",
"Requirement already satisfied: sacrebleu>=1.4.12 in /usr/local/lib/python3.8/dist-packages (from fairseq) (2.3.1)\n",
"Requirement already satisfied: hydra-core<1.1,>=1.0.7 in /usr/local/lib/python3.8/dist-packages (from fairseq) (1.0.7)\n",
"Requirement already satisfied: cffi in /usr/local/lib/python3.8/dist-packages (from fairseq) (1.15.1)\n",
"Requirement already satisfied: bitarray in /usr/local/lib/python3.8/dist-packages (from fairseq) (2.7.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from fairseq) (4.64.1)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from fairseq) (1.21.6)\n",
"Requirement already satisfied: regex in /usr/local/lib/python3.8/dist-packages (from fairseq) (2022.6.2)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (from fairseq) (1.13.1+cu116)\n",
"Requirement already satisfied: omegaconf<2.1 in /usr/local/lib/python3.8/dist-packages (from fairseq) (2.0.6)\n",
"Requirement already satisfied: torchaudio>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from fairseq) (0.13.1+cu116)\n",
"Requirement already satisfied: importlib-resources in /usr/local/lib/python3.8/dist-packages (from hydra-core<1.1,>=1.0.7->fairseq) (5.10.2)\n",
"Requirement already satisfied: antlr4-python3-runtime==4.8 in /usr/local/lib/python3.8/dist-packages (from hydra-core<1.1,>=1.0.7->fairseq) (4.8)\n",
"Requirement already satisfied: PyYAML>=5.1.* in /usr/local/lib/python3.8/dist-packages (from omegaconf<2.1->fairseq) (6.0)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from omegaconf<2.1->fairseq) (4.4.0)\n",
"Requirement already satisfied: lxml in /usr/local/lib/python3.8/dist-packages (from sacrebleu>=1.4.12->fairseq) (4.9.2)\n",
"Requirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.8/dist-packages (from sacrebleu>=1.4.12->fairseq) (0.8.10)\n",
"Requirement already satisfied: portalocker in /usr/local/lib/python3.8/dist-packages (from sacrebleu>=1.4.12->fairseq) (2.7.0)\n",
"Requirement already satisfied: colorama in /usr/local/lib/python3.8/dist-packages (from sacrebleu>=1.4.12->fairseq) (0.4.6)\n",
"Requirement already satisfied: pycparser in /usr/local/lib/python3.8/dist-packages (from cffi->fairseq) (2.21)\n",
"Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from importlib-resources->hydra-core<1.1,>=1.0.7->fairseq) (3.12.0)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#@title prepare package Moses\n",
"!git clone https://github.com/moses-smt/mosesdecoder.git\n",
"!export MOSES=${PWD}/mosesdecoder"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Jy4XY-zwAesc",
"outputId": "8ebfb84e-bf0e-4179-a038-5b15fd63da3c"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Cloning into 'mosesdecoder'...\n",
"remote: Enumerating objects: 148097, done.\u001b[K\n",
"remote: Counting objects: 100% (525/525), done.\u001b[K\n",
"remote: Compressing objects: 100% (229/229), done.\u001b[K\n",
"remote: Total 148097 (delta 323), reused 441 (delta 292), pack-reused 147572\u001b[K\n",
"Receiving objects: 100% (148097/148097), 129.88 MiB | 25.47 MiB/s, done.\n",
"Resolving deltas: 100% (114349/114349), done.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#@title setup package fasBPE\n",
"!git clone https://github.com/glample/fastBPE.git\n",
"!export FASTBPE=${PWD}/fastBPE\n",
"%cd fastBPE\n",
"!g++ -std=c++11 -pthread -O3 fastBPE/main.cc -IfastBPE -o fast"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XEfxn6icAnYX",
"outputId": "f346017b-8d80-485e-d685-1bc2004dc180"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Cloning into 'fastBPE'...\n",
"remote: Enumerating objects: 59, done.\u001b[K\n",
"Unpacking objects: 1% (1/59)\rUnpacking objects: 3% (2/59)\rUnpacking objects: 5% (3/59)\rUnpacking objects: 6% (4/59)\rUnpacking objects: 8% (5/59)\rUnpacking objects: 10% (6/59)\rUnpacking objects: 11% (7/59)\rUnpacking objects: 13% (8/59)\rUnpacking objects: 15% (9/59)\rUnpacking objects: 16% (10/59)\rUnpacking objects: 18% (11/59)\rUnpacking objects: 20% (12/59)\rUnpacking objects: 22% (13/59)\rUnpacking objects: 23% (14/59)\rUnpacking objects: 25% (15/59)\rUnpacking objects: 27% (16/59)\rUnpacking objects: 28% (17/59)\rUnpacking objects: 30% (18/59)\rUnpacking objects: 32% (19/59)\rUnpacking objects: 33% (20/59)\rUnpacking objects: 35% (21/59)\rUnpacking objects: 37% (22/59)\rremote: Total 59 (delta 0), reused 0 (delta 0), pack-reused 59\u001b[K\n",
"Unpacking objects: 38% (23/59)\rUnpacking objects: 40% (24/59)\rUnpacking objects: 42% (25/59)\rUnpacking objects: 44% (26/59)\rUnpacking objects: 45% (27/59)\rUnpacking objects: 47% (28/59)\rUnpacking objects: 49% (29/59)\rUnpacking objects: 50% (30/59)\rUnpacking objects: 52% (31/59)\rUnpacking objects: 54% (32/59)\rUnpacking objects: 55% (33/59)\rUnpacking objects: 57% (34/59)\rUnpacking objects: 59% (35/59)\rUnpacking objects: 61% (36/59)\rUnpacking objects: 62% (37/59)\rUnpacking objects: 64% (38/59)\rUnpacking objects: 66% (39/59)\rUnpacking objects: 67% (40/59)\rUnpacking objects: 69% (41/59)\rUnpacking objects: 71% (42/59)\rUnpacking objects: 72% (43/59)\rUnpacking objects: 74% (44/59)\rUnpacking objects: 76% (45/59)\rUnpacking objects: 77% (46/59)\rUnpacking objects: 79% (47/59)\rUnpacking objects: 81% (48/59)\rUnpacking objects: 83% (49/59)\rUnpacking objects: 84% (50/59)\rUnpacking objects: 86% (51/59)\rUnpacking objects: 88% (52/59)\rUnpacking objects: 89% (53/59)\rUnpacking objects: 91% (54/59)\rUnpacking objects: 93% (55/59)\rUnpacking objects: 94% (56/59)\rUnpacking objects: 96% (57/59)\rUnpacking objects: 98% (58/59)\rUnpacking objects: 100% (59/59)\rUnpacking objects: 100% (59/59), 29.97 KiB | 1.67 MiB/s, done.\n",
"/content/fastBPE\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"%cd ..\n",
"!pip install sacremoses\n",
"!pip install scikit-learn"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "whLrxejtBmWg",
"outputId": "55aa3df6-5e50-4ff2-da4d-1273779037b1"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting sacremoses\n",
" Downloading sacremoses-0.0.53.tar.gz (880 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m880.6/880.6 KB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: regex in /usr/local/lib/python3.8/dist-packages (from sacremoses) (2022.6.2)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.8/dist-packages (from sacremoses) (1.15.0)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.8/dist-packages (from sacremoses) (7.1.2)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.8/dist-packages (from sacremoses) (1.2.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from sacremoses) (4.64.1)\n",
"Building wheels for collected packages: sacremoses\n",
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895260 sha256=855ea489e7887ff5678e3dd944909791621679099c5b38912fa960ef65dd9e98\n",
" Stored in directory: /root/.cache/pip/wheels/82/ab/9b/c15899bf659ba74f623ac776e861cf2eb8608c1825ddec66a4\n",
"Successfully built sacremoses\n",
"Installing collected packages: sacremoses\n",
"Successfully installed sacremoses-0.0.53\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.0.2)\n",
"Requirement already satisfied: numpy>=1.14.6 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.21.6)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n",
"Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"%cd /content\n",
"!git clone https://github.com/microsoft/BioGPT.git"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fVThQat1DQIJ",
"outputId": "c8147df9-d1f8-4e11-b2ca-3c43d42b7b0a"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content\n",
"Cloning into 'BioGPT'...\n",
"remote: Enumerating objects: 320, done.\u001b[K\n",
"remote: Counting objects: 100% (39/39), done.\u001b[K\n",
"remote: Compressing objects: 100% (20/20), done.\u001b[K\n",
"remote: Total 320 (delta 26), reused 22 (delta 19), pack-reused 281\u001b[K\n",
"Receiving objects: 100% (320/320), 31.44 MiB | 36.17 MiB/s, done.\n",
"Resolving deltas: 100% (158/158), done.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#@title Download them and extract them to the checkpoints folder of this project.\n",
"%cd /content/BioGPT\n",
"!mkdir checkpoints\n",
"%cd checkpoints\n",
"# !wget https://msramllasc.blob.core.windows.net/modelrelease/BioGPT/checkpoints/Pre-trained-BioGPT.tgz\n",
"'''RE-DTI-BioGPT.tgz = 3.7G.'''\n",
"!wget https://msramllasc.blob.core.windows.net/modelrelease/BioGPT/checkpoints/RE-DTI-BioGPT.tgz\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RmaF_L8eB4eI",
"outputId": "a60ab8a7-e610-4723-d8b5-c45d9d383d46"
},
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/checkpoints/checkpoints\n",
"--2023-02-09 07:53:36-- https://msramllasc.blob.core.windows.net/modelrelease/BioGPT/checkpoints/RE-DTI-BioGPT.tgz\n",
"Resolving msramllasc.blob.core.windows.net (msramllasc.blob.core.windows.net)... 20.209.34.164\n",
"Connecting to msramllasc.blob.core.windows.net (msramllasc.blob.core.windows.net)|20.209.34.164|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 3960967808 (3.7G) [application/octet-stream]\n",
"Saving to: ‘RE-DTI-BioGPT.tgz’\n",
"\n",
"RE-DTI-BioGPT.tgz 100%[===================>] 3.69G 22.1MB/s in 4m 57s \n",
"\n",
"2023-02-09 07:58:33 (12.7 MB/s) - ‘RE-DTI-BioGPT.tgz’ saved [3960967808/3960967808]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# !cp /content/checkpoints/RE-DTI-BioGPT.tgz /content/drive/MyDrive/work/public/language_model"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jYCjLdOhKn50",
"outputId": "47ffc020-c0de-4e1d-b41e-1384a253ab27"
},
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"cp: cannot stat '/content/checkpoints/RE-DTI-BioGPT.tgz': No such file or directory\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import time\n",
"start_time = time.time()\n",
"# !tar -zxvf Pre-trained-BioGPT.tgz\n",
"!tar -zxvf /content/BioGPT/checkpoints/RE-DTI-BioGPT.tgz\n",
"\n",
"end_time = time.time()\n",
"print(f'time was: {end_time - start_time} s')#.format(ent_time - start_time))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gx7qX2mAFT7s",
"outputId": "89686bcb-4e50-409d-e20f-21126c08aef2"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"time was: 44.21730136871338 s\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# %cd /content/BioGPT/checkpoints\n",
"%cd /content/BioGPT"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_JGH8HlaDdYp",
"outputId": "fd22425d-42e6-4616-ffb4-73c6808a526b"
},
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/BioGPT\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"text3 = 'The increased prevalence of obesity and diabetes, with the attendant increase in morbidity and mortality, pose a substantial therapeutic challenge. Genetic screens in lower organisms provide evidence that gain-of-function of the deacetylase Sir2 results in beneficial metabolic effects and lifespan extension. Sirtuin agonists increase metabolic efficiency in rodents through a mechanism bearing similarity with calorie restriction. However, the specificity of these compounds remains undefined.'"
],
"metadata": {
"id": "7MAy9Q8-D5T2"
},
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"source": [
"path1 = '/content/BioGPT/'\n",
"\n",
"from src.transformer_lm_prompt import TransformerLanguageModelPrompt\n",
"\n",
"m = TransformerLanguageModelPrompt.from_pretrained(\n",
" path1 + \"checkpoints/RE-DTI-BioGPT\", \n",
" \"checkpoint_avg.pt\", \n",
" path1 + \"data/KD-DTI/relis-bin\",\n",
" tokenizer='moses', \n",
" bpe='fastbpe', \n",
" bpe_codes=\"data/bpecodes\",\n",
" max_len_b=1024,\n",
" beam=1)\n",
"m.cuda()\n",
"src_text= [text3,text3] # input text, e.g., a PubMed abstract\n",
"src_tokens = m.encode(src_text)\n",
"generate = m.generate([src_tokens], beam=args.beam)[0]\n",
"output = m.decode(generate[0][\"tokens\"])\n",
"print(output)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 688
},
"id": "CEFyAg4RGrHu",
"outputId": "657f66db-e17d-4877-9454-33387942140d"
},
"execution_count": 21,
"outputs": [
{
"output_type": "error",
"ename": "TypeError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-21-41726c6a2fdf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_lm_prompt\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTransformerLanguageModelPrompt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m m = TransformerLanguageModelPrompt.from_pretrained(\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mpath1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"checkpoints/RE-DTI-BioGPT\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\"checkpoint_avg.pt\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/fairseq/models/fairseq_model.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, model_name_or_path, checkpoint_file, data_name_or_path, **kwargs)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mfairseq\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mhub_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m x = hub_utils.from_pretrained(\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0mmodel_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0mcheckpoint_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/fairseq/hub_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(model_name_or_path, checkpoint_file, data_name_or_path, archive_map, **kwargs)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;34m\"vocab.json\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"bpe_vocab\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m }.items():\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.8/posixpath.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(a, *p)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0mwill\u001b[0m \u001b[0mbe\u001b[0m \u001b[0mdiscarded\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mAn\u001b[0m \u001b[0mempty\u001b[0m \u001b[0mlast\u001b[0m \u001b[0mpart\u001b[0m \u001b[0mwill\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ma\u001b[0m \u001b[0mpath\u001b[0m \u001b[0mthat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m ends with a separator.\"\"\"\n\u001b[0;32m---> 76\u001b[0;31m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfspath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 77\u001b[0m \u001b[0msep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_sep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: expected str, bytes or os.PathLike object, not NoneType"
]
}
]
},
{
"cell_type": "code",
"source": [
"from src.transformer_lm_prompt import TransformerLanguageModelPrompt\n",
"\n",
"m = TransformerLanguageModelPrompt.from_pretrained(\n",
" \"checkpoints/RE-DTI-BioGPT\", \n",
" \"checkpoint_avg.pt\", \n",
" \"data/KD-DTI/relis-bin\",\n",
" tokenizer='moses', \n",
" bpe='fastbpe', \n",
" bpe_codes=\"data/bpecodes\",\n",
" max_len_b=1024,\n",
" beam=1)\n",
"m.cuda()\n",
"src_text= [textius,textius] # input text, e.g., a PubMed abstract\n",
"src_tokens = m.encode(src_text)\n",
"generate = m.generate([src_tokens], beam=args.beam)[0]\n",
"output = m.decode(generate[0][\"tokens\"])\n",
"print(output)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 396
},
"id": "hbHUGUaJCdX3",
"outputId": "e05db92c-477b-4864-f9aa-1e814dff8be4"
},
"execution_count": 20,
"outputs": [
{
"output_type": "error",
"ename": "TypeError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-20-2e11355943d6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_lm_prompt\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTransformerLanguageModelPrompt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m m = TransformerLanguageModelPrompt.from_pretrained(\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\"checkpoints/RE-DTI-BioGPT\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\"checkpoint_avg.pt\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/fairseq/models/fairseq_model.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, model_name_or_path, checkpoint_file, data_name_or_path, **kwargs)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mfairseq\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mhub_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m x = hub_utils.from_pretrained(\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0mmodel_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0mcheckpoint_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/fairseq/hub_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(model_name_or_path, checkpoint_file, data_name_or_path, archive_map, **kwargs)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;34m\"vocab.json\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"bpe_vocab\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m }.items():\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.8/posixpath.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(a, *p)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0mwill\u001b[0m \u001b[0mbe\u001b[0m \u001b[0mdiscarded\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mAn\u001b[0m \u001b[0mempty\u001b[0m \u001b[0mlast\u001b[0m \u001b[0mpart\u001b[0m \u001b[0mwill\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ma\u001b[0m \u001b[0mpath\u001b[0m \u001b[0mthat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m ends with a separator.\"\"\"\n\u001b[0;32m---> 76\u001b[0;31m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfspath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 77\u001b[0m \u001b[0msep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_sep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: expected str, bytes or os.PathLike object, not NoneType"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "bLGKobf4UsH5"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## 3. Huggingface-version\n",
"might run"
],
"metadata": {
"id": "XpvMW7LVUteY"
}
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "-r8vuZzaUvvg"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment