This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # | |
| # See LICENSE for license information. | |
| """ | |
| Wrapper module for Transformer related layers with FP8 support. | |
| """ | |
| import functools | |
| from enum import Enum | |
| from math import sqrt | |
| import os | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # | |
| # See LICENSE for license information. | |
| """JAX/TE custom ops for attention""" | |
| import operator | |
| import os | |
| import warnings | |
| from dataclasses import dataclass, replace | |
| from functools import partial, reduce | |
| from typing import Optional, Tuple | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from typing import Generator | |
| import numpy as np | |
| def generate_batch( | |
| *, | |
| vocab_size: int, | |
| sequence_length: int, | |
| num_instances: int, | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import numpy as np | |
| def num_bits(max_i: int) -> int: | |
| return len(bin(max_i)[2:]) | |
| def int_to_bits(i: int, n: int, dtype=int) -> list[int]: | |
| return [dtype(int(b)) for b in bin(i)[2:].zfill(n)] | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Step 1: Create and activate a new virtual environment (need Python 3.7 or newer) | |
| virtualenv .venv | |
| . .venv/bin/activate | |
| # Step 2: Install latest PyTorch | |
| # This assumes your drivers are compatable with CUDA 11.*. If not, see https://pytorch.org/ | |
| # for alternate install instructions. | |
| pip install torch==1.10.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html | |
| # Step 3: Clone and install the "tango" repo which has the GPT-J example. | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import signal | |
| import logging | |
| import time | |
| from transformers import AutoTokenizer | |
| from allennlp.data.instance import Instance | |
| from allennlp.data.dataset_readers import DatasetReader | |
| from allennlp.data.data_loaders import MultiProcessDataLoader | |
| from allennlp.data.fields import TransformerTextField | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Requires allennlp>=2.4.0, allennlp_models>=2.4.0 | |
| from allennlp_models.generation.predictors import Seq2SeqPredictor | |
| ARTICLE_TO_SUMMARIZE = ''' | |
| summarize: The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, | |
| and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. | |
| During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest | |
| man-made structure in the world, a title it held for 41 years until the Chrysler Building in | |
| New York City was finished in 1930. It was the first structure to reach a height of 300 metres. | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | local target_namespace = "target_tokens"; | |
| { | |
| "dataset_reader": { | |
| "target_namespace": target_namespace, | |
| "type": "copynet_seq2seq", | |
| "source_token_indexers": { | |
| "tokens": { | |
| "type": "single_id", | |
| "namespace": "source_tokens" | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from typing import List, Tuple, Dict, Any | |
| import torch | |
| from allennlp.common.lazy import Lazy | |
| from allennlp.common.params import Params | |
| from allennlp.training.optimizers import Optimizer | |
| ParameterGroupType = List[Tuple[List[str], Dict[str, Any]]] | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | - uses: actions/cache@v2 | |
| with: | |
| path: ${{ env.pythonLocation }} | |
| key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }} | 
NewerOlder