Step 1: Fork the repo.
Step 2: Clone your fork locally.
git clone https://github.com/USERNAME/REPO.git
| class CopyNetSeq2Seq(Model): | |
| # snip... | |
| def _decoder_step(self, | |
| last_predictions: torch.Tensor, | |
| selective_weights: torch.Tensor, | |
| state: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: | |
| # shape: (group_size, max_input_sequence_length, encoder_output_dim) | |
| encoder_outputs_mask = state["source_mask"].float() |
| class CopyNetSeq2Seq(Model): | |
| # snip... | |
| def _get_copy_scores(self, state: Dict[str, torch.Tensor]) -> torch.Tensor: | |
| # NOTE: here `trimmed_source_length` refers to the input sequence length minus 2, | |
| # so that the special START and END tokens in the source are ignored. We also need to | |
| # ignore PAD tokens, but that happens elsewhere using a mask. | |
| # shape: (batch_size, trimmed_source_length, encoder_output_dim) | |
| trimmed_encoder_outputs = state["encoder_outputs"][:, 1:-1] |
| class CopyNetSeq2Seq(Model): | |
| # snip... | |
| def _get_generation_scores(self, state: Dict[str, torch.Tensor]) -> torch.Tensor: | |
| # `self._output_generation_layer` is just a PyTorch linear layer with an input | |
| # dimension equal to the decoder hidden state size, and an output dimension | |
| # equal to the size of the target vocabulary. | |
| return self._output_generation_layer(state["decoder_hidden"]) |
| { | |
| "trainer": { | |
| "cuda_device": 0, | |
| "learning_rate_scheduler": { | |
| "type": "triangular", | |
| // total number of epochs, should match the trainner param `num_epochs` below | |
| "num_epochs": 80, | |
| // increase LR linearly for 20 epochs | |
| "warm_up": 20, | |
| // then decrease LR linearly for 30 epochs |
| find ~/ISU-DMC/dmc2016 -name '*.h' -o -name '*.R' > RFILES | |
| find ~/ISU-DMC/dmc2016 -name '*.h' -o -name '*.py' > PYFILES |