Skip to content

Instantly share code, notes, and snippets.

import torch
import logging
import comfy.samplers
import comfy.k_diffusion.sampling
from functools import partial
from tqdm.auto import trange, tqdm
@torch.no_grad()
# Copyright 2025 xzuyn
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# Variables
wandb_name: bs8_8e-6_linear
output_dir: ./Outputs/bs8_8e-6_linear
micro_batch_size: 8
learning_rate: 8e-6
eval_steps: 10
# Weights and Biases logging config
wandb_project: bsz-test
wandb_entity:
def mask_regex_attention_tokenizer(tokenizer, text, compiled_regex_patterns):
tokenized_text = tokenizer(
text=text,
add_special_tokens=False,
truncation=False,
padding=False,
return_tensors=None,
return_offsets_mapping=True,
)
@xzuyn
xzuyn / regex_attention.py
Last active March 1, 2025 14:57
Allows you to easily mask tokens using regex: https://i.imgur.com/xPOU5Ei.png
import re
from typing import List, Tuple, Pattern, Dict, Union
def mask_regex_attention(
text: str,
input_ids: List[int],
attention_mask: List[int],
offset_mapping: List[Tuple[int, int]],
compiled_regex_patterns: List[Pattern[str]]
# Modified from this original script:
# https://github.com/huggingface/trl/blob/a2adfb836a90d1e37b1253ab43dace05f1241e04/examples/scripts/orpo.py
#
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import json
from tqdm import tqdm
import copy
import gc
def load_json_or_jsonl(file_path):
try:
# Modified from this original script:
# https://github.com/huggingface/trl/blob/a2adfb836a90d1e37b1253ab43dace05f1241e04/examples/scripts/orpo.py
#
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
ministrations
audible pop
rivulets of
admit it
the ball is in your court
the game is on
the choice is yours
I don't bite... unless you want me to
half-lidded eyes
she worries her bottom lip
import json
def load_json_or_jsonl(file_path):
try:
with open(file_path, "r") as file:
try:
# Try loading the entire file as JSON
data = json.load(file)
return data