This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import torch | |
import accelerate | |
import requests | |
import numpy as np | |
from PIL import Image as PILImage | |
from tqdm.notebook import tqdm | |
from datasets import load_dataset | |
from google.colab import userdata | |
from huggingface_hub import login, create_repo |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import string | |
import re | |
from typing import List, Tuple, Optional | |
from itertools import islice | |
import numpy as np | |
from pdfminer.high_level import extract_pages | |
from pdfminer.layout import LTTextBox, LTPage |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE language SYSTEM "language.dtd"> | |
<!--Based on the language defintion shared on: | |
https://wiki.gnome.org/Projects/GtkSourceView/LanguageDefinitions?action=AttachFile&do=view&target=asm-intel.lang | |
--> | |
<language id="assembler8086" name="Assembler (8086)" version="2.0" section="Sources"> | |
<metadata> | |
<property name="mimetypes">text/x-asm;text/x-assembler</property> | |
<property name="globs">*.asm</property> | |
</metadata> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE language SYSTEM "language.dtd"> | |
<!--Based on the language defintion shared on: | |
https://wiki.gnome.org/Projects/GtkSourceView/LanguageDefinitions?action=AttachFile&do=view&target=asm-intel.lang | |
--> | |
<language id="assembler" name="Assembler (Intel)" version="2.0" section="Sources"> | |
<metadata> | |
<property name="mimetypes">text/x-asm;text/x-assembler</property> | |
<property name="globs">*.asm</property> | |
</metadata> |