Skip to content

Instantly share code, notes, and snippets.

@Norod
Norod / CoreML_distilGPT2.py
Created May 6, 2021 14:25
Various Huggingface GPT2 to CoreML converters. To be used with https://github.com/huggingface/swift-coreml-transformers.git
"""
Recreate the Core ML model from scratch using
coremltools' neural_network.NeuralNetworkBuilder
"""
import coremltools
import coremltools.models.datatypes as datatypes
from coremltools.models import neural_network as neural_network
from coremltools.models.utils import save_spec
import numpy as np
@Norod
Norod / gtranslate.py
Created May 22, 2021 19:14
Google Translate API - Basic exmaple from Google + Added an option to take a text file name as an input
#!/usr/bin/env python
# Copyright 2016 Google, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
@Norod
Norod / tika_parser_pdf2text.py
Created May 23, 2021 09:15
A tika parser based PDF2TXT converter
from tika import parser # pip install tika
import os
FOLDER_WITH_PDF="./"
files = [f for f in os.listdir(FOLDER_WITH_PDF) if f.endswith('.pdf')]
for infile in files:
full_path = os.path.join(FOLDER_WITH_PDF, infile)
raw = parser.from_file(full_path)
@Norod
Norod / AppIconRibbonQA.txt
Created June 1, 2021 10:29
Add a "QA" top and bottom bars to App Icons Using ImageMagick
# Add a "QA" top and bottom bars to App Icons
# Using ImageMagick
#brew install imagemagick
########
# Phone
########
convert "App Icon 1024x1024.png" \
-size 1024x128 -font "Times New Roman" -pointsize 96 -background 'rgb(106, 228, 222)' -fill red \
@Norod
Norod / onnx_rename_node_inp_inst.py
Created June 1, 2021 15:01
Rename a node in an ONNX model
import onnx
onnx_model = onnx.load('./input.onnx')
#Rename 'inp' to 'inst'
endpoint_names = ['inp', 'inst']
for i in range(len(onnx_model.graph.node)):
for j in range(len(onnx_model.graph.node[i].input)):
if onnx_model.graph.node[i].input[j] == endpoint_names[0]:
from transformers import AutoTokenizer, AutoModelForCausalLM
#pip install tokenizers==0.10.3 transformers==4.8.0
tokenizer = AutoTokenizer.from_pretrained("Norod78/distilgpt2-base-pretrained-he")
model = AutoModelForCausalLM.from_pretrained("Norod78/distilgpt2-base-pretrained-he", pad_token_id=tokenizer.eos_token_id)
prompt_text = "הנבחרת האולימפית של ישראל זכתה השנה"
max_len = 50
@Norod
Norod / ruDALLE_forAmazonSageMakerStudioLab.ipynb
Created December 9, 2021 10:50
ruDALLE for Amazon Sage Maker Studio Lab
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@Norod
Norod / microsoft-vq-diffusion_not-user-friendly.ipynb
Last active January 11, 2022 14:52
microsoft/VQ-Diffusion_Not-User-Friendly.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@Norod
Norod / output-token-scores-hebrew.ipynb
Last active January 4, 2022 20:08
Output token scores for Norod78/distilgpt2-base-pretrained-he (hebrew)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@Norod
Norod / translate_csv_file.py
Created January 24, 2022 17:25
Translate a csv file using Helsinki-NLP's hugging-face models
# !pip install sentencepiece transformers tokenizers
from transformers import MarianTokenizer, MarianMTModel
from typing import List
import csv
src = "en" # source language
trg = "he" # target language