juliensimon · May 5, 2024 23:02
diff --git a/benchmark.py b/benchmark.py
 import time

 import numpy as np
 import torch
 from transformers import pipeline


 def benchmark(pipeline, data, iterations=1000):
    # Warmup
    for i in range(100):
        result = pipeline(data)
    times = []
    for i in range(iterations):
        tick = time.time()
        result = pipeline(data)
        tock = time.time()
        times.append(tock - tick)
    return "{:.2f}".format(np.mean(times) * 1000), "{:.2f}".format(
        np.percentile(times, 99) * 1000
    )


 sentence_short = "This is a really nice pair of shoes, I am completely satisfied with my purchase"
 sentence_short_array = [sentence_short] * 8

 sentence_long = "These Adidas Lite Racer shoes hit a nice sweet spot for comfort shoes. Despite being a little snug in the toe box, these are very comfortable to wear and provide nice support while wearing. I would stop short of saying they are good running shoes or cross-trainers because they simply lack the ankle and arch support most would desire in those type of shoes and the treads wear fairly quickly, but they are definitely comfortable. I actually walked around Disney World all day in these without issue if that is any reference. Bottom line, I use these as the shoes they are best; versatile, inexpensive and comfortable, without expecting the performance of a high-end athletic sneaker or expecting the comfort of my favorite pair of slippers."
 sentence_long_array = [sentence_long] * 8

 models = ["distilbert-base-uncased", "bert-base-uncased", "roberta-base"]

 for model in models:
    print(f"Benchmarking {model}")
    pipe = pipeline("sentiment-analysis", model=model)
    result = benchmark(pipe, sentence_short)
    print(f"Transformers pipeline, short sentence: {result}")
    result = benchmark(pipe, sentence_long)
    print(f"Transformers pipeline, long sentence: {result}")
    result = benchmark(pipe, sentence_short_array)
    print(f"Transformers pipeline, short sentence array: {result}")
    result = benchmark(pipe, sentence_long_array)
    print(f"Transformers pipeline, long sentence array: {result}")
    
    from optimum.intel import inference_mode

    with inference_mode(pipe, dtype=torch.bfloat16, jit=True) as opt_pipe:
        result = benchmark(opt_pipe, sentence_short)
        print(f"Optimum pipeline, short sentence: {result}")
        result = benchmark(opt_pipe, sentence_long)
        print(f"Optimum pipeline, long sentence: {result}")
        result = benchmark(opt_pipe, sentence_short_array)
        print(f"Optimum pipeline, short sentence array: {result}")
        result = benchmark(opt_pipe, sentence_long_array)
        print(f"Optimum pipeline, long sentence array: {result}")
	import time

	import numpy as np
	import torch
	from transformers import pipeline


	def benchmark(pipeline, data, iterations=1000):
	# Warmup
	for i in range(100):
	result = pipeline(data)
	times = []
	for i in range(iterations):
	tick = time.time()
	result = pipeline(data)
	tock = time.time()
	times.append(tock - tick)
	return "{:.2f}".format(np.mean(times) * 1000), "{:.2f}".format(
	np.percentile(times, 99) * 1000
	)


	sentence_short = "This is a really nice pair of shoes, I am completely satisfied with my purchase"
	sentence_short_array = [sentence_short] * 8

	sentence_long = "These Adidas Lite Racer shoes hit a nice sweet spot for comfort shoes. Despite being a little snug in the toe box, these are very comfortable to wear and provide nice support while wearing. I would stop short of saying they are good running shoes or cross-trainers because they simply lack the ankle and arch support most would desire in those type of shoes and the treads wear fairly quickly, but they are definitely comfortable. I actually walked around Disney World all day in these without issue if that is any reference. Bottom line, I use these as the shoes they are best; versatile, inexpensive and comfortable, without expecting the performance of a high-end athletic sneaker or expecting the comfort of my favorite pair of slippers."
	sentence_long_array = [sentence_long] * 8

	models = ["distilbert-base-uncased", "bert-base-uncased", "roberta-base"]

	for model in models:
	print(f"Benchmarking {model}")
	pipe = pipeline("sentiment-analysis", model=model)
	result = benchmark(pipe, sentence_short)
	print(f"Transformers pipeline, short sentence: {result}")
	result = benchmark(pipe, sentence_long)
	print(f"Transformers pipeline, long sentence: {result}")
	result = benchmark(pipe, sentence_short_array)
	print(f"Transformers pipeline, short sentence array: {result}")
	result = benchmark(pipe, sentence_long_array)
	print(f"Transformers pipeline, long sentence array: {result}")

	from optimum.intel import inference_mode

	with inference_mode(pipe, dtype=torch.bfloat16, jit=True) as opt_pipe:
	result = benchmark(opt_pipe, sentence_short)
	print(f"Optimum pipeline, short sentence: {result}")
	result = benchmark(opt_pipe, sentence_long)
	print(f"Optimum pipeline, long sentence: {result}")
	result = benchmark(opt_pipe, sentence_short_array)
	print(f"Optimum pipeline, short sentence array: {result}")
	result = benchmark(opt_pipe, sentence_long_array)
	print(f"Optimum pipeline, long sentence array: {result}")