pronoob deepanshu-yadav

Running Gemma 4 Vision with llama.cpp

This guide provides instructions on how to download, set up, and run the Gemma 4 Vision (E2B) model using llama.cpp. It includes instructions for both text-only generation and multimodal (text + image) generation, along with testing examples for Windows and Linux.

1. Download Required Files

Models

You will need to download the following model files from HuggingFace:

Main Model (GGUF):

	# Class that handles scaling operation.
	class Scaler:

	def __init__(self, scaler):
	self.scaler = scaler
	# For backward compatibilty with earlier version of scikit learn
	self.scaler.clip = False

	def transform_using_scaler(self, data_x):

	import glob
	from sklearn.preprocessing import MinMaxScaler

	def npy_header_offset(npy_path):
	"""Gives the no of header bytes inside a numpy file."""
	with open(str(npy_path), 'rb') as f:
	if f.read(6) != b'\x93NUMPY':
	raise ValueError('Invalid NPY file.')
	version_major, version_minor = f.read(2)
	if version_major == 1:

	# we will utilize our class for creating generator.
	train_gen = CustomGenerator(train_data_descriptor, BATCH_SIZE, min_max_scaler_train)
	valid_gen = CustomGenerator(validation_data_descriptor, BATCH_SIZE, min_max_scaler_train)

	input_dim = train_gen.getitem(0)[0].shape[1]

	# The following two functions actually extract data on demand by using yield.
	def gen_data_train():
	j = 0
	while j < NO_OF_EPOCHS:

	# Here is a class that makes it easy to load data in a batch.
	class CustomGenerator():

	def __init__(self, data_desc, batch_size, scaler):
	self.data_desc = data_desc
	self.batch_size = batch_size
	self.scaler = scaler
	self.len = self.__len__() # an attribute for the length

	def __len__(self):

	import glob
	from sklearn.preprocessing import MinMaxScaler

	training_files = glob.glob(os.path.join(train_dir, '*'))
	validation_files = glob.glob(os.path.join(validation_dir, '*'))
	min_max_scaler_train = MinMaxScaler()

	# constants declaration. Notice the constants are same for meaningful comparison.
	BATCH_SIZE = 32
	NO_OF_EPOCHS = 3

	import tensorflow as tf
	from tensorflow.keras.layers import InputLayer, Dense, Input, Dropout,\
	BatchNormalization, Flatten
	from tensorflow.keras import regularizers
	from tensorflow.keras.models import Sequential, Model, load_model
	from tensorflow.keras.optimizers import Adam
	from functools import partial

	def create_ae_model(num_hidden_layers=3, hidden_layer_neurons=64,
	lr=0.001, kernel_init='lecun_normal', optimizer='adam',

	import os
	import numpy as np

	NO_OF_FILES = 1000
	NO_OF_FEATURES = 1000
	MAX_ROW_LIMIT_IN_SINGLE_FILE = 370

	train_dir = os.path.join(os.getcwd(), 'train')
	os.makedirs(train_dir, exist_ok=True)