Kishlay-notabot · November 5, 2024 14:54 · Kishlay-notabot · Oct 31, 2024
diff --git a/IndicTrans2_setup.md b/IndicTrans2_setup.md
diff --git a/script.sh b/script.sh
 #!/bin/bash

 # Setup script for IndicTrans2 (English-Indic Translation)
 # This script uses Python 3.10

 # Exit on error
 set -e

 echo "Setting up IndicTrans2..."

 # Install system dependencies
 echo "Installing system dependencies..."
 sudo apt-get update
 sudo apt-get install -y build-essential python3.10-dev python3.10-venv parallel wget unzip

 # Create and activate virtual environment
 echo "Creating Python virtual environment..."
 python3.10 -m venv indictrans2_env
 source indictrans2_env/bin/activate

 # Clone repository
 echo "Cloning IndicTrans2 repository..."
 git clone https://github.com/AI4Bharat/IndicTrans2
 cd IndicTrans2

 # Install Python dependencies
 echo "Installing Python requirements..."
 pip install --upgrade pip
 pip install sentencepiece
 source install.sh

 # Download and extract distilled model (En-indic)
 echo "Downloading distilled model..."
 mkdir -p fairseq_model
 wget https://indictrans2-public.objectstore.e2enetworks.net/it2_distilled_ckpts/en-indic.zip
 unzip en-indic.zip -d fairseq_model/
 rm en-indic.zip

 echo "
 Setup completed successfully!

 Usage examples:
 1. For bash interface:
   bash joint_translate.sh <input_file> <output_file> eng_Latn <target_lang> \"$(pwd)/fairseq_model\"
   
   Example for English to Hindi:
   bash joint_translate.sh input.txt output.txt eng_Latn hin_Deva \"$(pwd)/fairseq_model\"

 2. The model is downloaded at: $(pwd)/fairseq_model

 Note: This setup uses the distilled version of the En-indic model for faster inference.
 "
	#!/bin/bash

	# Setup script for IndicTrans2 (English-Indic Translation)
	# This script uses Python 3.10

	# Exit on error
	set -e

	echo "Setting up IndicTrans2..."

	# Install system dependencies
	echo "Installing system dependencies..."
	sudo apt-get update
	sudo apt-get install -y build-essential python3.10-dev python3.10-venv parallel wget unzip

	# Create and activate virtual environment
	echo "Creating Python virtual environment..."
	python3.10 -m venv indictrans2_env
	source indictrans2_env/bin/activate

	# Clone repository
	echo "Cloning IndicTrans2 repository..."
	git clone https://github.com/AI4Bharat/IndicTrans2
	cd IndicTrans2

	# Install Python dependencies
	echo "Installing Python requirements..."
	pip install --upgrade pip
	pip install sentencepiece
	source install.sh

	# Download and extract distilled model (En-indic)
	echo "Downloading distilled model..."
	mkdir -p fairseq_model
	wget https://indictrans2-public.objectstore.e2enetworks.net/it2_distilled_ckpts/en-indic.zip
	unzip en-indic.zip -d fairseq_model/
	rm en-indic.zip

	echo "
	Setup completed successfully!

	Usage examples:
	1. For bash interface:
	bash joint_translate.sh <input_file> <output_file> eng_Latn <target_lang> \"$(pwd)/fairseq_model\"

	Example for English to Hindi:
	bash joint_translate.sh input.txt output.txt eng_Latn hin_Deva \"$(pwd)/fairseq_model\"

	2. The model is downloaded at: $(pwd)/fairseq_model

	Note: This setup uses the distilled version of the En-indic model for faster inference.
	"