Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save iamhowardtheduck/811e0da772e52df8e265cc55baa9f758 to your computer and use it in GitHub Desktop.
Save iamhowardtheduck/811e0da772e52df8e265cc55baa9f758 to your computer and use it in GitHub Desktop.
ELCIA-V3-unsecured
clear
echo -e "\n\n\n\n\n\n\n"
if [[ $EUID -eq 0 ]]; then
echo "This script must NOT be run as \"root\" OR as \"sudo $USER\"; please try again." 1>&2
exit 1
fi
#
# BEGIN WELCOME SCREEN & INITIAL UPDATING
#
clear
echo -e "\n\n\n\n\n\n\n"
echo " Welcome I am ELCIA, your..."
echo " Elastic & ChatGPT Integration Application"
echo -e "\n\n\n\n\n\n\n"
echo "You can choose to either:"
echo ""
echo "Configure an Elastic Cloud instance with ChatGPT using sentence transformers for title vectorization"
echo ""
#echo "Configure an Elastic Cloud instance with ChatGPT using ELSER for title, body, description, and keyword fields for semantic search"
#echo ""
echo "Configure an Elastic Cloud instance for Image Similarity search. (No OpenAI API required!)"
echo ""
echo "Configure an Elastic Cloud instance for Image Similarity search for the PowerPoint Slide Finder use-case. (No OpenAI API required!)"
echo ""
echo "Download the guts and do the rest yourself."
echo -e "\n\n\n"
echo "WARNING: CURRENTLY ELCIA ONLY SUPPORTS OPENAI FOR OPTION 1, NOT AZURE'S OPENAI. Please obtain your personal openai key here:"
echo ""
echo "https://platform.openai.com/"
echo -e "\n\n\n"
echo "But first we must run a few commands to get ready."
echo -e "\n\n\n"
read -n 1 -s -r -p "Press any key to continue"
echo ""
echo "Enjoy! ☺"
sudo apt update -y
sudo apt install dialog git -y
clear
#
cmd=(dialog --radiolist "Which would you like to do?" 22 135 16)
options=(1 "Configure an Elastic Cloud instance with ChatGPT using sentence transformers for title vectorization" off # any option can be set to default to "on"
2 "Configure an Elastic Cloud instance for Image Similarity search. No OpenAI API required!" off
3 "Configure an Elastic Cloud instance for Image Similarity search for the PowerPoint Slide Finder use-case. No OpenAI API required!" off
4 "Just download the guts for all the awesome search apps and build it yourself" off
5 "Make like a tree, and leave." off)
choices=$("${cmd[@]}" "${options[@]}" 2>&1 >/dev/tty)
clear
for choice in $choices
do
case $choice in
# Configre an Elastic Cloud instance with ChatGPT using sentence transformers for title vectorization
1) clear
echo ""
echo "You will need a version 8.8+ cluster with an ML node with at least 4GB of RAM, as well as login credentials and your cluster's ES endpoint."
echo ""
echo "So please make sure you have all of this prior to continuing."
echo ""
echo -e "\n\n\n"
echo "WARNING: CURRENTLY ELCIA ONLY SUPPORTS OPENAI FOR THIS OPTION, NOT AZURE'S OPENAI. Please obtain your personal openai key here:"
echo ""
echo "https://platform.openai.com/"
echo -e "\n\n\n"
echo ""
echo ""
read -n 1 -s -r -p "Press any key to continue"
sudo apt install docker.io* python3-pip* curl -y
sudo pip install streamlit openai Elasticsearch
clear
echo ""
echo "What is your username? Typically it is just 'elastic'."
echo ""
read cloud_user
echo ""
echo "What is your password? If you're using 'elastic' and forgot it, you can reset it from the cloud UI."
echo ""
read cloud_pass
echo ""
echo "What is your Cloud ID? Please copy & paste it directly from the cloud UI with the trailing '=='"
echo ""
read cloud_id
echo ""
echo "Please copy and paste your Elasticsearch endpoint from the cloud UI below WITHOUT the 'https://' portion"
echo ""
echo "Example: my-cluster-is-awesome.es.us-east4.gcp.elastic-cloud.com"
echo ""
echo ""
read es_client
echo ""
echo ""
echo "Next we'll load the sentence transformers model into your cluster using Docker!"
echo ""
cd /home/$USER && git clone https://github.com/elastic/eland.git && cd eland && sudo docker build -t elastic/eland . && sudo docker run -it --rm --network host elastic/eland eland_import_hub_model --url https://${cloud_user}:${cloud_pass}@${es_client}:9243/ --hub-model-id sentence-transformers/all-distilroberta-v1 --start
echo ""
echo "Next let's create the pipeline so you can use it over and over again!"
echo ""
curl -X PUT "https://${cloud_user}:${cloud_pass}@${es_client}:9243/_ingest/pipeline/ml-inference-title-vector?pretty" -H 'Content-Type: application/json' -d' {"processors":[{"remove": {"field": "ml.inference.title-vector", "ignore_missing": true}},{"remove": {"field": "title-vector", "ignore_missing": true}},{"inference": {"field_map": {"title": "text_field"}, "model_id": "sentence-transformers__all-distilroberta-v1","target_field": "ml.inference.title-vector","on_failure":[{"append":{"field":"_source._ingest.inference_errors","value":[{"message": "Processor 'inference' in pipeline ml-inference-title-vector failed with message {{ _ingest.on_failure_message }}","pipeline": "ml-inference-title-vector","timestamp":"{{{ _ingest.timestamp }}}"}]}}]}},{"append": {"field":"_source._ingest.processors","value": [{"model_version":"8.8.1","pipeline":"ml-inference-title-vector","processed_timestamp":"{{{ _ingest.timestamp }}}","types":["pytorch","text_embedding"]}]}},{"set":{"copy_from":"ml.inference.title-vector.predicted_value","description": "Copy the predicted_value to title-vector","field": "title-vector","if": "ctx?.ml?.inference != null && ctx.ml.inference['\''title-vector'\''] != null"}}]} '
echo ""
echo "Now let's prepare the index so that when you go to create it in the GUI, you won't have to update the mappings in DevTools!"
echo ""
echo "This will create the 'elcia-script' index template which will be used for all 'search-*' indices"
echo ""
curl -X PUT "https://${cloud_user}:${cloud_pass}@${es_client}:9243/_index_template/elcia-script?pretty" -H 'Content-Type: application/json' -d '{"index_patterns": ["search-*"],"template":{"settings": {"number_of_shards": 2,"auto_expand_replicas": "0-3","default_pipeline":"ml-inference-title-vector","similarity": {"default": {"type": "BM25"}}},"mappings": {"properties": {"title-vector": {"type": "dense_vector","dims": 768,"index": true,"similarity": "dot_product"},"created_at":{"type":"date","format":"EEE MMM dd HH:mm:ss Z yyyy"}}}}}'
echo ""
echo "Now we'll create the search application"
echo ""
echo "But first two variables need to be passed."
echo ""
echo "What will your index be? Typically it's 'search-something'"
echo ""
read index
echo ""
echo "Next, what will we call your ChatGPT web UI? Typically it's the name of the website you plan on crawling."
echo ""
echo "So if you plan on crawling 'widgets.com' and want a Widgets GPT UI, just put 'Widgets' and I'll make the necessary changes for you."
echo ""
read engine
echo ""
cd /home/$USER && git clone https://github.com/jeffvestal/ElasticDocs_GPT.git
sed -i.bak \'22s/0301/0613/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py
sed -i.bak \'26s/http/basic/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py
sed -i.bak \'69s/elastic-docs/${index}/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py
sed -i.bak \'100s/ElasticDocs/${engine}/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py
sed -i.bak \'108s/Elastic Docs/the Elastic Data-Set/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py
echo ""
echo ""
echo "Now let's configure streamlit (the web UI) to run as a service, and all you'll have to do is crawl!"
echo ""
echo ""
echo "[Unit]" | sudo tee -a /etc/systemd/system/chatgpt4all.service
echo "After=network.target" | sudo tee -a /etc/systemd/system/chatgpt4all.service
echo "" | sudo tee -a /etc/systemd/system/chatgpt4all.service
echo "[Service]" | sudo tee -a /etc/systemd/system/chatgpt4all.service
echo "ExecStart=/usr/local/bin/chatgpt.sh" | sudo tee -a /etc/systemd/system/chatgpt4all.service
echo "" | sudo tee -a /etc/systemd/system/chatgpt4all.service
echo "[Install]" | sudo tee -a /etc/systemd/system/chatgpt4all.service
echo "WantedBy=default.target" | sudo tee -a /etc/systemd/system/chatgpt4all.service
echo "#!/bin/bash" | sudo tee -a /usr/local/bin/chatgpt.sh
echo "export cloud_id=${cloud_id}" | sudo tee -a /usr/local/bin/chatgpt.sh
echo "export cloud_user=${cloud_user}" | sudo tee -a /usr/local/bin/chatgpt.sh
echo "export cloud_pass=${cloud_pass}" | sudo tee -a /usr/local/bin/chatgpt.sh
echo ""
echo "What is your Open AI api key?"
echo ""
read openai_api
echo "export openai_api=${openai_api}" | sudo tee -a /usr/local/bin/chatgpt.sh
echo "streamlit run /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py" | sudo tee -a /usr/local/bin/chatgpt.sh
sudo chmod 744 /usr/local/bin/chatgpt.sh
sudo chmod 664 /etc/systemd/system/chatgpt4all.service
sudo systemctl daemon-reload
sudo systemctl enable chatgpt4all.service
echo "The Chat GPT Service is now running, please go ahead and crawl!!!"""
echo ""
echo "Feel free to press ctrl + c to exit out of the status message"
echo ""
sudo systemctl status chatgpt4all.service
;;
2) clear
echo "This is designed to be run on a minimal server install of Ubuntu 22.04 AFTER 'sudo apt update' has been run and the system was rebooted."
echo ""
echo "You will also need a cluster with an ML node with at least 4GB of RAM, as well as login credentials and your cluster's ES endpoint."
echo ""
echo "So please make sure you have all of this prior to continuing."
echo ""
echo ""
echo ""
read -n 1 -s -r -p "Press any key to continue"
sudo apt install docker.io* python3-pip* curl tar unzip -y
sudo pip install streamlit openai Elasticsearch python-dotenv
sudo apt install python3-flask cmdtest -y
cd /home/$USER && git clone https://github.com/radoondas/flask-elastic-image-search.git
cd /home/$USER/flask-elastic-image-search && pip3 install -r requirements.txt
clear
echo ""
echo "MANUAL INTERVENTION REQUIRED. DO NOT PROCEED UNTIL THIS IS COMPLETED. PLEASE READ EACH STEP."
echo ""
echo "Please load your images as a single zip file from the Elastic uploader service:"
echo "https://upload.elastic.co/login"
echo ""
echo "Choose: External"
echo ""
echo "Click the link provided to proceed to the zip file drop page"
echo ""
echo "Drag and drop your zip file that contains your image files there. Once loaded, note the Token and URL"
echo ""
echo "What is your Token"
read token
echo ""
echo "What is your URL?"
read url
echo ""
cd /home/$USER/flask-elastic-image-search/app/static/images && curl -L -H "Authorization: $token" -o elcia.zip $url
cd /home/$USER/flask-elastic-image-search/app/static/images && unzip elcia.zip
echo ""
echo "What is your username? Typically it is just 'elastic'."
echo ""
read cloud_user
echo ""
echo "What is your password? If you're using 'elastic' and forgot it, you can reset it from the cloud UI."
echo ""
read cloud_pass
echo ""
echo "What is your Cloud ID? Please copy & paste it directly from the cloud UI with the trailing '=='"
echo ""
read cloud_id
echo ""
echo "Please copy and paste your Elasticsearch endpoint from the cloud UI below WITHOUT the 'https://' portion"
echo ""
echo "Example: my-cluster-is-awesome.es.us-east4.gcp.elastic-cloud.com"
echo ""
echo ""
read es_client
echo ""
echo "ES_HOST=\'${es_client}:443\'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env
echo "ES_USER=\'${cloud_user}\'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env
echo "ES_PWD=\'${cloud_pass}\'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env
echo ""
echo "Next we'll load the sentence transformers model into your cluster using Docker!"
echo ""
cd /home/$USER && git clone https://github.com/elastic/eland.git && cd eland && sudo docker build -t elastic/eland . && sudo docker run -it --rm --network host elastic/eland eland_import_hub_model --url https://${cloud_user}:${cloud_pass}@${es_client}:443/ --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --start
echo ""
echo "We will now load and embed your images into Elastic, this next process may take while (depending on the number of images you loaded)."
echo ""
cd /home/$USER/flask-elastic-image-search/image_embeddings && python3 create-image-embeddings.py --es_host="https://${es_client}:443" --es_user="${cloud_user}" --es_password="${cloud_pass}" --ca_certs="../app/conf/ess-cloud.cer"
echo ""
echo "Now let's configure Image Search to run as a service!"
echo ""
echo ""
echo "[Unit]" | sudo tee -a /etc/systemd/system/imagesearch.service
echo "After=network.target" | sudo tee -a /etc/systemd/system/imagesearch.service
echo "" | sudo tee -a /etc/systemd/system/imagesearch.service
echo "[Service]" | sudo tee -a /etc/systemd/system/imagesearch.service
echo "ExecStart=/usr/local/bin/imagesearch.sh" | sudo tee -a /etc/systemd/system/imagesearch.service
echo "" | sudo tee -a /etc/systemd/system/imagesearch.service
echo "[Install]" | sudo tee -a /etc/systemd/system/imagesearch.service
echo "WantedBy=default.target" | sudo tee -a /etc/systemd/system/imagesearch.service
echo "#!/bin/bash" | sudo tee -a /usr/local/bin/imagesearch.sh
echo "cd /home/$USER/flask-elastic-image-search && flask run --port=5001 --host=0.0.0.0" | sudo tee -a /usr/local/bin/imagesearch.sh
sudo chmod 744 /usr/local/bin/imagesearch.sh
sudo chmod 664 /etc/systemd/system/imagesearch.service
sudo systemctl daemon-reload
sudo systemctl enable imagesearch.service
echo "The ImageSearch Service is now running, please go ahead and check out the UI!!!"""
echo "ImageSearch is running on port 5001, so please view http://MY-IP-HERE:5001"
echo ""
echo "Feel free to press ctrl + c to exit out of the status message"
echo ""
sudo systemctl status imagesearch.service
;;
3) clear
echo "This is designed to be run on a minimal server install of Ubuntu 22.04 AFTER 'sudo apt update' has been run and the system was rebooted."
echo ""
echo "You will also need a cluster with an ML node with at least 4GB of RAM, as well as login credentials and your cluster's ES endpoint."
echo ""
echo "So please make sure you have all of this prior to continuing."
echo ""
echo ""
echo ""
read -n 1 -s -r -p "Press any key to continue"
sudo apt update -y
sudo apt install docker.io* python3-pip* curl tar unzip -y
sudo pip install streamlit openai Elasticsearch python-dotenv
sudo apt install python3-flask cmdtest -y
sudo apt install libreoffice imagemagick -y
sed -i.bak \'97s/none/read|write/\' /etc/ImageMagick-6/policy.xml
cd /home/$USER && git clone https://github.com/radoondas/flask-elastic-image-search.git
cd /home/$USER/flask-elastic-image-search && pip3 install -r requirements.txt
clear
echo ""
echo "MANUAL INTERVENTION REQUIRED. DO NOT PROCEED UNTIL THIS IS COMPLETED. PLEASE READ EACH STEP."
echo ""
echo "Please load your PPTX formatted powerpoints as a single zip file from the Elastic uploader service:"
echo "https://upload.elastic.co/login"
echo ""
echo "Choose: External"
echo ""
echo "Click the link provided to proceed to the zip file drop page"
echo ""
echo "Drag and drop your zip file that contains your PPTX files there. Once loaded, note the Token and URL"
echo ""
echo "What is your Token"
read token
echo ""
echo "What is your URL?"
read url
echo ""
cd /home/$USER/flask-elastic-image-search/app/static/images && curl -L -H "Authorization: $token" -o elcia.zip $url
cd /home/$USER/flask-elastic-image-search/app/static/images && unzip elcia.zip
echo ""
echo "What is your username? Typically it is just 'elastic'."
echo ""
read cloud_user
echo ""
echo "What is your password? If you're using 'elastic' and forgot it, you can reset it from the cloud UI."
echo ""
read cloud_pass
echo ""
echo "What is your Cloud ID? Please copy & paste it directly from the cloud UI with the trailing '=='"
echo ""
read cloud_id
echo ""
echo "Please copy and paste your Elasticsearch endpoint from the cloud UI below WITHOUT the 'https://' portion"
echo ""
echo "Example: my-cluster-is-awesome.es.us-east4.gcp.elastic-cloud.com"
echo ""
echo ""
read es_client
echo ""
echo "ES_HOST='${es_client}:443'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env
echo "ES_USER='${cloud_user}'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env
echo "ES_PWD='${cloud_pass}'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env
echo ""
echo "Next we'll load the sentence transformers model into your cluster using Docker!"
echo ""
cd /home/$USER && git clone https://github.com/elastic/eland.git && cd eland && sudo docker build -t elastic/eland . && sudo docker run -it --rm --network host elastic/eland eland_import_hub_model --url https://${cloud_user}:${cloud_pass}@${es_client}:443/ --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --start
echo ""
echo "Now we will convert your PPTX files into PDFS, then into JPGs for each slide. This may take a while depending on the number of slides."
cd /home/$USER/flask-elastic-image-search/app/static/images && soffice --headless --convert-to pdf *.pptx
cd /home/$USER/flask-elastic-image-search/app/static/images && for f in *.pdf; do convert -density 150 "$f" "${f%pdf}jpg"; done && rm -rf /home/$USER/flask-elastic-image-search/app/static/images/*.pdf
rm -rf /home/$USER/flask-elastic-image-search/app/static/images/*.pptx
echo ""
echo "We will now load and embed your images into Elastic, this next process may take while (depending on the number of images you loaded)."
echo ""
cd /home/$USER/flask-elastic-image-search/image_embeddings && python3 create-image-embeddings.py --es_host="https://${es_client}:443" --es_user="${cloud_user}" --es_password="${cloud_pass}" --ca_certs="../app/conf/ess-cloud.cer"
echo ""
echo "Now let's configure Slide Search to run as a service!"
echo ""
echo ""
echo "[Unit]" | sudo tee -a /etc/systemd/system/slidesearch.service
echo "After=network.target" | sudo tee -a /etc/systemd/system/slidesearch.service
echo "" | sudo tee -a /etc/systemd/system/slidesearch.service
echo "[Service]" | sudo tee -a /etc/systemd/system/slidesearch.service
echo "ExecStart=/usr/local/bin/slidesearch.sh" | sudo tee -a /etc/systemd/system/slidesearch.service
echo "" | sudo tee -a /etc/systemd/system/slidesearch.service
echo "[Install]" | sudo tee -a /etc/systemd/system/slidesearch.service
echo "WantedBy=default.target" | sudo tee -a /etc/systemd/system/slidesearch.service
echo "#!/bin/bash" | sudo tee -a /usr/local/bin/slidesearch.sh
echo "cd /home/$USER/flask-elastic-image-search && flask run --port=5002 --host=0.0.0.0" | sudo tee -a /usr/local/bin/slidesearch.sh
sudo chmod 744 /usr/local/bin/slidesearch.sh
sudo chmod 664 /etc/systemd/system/slidesearch.service
sudo systemctl daemon-reload
sudo systemctl enable slidesearch.service
echo "The SlideSearch Service is now running, please go ahead and check out the UI!!!"""
echo "SlideSearch is running on port 5002, so please view http://MY-IP-HERE:5002"
echo ""
echo "Feel free to press ctrl + c to exit out of the status message"
echo ""
sudo systemctl status slidesearch.service
;;
4) clear
sudo apt udpate -y
sudo apt install docker.io* python3-pip* curl libreoffice imagemagick python3-flask cmdtest -y
sudo pip install streamlit openai Elasticsearch -y
cd /home/$USER && git clone https://github.com/elastic/eland.git && cd eland && sudo docker build -t elastic/eland .
cd /home/$USER && git clone https://github.com/jeffvestal/ElasticDocs_GPT.git
cd /home/$USER && git clone https://github.com/radoondas/flask-elastic-image-search.git
cd /home/$USER && git clone https://github.com/elastic/elasticsearch-labs.git
;;
5) clear
exit
esac
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment