Created
June 12, 2024 17:36
-
-
Save iamhowardtheduck/811e0da772e52df8e265cc55baa9f758 to your computer and use it in GitHub Desktop.
ELCIA-V3-unsecured
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clear | |
echo -e "\n\n\n\n\n\n\n" | |
if [[ $EUID -eq 0 ]]; then | |
echo "This script must NOT be run as \"root\" OR as \"sudo $USER\"; please try again." 1>&2 | |
exit 1 | |
fi | |
# | |
# BEGIN WELCOME SCREEN & INITIAL UPDATING | |
# | |
clear | |
echo -e "\n\n\n\n\n\n\n" | |
echo " Welcome I am ELCIA, your..." | |
echo " Elastic & ChatGPT Integration Application" | |
echo -e "\n\n\n\n\n\n\n" | |
echo "You can choose to either:" | |
echo "" | |
echo "Configure an Elastic Cloud instance with ChatGPT using sentence transformers for title vectorization" | |
echo "" | |
#echo "Configure an Elastic Cloud instance with ChatGPT using ELSER for title, body, description, and keyword fields for semantic search" | |
#echo "" | |
echo "Configure an Elastic Cloud instance for Image Similarity search. (No OpenAI API required!)" | |
echo "" | |
echo "Configure an Elastic Cloud instance for Image Similarity search for the PowerPoint Slide Finder use-case. (No OpenAI API required!)" | |
echo "" | |
echo "Download the guts and do the rest yourself." | |
echo -e "\n\n\n" | |
echo "WARNING: CURRENTLY ELCIA ONLY SUPPORTS OPENAI FOR OPTION 1, NOT AZURE'S OPENAI. Please obtain your personal openai key here:" | |
echo "" | |
echo "https://platform.openai.com/" | |
echo -e "\n\n\n" | |
echo "But first we must run a few commands to get ready." | |
echo -e "\n\n\n" | |
read -n 1 -s -r -p "Press any key to continue" | |
echo "" | |
echo "Enjoy! ☺" | |
sudo apt update -y | |
sudo apt install dialog git -y | |
clear | |
# | |
cmd=(dialog --radiolist "Which would you like to do?" 22 135 16) | |
options=(1 "Configure an Elastic Cloud instance with ChatGPT using sentence transformers for title vectorization" off # any option can be set to default to "on" | |
2 "Configure an Elastic Cloud instance for Image Similarity search. No OpenAI API required!" off | |
3 "Configure an Elastic Cloud instance for Image Similarity search for the PowerPoint Slide Finder use-case. No OpenAI API required!" off | |
4 "Just download the guts for all the awesome search apps and build it yourself" off | |
5 "Make like a tree, and leave." off) | |
choices=$("${cmd[@]}" "${options[@]}" 2>&1 >/dev/tty) | |
clear | |
for choice in $choices | |
do | |
case $choice in | |
# Configre an Elastic Cloud instance with ChatGPT using sentence transformers for title vectorization | |
1) clear | |
echo "" | |
echo "You will need a version 8.8+ cluster with an ML node with at least 4GB of RAM, as well as login credentials and your cluster's ES endpoint." | |
echo "" | |
echo "So please make sure you have all of this prior to continuing." | |
echo "" | |
echo -e "\n\n\n" | |
echo "WARNING: CURRENTLY ELCIA ONLY SUPPORTS OPENAI FOR THIS OPTION, NOT AZURE'S OPENAI. Please obtain your personal openai key here:" | |
echo "" | |
echo "https://platform.openai.com/" | |
echo -e "\n\n\n" | |
echo "" | |
echo "" | |
read -n 1 -s -r -p "Press any key to continue" | |
sudo apt install docker.io* python3-pip* curl -y | |
sudo pip install streamlit openai Elasticsearch | |
clear | |
echo "" | |
echo "What is your username? Typically it is just 'elastic'." | |
echo "" | |
read cloud_user | |
echo "" | |
echo "What is your password? If you're using 'elastic' and forgot it, you can reset it from the cloud UI." | |
echo "" | |
read cloud_pass | |
echo "" | |
echo "What is your Cloud ID? Please copy & paste it directly from the cloud UI with the trailing '=='" | |
echo "" | |
read cloud_id | |
echo "" | |
echo "Please copy and paste your Elasticsearch endpoint from the cloud UI below WITHOUT the 'https://' portion" | |
echo "" | |
echo "Example: my-cluster-is-awesome.es.us-east4.gcp.elastic-cloud.com" | |
echo "" | |
echo "" | |
read es_client | |
echo "" | |
echo "" | |
echo "Next we'll load the sentence transformers model into your cluster using Docker!" | |
echo "" | |
cd /home/$USER && git clone https://github.com/elastic/eland.git && cd eland && sudo docker build -t elastic/eland . && sudo docker run -it --rm --network host elastic/eland eland_import_hub_model --url https://${cloud_user}:${cloud_pass}@${es_client}:9243/ --hub-model-id sentence-transformers/all-distilroberta-v1 --start | |
echo "" | |
echo "Next let's create the pipeline so you can use it over and over again!" | |
echo "" | |
curl -X PUT "https://${cloud_user}:${cloud_pass}@${es_client}:9243/_ingest/pipeline/ml-inference-title-vector?pretty" -H 'Content-Type: application/json' -d' {"processors":[{"remove": {"field": "ml.inference.title-vector", "ignore_missing": true}},{"remove": {"field": "title-vector", "ignore_missing": true}},{"inference": {"field_map": {"title": "text_field"}, "model_id": "sentence-transformers__all-distilroberta-v1","target_field": "ml.inference.title-vector","on_failure":[{"append":{"field":"_source._ingest.inference_errors","value":[{"message": "Processor 'inference' in pipeline ml-inference-title-vector failed with message {{ _ingest.on_failure_message }}","pipeline": "ml-inference-title-vector","timestamp":"{{{ _ingest.timestamp }}}"}]}}]}},{"append": {"field":"_source._ingest.processors","value": [{"model_version":"8.8.1","pipeline":"ml-inference-title-vector","processed_timestamp":"{{{ _ingest.timestamp }}}","types":["pytorch","text_embedding"]}]}},{"set":{"copy_from":"ml.inference.title-vector.predicted_value","description": "Copy the predicted_value to title-vector","field": "title-vector","if": "ctx?.ml?.inference != null && ctx.ml.inference['\''title-vector'\''] != null"}}]} ' | |
echo "" | |
echo "Now let's prepare the index so that when you go to create it in the GUI, you won't have to update the mappings in DevTools!" | |
echo "" | |
echo "This will create the 'elcia-script' index template which will be used for all 'search-*' indices" | |
echo "" | |
curl -X PUT "https://${cloud_user}:${cloud_pass}@${es_client}:9243/_index_template/elcia-script?pretty" -H 'Content-Type: application/json' -d '{"index_patterns": ["search-*"],"template":{"settings": {"number_of_shards": 2,"auto_expand_replicas": "0-3","default_pipeline":"ml-inference-title-vector","similarity": {"default": {"type": "BM25"}}},"mappings": {"properties": {"title-vector": {"type": "dense_vector","dims": 768,"index": true,"similarity": "dot_product"},"created_at":{"type":"date","format":"EEE MMM dd HH:mm:ss Z yyyy"}}}}}' | |
echo "" | |
echo "Now we'll create the search application" | |
echo "" | |
echo "But first two variables need to be passed." | |
echo "" | |
echo "What will your index be? Typically it's 'search-something'" | |
echo "" | |
read index | |
echo "" | |
echo "Next, what will we call your ChatGPT web UI? Typically it's the name of the website you plan on crawling." | |
echo "" | |
echo "So if you plan on crawling 'widgets.com' and want a Widgets GPT UI, just put 'Widgets' and I'll make the necessary changes for you." | |
echo "" | |
read engine | |
echo "" | |
cd /home/$USER && git clone https://github.com/jeffvestal/ElasticDocs_GPT.git | |
sed -i.bak \'22s/0301/0613/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py | |
sed -i.bak \'26s/http/basic/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py | |
sed -i.bak \'69s/elastic-docs/${index}/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py | |
sed -i.bak \'100s/ElasticDocs/${engine}/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py | |
sed -i.bak \'108s/Elastic Docs/the Elastic Data-Set/\' /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py | |
echo "" | |
echo "" | |
echo "Now let's configure streamlit (the web UI) to run as a service, and all you'll have to do is crawl!" | |
echo "" | |
echo "" | |
echo "[Unit]" | sudo tee -a /etc/systemd/system/chatgpt4all.service | |
echo "After=network.target" | sudo tee -a /etc/systemd/system/chatgpt4all.service | |
echo "" | sudo tee -a /etc/systemd/system/chatgpt4all.service | |
echo "[Service]" | sudo tee -a /etc/systemd/system/chatgpt4all.service | |
echo "ExecStart=/usr/local/bin/chatgpt.sh" | sudo tee -a /etc/systemd/system/chatgpt4all.service | |
echo "" | sudo tee -a /etc/systemd/system/chatgpt4all.service | |
echo "[Install]" | sudo tee -a /etc/systemd/system/chatgpt4all.service | |
echo "WantedBy=default.target" | sudo tee -a /etc/systemd/system/chatgpt4all.service | |
echo "#!/bin/bash" | sudo tee -a /usr/local/bin/chatgpt.sh | |
echo "export cloud_id=${cloud_id}" | sudo tee -a /usr/local/bin/chatgpt.sh | |
echo "export cloud_user=${cloud_user}" | sudo tee -a /usr/local/bin/chatgpt.sh | |
echo "export cloud_pass=${cloud_pass}" | sudo tee -a /usr/local/bin/chatgpt.sh | |
echo "" | |
echo "What is your Open AI api key?" | |
echo "" | |
read openai_api | |
echo "export openai_api=${openai_api}" | sudo tee -a /usr/local/bin/chatgpt.sh | |
echo "streamlit run /home/$USER/ElasticDocs_GPT/elasticdocs_gpt.py" | sudo tee -a /usr/local/bin/chatgpt.sh | |
sudo chmod 744 /usr/local/bin/chatgpt.sh | |
sudo chmod 664 /etc/systemd/system/chatgpt4all.service | |
sudo systemctl daemon-reload | |
sudo systemctl enable chatgpt4all.service | |
echo "The Chat GPT Service is now running, please go ahead and crawl!!!""" | |
echo "" | |
echo "Feel free to press ctrl + c to exit out of the status message" | |
echo "" | |
sudo systemctl status chatgpt4all.service | |
;; | |
2) clear | |
echo "This is designed to be run on a minimal server install of Ubuntu 22.04 AFTER 'sudo apt update' has been run and the system was rebooted." | |
echo "" | |
echo "You will also need a cluster with an ML node with at least 4GB of RAM, as well as login credentials and your cluster's ES endpoint." | |
echo "" | |
echo "So please make sure you have all of this prior to continuing." | |
echo "" | |
echo "" | |
echo "" | |
read -n 1 -s -r -p "Press any key to continue" | |
sudo apt install docker.io* python3-pip* curl tar unzip -y | |
sudo pip install streamlit openai Elasticsearch python-dotenv | |
sudo apt install python3-flask cmdtest -y | |
cd /home/$USER && git clone https://github.com/radoondas/flask-elastic-image-search.git | |
cd /home/$USER/flask-elastic-image-search && pip3 install -r requirements.txt | |
clear | |
echo "" | |
echo "MANUAL INTERVENTION REQUIRED. DO NOT PROCEED UNTIL THIS IS COMPLETED. PLEASE READ EACH STEP." | |
echo "" | |
echo "Please load your images as a single zip file from the Elastic uploader service:" | |
echo "https://upload.elastic.co/login" | |
echo "" | |
echo "Choose: External" | |
echo "" | |
echo "Click the link provided to proceed to the zip file drop page" | |
echo "" | |
echo "Drag and drop your zip file that contains your image files there. Once loaded, note the Token and URL" | |
echo "" | |
echo "What is your Token" | |
read token | |
echo "" | |
echo "What is your URL?" | |
read url | |
echo "" | |
cd /home/$USER/flask-elastic-image-search/app/static/images && curl -L -H "Authorization: $token" -o elcia.zip $url | |
cd /home/$USER/flask-elastic-image-search/app/static/images && unzip elcia.zip | |
echo "" | |
echo "What is your username? Typically it is just 'elastic'." | |
echo "" | |
read cloud_user | |
echo "" | |
echo "What is your password? If you're using 'elastic' and forgot it, you can reset it from the cloud UI." | |
echo "" | |
read cloud_pass | |
echo "" | |
echo "What is your Cloud ID? Please copy & paste it directly from the cloud UI with the trailing '=='" | |
echo "" | |
read cloud_id | |
echo "" | |
echo "Please copy and paste your Elasticsearch endpoint from the cloud UI below WITHOUT the 'https://' portion" | |
echo "" | |
echo "Example: my-cluster-is-awesome.es.us-east4.gcp.elastic-cloud.com" | |
echo "" | |
echo "" | |
read es_client | |
echo "" | |
echo "ES_HOST=\'${es_client}:443\'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env | |
echo "ES_USER=\'${cloud_user}\'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env | |
echo "ES_PWD=\'${cloud_pass}\'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env | |
echo "" | |
echo "Next we'll load the sentence transformers model into your cluster using Docker!" | |
echo "" | |
cd /home/$USER && git clone https://github.com/elastic/eland.git && cd eland && sudo docker build -t elastic/eland . && sudo docker run -it --rm --network host elastic/eland eland_import_hub_model --url https://${cloud_user}:${cloud_pass}@${es_client}:443/ --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --start | |
echo "" | |
echo "We will now load and embed your images into Elastic, this next process may take while (depending on the number of images you loaded)." | |
echo "" | |
cd /home/$USER/flask-elastic-image-search/image_embeddings && python3 create-image-embeddings.py --es_host="https://${es_client}:443" --es_user="${cloud_user}" --es_password="${cloud_pass}" --ca_certs="../app/conf/ess-cloud.cer" | |
echo "" | |
echo "Now let's configure Image Search to run as a service!" | |
echo "" | |
echo "" | |
echo "[Unit]" | sudo tee -a /etc/systemd/system/imagesearch.service | |
echo "After=network.target" | sudo tee -a /etc/systemd/system/imagesearch.service | |
echo "" | sudo tee -a /etc/systemd/system/imagesearch.service | |
echo "[Service]" | sudo tee -a /etc/systemd/system/imagesearch.service | |
echo "ExecStart=/usr/local/bin/imagesearch.sh" | sudo tee -a /etc/systemd/system/imagesearch.service | |
echo "" | sudo tee -a /etc/systemd/system/imagesearch.service | |
echo "[Install]" | sudo tee -a /etc/systemd/system/imagesearch.service | |
echo "WantedBy=default.target" | sudo tee -a /etc/systemd/system/imagesearch.service | |
echo "#!/bin/bash" | sudo tee -a /usr/local/bin/imagesearch.sh | |
echo "cd /home/$USER/flask-elastic-image-search && flask run --port=5001 --host=0.0.0.0" | sudo tee -a /usr/local/bin/imagesearch.sh | |
sudo chmod 744 /usr/local/bin/imagesearch.sh | |
sudo chmod 664 /etc/systemd/system/imagesearch.service | |
sudo systemctl daemon-reload | |
sudo systemctl enable imagesearch.service | |
echo "The ImageSearch Service is now running, please go ahead and check out the UI!!!""" | |
echo "ImageSearch is running on port 5001, so please view http://MY-IP-HERE:5001" | |
echo "" | |
echo "Feel free to press ctrl + c to exit out of the status message" | |
echo "" | |
sudo systemctl status imagesearch.service | |
;; | |
3) clear | |
echo "This is designed to be run on a minimal server install of Ubuntu 22.04 AFTER 'sudo apt update' has been run and the system was rebooted." | |
echo "" | |
echo "You will also need a cluster with an ML node with at least 4GB of RAM, as well as login credentials and your cluster's ES endpoint." | |
echo "" | |
echo "So please make sure you have all of this prior to continuing." | |
echo "" | |
echo "" | |
echo "" | |
read -n 1 -s -r -p "Press any key to continue" | |
sudo apt update -y | |
sudo apt install docker.io* python3-pip* curl tar unzip -y | |
sudo pip install streamlit openai Elasticsearch python-dotenv | |
sudo apt install python3-flask cmdtest -y | |
sudo apt install libreoffice imagemagick -y | |
sed -i.bak \'97s/none/read|write/\' /etc/ImageMagick-6/policy.xml | |
cd /home/$USER && git clone https://github.com/radoondas/flask-elastic-image-search.git | |
cd /home/$USER/flask-elastic-image-search && pip3 install -r requirements.txt | |
clear | |
echo "" | |
echo "MANUAL INTERVENTION REQUIRED. DO NOT PROCEED UNTIL THIS IS COMPLETED. PLEASE READ EACH STEP." | |
echo "" | |
echo "Please load your PPTX formatted powerpoints as a single zip file from the Elastic uploader service:" | |
echo "https://upload.elastic.co/login" | |
echo "" | |
echo "Choose: External" | |
echo "" | |
echo "Click the link provided to proceed to the zip file drop page" | |
echo "" | |
echo "Drag and drop your zip file that contains your PPTX files there. Once loaded, note the Token and URL" | |
echo "" | |
echo "What is your Token" | |
read token | |
echo "" | |
echo "What is your URL?" | |
read url | |
echo "" | |
cd /home/$USER/flask-elastic-image-search/app/static/images && curl -L -H "Authorization: $token" -o elcia.zip $url | |
cd /home/$USER/flask-elastic-image-search/app/static/images && unzip elcia.zip | |
echo "" | |
echo "What is your username? Typically it is just 'elastic'." | |
echo "" | |
read cloud_user | |
echo "" | |
echo "What is your password? If you're using 'elastic' and forgot it, you can reset it from the cloud UI." | |
echo "" | |
read cloud_pass | |
echo "" | |
echo "What is your Cloud ID? Please copy & paste it directly from the cloud UI with the trailing '=='" | |
echo "" | |
read cloud_id | |
echo "" | |
echo "Please copy and paste your Elasticsearch endpoint from the cloud UI below WITHOUT the 'https://' portion" | |
echo "" | |
echo "Example: my-cluster-is-awesome.es.us-east4.gcp.elastic-cloud.com" | |
echo "" | |
echo "" | |
read es_client | |
echo "" | |
echo "ES_HOST='${es_client}:443'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env | |
echo "ES_USER='${cloud_user}'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env | |
echo "ES_PWD='${cloud_pass}'" | sudo tee -a /home/$USER/flask-elastic-image-search/.env | |
echo "" | |
echo "Next we'll load the sentence transformers model into your cluster using Docker!" | |
echo "" | |
cd /home/$USER && git clone https://github.com/elastic/eland.git && cd eland && sudo docker build -t elastic/eland . && sudo docker run -it --rm --network host elastic/eland eland_import_hub_model --url https://${cloud_user}:${cloud_pass}@${es_client}:443/ --hub-model-id sentence-transformers/clip-ViT-B-32-multilingual-v1 --task-type text_embedding --start | |
echo "" | |
echo "Now we will convert your PPTX files into PDFS, then into JPGs for each slide. This may take a while depending on the number of slides." | |
cd /home/$USER/flask-elastic-image-search/app/static/images && soffice --headless --convert-to pdf *.pptx | |
cd /home/$USER/flask-elastic-image-search/app/static/images && for f in *.pdf; do convert -density 150 "$f" "${f%pdf}jpg"; done && rm -rf /home/$USER/flask-elastic-image-search/app/static/images/*.pdf | |
rm -rf /home/$USER/flask-elastic-image-search/app/static/images/*.pptx | |
echo "" | |
echo "We will now load and embed your images into Elastic, this next process may take while (depending on the number of images you loaded)." | |
echo "" | |
cd /home/$USER/flask-elastic-image-search/image_embeddings && python3 create-image-embeddings.py --es_host="https://${es_client}:443" --es_user="${cloud_user}" --es_password="${cloud_pass}" --ca_certs="../app/conf/ess-cloud.cer" | |
echo "" | |
echo "Now let's configure Slide Search to run as a service!" | |
echo "" | |
echo "" | |
echo "[Unit]" | sudo tee -a /etc/systemd/system/slidesearch.service | |
echo "After=network.target" | sudo tee -a /etc/systemd/system/slidesearch.service | |
echo "" | sudo tee -a /etc/systemd/system/slidesearch.service | |
echo "[Service]" | sudo tee -a /etc/systemd/system/slidesearch.service | |
echo "ExecStart=/usr/local/bin/slidesearch.sh" | sudo tee -a /etc/systemd/system/slidesearch.service | |
echo "" | sudo tee -a /etc/systemd/system/slidesearch.service | |
echo "[Install]" | sudo tee -a /etc/systemd/system/slidesearch.service | |
echo "WantedBy=default.target" | sudo tee -a /etc/systemd/system/slidesearch.service | |
echo "#!/bin/bash" | sudo tee -a /usr/local/bin/slidesearch.sh | |
echo "cd /home/$USER/flask-elastic-image-search && flask run --port=5002 --host=0.0.0.0" | sudo tee -a /usr/local/bin/slidesearch.sh | |
sudo chmod 744 /usr/local/bin/slidesearch.sh | |
sudo chmod 664 /etc/systemd/system/slidesearch.service | |
sudo systemctl daemon-reload | |
sudo systemctl enable slidesearch.service | |
echo "The SlideSearch Service is now running, please go ahead and check out the UI!!!""" | |
echo "SlideSearch is running on port 5002, so please view http://MY-IP-HERE:5002" | |
echo "" | |
echo "Feel free to press ctrl + c to exit out of the status message" | |
echo "" | |
sudo systemctl status slidesearch.service | |
;; | |
4) clear | |
sudo apt udpate -y | |
sudo apt install docker.io* python3-pip* curl libreoffice imagemagick python3-flask cmdtest -y | |
sudo pip install streamlit openai Elasticsearch -y | |
cd /home/$USER && git clone https://github.com/elastic/eland.git && cd eland && sudo docker build -t elastic/eland . | |
cd /home/$USER && git clone https://github.com/jeffvestal/ElasticDocs_GPT.git | |
cd /home/$USER && git clone https://github.com/radoondas/flask-elastic-image-search.git | |
cd /home/$USER && git clone https://github.com/elastic/elasticsearch-labs.git | |
;; | |
5) clear | |
exit | |
esac | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment