This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# text preprocessing modules | |
from string import punctuation | |
# text preprocessing modules | |
from nltk.tokenize import word_tokenize | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem import WordNetLemmatizer | |
import re # regular expression | |
import os | |
from os.path import dirname, join, realpath |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@app.get("/predict-review") | |
def predict_sentiment(review: str): | |
""" | |
A simple function that receive a review content and predict the sentiment of the content. | |
:param review: | |
:return: prediction, probabilities | |
""" | |
# clean the review | |
cleaned_review = text_cleaning(review) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def text_cleaning(text, remove_stop_words=True, lemmatize_words=True): | |
# Clean the text, with the option to remove stop_words and to lemmatize word | |
# Clean the text | |
text = re.sub(r"[^A-Za-z0-9]", " ", text) | |
text = re.sub(r"\'s", " ", text) | |
text = re.sub(r"http\S+", " link ", text) | |
text = re.sub(r"\b\d+(?:\.\d+)?\s+", "", text) # remove numbers | |
# Remove punctuation from text | |
text = "".join([c for c in text if c not in punctuation]) | |
# Optionally, remove stop words |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load the sentiment model | |
with open( | |
join(dirname(realpath(__file__)), "models/sentiment_model_pipeline.pkl"), "rb" | |
) as f: | |
model = joblib.load(f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
app = FastAPI( | |
title="Sentiment Model API", | |
description="A simple API that use NLP model to predict the sentiment of the movie's reviews", | |
version="0.1", | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# text preprocessing modules | |
from string import punctuation | |
# text preprocessing modules | |
from nltk.tokenize import word_tokenize | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem import WordNetLemmatizer | |
import re # regular expression | |
import os | |
from os.path import dirname, join, realpath |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sentence = "I hope that, when I've built up my savings, I'll be able to travel to Mexico" | |
print(detect_and_translate(sentence,target_lang='sw')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langdetect import detect | |
from google_trans_new import google_translator | |
#simple function to detect and translate text | |
def detect_and_translate(text,target_lang): | |
result_lang = detect(text) | |
if result_lang == target_lang: | |
return text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google_trans_new import google_translator | |
translator = google_translator() | |
sentence = "Tanzania ni nchi inayoongoza kwa utalii barani afrika" | |
translate_text = translator.translate(sentence,lang_tgt='en') | |
print(translate_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langdetect import DetectorFactory | |
DetectorFactory.seed = 0 |
NewerOlder