This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[DEFAULT] | |
product_ranking_start = 10 | |
product_ranking_steps_down = 0.2 | |
cat_ranking_start = 10 | |
cat_ranking_steps_down = 0.2 | |
product_recommendations_campaignArn=arn:aws:personalize:us-east-2:11123456:campaign/es-test03-hrnn | |
product_rankings_campaignArn=arn:aws:personalize:us-east-2:222789:campaign/es-test03-rank | |
property1_recommendations_campaignArn=arn:aws:personalize:us-east-2:3333456:campaign/es-test03-cat-hrnn | |
property1_rankings_campaignArn=arn:aws:personalize:us-east-2:444789:campaign/es-test03-cat-rank |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def query_es(text_search, category_boost_pairs, product_id_weight_pairs): | |
client = Elasticsearch() | |
the_body = { | |
"query": { | |
"function_score": { | |
"query": { | |
"bool": { | |
"should": arrange_json_array( | |
transform_category_boost(category_boost_pairs), { | |
"match": { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_product_ranking(config, user, input_list): | |
_log_info('Retrieving product ranking') | |
answer = _get_ranking(config['DEFAULT']['product_rankings_campaignArn'], user, input_list, | |
float(config['DEFAULT']['product_ranking_start']), | |
float(config['DEFAULT']['product_ranking_steps_down'])) | |
return answer | |
def _get_ranking(campaign, user, input_list, start, steps_down): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_product_recommendations(config, user): | |
_log_info('Retrieving product recommendations') | |
return _get_recommendations(user, config['DEFAULT']['product_recommendations_campaignArn']) | |
def _get_recommendations(user, campaign): | |
personalize = boto3.client('personalize-runtime', 'us-east-2') | |
response = personalize.get_recommendations(campaignArn=campaign, userId=user) | |
answer = [] | |
_log_info('Recommended items:') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def search_with_personalization(user, search): | |
config = configparser.ConfigParser() | |
config.read('config.conf') | |
categories = get_category_recommendations(config, user) | |
ranked_categories = get_category_ranking(config, user, categories) | |
products = get_product_recommendations(config, user) | |
ranked_products = get_product_ranking(config, user, products) | |
query_es(search, ranked_categories, ranked_products) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyspark.sql.functions as f | |
import pyspark.sql.types as t | |
# ... | |
data_frame = data_frame.withColumn('columnB', data_frame['columnA']) | |
data_frame = data_frame.withColumn('columnC', data_frame['columnA']) | |
attrs = ['columnA', 'columnB', 'columnC'] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyspark.sql.functions as f | |
import pyspark.sql.types as t | |
# ... | |
def udf_concat_vec(a, b): | |
# a and b of type SparseVector | |
return np.concatenate((a.toArray(), b.toArray())).tolist() | |
my_udf_concat_vec = f.UserDefinedFunction(udf_concat_vec, t.ArrayType(t.FloatType())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataframe = dataframe.withColumn('description_vec', pandas_nlp('description')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pyspark.sql.functions import pandas_udf, PandasUDFType | |
import spacy | |
#... | |
# nlp = spacy.load('en_core_web_lg') | |
nlp = spacy.load('en_core_web_sm') | |
#... | |
# Use pandas_udf to define a Pandas UDF |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataframe = dataframe.withColumn('ACOLUMN_not_null', pandas_not_null('ACOLUMN')) | |
dataframe = one_hot_encode(dataframe, "ACOLUMN_not_null", "ACOLUMN_one_hot") |
NewerOlder