-
array_append(array<T> arr, T elem)
- Append an element to the end of an array
-
array_avg(array<number>)
- Returns an array<double> in which each element is the mean of a set of numbers
-
array_concat(array<ANY> x1, array<ANY> x2, ..)
- Returns a concatenated array
select array_concat(array(1),array(2,3));
> [1,2,3]
-
array_flatten(array<array<ANY>>)
- Returns an array with the elements flattened.
-
array_intersect(array<ANY> x1, array<ANY> x2, ..)
- Returns an intersect of given arrays
select array_intersect(array(1,3,4),array(2,3,4),array(3,5));
> [3]
-
array_remove(array<int|text> original, int|text|array<int> target)
- Returns an array that the target is removed from the original array
select array_remove(array(1,null,3),array(null));
> [3]
select array_remove(array("aaa","bbb"),"bbb");
> ["aaa"]
-
array_slice(array<ANY> values, int offset [, int length])
- Slices the given array by the given offset and length parameters.
select array_slice(array(1,2,3,4,5,6), 2,4);
> [3,4]
-
array_sum(array<number>)
- Returns an array<double> in which each element is summed up
-
array_union(array1, array2, ...)
- Returns the union of a set of arrays
-
conditional_emit(array<boolean> conditions, array<primitive> features)
- Emit features of a row according to various conditions
-
element_at(array<T> list, int pos)
- Returns an element at the given position
-
first_element(x)
- Returns the first element in an array
-
float_array(nDims)
- Returns an array<float> of nDims elements
-
last_element(x)
- Return the last element in an array
-
select_k_best(array<number> array, const array<number> importance, const int k)
- Returns selected top-k elements as array<double>
-
sort_and_uniq_array(array<int>)
- Takes array<int> and returns a sorted array with duplicate elements eliminated
select sort_and_uniq_array(array(3,1,1,-2,10));
> [-2,1,3,10]
-
subarray_endwith(array<int|text> original, int|text key)
- Returns an array that ends with the specified key
select subarray_endwith(array(1,2,3,4), 3);
> [1,2,3]
-
subarray_startwith(array<int|text> original, int|text key)
- Returns an array that starts with the specified key
select subarray_startwith(array(1,2,3,4), 2);
> [2,3,4]
-
to_string_array(array<ANY>)
- Returns an array of strings
-
to_ordered_list(PRIMITIVE value [, PRIMITIVE key, const string options])
- Return list of values sorted by value itself or specific key
with t as (
select 5 as key, 'apple' as value
union all
select 3 as key, 'banana' as value
union all
select 4 as key, 'candy' as value
union all
select 2 as key, 'donut' as value
union all
select 3 as key, 'egg' as value
)
select -- expected output
to_ordered_list(value, key, '-reverse'), -- [apple, candy, (banana, egg | egg, banana), donut] (reverse order)
to_ordered_list(value, key, '-k 2'), -- [apple, candy] (top-k)
to_ordered_list(value, key, '-k 100'), -- [apple, candy, (banana, egg | egg, banana), dunut]
to_ordered_list(value, key, '-k 2 -reverse'), -- [donut, (banana | egg)] (reverse top-k = tail-k)
to_ordered_list(value, key), -- [donut, (banana, egg | egg, banana), candy, apple] (natural order)
to_ordered_list(value, key, '-k -2'), -- [donut, (banana | egg)] (tail-k)
to_ordered_list(value, key, '-k -100'), -- [donut, (banana, egg | egg, banana), candy, apple]
to_ordered_list(value, key, '-k -2 -reverse'), -- [apple, candy] (reverse tail-k = top-k)
to_ordered_list(value, '-k 2'), -- [egg, donut] (alphabetically)
to_ordered_list(key, '-k -2 -reverse'), -- [5, 4] (top-2 keys)
to_ordered_list(key) -- [2, 3, 3, 4, 5] (natural ordered keys)
from
t
-
map_get_sum(map<int,float> src, array<int> keys)
- Returns sum of values that are retrieved by keys
-
map_tail_n(map SRC, int N)
- Returns the last N elements from a sorted array of SRC
-
to_map(key, value)
- Convert two aggregated columns into a key-value map
-
to_ordered_map(key, value [, const int k|const boolean reverseOrder=false])
- Convert two aggregated columns into an ordered key-value map
with t as (
select 10 as key, 'apple' as value
union all
select 3 as key, 'banana' as value
union all
select 4 as key, 'candy' as value
)
select
to_ordered_map(key, value, true), -- {10:"apple",4:"candy",3:"banana"} (reverse)
to_ordered_map(key, value, 1), -- {10:"apple"} (top-1)
to_ordered_map(key, value, 2), -- {10:"apple",4:"candy"} (top-2)
to_ordered_map(key, value, 3), -- {10:"apple",4:"candy",3:"banana"} (top-3)
to_ordered_map(key, value, 100), -- {10:"apple",4:"candy",3:"banana"} (top-100)
to_ordered_map(key, value), -- {3:"banana",4:"candy",10:"apple"} (natural)
to_ordered_map(key, value, -1), -- {3:"banana"} (tail-1)
to_ordered_map(key, value, -2), -- {3:"banana",4:"candy"} (tail-2)
to_ordered_map(key, value, -3), -- {3:"banana",4:"candy",10:"apple"} (tail-3)
to_ordered_map(key, value, -100) -- {3:"banana",4:"candy",10:"apple"} (tail-100)
from t
-
base91(BINARY bin)
- Convert the argument from binary to a BASE91 string
select base91(deflate('aaaaaaaaaaaaaaaabbbbccc'));
> AA+=kaIM|WTt!+wbGAA
-
is_stopword(string word)
- Returns whether English stopword or not
-
normalize_unicode(string str [, string form])
- Transforms str
with the specified normalization form. The form
takes one of NFC (default), NFD, NFKC, or NFKD
select normalize_unicode('ハンカクカナ','NFKC');
> ハンカクカナ
select normalize_unicode('㈱㌧㌦Ⅲ','NFKC');
> (株)トンドルIII
-
singularize(string word)
- Returns singular form of a given English word
select singularize(lower("Apples"));
> "apple"
-
split_words(string query [, string regex])
- Returns an array<text> containing split strings
-
tokenize(string englishText [, boolean toLowerCase])
- Returns tokenized words in array<string>
-
unbase91(string)
- Convert a BASE91 string to a binary
select inflate(unbase91(base91(deflate('aaaaaaaaaaaaaaaabbbbccc'))));
> aaaaaaaaaaaaaaaabbbbccc
-
word_ngrams(array<string> words, int minSize, int maxSize])
- Returns list of n-grams for given words, where minSize <= n <= maxSize
select word_ngrams(tokenize('Machine learning is fun!', true), 1, 2);
> ["machine","machine learning","learning","learning is","is","is fun","fun"]
-
train_arow_regr(array<int|bigint|string> features, float target [, constant string options])
- Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>
-
train_arowe2_regr(array<int|bigint|string> features, float target [, constant string options])
- Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>
-
train_arowe_regr(array<int|bigint|string> features, float target [, constant string options])
- Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>
-
train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])
- Returns a relation consists of <{int|bigint|string} feature, float weight>
-
train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])
- Returns a relation consists of <{int|bigint|string} feature, float weight>
-
train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])
- Returns a relation consists of <{int|bigint|string} feature, float weight>
-
train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])
- Returns a relation consists of <{int|bigint|string} feature, float weight>
-
train_regressor(list<string|int|bigint> features, double label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight>
Build a prediction model by a generic regressor
-
kpa_predict(@Nonnull double xh, @Nonnull double xk, @Nullable float w0, @Nonnull float w1, @Nonnull float w2, @Nullable float w3)
- Returns a prediction value in Double
-
train_arow(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight, float covar>
Build a prediction model by Adaptive Regularization of Weight Vectors (AROW) binary classifier
-
train_arowh(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight, float covar>
Build a prediction model by AROW binary classifier using hinge loss
-
train_classifier(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight>
Build a prediction model by a generic classifier
-
train_cw(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight, float covar>
Build a prediction model by Confidence-Weighted (CW) binary classifier
-
train_kpa(array<string|int|bigint> features, int label [, const string options])
- returns a relation <h int, hk int, float w0, float w1, float w2, float w3>
-
train_pa(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight>
Build a prediction model by Passive-Aggressive (PA) binary classifier
-
train_pa1(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight>
Build a prediction model by Passive-Aggressive 1 (PA-1) binary classifier
-
train_pa2(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight>
Build a prediction model by Passive-Aggressive 2 (PA-2) binary classifier
-
train_perceptron(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight>
Build a prediction model by Perceptron binary classifier
-
train_scw(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight, float covar>
Build a prediction model by Soft Confidence-Weighted (SCW-1) binary classifier
-
train_scw2(list<string|int|bigint> features, int label [, const string options])
- Returns a relation consists of <string|int|bigint feature, float weight, float covar>
Build a prediction model by Soft Confidence-Weighted 2 (SCW-2) binary classifier
-
train_multiclass_arow(list<string|int|bigint> features, {int|string} label [, const string options])
- Returns a relation consists of <{int|string} label, {string|int|bigint} feature, float weight, float covar>
Build a prediction model by Adaptive Regularization of Weight Vectors (AROW) multiclass classifier
-
train_multiclass_arowh(list<string|int|bigint> features, int|string label [, const string options])
- Returns a relation consists of <int|string label, string|int|bigint feature, float weight, float covar>
Build a prediction model by Adaptive Regularization of Weight Vectors (AROW) multiclass classifier using hinge loss
-
train_multiclass_cw(list<string|int|bigint> features, {int|string} label [, const string options])
- Returns a relation consists of <{int|string} label, {string|int|bigint} feature, float weight, float covar>
Build a prediction model by Confidence-Weighted (CW) multiclass classifier
-
train_multiclass_pa(list<string|int|bigint> features, {int|string} label [, const string options])
- Returns a relation consists of <{int|string} label, {string|int|bigint} feature, float weight>
Build a prediction model by Passive-Aggressive (PA) multiclass classifier
-
train_multiclass_pa1(list<string|int|bigint> features, {int|string} label [, const string options])
- Returns a relation consists of <{int|string} label, {string|int|bigint} feature, float weight>
Build a prediction model by Passive-Aggressive 1 (PA-1) multiclass classifier
-
train_multiclass_pa2(list<string|int|bigint> features, {int|string} label [, const string options])
- Returns a relation consists of <{int|string} label, {string|int|bigint} feature, float weight>
Build a prediction model by Passive-Aggressive 2 (PA-2) multiclass classifier
-
train_multiclass_perceptron(list<string|int|bigint> features, {int|string} label [, const string options])
- Returns a relation consists of <{int|string} label, {string|int|bigint} feature, float weight>
Build a prediction model by Perceptron multiclass classifier
-
train_multiclass_scw(list<string|int|bigint> features, {int|string} label [, const string options])
- Returns a relation consists of <{int|string} label, {string|int|bigint} feature, float weight, float covar>
Build a prediction model by Soft Confidence-Weighted (SCW-1) multiclass classifier
-
train_multiclass_scw2(list<string|int|bigint> features, {int|string} label [, const string options])
- Returns a relation consists of <{int|string} label, {string|int|bigint} feature, float weight, float covar>
Build a prediction model by Soft Confidence-Weighted 2 (SCW-2) multiclass classifier
-
add_field_indices(array<string> features)
- Returns arrays of string that field indices (<field>:<feature>)* are augmented
-
binarize_label(int/long positive, int/long negative, ...)
- Returns positive/negative records that are represented as (..., int label) where label is 0 or 1
-
categorical_features(array<string> featureNames, feature1, feature2, .. [, const string options])
- Returns a feature vector array<string>
-
ffm_features(const array<string> featureNames, feature1, feature2, .. [, const string options])
- Takes categorical variables and returns a feature vector array<string> in a libffm format <field>:<index>:<value>
-
indexed_features(double v1, double v2, ...)
- Returns a list of features as array<string>: [1:v1, 2:v2, ..]
-
onehot_encoding(PRIMITIVE feature, ...)
- Compute onehot encoded label for each feature
-
quantified_features(boolean output, col1, col2, ...)
- Returns an identified features in a dense array<double>
-
quantitative_features(array<string> featureNames, feature1, feature2, .. [, const string options])
- Returns a feature vector array<string>
-
vectorize_features(array<string> featureNames, feature1, feature2, .. [, const string options])
- Returns a feature vector array<string>
-
haversine_distance(double lat1, double lon1, double lat2, double lon2, [const boolean mile=false])
::double - return distance between two locations in km [or miles] using haversine
formula
Usage: select latlon_distance(lat1, lon1, lat2, lon2) from ...
-
lat2tiley(double lat, int zoom)
::int - Returns the tile number of the given latitude and zoom level
-
lon2tilex(double lon, int zoom)
::int - Returns the tile number of the given longitude and zoom level
-
map_url(double lat, double lon, int zoom [, const string option])
- Returns a URL string
OpenStreetMap: http://tile.openstreetmap.org/${zoom}/${xtile}/${ytile}.png
Google Maps: https://www.google.com/maps/@${lat},${lon},${zoom}z
-
tile(double lat, double lon, int zoom)
::bigint - Returns a tile number 2^2n where n is zoom level. FUNC(lat,lon,zoom) = xtile(lon,zoom) + ytile(lat,zoom) * 2^zoom
refer http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames for detail
-
tilex2lon(int x, int zoom)
::double - Returns longitude of the given tile x and zoom level
-
tiley2lat(int y, int zoom)
::double - Returns latitude of the given tile y and zoom level
-
auc(array rankItems | double score, array correctItems | int label [, const int recommendSize = rankItems.size ])
- Returns AUC
-
average_precision(array rankItems, array correctItems [, const int recommendSize = rankItems.size])
- Returns MAP
-
f1score(array[int], array[int])
- Return a F1 score
-
fmeasure(array|int|boolean actual, array|int| boolean predicted [, const string options])
- Return a F-measure (f1score is the special with beta=1.0)
-
hitrate(array rankItems, array correctItems [, const int recommendSize = rankItems.size])
- Returns HitRate
-
logloss(double predicted, double actual)
- Return a Logrithmic Loss
-
mae(double predicted, double actual)
- Return a Mean Absolute Error
-
mrr(array rankItems, array correctItems [, const int recommendSize = rankItems.size])
- Returns MRR
-
mse(double predicted, double actual)
- Return a Mean Squared Error
-
ndcg(array rankItems, array correctItems [, const int recommendSize = rankItems.size])
- Returns nDCG
-
precision_at(array rankItems, array correctItems [, const int recommendSize = rankItems.size])
- Returns Precision
-
r2(double predicted, double actual)
- Return R Squared (coefficient of determination)
-
recall_at(array rankItems, array correctItems [, const int recommendSize = rankItems.size])
- Returns Recall
-
rmse(double predicted, double actual)
- Return a Root Mean Squared Error
-
train_gradient_tree_boosting_classifier(array<double|string> features, int label [, string options])
- Returns a relation consists of <int iteration, int model_type, array<string> pred_models, double intercept, double shrinkage, array<double> var_importance, float oob_error_rate>
-
train_randomforest_classifier(array<double|string> features, int label [, const string options, const array<double> classWeights])
- Returns a relation consists of <string model_id, double model_weight, string model, array<double> var_importance, int oob_errors, int oob_tests>
-
train_randomforest_regression(array<double|string> features, double target [, string options])
- Returns a relation consists of <int model_id, int model_type, string pred_model, array<double> var_importance, int oob_errors, int oob_tests>
-
guess_attribute_types(ANY, ...)
- Returns attribute types
select guess_attribute_types(*) from train limit 1;
> Q,Q,C,C,C,C,Q,C,C,C,Q,C,Q,Q,Q,Q,C,Q
-
rf_ensemble(int yhat [, array<double> proba [, double model_weight=1.0]])
- Returns ensembled prediction results in <int label, double probability, array<double> probabilities>
-
tree_export(string model, const string options, optional array<string> featureNames=null, optional array<string> classNames=null)
- exports a Decision Tree model as javascript/dot]
-
tree_predict(string modelId, string model, array<double|string> features [, const string options | const boolean classification=false])
- Returns a prediction result of a random forest in <int value, array<double> a posteriori> for classification and <double> for regression