SELECT *,word
FROM {table}
LATERAL VIEW explode(tokenize_ja({text_column})) t AS word
WHERE 1=1
AND word not rlike '^[a-zA-Z0-9]{1}$' -- 除外 英語一文字
AND word not rlike "^[!-9@_]*$" -- 除外 数字記号のみ
AND word not rlike "^[〇一二三四五六七八九]*$" -- 漢数字のみ
AND word not rlike "^[\u3041-\u3096\u30A1-\u30FA]{1}$" -- ひらがなカタカナ1文字
参考