This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compute average hit rate for all users | |
def precision_at_k(predictions, k): | |
''' | |
Return the average ndcg for each users | |
args: | |
predictions: np.array user-item predictions | |
returns: | |
hit_rate: float, computed hit rate | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compute average hit rate for all users | |
def precision_at_k(predictions, k): | |
''' | |
Return the average ndcg for each users | |
args: | |
predictions: np.array user-item predictions | |
returns: | |
hit_rate: float, computed hit rate | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CustomBERTModel(BertPreTrainedModel): | |
def __init__(self, config, num_class): | |
super(CustomBERTModel, self).__init__(config) | |
self.bert = BertModel(config) | |
self.linear = nn.Linear(config.hidden_size, num_class) | |
model = CustomBERTModel.from_pretrained('bert-base-uncased',num_class=10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = BertModel.from_pretrained('bert-base-uncased') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all_doc_tokens=['SEP'] | |
orig_to_tok_index=[] | |
for (i, word) in enumerate(words): | |
orig_to_tok_index.append(len(all_doc_tokens)) | |
sub_tokens = tokenizer.tokenize(token) | |
all_doc_tokens.extend(sub_tokens) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tokenizer.convert_tokens_to_ids(tokens) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
tokens = tokenizer.tokenize('Learn Hugging Face Transformers & BERT with PyTorch in 5 Minutes') | |
tokens = ['[CLS]'] + tokens + ['[SEP]'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# infer the topic distribution of the second corpus. | |
lda[common_corpus[1]] | |
''' | |
output | |
[(0, 0.014287902), | |
(1, 0.014287437), | |
(2, 0.014287902), | |
(3, 0.014285716), | |
(4, 0.014285716), | |
(5, 0.014285714), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.test.utils import common_texts | |
from gensim.corpora.dictionary import Dictionary | |
from gensim.models import LdaModel | |
# Create a corpus from a list of texts | |
common_dictionary = Dictionary(common_texts) | |
common_corpus = [common_dictionary.doc2bow(text) for text in common_texts] | |
# Train the model on the corpus. | |
lda = LdaModel(common_corpus, num_topics=10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.test.utils import common_texts | |
from gensim.corpora.dictionary import Dictionary | |
from gensim.models import LdaModel | |
# Create a corpus from a list of texts | |
common_dictionary = Dictionary(common_texts) | |
common_corpus = [common_dictionary.doc2bow(text) for text in common_texts] | |
# Train the model on the corpus. | |
lda = LdaModel(common_corpus, num_topics=10) |
NewerOlder