This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //https://api.flickr.com/services/rest/?method=flickr.interestingness.getList&api_key=aa621a9050ef8dfbd9621cc311da86aa&format=json | |
| -(NSString*) flickrInterestingGenerateUrlFromInfo:(NSDictionary*) info { | |
| return [NSString stringWithFormat:@"http://farm%@.static.flickr.com/%@/%@_%@_t.jpg", | |
| [info objectForKey:@"farm"], | |
| [info objectForKey:@"server"], | |
| [info objectForKey:@"id"], | |
| [info objectForKey:@"secret"]]; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| sudo apt-get install libgnome-keyring-dev | |
| sudo make --directory=/usr/share/doc/git/contrib/credential/gnome-keyring | |
| git config --global credential.helper /usr/share/doc/git/contrib/credential/gnome-keyring/git-credential-gnome-keyring |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| final class DataCoordinator { | |
| //MARK: - singleton | |
| private static var coordinator: DataCoordinator? | |
| public class func sharedInstance() -> DataCoordinator { | |
| if coordinator == nil { | |
| coordinator = DataCoordinator() | |
| } | |
| return coordinator! | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| file_name = 'SMSSpamCollection' | |
| with open(file_name) as f: | |
| corpus = f.readlines() | |
| print("there are {} messages".format(len(corpus))) | |
| corpus = [x.strip() for x in corpus] #remove trailing \n from lines | |
| for i,message in enumerate(corpus[:10]): | |
| print(i, message) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| with open('SMSSpamCollection',encoding='UTF-8') as f: | |
| messages = pd.read_csv(f, sep='\t', quoting=csv.QUOTE_NONE, names=['label', 'message']) | |
| messages['message'] = messages['message'].map(lambda text:text.decode(encoding='utf-8')) | |
| messages.head() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| print("removing punctuations: "+string.punctuation) | |
| stemmer = PorterStemmer() | |
| def tokenize(message): | |
| """ removes punctuation and tokenizes the words and stems each word. | |
| """ | |
| msg = "".join([ch for ch in message if ch not in string.punctuation]) # get rid of punctuations | |
| tokens = word_tokenize(msg) | |
| #stems = [stemmer.stem(x).lower() for x in tokens] #correct way to do | |
| stems = [x.lower() for x in tokens] #iOS does not have porterstemmer, we are going to not use stem for now |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| idf = {} | |
| idf['idf'] = tfidf.idf_.tolist() | |
| with open('words_idf.json', 'wb') as fp: | |
| json.dump(idf, fp) | |
| print("IDF of corpus :", tfidf.idf_) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| %time spam_detector = LinearSVC().fit(messages_tfidf, messages.label) | |
| predictions = spam_detector.predict(messages_tfidf) | |
| print('accuracy', accuracy_score(messages['label'], predictions)) | |
| print('confusion matrix\n', confusion_matrix(messages['label'], predictions)) | |
| print('(row=expected, col=predicted)') | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| plt.matshow(confusion_matrix(messages['label'], predictions), cmap=plt.cm.binary, interpolation='nearest') | |
| plt.title('confusion matrix') | |
| plt.colorbar() | |
| plt.ylabel('expected label') | |
| plt.xlabel('predicted label') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import coremltools | |
| coreml_model = coremltools.converters.sklearn.convert(spam_detector, "message", "spam_or_not") | |
| #set parameters of the model | |
| coreml_model.short_description = "Classify whether message is spam or not" | |
| coreml_model.input_description["message"] = "TFIDF of message to be classified" | |
| coreml_model.output_description["spam_or_not"] = "Whether message is spam or not" | |
| #save the model | |
| coreml_model.save("SpamMessageClassifier.mlmodel") |