Skip to content

Instantly share code, notes, and snippets.

@darcwader
darcwader / flickr_interesting.m
Created January 28, 2017 13:54
Flickr Interesting Image building
//https://api.flickr.com/services/rest/?method=flickr.interestingness.getList&api_key=aa621a9050ef8dfbd9621cc311da86aa&format=json
-(NSString*) flickrInterestingGenerateUrlFromInfo:(NSDictionary*) info {
return [NSString stringWithFormat:@"http://farm%@.static.flickr.com/%@/%@_%@_t.jpg",
[info objectForKey:@"farm"],
[info objectForKey:@"server"],
[info objectForKey:@"id"],
[info objectForKey:@"secret"]];
}
@darcwader
darcwader / git_credential_ubuntu.sh
Created August 30, 2017 05:58
How to enable git to store password in ubuntu
sudo apt-get install libgnome-keyring-dev
sudo make --directory=/usr/share/doc/git/contrib/credential/gnome-keyring
git config --global credential.helper /usr/share/doc/git/contrib/credential/gnome-keyring/git-credential-gnome-keyring
@darcwader
darcwader / DataCoordinator.swift
Last active October 11, 2017 05:28
Core Data Coordinator for iOS 11 Apps
final class DataCoordinator {
//MARK: - singleton
private static var coordinator: DataCoordinator?
public class func sharedInstance() -> DataCoordinator {
if coordinator == nil {
coordinator = DataCoordinator()
}
return coordinator!
}
@darcwader
darcwader / spam_001.py
Last active October 29, 2017 12:56
spam
file_name = 'SMSSpamCollection'
with open(file_name) as f:
corpus = f.readlines()
print("there are {} messages".format(len(corpus)))
corpus = [x.strip() for x in corpus] #remove trailing \n from lines
for i,message in enumerate(corpus[:10]):
print(i, message)
@darcwader
darcwader / spam_002.py
Created October 29, 2017 12:57
spam 2
with open('SMSSpamCollection',encoding='UTF-8') as f:
messages = pd.read_csv(f, sep='\t', quoting=csv.QUOTE_NONE, names=['label', 'message'])
messages['message'] = messages['message'].map(lambda text:text.decode(encoding='utf-8'))
messages.head()
print("removing punctuations: "+string.punctuation)
stemmer = PorterStemmer()
def tokenize(message):
""" removes punctuation and tokenizes the words and stems each word.
"""
msg = "".join([ch for ch in message if ch not in string.punctuation]) # get rid of punctuations
tokens = word_tokenize(msg)
#stems = [stemmer.stem(x).lower() for x in tokens] #correct way to do
stems = [x.lower() for x in tokens] #iOS does not have porterstemmer, we are going to not use stem for now
idf = {}
idf['idf'] = tfidf.idf_.tolist()
with open('words_idf.json', 'wb') as fp:
json.dump(idf, fp)
print("IDF of corpus :", tfidf.idf_)
%time spam_detector = LinearSVC().fit(messages_tfidf, messages.label)
predictions = spam_detector.predict(messages_tfidf)
print('accuracy', accuracy_score(messages['label'], predictions))
print('confusion matrix\n', confusion_matrix(messages['label'], predictions))
print('(row=expected, col=predicted)')
plt.matshow(confusion_matrix(messages['label'], predictions), cmap=plt.cm.binary, interpolation='nearest')
plt.title('confusion matrix')
plt.colorbar()
plt.ylabel('expected label')
plt.xlabel('predicted label')
import coremltools
coreml_model = coremltools.converters.sklearn.convert(spam_detector, "message", "spam_or_not")
#set parameters of the model
coreml_model.short_description = "Classify whether message is spam or not"
coreml_model.input_description["message"] = "TFIDF of message to be classified"
coreml_model.output_description["spam_or_not"] = "Whether message is spam or not"
#save the model
coreml_model.save("SpamMessageClassifier.mlmodel")