darcwader’s gists

darcwader / flickr_interesting.m

Created January 28, 2017 13:54

Flickr Interesting Image building

	//https://api.flickr.com/services/rest/?method=flickr.interestingness.getList&api_key=aa621a9050ef8dfbd9621cc311da86aa&format=json


	-(NSString) flickrInterestingGenerateUrlFromInfo:(NSDictionary) info {
	return [NSString stringWithFormat:@"http://farm%@.static.flickr.com/%@/%@_%@_t.jpg",
	[info objectForKey:@"farm"],
	[info objectForKey:@"server"],
	[info objectForKey:@"id"],
	[info objectForKey:@"secret"]];
	}

darcwader / git_credential_ubuntu.sh

Created August 30, 2017 05:58

How to enable git to store password in ubuntu

	sudo apt-get install libgnome-keyring-dev
	sudo make --directory=/usr/share/doc/git/contrib/credential/gnome-keyring
	git config --global credential.helper /usr/share/doc/git/contrib/credential/gnome-keyring/git-credential-gnome-keyring

darcwader / DataCoordinator.swift

Last active October 11, 2017 05:28

Core Data Coordinator for iOS 11 Apps

	final class DataCoordinator {
	//MARK: - singleton
	private static var coordinator: DataCoordinator?
	public class func sharedInstance() -> DataCoordinator {
	if coordinator == nil {
	coordinator = DataCoordinator()
	}
	return coordinator!
	}

darcwader / spam_001.py

Last active October 29, 2017 12:56

spam

	file_name = 'SMSSpamCollection'
	with open(file_name) as f:
	corpus = f.readlines()

	print("there are {} messages".format(len(corpus)))

	corpus = [x.strip() for x in corpus] #remove trailing \n from lines

	for i,message in enumerate(corpus[:10]):
	print(i, message)

darcwader / spam_002.py

Created October 29, 2017 12:57

spam 2

	with open('SMSSpamCollection',encoding='UTF-8') as f:
	messages = pd.read_csv(f, sep='\t', quoting=csv.QUOTE_NONE, names=['label', 'message'])

	messages['message'] = messages['message'].map(lambda text:text.decode(encoding='utf-8'))
	messages.head()

darcwader / spam_003.py

Created October 29, 2017 13:14

spam

	print("removing punctuations: "+string.punctuation)
	stemmer = PorterStemmer()

	def tokenize(message):
	""" removes punctuation and tokenizes the words and stems each word.
	"""
	msg = "".join([ch for ch in message if ch not in string.punctuation]) # get rid of punctuations
	tokens = word_tokenize(msg)
	#stems = [stemmer.stem(x).lower() for x in tokens] #correct way to do
	stems = [x.lower() for x in tokens] #iOS does not have porterstemmer, we are going to not use stem for now

darcwader / spam_004.py

Created October 29, 2017 13:38

spam

	idf = {}
	idf['idf'] = tfidf.idf_.tolist()

	with open('words_idf.json', 'wb') as fp:
	json.dump(idf, fp)

	print("IDF of corpus :", tfidf.idf_)

darcwader / spam_005.py

Created October 29, 2017 13:40

spam

	%time spam_detector = LinearSVC().fit(messages_tfidf, messages.label)

	predictions = spam_detector.predict(messages_tfidf)

	print('accuracy', accuracy_score(messages['label'], predictions))
	print('confusion matrix\n', confusion_matrix(messages['label'], predictions))
	print('(row=expected, col=predicted)')

darcwader / spam_006.py

Created October 29, 2017 13:41

spam

	plt.matshow(confusion_matrix(messages['label'], predictions), cmap=plt.cm.binary, interpolation='nearest')
	plt.title('confusion matrix')
	plt.colorbar()
	plt.ylabel('expected label')
	plt.xlabel('predicted label')

darcwader / spam_007.py

Created October 29, 2017 13:43

spam

	import coremltools
	coreml_model = coremltools.converters.sklearn.convert(spam_detector, "message", "spam_or_not")

	#set parameters of the model
	coreml_model.short_description = "Classify whether message is spam or not"
	coreml_model.input_description["message"] = "TFIDF of message to be classified"
	coreml_model.output_description["spam_or_not"] = "Whether message is spam or not"
	#save the model
	coreml_model.save("SpamMessageClassifier.mlmodel")