Skip to content

Instantly share code, notes, and snippets.

View WillKoehrsen's full-sized avatar
🌆
building

Will Koehrsen WillKoehrsen

🌆
building
View GitHub Profile
import json
from multiprocessing.dummy import Pool as Threadpool
from itertools import chain
def read_data(file_path):
"""Read in json data from `file_path`"""
data = []
# Open the file and load in json
def process_article(title, text, timestamp, template = 'Infobox book'):
"""Process a wikipedia article looking for template"""
# Create a parsing object
wikicode = mwparserfromhell.parse(text)
# Search through templates for the template
matches = wikicode.filter_templates(matches = template)
if len(matches) >= 1:
import xml.sax
class WikiXmlHandler(xml.sax.handler.ContentHandler):
"""Content handler for Wiki XML data using SAX"""
def __init__(self):
xml.sax.handler.ContentHandler.__init__(self)
self._buffer = None
self._values = {}
self._current_tag = None
self._pages = []
C:\Users\willk\OneDrive\Documents\willkoehrsen.github.io\_posts>python medium_to_markdown.py
Enter post url: https://medium.com/@williamkoehrsen/five-minutes-to-your-own-website-fd0b43cbd886
Enter date (as 2018-10-05): 2018-09-16
Post saved as markdown to 2018-09-16-five-minutes-to-your-own-website.md
const mediumToMarkdown = require('medium-to-markdown');
// Enter url here
mediumToMarkdown.convertFromUrl('<medium post url>')
.then(function (markdown) {
console.log(markdown); //=> Markdown content of medium post
});
from sklearn.metrics import roc_auc_score
# Calculate roc auc
roc_value = roc_auc_score(test_labels, rf_probs)
# Actual class predictions
rf_predictions = model.predict(test)
# Probabilities for each class
rf_probs = model.predict_proba(test)[:, 1]
print(f'Model Accuracy: {tree.score(X, y)}')
Model Accuracy: 1.0
from sklearn.ensemble import RandomForestClassifier
# Create the model with 100 trees
model = RandomForestClassifier(n_estimators=100,
bootstrap = True,
max_features = 'sqrt')
# Fit on training data
model.fit(train, train_labels)
from sklearn.tree import DecisionTreeClassifier
# Make a decision tree and train
tree = DecisionTreeClassifier(random_state=RSEED)
tree.fit(X, y)