This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from multiprocessing.dummy import Pool as Threadpool | |
from itertools import chain | |
def read_data(file_path): | |
"""Read in json data from `file_path`""" | |
data = [] | |
# Open the file and load in json |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def process_article(title, text, timestamp, template = 'Infobox book'): | |
"""Process a wikipedia article looking for template""" | |
# Create a parsing object | |
wikicode = mwparserfromhell.parse(text) | |
# Search through templates for the template | |
matches = wikicode.filter_templates(matches = template) | |
if len(matches) >= 1: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.sax | |
class WikiXmlHandler(xml.sax.handler.ContentHandler): | |
"""Content handler for Wiki XML data using SAX""" | |
def __init__(self): | |
xml.sax.handler.ContentHandler.__init__(self) | |
self._buffer = None | |
self._values = {} | |
self._current_tag = None | |
self._pages = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
C:\Users\willk\OneDrive\Documents\willkoehrsen.github.io\_posts>python medium_to_markdown.py | |
Enter post url: https://medium.com/@williamkoehrsen/five-minutes-to-your-own-website-fd0b43cbd886 | |
Enter date (as 2018-10-05): 2018-09-16 | |
Post saved as markdown to 2018-09-16-five-minutes-to-your-own-website.md |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const mediumToMarkdown = require('medium-to-markdown'); | |
// Enter url here | |
mediumToMarkdown.convertFromUrl('<medium post url>') | |
.then(function (markdown) { | |
console.log(markdown); //=> Markdown content of medium post | |
}); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import roc_auc_score | |
# Calculate roc auc | |
roc_value = roc_auc_score(test_labels, rf_probs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Actual class predictions | |
rf_predictions = model.predict(test) | |
# Probabilities for each class | |
rf_probs = model.predict_proba(test)[:, 1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(f'Model Accuracy: {tree.score(X, y)}') | |
Model Accuracy: 1.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestClassifier | |
# Create the model with 100 trees | |
model = RandomForestClassifier(n_estimators=100, | |
bootstrap = True, | |
max_features = 'sqrt') | |
# Fit on training data | |
model.fit(train, train_labels) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.tree import DecisionTreeClassifier | |
# Make a decision tree and train | |
tree = DecisionTreeClassifier(random_state=RSEED) | |
tree.fit(X, y) |