Skip to content

Instantly share code, notes, and snippets.

@halfak
Created May 14, 2020 16:33
Show Gist options
  • Select an option

  • Save halfak/ff853ea44a42cb71235c8675685b1e9f to your computer and use it in GitHub Desktop.

Select an option

Save halfak/ff853ea44a42cb71235c8675685b1e9f to your computer and use it in GitHub Desktop.
"""
``$ articlequality weighted_sum -h``
::
Extracts probabilities assigned to each class from the output of
revscoring score utility and outputs the weighted sum of the article
quality predicted where each class is represented as a weight sorted
in a yaml config file.
Usage:
weighted_sum <weights> [--scores=<path>] [--output=<path>]
Options:
-h --help Show this documentation.
<weights> Path to a yaml file containing class weights
--scores=<path> Path to a file containting scores generated by
`revscoring score`. [default: <stdin>]
--output=<path> Path to a file to write new observations
(with "weighted_sum") out to.
[default: <stdout>]
"""
import sys
import mwapi
from docopt import docopt
import yamlconf
def main(argv=None):
args = docopt(__doc__, argv=argv)
weights = yamlconf.load(open(args['<weights>']))
if args['--scores'] == '<stdin>':
revision_scores = read_revision_scores(sys.stdin)
else:
revision_scores = read_revision_scores(open(args['--scores']))
if args['--output'] == '<stdout>':
output = sys.stdout
else:
output = open(args['--output'])
run(revision_scores, weights, output)
def read_revision_scores(f):
for line in f:
rev_id, json_doc = line.split('\t', 1)
yield int(rev_id), json.loads(json_doc)
def run(revision_scores, weights, output):
for rev_id, score in revision_scores:
# Apply the weights
# Put the weighted sum in the score doc
score['weighted_sum'] = weighted_sum
# Write the output
output.write("{0}\t{1}".format(rev_id, json.dumps(score)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment