Created
May 14, 2020 16:33
-
-
Save halfak/ff853ea44a42cb71235c8675685b1e9f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| ``$ articlequality weighted_sum -h`` | |
| :: | |
| Extracts probabilities assigned to each class from the output of | |
| revscoring score utility and outputs the weighted sum of the article | |
| quality predicted where each class is represented as a weight sorted | |
| in a yaml config file. | |
| Usage: | |
| weighted_sum <weights> [--scores=<path>] [--output=<path>] | |
| Options: | |
| -h --help Show this documentation. | |
| <weights> Path to a yaml file containing class weights | |
| --scores=<path> Path to a file containting scores generated by | |
| `revscoring score`. [default: <stdin>] | |
| --output=<path> Path to a file to write new observations | |
| (with "weighted_sum") out to. | |
| [default: <stdout>] | |
| """ | |
| import sys | |
| import mwapi | |
| from docopt import docopt | |
| import yamlconf | |
| def main(argv=None): | |
| args = docopt(__doc__, argv=argv) | |
| weights = yamlconf.load(open(args['<weights>'])) | |
| if args['--scores'] == '<stdin>': | |
| revision_scores = read_revision_scores(sys.stdin) | |
| else: | |
| revision_scores = read_revision_scores(open(args['--scores'])) | |
| if args['--output'] == '<stdout>': | |
| output = sys.stdout | |
| else: | |
| output = open(args['--output']) | |
| run(revision_scores, weights, output) | |
| def read_revision_scores(f): | |
| for line in f: | |
| rev_id, json_doc = line.split('\t', 1) | |
| yield int(rev_id), json.loads(json_doc) | |
| def run(revision_scores, weights, output): | |
| for rev_id, score in revision_scores: | |
| # Apply the weights | |
| # Put the weighted sum in the score doc | |
| score['weighted_sum'] = weighted_sum | |
| # Write the output | |
| output.write("{0}\t{1}".format(rev_id, json.dumps(score))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment