-
-
Save adamwight/eb111516543072ca70c1c586b824f135 to your computer and use it in GitHub Desktop.
Diff for templating
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| amsa@C235:~/editquality$ python differ.py "Spanish Wikipedia" | |
| ############################# Spanish Wikipedia ################################ | |
| - | |
| datasets/eswiki.sampled_revisions.20k_2015.json: | |
| wget -qO- http://quarry.wmflabs.org/run/42221/output/0/json-lines?download=true > $@ | |
| - | |
| - datasets/eswiki.human_labeled_revisions.5k_2015.json: | |
| - ./utility fetch_labels \ | |
| - https://labels.wmflabs.org/campaigns/eswiki/12/ > $@ | |
| datasets/eswiki.autolabeled_revisions.20k_2015.json: \ | |
| datasets/eswiki.sampled_revisions.20k_2015.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://es.wikipedia.org \ | |
| --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| + datasets/eswiki.human_labeled_revisions.5k_2015.json: | |
| + ./utility fetch_labels \ | |
| + https://labels.wmflabs.org/campaigns/eswiki/12/ > $@ | |
| + | |
| datasets/eswiki.labeled_revisions.20k_2015.json: \ | |
| + datasets/eswiki.autolabeled_revisions.20k_2015.json \ | |
| - datasets/eswiki.human_labeled_revisions.5k_2015.json \ | |
| ? -- | |
| + datasets/eswiki.human_labeled_revisions.5k_2015.json | |
| - datasets/eswiki.autolabeled_revisions.20k_2015.json | |
| ./utility merge_labels $^ > $@ | |
| datasets/eswiki.autolabeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/eswiki.autolabeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.eswiki.reverted \ | |
| - editquality.feature_lists.eswiki.damaging \ | |
| - editquality.feature_lists.eswiki.goodfaith \ | |
| --host https://es.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| datasets/eswiki.labeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/eswiki.labeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.eswiki.reverted \ | |
| editquality.feature_lists.eswiki.damaging \ | |
| editquality.feature_lists.eswiki.goodfaith \ | |
| --host https://es.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/eswiki.damaging.md: \ | |
| datasets/eswiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.eswiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| - --pop-rate "true=0.1273116307504203" \ | |
| - --pop-rate "false=0.8726883692495797" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - tuning_reports/eswiki.reverted.md: \ | |
| - datasets/eswiki.autolabeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.eswiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.11036013315847877" \ | |
| --pop-rate "false=0.8896398668415212" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| - | |
| - models/eswiki.reverted.gradient_boosting.model: \ | |
| - datasets/eswiki.autolabeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.eswiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.11036013315847877" \ | |
| - --pop-rate "false=0.8896398668415212" \ | |
| - --center --scale > $@ | |
| models/eswiki.damaging.gradient_boosting.model: \ | |
| datasets/eswiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.eswiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.1' \ | |
| -p 'max_depth=3' \ | |
| - -p 'learning_rate=0.1' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=300' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.11036013315847877" \ | |
| --pop-rate "false=0.8896398668415212" \ | |
| - --center --scale > $@ | |
| ? - | |
| + --center --scale > $@ | |
| tuning_reports/eswiki.goodfaith.md: \ | |
| datasets/eswiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.eswiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.11036013315847877" \ | |
| --pop-rate "false=0.8896398668415212" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| - --debug > $@ | |
| ? - | |
| + --debug > $@ | |
| models/eswiki.goodfaith.gradient_boosting.model: \ | |
| datasets/eswiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.eswiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.1' \ | |
| -p 'max_depth=3' \ | |
| - -p 'learning_rate=0.1' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=300' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.11036013315847877" \ | |
| --pop-rate "false=0.8896398668415212" \ | |
| - --center --scale > $@ | |
| ? - | |
| + --center --scale > $@ | |
| eswiki_models: \ | |
| - models/eswiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/eswiki.damaging.gradient_boosting.model \ | |
| - models/eswiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/eswiki.goodfaith.gradient_boosting.model | |
| eswiki_tuning_reports: \ | |
| - tuning_reports/eswiki.damaging.md \ | |
| ? - | |
| + tuning_reports/eswiki.damaging.md \ | |
| - tuning_reports/eswiki.goodfaith.md | |
| ? - | |
| + tuning_reports/eswiki.goodfaith.md | |
| ############################# Spanish Wikibooks ################################ | |
| - | |
| datasets/eswikibooks.sampled_revisions.20k_2015.json: | |
| wget -qO- https://quarry.wmflabs.org/run/113419/output/0/json-lines?download=true > $@ | |
| + | |
| + datasets/eswikibooks.autolabeled_revisions.20k_2015.json: \ | |
| + datasets/eswikibooks.sampled_revisions.20k_2015.json | |
| + cat $< | \ | |
| + ./utility autolabel --host=https://es.wikibooks.org \ | |
| + --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| + --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| + --verbose > $@ | |
| datasets/eswikibooks.human_labeled_revisions.5k_2015.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/eswikibooks/42/ > $@ | |
| - datasets/eswikibooks.autolabeled_revisions.20k_2015.json: \ | |
| - datasets/eswikibooks.sampled_revisions.20k_2015.json | |
| - cat $< | \ | |
| - ./utility autolabel --host=https://es.wikibooks.org \ | |
| - --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat,autopatrolled \ | |
| - --trusted-edits=1000 \ | |
| - --verbose > $@ | |
| - | |
| datasets/eswikibooks.labeled_revisions.20k_2015.json: \ | |
| + datasets/eswikibooks.autolabeled_revisions.20k_2015.json \ | |
| - datasets/eswikibooks.human_labeled_revisions.5k_2015.json \ | |
| ? -- | |
| + datasets/eswikibooks.human_labeled_revisions.5k_2015.json | |
| - datasets/eswikibooks.autolabeled_revisions.20k_2015.json | |
| ./utility merge_labels $^ > $@ | |
| datasets/eswikibooks.autolabeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/eswikibooks.autolabeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.eswikibooks.reverted \ | |
| - editquality.feature_lists.eswikibooks.damaging \ | |
| - editquality.feature_lists.eswikibooks.goodfaith \ | |
| --host https://es.wikibooks.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/eswikibooks.labeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.eswikibooks.reverted \ | |
| editquality.feature_lists.eswikibooks.damaging \ | |
| editquality.feature_lists.eswikibooks.goodfaith \ | |
| --host https://es.wikibooks.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| - | |
| - tuning_reports/eswikibooks.reverted.md: \ | |
| - datasets/eswikibooks.autolabeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.eswikibooks.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.08983104208206527" \ | |
| - --pop-rate "false=0.9101689579179347" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/eswikibooks.reverted.gradient_boosting.model: \ | |
| - datasets/eswikibooks.autolabeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.eswikibooks.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.08983104208206527" \ | |
| - --pop-rate "false=0.9101689579179347" \ | |
| - --center --scale > $@ | |
| tuning_reports/eswikibooks.damaging.md: \ | |
| datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.eswikibooks.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.1126671580499105" \ | |
| --pop-rate "false=0.8873328419500895" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| - --debug > $@ | |
| ? - | |
| + --debug > $@ | |
| models/eswikibooks.damaging.gradient_boosting.model: \ | |
| datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.eswikibooks.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.1' \ | |
| -p 'max_depth=3' \ | |
| - -p 'learning_rate=0.1' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=500' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.1126671580499105" \ | |
| --pop-rate "false=0.8873328419500895" \ | |
| - --center --scale > $@ | |
| ? - | |
| + --center --scale > $@ | |
| tuning_reports/eswikibooks.goodfaith.md: \ | |
| datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.eswikibooks.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9139393939393939" \ | |
| --pop-rate "false=0.08606060606060606" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| - --debug > $@ | |
| ? - | |
| + --debug > $@ | |
| models/eswikibooks.goodfaith.gradient_boosting.model: \ | |
| datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.eswikibooks.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.5' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.5' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9139393939393939" \ | |
| --pop-rate "false=0.08606060606060606" \ | |
| - --center --scale > $@ | |
| ? - | |
| + --center --scale > $@ | |
| eswikibooks_models: \ | |
| - models/eswikibooks.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/eswikibooks.damaging.gradient_boosting.model \ | |
| - models/eswikibooks.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/eswikibooks.goodfaith.gradient_boosting.model | |
| eswikibooks_tuning_reports: \ | |
| - tuning_reports/eswikibooks.damaging.md | |
| ? - | |
| + tuning_reports/eswikibooks.damaging.md \ | |
| ? ++ | |
| - tuning_reports/eswikibooks.goodfaith.md | |
| ? - | |
| + tuning_reports/eswikibooks.goodfaith.md | |
| - ########################### Estonian Wikipedia ################################ | |
| + ############################# Estonian Wikipedia ################################ | |
| ? ++ | |
| - | |
| datasets/etwiki.sampled_revisions.20k_2015.json: | |
| wget -qO- http://quarry.wmflabs.org/run/50110/output/0/json-lines?download=true > $@ | |
| datasets/etwiki.autolabeled_revisions.20k_2015.json: \ | |
| datasets/etwiki.sampled_revisions.20k_2015.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://et.wikipedia.org \ | |
| --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat,flow-bot \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/etwiki.human_labeled_revisions.5k_2015.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/etwiki/17/ > $@ | |
| datasets/etwiki.labeled_revisions.20k_2015.json: \ | |
| + datasets/etwiki.autolabeled_revisions.20k_2015.json \ | |
| - datasets/etwiki.human_labeled_revisions.5k_2015.json \ | |
| ? -- | |
| + datasets/etwiki.human_labeled_revisions.5k_2015.json | |
| + ./utility merge_labels $^ > $@ | |
| + | |
| + datasets/etwiki.autolabeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/etwiki.autolabeled_revisions.20k_2015.json | |
| - ./utility merge_labels $^ > $@ | |
| + cat $< | \ | |
| + revscoring extract \ | |
| + editquality.feature_lists.etwiki.reverted \ | |
| + --host https://et.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| datasets/etwiki.labeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/etwiki.labeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.etwiki.reverted \ | |
| editquality.feature_lists.etwiki.damaging \ | |
| editquality.feature_lists.etwiki.goodfaith \ | |
| --host https://et.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| - | |
| - tuning_reports/etwiki.reverted.md: \ | |
| - datasets/etwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.etwiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.020021127823331153" \ | |
| - --pop-rate "false=0.9799788721766688" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/etwiki.reverted.gradient_boosting.model: \ | |
| - datasets/etwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.etwiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=500' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.020021127823331153" \ | |
| - --pop-rate "false=0.9799788721766688" \ | |
| - --center --scale > $@ | |
| tuning_reports/etwiki.damaging.md: \ | |
| datasets/etwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.etwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.026158257457618593" \ | |
| --pop-rate "false=0.9738417425423814" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| - --debug > $@ | |
| ? - | |
| + --debug > $@ | |
| models/etwiki.damaging.gradient_boosting.model: \ | |
| datasets/etwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.etwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=500' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.026158257457618593" \ | |
| --pop-rate "false=0.9738417425423814" \ | |
| - --center --scale > $@ | |
| ? - | |
| + --center --scale > $@ | |
| tuning_reports/etwiki.goodfaith.md: \ | |
| datasets/etwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.etwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9841038281603702" \ | |
| - --pop-rate "false=0.01589617183962976" \ | |
| ? ^ | |
| + --pop-rate "false=0.01589617183962977" \ | |
| ? ^ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| - --debug > $@ | |
| ? - | |
| + --debug > $@ | |
| models/etwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/etwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.etwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=500' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9841038281603702" \ | |
| - --pop-rate "false=0.01589617183962976" \ | |
| ? ^ | |
| + --pop-rate "false=0.01589617183962977" \ | |
| ? ^ | |
| - --center --scale > $@ | |
| ? - | |
| + --center --scale > $@ | |
| etwiki_models: \ | |
| - models/etwiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/etwiki.damaging.gradient_boosting.model \ | |
| - models/etwiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/etwiki.goodfaith.gradient_boosting.model | |
| etwiki_tuning_reports: \ | |
| - tuning_reports/etwiki.damaging.md \ | |
| ? - | |
| + tuning_reports/etwiki.damaging.md \ | |
| - tuning_reports/etwiki.goodfaith.md | |
| ? - | |
| + tuning_reports/etwiki.goodfaith.md |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| amsa@C235:~/editquality$ python differ.py "Japanese Wikipedia" | |
| - ########################### Japanese Wikipedia ################################ | |
| + ############################# Japanese Wikipedia ################################ | |
| ? ++ | |
| - | |
| # From https://quarry.wmflabs.org/query/9927 | |
| datasets/jawiki.sampled_revisions.40k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/89016/output/0/json-lines?download=true > $@ | |
| datasets/jawiki.autolabeled_revisions.40k_2016.json: \ | |
| datasets/jawiki.sampled_revisions.40k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://ja.wikipedia.org \ | |
| --trusted-groups=abusefilter,bot,bureaucrat,checkuser,eliminator,interface-editor,oversight,rollbacker,sysop \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| + | |
| datasets/jawiki.autolabeled_revisions.w_cache.40k_2016.json: \ | |
| datasets/jawiki.autolabeled_revisions.40k_2016.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.jawiki.reverted \ | |
| --host https://ja.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/jawiki.reverted.md: \ | |
| datasets/jawiki.autolabeled_revisions.w_cache.40k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.jawiki.reverted \ | |
| reverted_for_damage \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.03256945140908635" \ | |
| --pop-rate "false=0.9674305485909136" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/jawiki.reverted.gradient_boosting.model: \ | |
| datasets/jawiki.autolabeled_revisions.w_cache.40k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.jawiki.reverted \ | |
| reverted_for_damage \ | |
| --version=$(reverted_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.03256945140908635" \ | |
| --pop-rate "false=0.9674305485909136" \ | |
| --center --scale > $@ | |
| - | |
| jawiki_models: \ | |
| models/jawiki.reverted.gradient_boosting.model | |
| jawiki_tuning_reports: \ | |
| - tuning_reports/jawiki.reverted.md | |
| ? - | |
| + tuning_reports/jawiki.reverted.md- | |
| amsa@C235:~/editquality$ python differ.py "Korean Wikipedia" | |
| ############################# Korean Wikipedia ################################ | |
| - | |
| - # from https://quarry.wmflabs.org/query/17645 | |
| ? ^ | |
| + # From https://quarry.wmflabs.org/query/17645 | |
| ? ^ | |
| datasets/kowiki.sampled_revisions.20k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/165613/output/0/json-lines?download=true > $@ | |
| datasets/kowiki.autolabeled_revisions.20k_2016.json: \ | |
| datasets/kowiki.sampled_revisions.20k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://ko.wikipedia.org \ | |
| --trusted-groups=abusefilter,bot,bureaucrat,checkuser,eliminator,interface-editor,oversight,rollbacker,sysop \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| + | |
| datasets/kowiki.autolabeled_revisions.w_cache.20k_2016.json: \ | |
| datasets/kowiki.autolabeled_revisions.20k_2016.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.kowiki.reverted \ | |
| --host https://ko.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/kowiki.reverted.md: \ | |
| datasets/kowiki.autolabeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.kowiki.reverted \ | |
| reverted_for_damage \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.04717122705217348" \ | |
| --pop-rate "false=0.9528287729478265" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/kowiki.reverted.gradient_boosting.model: \ | |
| datasets/kowiki.autolabeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.kowiki.reverted \ | |
| reverted_for_damage \ | |
| --version=$(reverted_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.04717122705217348" \ | |
| --pop-rate "false=0.9528287729478265" \ | |
| --center --scale > $@ | |
| kowiki_models: \ | |
| models/kowiki.reverted.gradient_boosting.model | |
| kowiki_tuning_reports: \ | |
| - tuning_reports/kowiki.reverted.md | |
| ? - | |
| + tuning_reports/kowiki.reverted.md- | |
| amsa@C235:~/editquality$ python differ.py "Tamil Wikipedia" | |
| - ############################## Tamil Wikipedia ################################ | |
| ? - | |
| + ############################# Tamil Wikipedia ################################ | |
| - | |
| # From https://quarry.wmflabs.org/query/20230 | |
| datasets/tawiki.sampled_revisions.20k_2017.json: | |
| wget -qO- https://quarry.wmflabs.org/run/190662/output/0/json-lines?download=true > $@ | |
| datasets/tawiki.autolabeled_revisions.20k_2017.json: \ | |
| datasets/tawiki.sampled_revisions.20k_2017.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://ta.wikipedia.org \ | |
| --trusted-groups=autopatrolled,bot,bureaucrat,patroller,rollbacker,sysop \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/tawiki.revisions_for_review.5k_2017.json: \ | |
| datasets/tawiki.autolabeled_revisions.20k_2017.json | |
| grep '"needs_review": true' $< | shuf > $@ | |
| datasets/tawiki.autolabeled_revisions.w_cache.20k_2017.json: \ | |
| datasets/tawiki.autolabeled_revisions.20k_2017.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.tawiki.reverted \ | |
| --host https://ta.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/tawiki.reverted.md: \ | |
| datasets/tawiki.autolabeled_revisions.w_cache.20k_2017.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.tawiki.reverted \ | |
| reverted_for_damage \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.015904172328753335" \ | |
| --pop-rate "false=0.9840958276712467" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/tawiki.reverted.gradient_boosting.model: \ | |
| datasets/tawiki.autolabeled_revisions.w_cache.20k_2017.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.tawiki.reverted \ | |
| reverted_for_damage \ | |
| --version=$(reverted_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=500' \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.015904172328753335" \ | |
| --pop-rate "false=0.9840958276712467" \ | |
| --center --scale > $@ | |
| tawiki_models: \ | |
| models/tawiki.reverted.gradient_boosting.model | |
| tawiki_tuning_reports: \ | |
| - tuning_reports/tawiki.reverted.md | |
| ? - | |
| + tuning_reports/tawiki.reverted.md- | |
| amsa@C235:~/editquality$ python differ.py "Ukranian Wikipedia" | |
| - ############################### Ukranian Wikipedia ############################ | |
| ? -- | |
| + ############################# Ukranian Wikipedia ################################ | |
| ? ++++ | |
| - | |
| datasets/ukwiki.sampled_revisions.20k_2015.json: | |
| wget -qO- http://quarry.wmflabs.org/run/48597/output/0/json-lines?download=true > $@ | |
| datasets/ukwiki.autolabeled_revisions.20k_2015.json: \ | |
| datasets/ukwiki.sampled_revisions.20k_2015.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://uk.wikipedia.org \ | |
| --trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| + | |
| + datasets/ukwiki.revisions_for_review.5k_2015.json: \ | |
| + datasets/ukwiki.autolabeled_revisions.20k_2015.json | |
| + grep '"needs_review": true' $< | shuf > $@ | |
| datasets/ukwiki.autolabeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/ukwiki.autolabeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.ukwiki.reverted \ | |
| --host https://uk.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/ukwiki.reverted.md: \ | |
| datasets/ukwiki.autolabeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.ukwiki.reverted \ | |
| reverted_for_damage \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.021877665713282153" \ | |
| --pop-rate "false=0.9781223342867178" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/ukwiki.reverted.gradient_boosting.model: \ | |
| datasets/ukwiki.autolabeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.ukwiki.reverted \ | |
| reverted_for_damage \ | |
| --version=$(reverted_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.021877665713282153" \ | |
| --pop-rate "false=0.9781223342867178" \ | |
| --center --scale > $@ | |
| ukwiki_models: \ | |
| - models/ukwiki.reverted.gradient_boosting.model | |
| ? - | |
| + models/ukwiki.reverted.gradient_boosting.model | |
| ukwiki_tuning_reports: \ | |
| - tuning_reports/ukwiki.reverted.md | |
| ? - - | |
| + tuning_reports/ukwiki.reverted.md |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| - ############################### Romanian Wikipedia ############################ | |
| ? -- | |
| + ############################# Romanian Wikipedia ################################ | |
| ? ++++ | |
| - | |
| datasets/rowiki.sampled_revisions.20k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/146926/output/0/json-lines?download=true > $@ | |
| datasets/rowiki.autolabeled_revisions.20k_2016.json: \ | |
| datasets/rowiki.sampled_revisions.20k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://ro.wikipedia.org \ | |
| --trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot,templateeditor,patroller,autopatrolled \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/rowiki.human_labeled_revisions.5k_2016.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/rowiki/48/ > $@ | |
| datasets/rowiki.labeled_revisions.20k_2016.json: \ | |
| + datasets/rowiki.autolabeled_revisions.20k_2016.json \ | |
| - datasets/rowiki.human_labeled_revisions.5k_2016.json \ | |
| ? -- | |
| + datasets/rowiki.human_labeled_revisions.5k_2016.json | |
| - datasets/rowiki.autolabeled_revisions.20k_2016.json | |
| ./utility merge_labels $^ > $@ | |
| datasets/rowiki.autolabeled_revisions.w_cache.20k_2016.json: \ | |
| datasets/rowiki.autolabeled_revisions.20k_2016.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.rowiki.reverted \ | |
| --host https://ro.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| datasets/rowiki.labeled_revisions.w_cache.20k_2016.json: \ | |
| datasets/rowiki.labeled_revisions.20k_2016.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.rowiki.reverted \ | |
| ? ------- | |
| + editquality.feature_lists.rowiki.damaging \ | |
| ? +++++++ | |
| editquality.feature_lists.rowiki.goodfaith \ | |
| - editquality.feature_lists.rowiki.damaging \ | |
| --host https://ro.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| - | |
| - tuning_reports/rowiki.reverted.md: \ | |
| - datasets/rowiki.autolabeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.rowiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.03311324529811925" \ | |
| - --pop-rate "false=0.9668867547018808" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/rowiki.reverted.gradient_boosting.model: \ | |
| - datasets/rowiki.autolabeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.rowiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).1 \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_depth=7' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.03311324529811925" \ | |
| - --pop-rate "false=0.9668867547018808" \ | |
| - --center --scale > $@ | |
| tuning_reports/rowiki.damaging.md: \ | |
| datasets/rowiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.rowiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.04956982793117247" \ | |
| --pop-rate "false=0.9504301720688275" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/rowiki.damaging.gradient_boosting.model: \ | |
| datasets/rowiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.rowiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).1 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.04956982793117247" \ | |
| --pop-rate "false=0.9504301720688275" \ | |
| --center --scale > $@ | |
| tuning_reports/rowiki.goodfaith.md: \ | |
| datasets/rowiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.rowiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9699379751900761" \ | |
| - --pop-rate "false=0.030062024809923968" \ | |
| ? - | |
| + --pop-rate "false=0.030062024809923926" \ | |
| ? + | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/rowiki.goodfaith.gradient_boosting.model: \ | |
| datasets/rowiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.rowiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).1 \ | |
| + -p 'learning_rate=0.1' \ | |
| -p 'max_depth=3' \ | |
| - -p 'learning_rate=0.1' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=300' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9699379751900761" \ | |
| - --pop-rate "false=0.030062024809923968" \ | |
| ? - | |
| + --pop-rate "false=0.030062024809923926" \ | |
| ? + | |
| --center --scale > $@ | |
| rowiki_models: \ | |
| - models/rowiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/rowiki.damaging.gradient_boosting.model \ | |
| - models/rowiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/rowiki.goodfaith.gradient_boosting.model | |
| rowiki_tuning_reports: \ | |
| - tuning_reports/rowiki.damaging.md \ | |
| ? - | |
| + tuning_reports/rowiki.damaging.md \ | |
| - tuning_reports/rowiki.goodfaith.md | |
| ? - | |
| + tuning_reports/rowiki.goodfaith.md | |
| - ############################### Russian Wikipedia ############################ | |
| ? -- | |
| + ############################# Russian Wikipedia ################################ | |
| ? ++++ | |
| - | |
| datasets/ruwiki.sampled_revisions.20k_2015.json: | |
| wget -qO- https://quarry.wmflabs.org/run/48649/output/0/json-lines?download=true > $@ | |
| datasets/ruwiki.autolabeled_revisions.20k_2015.json: \ | |
| datasets/ruwiki.sampled_revisions.20k_2015.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://ru.wikipedia.org \ | |
| --trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/ruwiki.human_labeled_revisions.5k_2015.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/ruwiki/10/ > $@ | |
| datasets/ruwiki.labeled_revisions.20k_2015.json: \ | |
| + datasets/ruwiki.autolabeled_revisions.20k_2015.json \ | |
| - datasets/ruwiki.human_labeled_revisions.5k_2015.json \ | |
| ? -- | |
| + datasets/ruwiki.human_labeled_revisions.5k_2015.json | |
| + ./utility merge_labels $^ > $@ | |
| + | |
| + datasets/ruwiki.autolabeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/ruwiki.autolabeled_revisions.20k_2015.json | |
| - ./utility merge_labels $^ > $@ | |
| + cat $< | \ | |
| + revscoring extract \ | |
| + editquality.feature_lists.ruwiki.reverted \ | |
| + --host https://ru.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/ruwiki.labeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.ruwiki.reverted \ | |
| ? ------- | |
| + editquality.feature_lists.ruwiki.damaging \ | |
| ? +++++++ | |
| editquality.feature_lists.ruwiki.goodfaith \ | |
| - editquality.feature_lists.ruwiki.damaging \ | |
| --host https://ru.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| - | |
| - tuning_reports/ruwiki.reverted.md: \ | |
| - datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.ruwiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.05317532664843513" \ | |
| - --pop-rate "false=0.9468246733515648" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/ruwiki.reverted.gradient_boosting.model: \ | |
| - datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.ruwiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_depth=5' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.05317532664843513" \ | |
| - --pop-rate "false=0.9468246733515648" \ | |
| - --center --scale > $@ | |
| tuning_reports/ruwiki.damaging.md: \ | |
| datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.ruwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.053479185657854755" \ | |
| - --pop-rate "false=0.9465208143421452" \ | |
| ? ^ | |
| + --pop-rate "false=0.9465208143421453" \ | |
| ? ^ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/ruwiki.damaging.gradient_boosting.model: \ | |
| datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.ruwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.053479185657854755" \ | |
| - --pop-rate "false=0.9465208143421452" \ | |
| ? ^ | |
| + --pop-rate "false=0.9465208143421453" \ | |
| ? ^ | |
| --center --scale > $@ | |
| tuning_reports/ruwiki.goodfaith.md: \ | |
| datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.ruwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9713866099463182" \ | |
| - --pop-rate "false=0.02861339005368176" \ | |
| ? ^ | |
| + --pop-rate "false=0.028613390053681798" \ | |
| ? ^^ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/ruwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.ruwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.1' \ | |
| -p 'max_depth=3' \ | |
| - -p 'learning_rate=0.1' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=300' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9713866099463182" \ | |
| - --pop-rate "false=0.02861339005368176" \ | |
| ? ^ | |
| + --pop-rate "false=0.028613390053681798" \ | |
| ? ^^ | |
| --center --scale > $@ | |
| ruwiki_models: \ | |
| - models/ruwiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/ruwiki.damaging.gradient_boosting.model \ | |
| - models/ruwiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/ruwiki.goodfaith.gradient_boosting.model | |
| ruwiki_tuning_reports: \ | |
| - tuning_reports/ruwiki.damaging.md \ | |
| ? - | |
| + tuning_reports/ruwiki.damaging.md \ | |
| - tuning_reports/ruwiki.goodfaith.md | |
| ? - | |
| + tuning_reports/ruwiki.goodfaith.md | |
| - ################################# Albanian Wikipedia ########################### | |
| ? ---- | |
| + ############################# Albanian Wikipedia ################################ | |
| ? +++++ | |
| - | |
| # From https://quarry.wmflabs.org/query/17988 | |
| datasets/sqwiki.sampled_revisions.20k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/169099/output/0/json-lines?download=true > $@ | |
| datasets/sqwiki.autolabeled_revisions.20k_2016.json: \ | |
| datasets/sqwiki.sampled_revisions.20k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://sq.wikipedia.org \ | |
| --trusted-groups=sysop,oversight,trusted,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/sqwiki.human_labeled_revisions.5k_2016.json: | |
| ./utility fetch_labels \ | |
| - https://labels.wmflabs.org/campaigns/sqwiki/57/ > \ | |
| ? ^ | |
| + https://labels.wmflabs.org/campaigns/sqwiki/57/ > $@ | |
| ? ^^ | |
| - datasets/sqwiki.human_labeled_revisions.5k_2016.json | |
| datasets/sqwiki.labeled_revisions.20k_2016.json: \ | |
| + datasets/sqwiki.autolabeled_revisions.20k_2016.json \ | |
| - datasets/sqwiki.human_labeled_revisions.5k_2016.json \ | |
| ? -- | |
| + datasets/sqwiki.human_labeled_revisions.5k_2016.json | |
| + ./utility merge_labels $^ > $@ | |
| + | |
| + datasets/sqwiki.autolabeled_revisions.w_cache.20k_2016.json: \ | |
| datasets/sqwiki.autolabeled_revisions.20k_2016.json | |
| - ./utility merge_labels $^ > $@ | |
| + cat $< | \ | |
| + revscoring extract \ | |
| + editquality.feature_lists.sqwiki.reverted \ | |
| + --host https://sq.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json: \ | |
| datasets/sqwiki.labeled_revisions.20k_2016.json | |
| - cat datasets/sqwiki.labeled_revisions.20k_2016.json | \ | |
| + cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.sqwiki.reverted \ | |
| editquality.feature_lists.sqwiki.damaging \ | |
| editquality.feature_lists.sqwiki.goodfaith \ | |
| --host https://sq.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| - --verbose > \ | |
| ? ^ | |
| + --verbose > $@ | |
| ? ^^ | |
| - datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json | |
| - | |
| - tuning_reports/sqwiki.reverted.md: \ | |
| - datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.sqwiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.018051805180518053" \ | |
| - --pop-rate "false=0.9819481948194819" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/sqwiki.reverted.gradient_boosting.model: \ | |
| - datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.sqwiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=500' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.018051805180518053" \ | |
| - --pop-rate "false=0.9819481948194819" \ | |
| - --center --scale > $@ | |
| tuning_reports/sqwiki.damaging.md: \ | |
| datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.sqwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.0287028702870287" \ | |
| --pop-rate "false=0.9712971297129713" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/sqwiki.damaging.gradient_boosting.model: \ | |
| datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.sqwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=500' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.0287028702870287" \ | |
| --pop-rate "false=0.9712971297129713" \ | |
| --center --scale > $@ | |
| tuning_reports/sqwiki.goodfaith.md: \ | |
| datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.sqwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9763476347634763" \ | |
| - --pop-rate "false=0.023652365236523653" \ | |
| ? ^^ | |
| + --pop-rate "false=0.023652365236523698" \ | |
| ? ^^ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/sqwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.sqwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=500' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9763476347634763" \ | |
| - --pop-rate "false=0.023652365236523653" \ | |
| ? ^^ | |
| + --pop-rate "false=0.023652365236523698" \ | |
| ? ^^ | |
| --center --scale > $@ | |
| + | |
| + sqwiki_models: \ | |
| + models/sqwiki.damaging.gradient_boosting.model \ | |
| + models/sqwiki.goodfaith.gradient_boosting.model | |
| sqwiki_tuning_reports: \ | |
| tuning_reports/sqwiki.damaging.md \ | |
| tuning_reports/sqwiki.goodfaith.md | |
| - | |
| - sqwiki_models: \ | |
| - models/sqwiki.damaging.gradient_boosting.model \ | |
| - models/sqwiki.goodfaith.gradient_boosting.model | |
| - ############################# Turkish Wikipedia ############################ | |
| + ############################# Turkish Wikipedia ################################ | |
| ? ++++ | |
| + datasets/trwiki.sampled_revisions.20k_2015.json: | |
| + wget -qO- http://quarry.wmflabs.org/run/168286/output/0/json-lines?download=true > $@ | |
| + | |
| + datasets/trwiki.autolabeled_revisions.20k_2015.json: \ | |
| + datasets/trwiki.sampled_revisions.20k_2015.json | |
| + cat $< | \ | |
| + ./utility autolabel --host=https://tr.wikipedia.org \ | |
| + --trusted-groups=sysop,oversight,trusted,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| + --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| + --verbose > $@ | |
| + | |
| - datasets/trwiki.human_labeled_revisions.20k_2015.json: | |
| ? ^^ | |
| + datasets/trwiki.human_labeled_revisions.5k_2015.json: | |
| ? ^ | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/trwiki/5/ > $@ | |
| datasets/trwiki.labeled_revisions.20k_2015.json: \ | |
| + datasets/trwiki.autolabeled_revisions.20k_2015.json \ | |
| - datasets/trwiki.human_labeled_revisions.20k_2015.json | |
| ? ^^ | |
| + datasets/trwiki.human_labeled_revisions.5k_2015.json | |
| ? ^ | |
| + ./utility merge_labels $^ > $@ | |
| - cat $< | \ | |
| - ./utility autolabel --host=https://tr.wikipedia.org \ | |
| - --trusted-groups=sysop,oversight,trusted,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| - --trusted-edits=1000 \ | |
| - --verbose > $@ | |
| - datasets/trwiki.labeled_revisions.w_cache.20k_2015.json: \ | |
| - datasets/trwiki.labeled_revisions.20k_2015.json | |
| - cat $< | \ | |
| - revscoring extract \ | |
| - editquality.feature_lists.trwiki.reverted \ | |
| - editquality.feature_lists.trwiki.damaging \ | |
| - editquality.feature_lists.trwiki.goodfaith \ | |
| - --host https://tr.wikipedia.org \ | |
| - --extractor $(max_extractors) \ | |
| - --verbose > $@ | |
| - | |
| - datasets/trwiki.sampled_revisions.20k_2016.json: | |
| - wget -qO- http://quarry.wmflabs.org/run/168286/output/0/json-lines?download=true > $@ | |
| - | |
| - datasets/trwiki.autolabeled_revisions.20k_2016.json: \ | |
| - datasets/trwiki.sampled_revisions.20k_2016.json | |
| - cat $< | \ | |
| - ./utility autolabel --host=https://tr.wikipedia.org \ | |
| - --trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot \ | |
| - --trusted-edits=1000 \ | |
| - --verbose > $@ | |
| - | |
| - datasets/trwiki.revisions_to_review.20k_2016.json: \ | |
| - datasets/trwiki.autolabeled_revisions.20k_2016.json | |
| - cat $< | \ | |
| - grep '"needs_review": true' > $@ | |
| - | |
| - datasets/trwiki.autolabeled_revisions.w_cache.20k_2016.json: \ | |
| ? ^ | |
| + datasets/trwiki.autolabeled_revisions.w_cache.20k_2015.json: \ | |
| ? ^ | |
| - datasets/trwiki.autolabeled_revisions.20k_2016.json | |
| ? ^ | |
| + datasets/trwiki.autolabeled_revisions.20k_2015.json | |
| ? ^ | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.trwiki.reverted \ | |
| --host https://tr.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| - tuning_reports/trwiki.reverted.md: \ | |
| - datasets/trwiki.labeled_revisions.w_cache.20k_2015.json | |
| ? -- | |
| + datasets/trwiki.labeled_revisions.w_cache.20k_2015.json: \ | |
| ? +++ | |
| + datasets/trwiki.labeled_revisions.20k_2015.json | |
| cat $< | \ | |
| - revscoring tune \ | |
| ? ^^^ | |
| + revscoring extract \ | |
| ? ++ ^^^^ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.trwiki.reverted \ | |
| ? ------- | |
| + editquality.feature_lists.trwiki.damaging \ | |
| ? +++++++ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.11489598623272763" \ | |
| - --pop-rate "false=0.8851040137672723" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/trwiki.reverted.gradient_boosting.model: \ | |
| - datasets/trwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.trwiki.reverted \ | |
| ? ^^^^^ ^^ | |
| + editquality.feature_lists.trwiki.goodfaith \ | |
| ? ^^^^^^^ ^ | |
| + --host https://tr.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.11489598623272763" \ | |
| - --pop-rate "false=0.8851040137672723" \ | |
| - --center --scale > $@ | |
| tuning_reports/trwiki.damaging.md: \ | |
| datasets/trwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.trwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.0495014425266994" \ | |
| --pop-rate "false=0.9504985574733006" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/trwiki.damaging.gradient_boosting.model: \ | |
| datasets/trwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.trwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.0495014425266994" \ | |
| --pop-rate "false=0.9504985574733006" \ | |
| --center --scale > $@ | |
| tuning_reports/trwiki.goodfaith.md: \ | |
| datasets/trwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.trwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9538897605911829" \ | |
| - --pop-rate "false=0.04611023940881713" \ | |
| ? ^^ | |
| + --pop-rate "false=0.04611023940881709" \ | |
| ? ^^ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/trwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/trwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.trwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9538897605911829" \ | |
| - --pop-rate "false=0.04611023940881713" \ | |
| ? ^^ | |
| + --pop-rate "false=0.04611023940881709" \ | |
| ? ^^ | |
| --center --scale > $@ | |
| trwiki_models: \ | |
| - models/trwiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/trwiki.damaging.gradient_boosting.model \ | |
| - models/trwiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/trwiki.goodfaith.gradient_boosting.model | |
| trwiki_tuning_reports: \ | |
| - tuning_reports/trwiki.damaging.md \ | |
| ? - | |
| + tuning_reports/trwiki.damaging.md \ | |
| - tuning_reports/trwiki.goodfaith.md | |
| ? - | |
| + tuning_reports/trwiki.goodfaith.md | |
| amsa@C235:~/editquality$ python differ.py "Chinese Wikipedia" | |
| - ############################### Chinese Wikipedia ############################# | |
| ? -- | |
| + ############################# Chinese Wikipedia ################################ | |
| ? +++ | |
| - | |
| datasets/zhwiki.sampled_revisions.100k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/131979/output/0/json-lines?download=true > $@ | |
| datasets/zhwiki.autolabeled_revisions.100k_2016.json: \ | |
| datasets/zhwiki.sampled_revisions.100k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://zh.wikipedia.org \ | |
| --trusted-groups=checkuser,bureaucrat,sysop,eliminator,bot \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| - datasets/zhwiki.revisions_to_review.5k_2016.json: \ | |
| ? ^ | |
| + datasets/zhwiki.revisions_for_review.5k_2016.json: \ | |
| ? ^ + | |
| datasets/zhwiki.autolabeled_revisions.100k_2016.json | |
| + ( \ | |
| - (cat $< | \ | |
| ? ^ | |
| + cat $< | \ | |
| ? ^ | |
| grep '"needs_review": true' | \ | |
| shuf -n 2500; \ | |
| cat $< | \ | |
| grep '"needs_review": false' | \ | |
| shuf -n 2500 \ | |
| - ) | shuf > $@+ ) | shuf > $@ | |
| ? + | |
| + | |
| + datasets/zhwiki.autolabeled_revisions.w_cache.100k_2016.json: \ | |
| + datasets/zhwiki.autolabeled_revisions.100k_2016.json | |
| + cat $< | \ | |
| + revscoring extract \ | |
| + editquality.feature_lists.zhwiki.reverted \ | |
| + --host https://zh.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| - ############################### Urdu Wikipedia ################################# | |
| ? -- - | |
| + ############################# Urdu Wikipedia ################################ | |
| - | |
| datasets/urwiki.sampled_revisions.500k_2015.json: | |
| wget -qO- http://quarry.wmflabs.org/run/64277/output/0/json-lines?download=true > $@ | |
| datasets/urwiki.autolabeled_revisions.500k_2015.json: \ | |
| datasets/urwiki.sampled_revisions.500k_2015.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://ur.wikipedia.org \ | |
| --trusted-groups=bot,bureaucrat,sysop,rollbackers \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| - | |
| datasets/urwiki.revisions_for_review.5k_2015.json: \ | |
| datasets/urwiki.autolabeled_revisions.500k_2015.json | |
| ( \ | |
| - cat $< | \ | |
| ? ^ | |
| + cat $< | \ | |
| ? ^ | |
| - grep '"needs_review": true' | \ | |
| ? ^ | |
| + grep '"needs_review": true' | \ | |
| ? ^ | |
| - shuf -n 2500; \ | |
| ? ^ | |
| + shuf -n 2500; \ | |
| ? ^ | |
| - cat $< | \ | |
| ? - | |
| + cat $< | \ | |
| - grep '"needs_review": false' | \ | |
| ? - | |
| + grep '"needs_review": false' | \ | |
| - shuf -n 2500 \ | |
| ? - | |
| + shuf -n 2500 \ | |
| ) | shuf > $@ | |
| + datasets/urwiki.autolabeled_revisions.w_cache.500k_2015.json: \ | |
| + datasets/urwiki.autolabeled_revisions.500k_2015.json | |
| + cat $< | \ | |
| + revscoring extract \ | |
| + editquality.feature_lists.urwiki.reverted \ | |
| + --host https://ur.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| - #urwiki_models: \ | |
| - # models/urwiki.reverted.gradient_boosting.model | |
| - | |
| - #urwiki_tuning_reports: \ | |
| - # tuning_reports/urwiki.reverted.md | |
| - ############################## Serbian Wikipedia ################################ | |
| ? - | |
| + ############################# Serbian Wikipedia ################################ | |
| - # From https://quarry.wmflabs.org/query/22469 | |
| datasets/srwiki.sampled_revisions.120k_2017.json: | |
| wget -qO- https://quarry.wmflabs.org/run/211097/output/0/json-lines?download=true > $@ | |
| datasets/srwiki.autolabeled_revisions.120k_2017.json: \ | |
| datasets/srwiki.sampled_revisions.120k_2017.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://sr.wikipedia.org \ | |
| --trusted-groups=autopatrolled,bot,bureaucrat,patroller,rollbacker,sysop \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/srwiki.revisions_for_review.5k_2017.json: \ | |
| datasets/srwiki.autolabeled_revisions.120k_2017.json | |
| grep '"needs_review": true' $< | shuf > $@ | |
| + | |
| + datasets/srwiki.autolabeled_revisions.w_cache.120k_2017.json: \ | |
| + datasets/srwiki.autolabeled_revisions.120k_2017.json | |
| + cat $< | \ | |
| + revscoring extract \ | |
| + editquality.feature_lists.srwiki.reverted \ | |
| + --host https://sr.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| - ################################# Latvian Wikipedia ############################ | |
| ? ---- | |
| + ############################# Latvian Wikipedia ################################ | |
| ? ++++ | |
| - | |
| # From https://quarry.wmflabs.org/query/17989 | |
| datasets/lvwiki.sampled_revisions.20k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/169100/output/0/json-lines?download=true > $@ | |
| datasets/lvwiki.autolabeled_revisions.20k_2016.json: \ | |
| datasets/lvwiki.sampled_revisions.20k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://lv.wikipedia.org \ | |
| --trusted-groups=sysop,bureaucrat,bot,oversight,checkuser,patroller,autopatrolled \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| + datasets/lvwiki.revisions_for_review.5k_2016.json: \ | |
| + datasets/lvwiki.autolabeled_revisions.20k_2016.json | |
| + grep '"needs_review": true' $< | shuf > $@ | |
| + | |
| + datasets/lvwiki.autolabeled_revisions.w_cache.20k_2016.json: \ | |
| + datasets/lvwiki.autolabeled_revisions.20k_2016.json | |
| + cat $< | \ | |
| + revscoring extract \ | |
| + editquality.feature_lists.lvwiki.reverted \ | |
| + --host https://lv.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| ############################# Azeri Wikipedia ################################ | |
| - | |
| datasets/azwiki.sampled_revisions.20k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/99533/output/0/json-lines?download=true > $@ | |
| datasets/azwiki.autolabeled_revisions.20k_2016.json: \ | |
| datasets/azwiki.sampled_revisions.20k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://az.wikipedia.org \ | |
| --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/azwiki.revisions_for_review.5k_2016.json: \ | |
| datasets/azwiki.autolabeled_revisions.20k_2016.json | |
| ( \ | |
| - cat $< | \ | |
| ? - | |
| + cat $< | \ | |
| - grep '"needs_review": true' | \ | |
| ? - | |
| + grep '"needs_review": true' | \ | |
| - shuf -n 2500; \ | |
| ? - | |
| + shuf -n 2500; \ | |
| - cat $< | \ | |
| ? - | |
| + cat $< | \ | |
| - grep '"needs_review": false' | \ | |
| ? - | |
| + grep '"needs_review": false' | \ | |
| - shuf -n 2500 \ | |
| ? - | |
| + shuf -n 2500 \ | |
| ) | shuf > $@ | |
| + | |
| + datasets/azwiki.autolabeled_revisions.w_cache.20k_2016.json: \ | |
| + datasets/azwiki.autolabeled_revisions.20k_2016.json | |
| + cat $< | \ | |
| + revscoring extract \ | |
| + editquality.feature_lists.azwiki.reverted \ | |
| + --host https://az.wikipedia.org \ | |
| + --extractor $(max_extractors) \ | |
| + --verbose > $@ | |
| + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| - ############################# English Wikipedia ############################### | |
| + ############################# English Wikipedia ################################ | |
| ? + | |
| + | |
| datasets/enwiki.human_labeled_revisions.20k_2015.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/enwiki/4/ > $@ | |
| datasets/enwiki.labeled_revisions.20k_2015.json: \ | |
| datasets/enwiki.human_labeled_revisions.20k_2015.json | |
| + ./utility merge_labels $^ > $@ | |
| - cat $< | \ | |
| - ./utility autolabel --host=https://en.wikipedia.org \ | |
| - --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| - --trusted-edits=1000 \ | |
| - --verbose > $@ | |
| datasets/enwiki.labeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/enwiki.labeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.enwiki.reverted \ | |
| ? ------- | |
| + editquality.feature_lists.enwiki.damaging \ | |
| ? +++++++ | |
| editquality.feature_lists.enwiki.goodfaith \ | |
| - editquality.feature_lists.enwiki.damaging \ | |
| --host https://en.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| - | |
| - tuning_reports/enwiki.reverted.md: \ | |
| - datasets/enwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.enwiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.0728788421214136" \ | |
| - --pop-rate "false=0.9271211578785864" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/enwiki.reverted.gradient_boosting.model: \ | |
| - datasets/enwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.enwiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.0728788421214136" \ | |
| - --pop-rate "false=0.9271211578785864" \ | |
| - --center --scale > $@ | |
| - | |
| tuning_reports/enwiki.damaging.md: \ | |
| datasets/enwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.enwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.034163555464634586" \ | |
| --pop-rate "false=0.9658364445353654" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/enwiki.damaging.gradient_boosting.model: \ | |
| datasets/enwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.enwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.034163555464634586" \ | |
| --pop-rate "false=0.9658364445353654" \ | |
| --center --scale > $@ | |
| tuning_reports/enwiki.goodfaith.md: \ | |
| datasets/enwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.enwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9671661637600368" \ | |
| - --pop-rate "false=0.03283383623996318" \ | |
| ? ^ | |
| + --pop-rate "false=0.032833836239963166" \ | |
| ? ^^ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/enwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/enwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.enwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9671661637600368" \ | |
| - --pop-rate "false=0.03283383623996318" \ | |
| ? ^ | |
| + --pop-rate "false=0.032833836239963166" \ | |
| ? ^^ | |
| --center --scale > $@ | |
| enwiki_models: \ | |
| - models/enwiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/enwiki.damaging.gradient_boosting.model \ | |
| - models/enwiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/enwiki.goodfaith.gradient_boosting.model | |
| enwiki_tuning_reports: \ | |
| - tuning_reports/enwiki.damaging.md \ | |
| ? - | |
| + tuning_reports/enwiki.damaging.md \ | |
| - tuning_reports/enwiki.goodfaith.md | |
| ? - | |
| + tuning_reports/enwiki.goodfaith.md | |
| - | |
| ############################# Finnish Wikipedia ################################ | |
| datasets/fiwiki.sampled_revisions.20k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/161254/output/0/json-lines?download=true > $@ | |
| - | |
| - # From https://quarry.wmflabs.org/query/19212 | |
| - datasets/fiwiki.sampled_revisions.20k_2017.json: | |
| - wget -qO- https://quarry.wmflabs.org/run/181764/output/0/json-lines?download=true > $@ | |
| datasets/fiwiki.autolabeled_revisions.20k_2016.json: \ | |
| datasets/fiwiki.sampled_revisions.20k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://fi.wikipedia.org \ | |
| --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,autoreview,abusefilter,bureaucrat \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| + | |
| + # From https://quarry.wmflabs.org/query/19212 | |
| + datasets/fiwiki.sampled_revisions.20k_2017.json: | |
| + wget -qO- https://quarry.wmflabs.org/run/181764/output/0/json-lines?download=true > $@ | |
| datasets/fiwiki.autolabeled_revisions.20k_2017.json: \ | |
| datasets/fiwiki.sampled_revisions.20k_2017.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://fi.wikipedia.org \ | |
| --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,autoreview,abusefilter,bureaucrat \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/fiwiki.human_labeled_revisions.5k_2016.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/fiwiki/55/ > $@ | |
| datasets/fiwiki.labeled_revisions.20k_2016.json: \ | |
| + datasets/fiwiki.autolabeled_revisions.20k_2016.json \ | |
| - datasets/fiwiki.human_labeled_revisions.5k_2016.json \ | |
| ? -- | |
| + datasets/fiwiki.human_labeled_revisions.5k_2016.json | |
| - datasets/fiwiki.autolabeled_revisions.20k_2016.json | |
| ./utility merge_labels $^ > $@ | |
| datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json: \ | |
| datasets/fiwiki.labeled_revisions.20k_2016.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.fiwiki.reverted \ | |
| editquality.feature_lists.fiwiki.damaging \ | |
| editquality.feature_lists.fiwiki.goodfaith \ | |
| --host https://fi.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| - | |
| - tuning_reports/fiwiki.reverted.md: \ | |
| - datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.fiwiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.053624130858886496" \ | |
| - --pop-rate "false=0.9463758691411135" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/fiwiki.reverted.gradient_boosting.model: \ | |
| - datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.fiwiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).1 \ | |
| - -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.053624130858886496" \ | |
| - --pop-rate "false=0.9463758691411135" \ | |
| - --center --scale > $@ | |
| tuning_reports/fiwiki.damaging.md: \ | |
| datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.fiwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.051323095392926815" \ | |
| --pop-rate "false=0.9486769046070732" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/fiwiki.damaging.gradient_boosting.model: \ | |
| datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.fiwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).1 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.051323095392926815" \ | |
| --pop-rate "false=0.9486769046070732" \ | |
| --center --scale > $@ | |
| tuning_reports/fiwiki.goodfaith.md: \ | |
| datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.fiwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9658846480916412" \ | |
| - --pop-rate "false=0.03411535190835876" \ | |
| ? ^ | |
| + --pop-rate "false=0.03411535190835879" \ | |
| ? ^ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/fiwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.fiwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).1 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9658846480916412" \ | |
| - --pop-rate "false=0.03411535190835876" \ | |
| ? ^ | |
| + --pop-rate "false=0.03411535190835879" \ | |
| ? ^ | |
| --center --scale > $@ | |
| fiwiki_models: \ | |
| - models/fiwiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/fiwiki.damaging.gradient_boosting.model \ | |
| - models/fiwiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/fiwiki.goodfaith.gradient_boosting.model | |
| fiwiki_tuning_reports: \ | |
| - tuning_reports/fiwiki.damaging.md \ | |
| ? - | |
| + tuning_reports/fiwiki.damaging.md \ | |
| - tuning_reports/fiwiki.goodfaith.md | |
| ? - | |
| + tuning_reports/fiwiki.goodfaith.md | |
| ############################# French Wikipedia ################################ | |
| datasets/frwiki.sampled_revisions.20k_2015.json: | |
| wget -qO- https://quarry.wmflabs.org/run/48090/output/0/json-lines?download=true > $@ | |
| + | |
| + datasets/frwiki.autolabeled_revisions.20k_2015.json: \ | |
| + datasets/frwiki.sampled_revisions.20k_2015.json | |
| + cat $< | \ | |
| + ./utility autolabel --host=https://fr.wikipedia.org \ | |
| + --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| + --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| + --verbose > $@ | |
| datasets/frwiki.sampled_revisions.20k_2016.json: | |
| wget -qO- https://quarry.wmflabs.org/run/98251/output/0/json-lines?download=true > $@ | |
| datasets/frwiki.autolabeled_revisions.20k_2016.json: \ | |
| datasets/frwiki.sampled_revisions.20k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://fr.wikipedia.org \ | |
| --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| - | |
| - datasets/frwiki.revisions_for_review.5k_2016.json: \ | |
| - datasets/frwiki.autolabeled_revisions.20k_2016.json | |
| - ( \ | |
| - cat $< | \ | |
| - grep '"needs_review": true' | \ | |
| - shuf -n 2500; \ | |
| - cat $< | \ | |
| - grep '"needs_review": false' | \ | |
| - shuf -n 2500 \ | |
| - ) | shuf > $@ | |
| - | |
| - datasets/frwiki.autolabeled_revisions.w_cache.20k_2016.json: \ | |
| - datasets/frwiki.autolabeled_revisions.20k_2016.json | |
| - cat $< | \ | |
| - revscoring extract \ | |
| - editquality.feature_lists.frwiki.reverted \ | |
| - --host https://fr.wikipedia.org \ | |
| - --extractor $(max_extractors) \ | |
| - --verbose > $@ | |
| - | |
| - tuning_reports/frwiki.reverted.md: \ | |
| - datasets/frwiki.autolabeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.frwiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.035263474253656585" \ | |
| - --pop-rate "false=0.9647365257463434" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/frwiki.reverted.gradient_boosting.model: \ | |
| - datasets/frwiki.autolabeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.frwiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.035263474253656585" \ | |
| - --pop-rate "false=0.9647365257463434" \ | |
| - --center --scale > $@ | |
| datasets/frwiki.human_labeled_revisions.5k_2016.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/frwiki/38/ > $@ | |
| + datasets/frwiki.revisions_for_review.5k_2016.json: \ | |
| + datasets/frwiki.autolabeled_revisions.20k_2016.json | |
| + ( \ | |
| + cat $< | \ | |
| + grep '"needs_review": true' | \ | |
| + shuf -n 2500; \ | |
| + cat $< | \ | |
| + grep '"needs_review": false' | \ | |
| + shuf -n 2500 \ | |
| + ) | shuf > $@ | |
| + | |
| datasets/frwiki.labeled_revisions.20k_2016.json: \ | |
| + datasets/frwiki.autolabeled_revisions.20k_2016.json \ | |
| - datasets/frwiki.human_labeled_revisions.5k_2016.json \ | |
| ? -- | |
| + datasets/frwiki.human_labeled_revisions.5k_2016.json | |
| - datasets/frwiki.autolabeled_revisions.20k_2016.json | |
| ./utility merge_labels $^ > $@ | |
| datasets/frwiki.labeled_revisions.w_cache.20k_2016.json: \ | |
| datasets/frwiki.labeled_revisions.20k_2016.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.frwiki.reverted \ | |
| editquality.feature_lists.frwiki.damaging \ | |
| editquality.feature_lists.frwiki.goodfaith \ | |
| --host https://fr.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/frwiki.damaging.md: \ | |
| datasets/frwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.frwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.028751753155680224" \ | |
| --pop-rate "false=0.9712482468443198" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/frwiki.damaging.gradient_boosting.model: \ | |
| datasets/frwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.frwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=300' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.028751753155680224" \ | |
| --pop-rate "false=0.9712482468443198" \ | |
| --center --scale > $@ | |
| tuning_reports/frwiki.goodfaith.md: \ | |
| datasets/frwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.frwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9786115007012622" \ | |
| - --pop-rate "false=0.021388499298737727" \ | |
| ? - | |
| + --pop-rate "false=0.021388499298737762" \ | |
| ? + | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/frwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/frwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.frwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=500' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9786115007012622" \ | |
| - --pop-rate "false=0.021388499298737727" \ | |
| ? - | |
| + --pop-rate "false=0.021388499298737762" \ | |
| ? + | |
| --center --scale > $@ | |
| frwiki_models: \ | |
| - models/frwiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/frwiki.damaging.gradient_boosting.model \ | |
| - models/frwiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/frwiki.goodfaith.gradient_boosting.model | |
| frwiki_tuning_reports: \ | |
| - tuning_reports/frwiki.damaging.md \ | |
| ? - | |
| + tuning_reports/frwiki.damaging.md \ | |
| - tuning_reports/frwiki.goodfaith.md | |
| ? - | |
| + tuning_reports/frwiki.goodfaith.md | |
| - ############################### Dutch Wikipedia ############################### | |
| ? -- | |
| + ############################# Dutch Wikipedia ################################ | |
| ? + | |
| datasets/nlwiki.sampled_revisions.20k_2016.json: | |
| wget -qO- http://quarry.wmflabs.org/run/42225/output/0/json-lines?download=true > $@ | |
| datasets/nlwiki.autolabeled_revisions.20k_2016.json: \ | |
| datasets/nlwiki.sampled_revisions.20k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://nl.wikipedia.org \ | |
| --trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| - | |
| - tuning_reports/nlwiki.reverted.md: \ | |
| - datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.nlwiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.06322309818777516" \ | |
| - --pop-rate "false=0.9367769018122248" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/nlwiki.reverted.gradient_boosting.model: \ | |
| - datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.nlwiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.06322309818777516" \ | |
| - --pop-rate "false=0.9367769018122248" \ | |
| - --center --scale > $@ | |
| datasets/nlwiki.human_labeled_revisions.5k_2016.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/nlwiki/14/ > $@ | |
| datasets/nlwiki.labeled_revisions.20k_2016.json: \ | |
| + datasets/nlwiki.autolabeled_revisions.20k_2016.json \ | |
| - datasets/nlwiki.human_labeled_revisions.5k_2016.json \ | |
| ? -- | |
| + datasets/nlwiki.human_labeled_revisions.5k_2016.json | |
| - datasets/nlwiki.autolabeled_revisions.20k_2016.json | |
| ./utility merge_labels $^ > $@ | |
| datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json: \ | |
| datasets/nlwiki.labeled_revisions.20k_2016.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.nlwiki.reverted \ | |
| editquality.feature_lists.nlwiki.damaging \ | |
| editquality.feature_lists.nlwiki.goodfaith \ | |
| --host https://nl.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/nlwiki.damaging.md: \ | |
| datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.nlwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.05068086413432989" \ | |
| --pop-rate "false=0.9493191358656701" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/nlwiki.damaging.gradient_boosting.model: \ | |
| datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.nlwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.05068086413432989" \ | |
| --pop-rate "false=0.9493191358656701" \ | |
| --center --scale > $@ | |
| tuning_reports/nlwiki.goodfaith.md: \ | |
| datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.nlwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9646257806900789" \ | |
| - --pop-rate "false=0.035374219309921164" \ | |
| ? ^^ | |
| + --pop-rate "false=0.03537421930992113" \ | |
| ? ^ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/nlwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.nlwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9646257806900789" \ | |
| - --pop-rate "false=0.035374219309921164" \ | |
| ? ^^ | |
| + --pop-rate "false=0.03537421930992113" \ | |
| ? ^ | |
| --center --scale > $@ | |
| nlwiki_models: \ | |
| - models/nlwiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/nlwiki.damaging.gradient_boosting.model \ | |
| - models/nlwiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/nlwiki.goodfaith.gradient_boosting.model | |
| nlwiki_tuning_reports: \ | |
| - tuning_reports/nlwiki.damaging.md \ | |
| ? - | |
| + tuning_reports/nlwiki.damaging.md \ | |
| - tuning_reports/nlwiki.goodfaith.md | |
| ? - | |
| + tuning_reports/nlwiki.goodfaith.md | |
| - ############################# Portugueses Wikipedia ############################ | |
| + ############################# Portugueses Wikipedia ################################ | |
| ? ++++ | |
| + | |
| datasets/ptwiki.human_labeled_revisions.20k_2015.json: | |
| ./utility fetch_labels \ | |
| https://labels.wmflabs.org/campaigns/ptwiki/7/ > $@ | |
| datasets/ptwiki.labeled_revisions.20k_2015.json: \ | |
| datasets/ptwiki.human_labeled_revisions.20k_2015.json | |
| + ./utility merge_labels $^ > $@ | |
| - cat $< | \ | |
| - ./utility autolabel --host=https://pt.wikipedia.org \ | |
| - --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat,flow-bot \ | |
| - --trusted-edits=1000 \ | |
| - --verbose > $@ | |
| datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/ptwiki.labeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.ptwiki.reverted \ | |
| ? ------- | |
| + editquality.feature_lists.ptwiki.damaging \ | |
| ? +++++++ | |
| + editquality.feature_lists.ptwiki.goodfaith \ | |
| --host https://pt.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| - | |
| - tuning_reports/ptwiki.reverted.md: \ | |
| - datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| - editquality.feature_lists.ptwiki.reverted \ | |
| - reverted_for_damage \ | |
| - roc_auc.labels.true \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.10170004540180598" \ | |
| - --pop-rate "false=0.898299954598194" \ | |
| - --center --scale \ | |
| - --cv-timeout=60 \ | |
| - --debug > $@ | |
| - | |
| - models/ptwiki.reverted.gradient_boosting.model: \ | |
| - datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json | |
| - cat $< | \ | |
| - revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| - editquality.feature_lists.ptwiki.reverted \ | |
| - reverted_for_damage \ | |
| - --version=$(reverted_major_minor).0 \ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| - -p 'n_estimators=700' \ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| - --pop-rate "true=0.10170004540180598" \ | |
| - --pop-rate "false=0.898299954598194" \ | |
| - --center --scale > $@ | |
| tuning_reports/ptwiki.damaging.md: \ | |
| datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.ptwiki.damaging \ | |
| damaging \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.06896029864299047" \ | |
| --pop-rate "false=0.9310397013570095" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/ptwiki.damaging.gradient_boosting.model: \ | |
| datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.ptwiki.damaging \ | |
| damaging \ | |
| --version=$(damaging_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(damaging_weight)" \ | |
| --pop-rate "true=0.06896029864299047" \ | |
| --pop-rate "false=0.9310397013570095" \ | |
| --center --scale > $@ | |
| tuning_reports/ptwiki.goodfaith.md: \ | |
| datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.ptwiki.goodfaith \ | |
| goodfaith \ | |
| roc_auc.labels.true \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9397669373959542" \ | |
| - --pop-rate "false=0.0602330626040458" \ | |
| + --pop-rate "false=0.06023306260404582" \ | |
| ? + | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/ptwiki.goodfaith.gradient_boosting.model: \ | |
| datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.ptwiki.goodfaith \ | |
| goodfaith \ | |
| --version=$(goodfaith_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "false=$(goodfaith_weight)" \ | |
| --pop-rate "true=0.9397669373959542" \ | |
| - --pop-rate "false=0.0602330626040458" \ | |
| + --pop-rate "false=0.06023306260404582" \ | |
| ? + | |
| --center --scale > $@ | |
| ptwiki_models: \ | |
| - models/ptwiki.damaging.gradient_boosting.model \ | |
| ? - | |
| + models/ptwiki.damaging.gradient_boosting.model \ | |
| - models/ptwiki.goodfaith.gradient_boosting.model | |
| ? - | |
| + models/ptwiki.goodfaith.gradient_boosting.model | |
| ptwiki_tuning_reports: \ | |
| - tuning_reports/ptwiki.damaging.md \ | |
| ? - | |
| + tuning_reports/ptwiki.damaging.md \ | |
| - tuning_reports/ptwiki.goodfaith.md | |
| ? - | |
| + tuning_reports/ptwiki.goodfaith.md |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| - ############################### Croatian Wikipedia ########################### | |
| ? -- | |
| + ############################# Croatian Wikipedia ################################ | |
| ? +++++ | |
| - | |
| # From https://quarry.wmflabs.org/query/21213 | |
| datasets/hrwiki.sampled_revisions.20k_2017.json: | |
| wget -qO- https://quarry.wmflabs.org/run/200064/output/0/json-lines?download=true > $@ | |
| datasets/hrwiki.autolabeled_revisions.20k_2017.json: \ | |
| datasets/hrwiki.sampled_revisions.20k_2017.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://hr.wikipedia.org \ | |
| --trusted-groups=autopatrolled,bot,bureaucrat,checkuser,reviewer,rollbacker,sysop \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| datasets/hrwiki.revisions_for_review.5k_2017.json: \ | |
| datasets/hrwiki.autolabeled_revisions.20k_2017.json | |
| grep '"needs_review": true' $< | shuf > $@ | |
| datasets/hrwiki.autolabeled_revisions.w_cache.20k_2017.json: \ | |
| datasets/hrwiki.autolabeled_revisions.20k_2017.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.hrwiki.reverted \ | |
| --host https://hr.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/hrwiki.reverted.md: \ | |
| datasets/hrwiki.autolabeled_revisions.w_cache.20k_2017.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.hrwiki.reverted \ | |
| reverted_for_damage \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.07927353670258512" \ | |
| --pop-rate "false=0.9207264632974149" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/hrwiki.reverted.gradient_boosting.model: \ | |
| datasets/hrwiki.autolabeled_revisions.w_cache.20k_2017.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.hrwiki.reverted \ | |
| reverted_for_damage \ | |
| --version=$(reverted_major_minor).0 \ | |
| + -p 'learning_rate=0.1' \ | |
| -p 'max_depth=3' \ | |
| - -p 'learning_rate=0.1' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=300' \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.07927353670258512" \ | |
| --pop-rate "false=0.9207264632974149" \ | |
| --center --scale > $@ | |
| hrwiki_models: \ | |
| models/hrwiki.reverted.gradient_boosting.model | |
| hrwiki_tuning_reports: \ | |
| - tuning_reports/hrwiki.reverted.md | |
| ? - | |
| + tuning_reports/hrwiki.reverted.md | |
| - ############################### Indonesian Wikipedia ########################## | |
| ? -- | |
| + ############################# Indonesian Wikipedia ################################ | |
| ? ++++++ | |
| - | |
| datasets/idwiki.sampled_revisions.100k_2016.json: | |
| wget -qO- http://quarry.wmflabs.org/run/135748/output/0/json-lines?download=true > $@ | |
| datasets/idwiki.autolabeled_revisions.100k_2016.json: \ | |
| datasets/idwiki.sampled_revisions.100k_2016.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://id.wikipedia.org \ | |
| --trusted-groups=autoreview,bot,bureaucrat,checkuser,editor,flow-bot,oversight,reviewer,rollbacker,sysop \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| + | |
| datasets/idwiki.autolabeled_revisions.w_cache.100k_2016.json: \ | |
| datasets/idwiki.autolabeled_revisions.100k_2016.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.idwiki.reverted \ | |
| --host https://id.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/idwiki.reverted.md: \ | |
| datasets/idwiki.autolabeled_revisions.w_cache.100k_2016.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.idwiki.reverted \ | |
| reverted_for_damage \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.02272613605673532" \ | |
| --pop-rate "false=0.9772738639432647" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/idwiki.reverted.gradient_boosting.model: \ | |
| datasets/idwiki.autolabeled_revisions.w_cache.100k_2016.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.idwiki.reverted \ | |
| reverted_for_damage \ | |
| --version=$(reverted_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=5' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.02272613605673532" \ | |
| --pop-rate "false=0.9772738639432647" \ | |
| --center --scale > $@ | |
| idwiki_models: \ | |
| - models/idwiki.reverted.gradient_boosting.model | |
| ? - | |
| + models/idwiki.reverted.gradient_boosting.model | |
| idwiki_tuning_reports: \ | |
| - tuning_reports/idwiki.reverted.md | |
| ? - - | |
| + tuning_reports/idwiki.reverted.md | |
| - ############################# Icelandic Wikipedia ############################# | |
| + ############################# Icelandic Wikipedia ################################ | |
| ? +++ | |
| - | |
| - # From https://quarry.wmflabs.org/query/23305 | |
| datasets/iswiki.sampled_revisions.20k_2017.json: | |
| wget -qO- https://quarry.wmflabs.org/run/218976/output/0/json-lines?download=true > $@ | |
| datasets/iswiki.autolabeled_revisions.20k_2017.json: \ | |
| - datasets/iswiki.sampled_revisions.20k_2017.json | |
| ? ^^^^^^^^ | |
| + datasets/iswiki.sampled_revisions.20k_2017.json | |
| ? ^ | |
| cat $< | \ | |
| ./utility autolabel --host=https://is.wikipedia.org \ | |
| - --trusted-groups=autopatrolled,bot,bureaucrat,checkuser,reviewer,rollbacker,sysop \ | |
| ? ^^^^^^^^ | |
| + --trusted-groups=autopatrolled,bot,bureaucrat,checkuser,reviewer,rollbacker,sysop \ | |
| ? ^ | |
| - --trusted-edits=1000 \ | |
| ? ^^^^^^^^ | |
| + --trusted-edits=1000 \ | |
| ? ^ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| - --verbose > $@ | |
| ? ^^^^^^^^ | |
| + --verbose > $@ | |
| ? ^ | |
| datasets/iswiki.revisions_for_review.5k_2017.json: \ | |
| - datasets/iswiki.autolabeled_revisions.20k_2017.json | |
| ? ^^^^^^^^ | |
| + datasets/iswiki.autolabeled_revisions.20k_2017.json | |
| ? ^ | |
| grep '"needs_review": true' $< | shuf > $@ | |
| datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json: \ | |
| - datasets/iswiki.autolabeled_revisions.20k_2017.json | |
| ? ^^^^^^^^ | |
| + datasets/iswiki.autolabeled_revisions.20k_2017.json | |
| ? ^ | |
| cat $< | \ | |
| revscoring extract \ | |
| - editquality.feature_lists.iswiki.reverted \ | |
| ? ^^^^^^^^ | |
| + editquality.feature_lists.iswiki.reverted \ | |
| ? ^ | |
| - --host https://is.wikipedia.org \ | |
| ? ^^^^^^^^ | |
| + --host https://is.wikipedia.org \ | |
| ? ^ | |
| - --extractor $(max_extractors) \ | |
| ? ^^^^^^^^ | |
| + --extractor $(max_extractors) \ | |
| ? ^ | |
| - --verbose > $@ | |
| ? ^^^^^^^^ | |
| + --verbose > $@ | |
| ? ^ | |
| tuning_reports/iswiki.reverted.md: \ | |
| - datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json | |
| ? ^^^^^^^^ | |
| + datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json | |
| ? ^ | |
| cat $< | \ | |
| revscoring tune \ | |
| - config/classifiers.params.yaml \ | |
| ? ^^^^^^^^ | |
| + config/classifiers.params.yaml \ | |
| ? ^ | |
| - editquality.feature_lists.iswiki.reverted \ | |
| ? ^^^^^^^^ | |
| + editquality.feature_lists.iswiki.reverted \ | |
| ? ^ | |
| - reverted_for_damage \ | |
| ? ^^^^^^^^ | |
| + reverted_for_damage \ | |
| ? ^ | |
| - roc_auc.labels.true \ | |
| ? ^^^^^^^^ | |
| + roc_auc.labels.true \ | |
| ? ^ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| ? ^^^^^^^^ | |
| + --label-weight "true=$(reverted_weight)" \ | |
| ? ^ | |
| - --pop-rate "true=0.08115405770288514" \ | |
| ? ^^^^^^^^ | |
| + --pop-rate "true=0.08115405770288514" \ | |
| ? ^ | |
| - --pop-rate "false=0.9188459422971149" \ | |
| ? ^^^^^^^^ | |
| + --pop-rate "false=0.9188459422971149" \ | |
| ? ^ | |
| - --center --scale \ | |
| ? ^^^^^^^^ | |
| + --center --scale \ | |
| ? ^ | |
| - --cv-timeout=60 \ | |
| ? ^^^^^^^^ ^ | |
| + --cv-timeout 60 \ | |
| ? ^ ^ | |
| - --debug > $@ | |
| ? ^^^^^^^^ | |
| + --debug > $@ | |
| ? ^ | |
| models/iswiki.reverted.gradient_boosting.model: \ | |
| - datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json | |
| ? ^^^^^^^^ | |
| + datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json | |
| ? ^ | |
| cat $< | \ | |
| revscoring cv_train \ | |
| - revscoring.scoring.models.GradientBoosting \ | |
| ? ^^^^^^^^ | |
| + revscoring.scoring.models.GradientBoosting \ | |
| ? ^ | |
| - editquality.feature_lists.iswiki.reverted \ | |
| ? ^^^^^^^^ | |
| + editquality.feature_lists.iswiki.reverted \ | |
| ? ^ | |
| - reverted_for_damage \ | |
| ? ^^^^^^^^ | |
| + reverted_for_damage \ | |
| ? ^ | |
| - --version=$(reverted_major_minor).0 \ | |
| ? ^^^^^^^^ | |
| + --version=$(reverted_major_minor).0 \ | |
| ? ^ | |
| - -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.1' \ | |
| ? ^^^^^^^^ | |
| + -p 'learning_rate=0.1' \ | |
| ? ^ | |
| + -p 'max_depth=7' \ | |
| - -p 'max_features="log2"' \ | |
| ? ^^^^^^^^ - - | |
| + -p 'max_features=log2' \ | |
| ? ^ | |
| - -p 'n_estimators=300' \ | |
| ? ^^^^^^^^ | |
| + -p 'n_estimators=300' \ | |
| ? ^ | |
| - --label-weight "true=$(reverted_weight)" \ | |
| ? ^^^^^^^^ | |
| + --label-weight "true=$(reverted_weight)" \ | |
| ? ^ | |
| - --pop-rate "true=0.08115405770288514" \ | |
| ? ^^^^^^^^ | |
| + --pop-rate "true=0.08115405770288514" \ | |
| ? ^ | |
| - --pop-rate "false=0.9188459422971149" \ | |
| ? ^^^^^^^^ | |
| + --pop-rate "false=0.9188459422971149" \ | |
| ? ^ | |
| - --center --scale > $@ | |
| ? ^^^^^^^^ | |
| + --center --scale > $@ | |
| ? ^ | |
| iswiki_models: \ | |
| models/iswiki.reverted.gradient_boosting.model | |
| iswiki_tuning_reports: \ | |
| - tuning_reports/iswiki.reverted.md | |
| ? - | |
| + tuning_reports/iswiki.reverted.md | |
| - ############################# Italian Wikipedia ############################### | |
| + ############################# Italian Wikipedia ################################ | |
| ? + | |
| - | |
| datasets/itwiki.sampled_revisions.20k_2015.json: | |
| wget -qO- http://quarry.wmflabs.org/run/42224/output/0/json-lines?download=true > $@ | |
| datasets/itwiki.autolabeled_revisions.20k_2015.json: \ | |
| datasets/itwiki.sampled_revisions.20k_2015.json | |
| cat $< | \ | |
| ./utility autolabel --host=https://it.wikipedia.org \ | |
| --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \ | |
| --trusted-edits=1000 \ | |
| + --revert-radius=3 \ | |
| + --revert-window=48 \ | |
| --verbose > $@ | |
| + | |
| datasets/itwiki.autolabeled_revisions.w_cache.20k_2015.json: \ | |
| datasets/itwiki.autolabeled_revisions.20k_2015.json | |
| cat $< | \ | |
| revscoring extract \ | |
| editquality.feature_lists.itwiki.reverted \ | |
| --host https://it.wikipedia.org \ | |
| --extractor $(max_extractors) \ | |
| --verbose > $@ | |
| tuning_reports/itwiki.reverted.md: \ | |
| datasets/itwiki.autolabeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring tune \ | |
| config/classifiers.params.yaml \ | |
| editquality.feature_lists.itwiki.reverted \ | |
| reverted_for_damage \ | |
| roc_auc.labels.true \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.04628882613957241" \ | |
| --pop-rate "false=0.9537111738604276" \ | |
| --center --scale \ | |
| - --cv-timeout=60 \ | |
| ? ^ | |
| + --cv-timeout 60 \ | |
| ? ^ | |
| --debug > $@ | |
| models/itwiki.reverted.gradient_boosting.model: \ | |
| datasets/itwiki.autolabeled_revisions.w_cache.20k_2015.json | |
| cat $< | \ | |
| revscoring cv_train \ | |
| revscoring.scoring.models.GradientBoosting \ | |
| editquality.feature_lists.itwiki.reverted \ | |
| reverted_for_damage \ | |
| --version=$(reverted_major_minor).0 \ | |
| + -p 'learning_rate=0.01' \ | |
| -p 'max_depth=7' \ | |
| - -p 'learning_rate=0.01' \ | |
| - -p 'max_features="log2"' \ | |
| ? - - | |
| + -p 'max_features=log2' \ | |
| -p 'n_estimators=700' \ | |
| --label-weight "true=$(reverted_weight)" \ | |
| --pop-rate "true=0.04628882613957241" \ | |
| --pop-rate "false=0.9537111738604276" \ | |
| --center --scale > $@ | |
| itwiki_models: \ | |
| - models/itwiki.reverted.gradient_boosting.model | |
| ? - | |
| + models/itwiki.reverted.gradient_boosting.model | |
| itwiki_tuning_reports: \ | |
| - tuning_reports/itwiki.reverted.md | |
| ? - - | |
| + tuning_reports/itwiki.reverted.md |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment