Last active
December 11, 2024 02:25
-
-
Save cdpath/fcf4c59e933275e5db2758920a9c1fd8 to your computer and use it in GitHub Desktop.
使用 Huginn 实现微信公众号全文 RSS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"schema_version": 1, | |
"name": "WeChat", | |
"description": "微信公众号全文 RSS", | |
"source_url": false, | |
"guid": "dd67102f09869c2228f8ed903a32d063", | |
"tag_fg_color": "#333333", | |
"tag_bg_color": "#66ff66", | |
"icon": "leaf", | |
"exported_at": "2019-01-12T10:56:41Z", | |
"agents": [ | |
{ | |
"type": "Agents::WebsiteAgent", | |
"name": "0 获取微信公众号文章", | |
"disabled": false, | |
"guid": "00a23c266080e989591e35697d91b21e", | |
"options": { | |
"expected_update_period_in_days": "2", | |
"url": [ | |
"http://weixin.sogou.com/weixin?type=1&s_from=input&query=pongba_mindhacks", | |
"http://weixin.sogou.com/weixin?type=1&s_from=input&query=ling-lunch", | |
"http://weixin.sogou.com/weixin?type=1&s_from=input&query=noon-story", | |
"http://weixin.sogou.com/weixin?type=1&s_from=input&query=mzmojo" | |
], | |
"type": "html", | |
"mode": "on_change", | |
"extract": { | |
"title": { | |
"css": "#sogou_vr_11002301_box_0 > dl:last>dd>a", | |
"value": ".//text()" | |
}, | |
"url": { | |
"css": "#sogou_vr_11002301_box_0 > dl:last>dd>a", | |
"value": "@href" | |
} | |
}, | |
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0" | |
}, | |
"schedule": "every_12h", | |
"keep_events_for": 172800, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::DataOutputAgent", | |
"name": "4 输出RSS", | |
"disabled": false, | |
"guid": "1ea34bb7e56575e6c007bf2e2f48b990", | |
"options": { | |
"secrets": [ | |
"wechat202" | |
], | |
"expected_receive_period_in_days": 2, | |
"template": { | |
"title": "微信公众号", | |
"description": "微信公众号全文", | |
"item": { | |
"title": "{{author}} | {{title}}", | |
"description": "{{fulltext}}", | |
"link": "{{url}}" | |
}, | |
"icon": "https://res.wx.qq.com/mmbizwap/zh_CN/htmledition/images/icon/common/favicon22c41b.ico" | |
}, | |
"rss_content_type": "text/xml" | |
}, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::DeDuplicationAgent", | |
"name": "1 标题去重", | |
"disabled": false, | |
"guid": "6406a562f112d0686bf2ae24afcac902", | |
"options": { | |
"property": "{{title}}", | |
"lookback": "200", | |
"expected_update_period_in_days": "6" | |
}, | |
"keep_events_for": 345600, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::TriggerAgent", | |
"name": "2 过滤广告", | |
"disabled": false, | |
"guid": "822097071177a8c9d57eb0aea8b7554f", | |
"options": { | |
"expected_receive_period_in_days": "6", | |
"keep_event": "true", | |
"rules": [ | |
{ | |
"type": "!regex", | |
"value": "(市集)|(广告)|(推广)|(招人)|(限时)|(福利)", | |
"path": "title" | |
} | |
], | |
"message": "没有看到广告,放行!" | |
}, | |
"keep_events_for": 259200, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::WebsiteAgent", | |
"name": "3 获取文章全文", | |
"disabled": false, | |
"guid": "c5b16d43997a5195533911e8d1824711", | |
"options": { | |
"expected_update_period_in_days": "2", | |
"url_from_event": "{{url}}", | |
"type": "html", | |
"mode": "merge", | |
"extract": { | |
"fulltext": { | |
"css": "#js_content", | |
"value": "." | |
}, | |
"title_": { | |
"css": "#activity-name", | |
"value": "normalize-space(.)" | |
}, | |
"author": { | |
"css": "#profileBt > a", | |
"value": "normalize-space(.)" | |
} | |
}, | |
"template": { | |
"fulltext": "{{ fulltext |strip_newlines|replace: \"<br>\",\"\" | regex_replace:'data-src','src'}}" | |
}, | |
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0" | |
}, | |
"schedule": "every_12h", | |
"keep_events_for": 604800, | |
"propagate_immediately": true | |
} | |
], | |
"links": [ | |
{ | |
"source": 0, | |
"receiver": 2 | |
}, | |
{ | |
"source": 2, | |
"receiver": 3 | |
}, | |
{ | |
"source": 3, | |
"receiver": 4 | |
}, | |
{ | |
"source": 4, | |
"receiver": 1 | |
} | |
], | |
"control_links": [ | |
] | |
} |
最新的反爬虫该Scenario 不可用,我写了个新的 https://gist.github.com/dequn/674b0401c1f31f7919b112ad64640552。
楼主这个现在还能使用吗 @cdpath
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@cdpath 还有这中图文也不行https://mp.weixin.qq.com/s?timestamp=1527129225&src=3&ver=1&signature=J1vwPTXibs8iYk-lr9ahXu94S0JntHvzxKYeAMId*KeP-Wb7Dnw0cMZtJVOUjQ83DcbGmOKnOFgPNw2GTIuAjOAOYEJOoKYkoxX8cc2EwWWKfHTmzKpqcQ*H9UE0pzTWkI4gPkD2nc*N7dwjHZgxdMmLOB0DWWAj*4RzTQ0GJ-w=