Last active
December 11, 2024 02:25
-
-
Save cdpath/fcf4c59e933275e5db2758920a9c1fd8 to your computer and use it in GitHub Desktop.
使用 Huginn 实现微信公众号全文 RSS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"schema_version": 1, | |
"name": "WeChat", | |
"description": "微信公众号全文 RSS", | |
"source_url": false, | |
"guid": "dd67102f09869c2228f8ed903a32d063", | |
"tag_fg_color": "#333333", | |
"tag_bg_color": "#66ff66", | |
"icon": "leaf", | |
"exported_at": "2019-01-12T10:56:41Z", | |
"agents": [ | |
{ | |
"type": "Agents::WebsiteAgent", | |
"name": "0 获取微信公众号文章", | |
"disabled": false, | |
"guid": "00a23c266080e989591e35697d91b21e", | |
"options": { | |
"expected_update_period_in_days": "2", | |
"url": [ | |
"http://weixin.sogou.com/weixin?type=1&s_from=input&query=pongba_mindhacks", | |
"http://weixin.sogou.com/weixin?type=1&s_from=input&query=ling-lunch", | |
"http://weixin.sogou.com/weixin?type=1&s_from=input&query=noon-story", | |
"http://weixin.sogou.com/weixin?type=1&s_from=input&query=mzmojo" | |
], | |
"type": "html", | |
"mode": "on_change", | |
"extract": { | |
"title": { | |
"css": "#sogou_vr_11002301_box_0 > dl:last>dd>a", | |
"value": ".//text()" | |
}, | |
"url": { | |
"css": "#sogou_vr_11002301_box_0 > dl:last>dd>a", | |
"value": "@href" | |
} | |
}, | |
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0" | |
}, | |
"schedule": "every_12h", | |
"keep_events_for": 172800, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::DataOutputAgent", | |
"name": "4 输出RSS", | |
"disabled": false, | |
"guid": "1ea34bb7e56575e6c007bf2e2f48b990", | |
"options": { | |
"secrets": [ | |
"wechat202" | |
], | |
"expected_receive_period_in_days": 2, | |
"template": { | |
"title": "微信公众号", | |
"description": "微信公众号全文", | |
"item": { | |
"title": "{{author}} | {{title}}", | |
"description": "{{fulltext}}", | |
"link": "{{url}}" | |
}, | |
"icon": "https://res.wx.qq.com/mmbizwap/zh_CN/htmledition/images/icon/common/favicon22c41b.ico" | |
}, | |
"rss_content_type": "text/xml" | |
}, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::DeDuplicationAgent", | |
"name": "1 标题去重", | |
"disabled": false, | |
"guid": "6406a562f112d0686bf2ae24afcac902", | |
"options": { | |
"property": "{{title}}", | |
"lookback": "200", | |
"expected_update_period_in_days": "6" | |
}, | |
"keep_events_for": 345600, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::TriggerAgent", | |
"name": "2 过滤广告", | |
"disabled": false, | |
"guid": "822097071177a8c9d57eb0aea8b7554f", | |
"options": { | |
"expected_receive_period_in_days": "6", | |
"keep_event": "true", | |
"rules": [ | |
{ | |
"type": "!regex", | |
"value": "(市集)|(广告)|(推广)|(招人)|(限时)|(福利)", | |
"path": "title" | |
} | |
], | |
"message": "没有看到广告,放行!" | |
}, | |
"keep_events_for": 259200, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::WebsiteAgent", | |
"name": "3 获取文章全文", | |
"disabled": false, | |
"guid": "c5b16d43997a5195533911e8d1824711", | |
"options": { | |
"expected_update_period_in_days": "2", | |
"url_from_event": "{{url}}", | |
"type": "html", | |
"mode": "merge", | |
"extract": { | |
"fulltext": { | |
"css": "#js_content", | |
"value": "." | |
}, | |
"title_": { | |
"css": "#activity-name", | |
"value": "normalize-space(.)" | |
}, | |
"author": { | |
"css": "#profileBt > a", | |
"value": "normalize-space(.)" | |
} | |
}, | |
"template": { | |
"fulltext": "{{ fulltext |strip_newlines|replace: \"<br>\",\"\" | regex_replace:'data-src','src'}}" | |
}, | |
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0" | |
}, | |
"schedule": "every_12h", | |
"keep_events_for": 604800, | |
"propagate_immediately": true | |
} | |
], | |
"links": [ | |
{ | |
"source": 0, | |
"receiver": 2 | |
}, | |
{ | |
"source": 2, | |
"receiver": 3 | |
}, | |
{ | |
"source": 3, | |
"receiver": 4 | |
}, | |
{ | |
"source": 4, | |
"receiver": 1 | |
} | |
], | |
"control_links": [ | |
] | |
} |
@cdpath 楼主您好,测试过程发现加入微信文章是一篇图文,或者是一篇转载之类的文章,总结而来就是activity-name元素找不到的时候,会报如下错误:
Error when fetching url: Got an uneven number of matches for : {"fulltext"=>{"css"=>"#js_content", "value"=>"."}, "title"=>{"css"=>"#activity-name", "value"=>"normalize-space(.)"}}
知识不熟,找到问题但是没办法解决,楼主看看能不能帮忙解决一下,谢谢。
@longhaiqwe 出错的微信文章的链接发一下
最新的反爬虫该Scenario 不可用,我写了个新的 https://gist.github.com/dequn/674b0401c1f31f7919b112ad64640552。
楼主这个现在还能使用吗 @cdpath
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
楼主您好,测试过程发现加入微信文章是一篇图文,或者是一篇转载之类的文章,总结而来就是activity-name元素找不到的时候,会报如下错误:
Error when fetching url: Got an uneven number of matches for : {"fulltext"=>{"css"=>"#js_content", "value"=>"."}, "title"=>{"css"=>"#activity-name", "value"=>"normalize-space(.)"}}
知识不熟,找到问题但是没办法解决,楼主看看能不能帮忙解决一下,谢谢。