Created
August 3, 2019 10:16
-
-
Save dequn/78f100cdd78e72aced9462be06892566 to your computer and use it in GitHub Desktop.
Huginn抓取微信公众号文章,只能抓取最近一篇,20190803更新
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"schema_version": 1, | |
"name": "微信公众号订阅【第二版只有最新的一篇文章】", | |
"description": "2019.08.03 更新,最新可用", | |
"source_url": false, | |
"guid": "a0be876a712baf52fa4270d6b174443a", | |
"tag_fg_color": "#ffffff", | |
"tag_bg_color": "#00b050", | |
"icon": "eye", | |
"exported_at": "2019-08-03T10:05:30Z", | |
"agents": [ | |
{ | |
"type": "Agents::WebsiteAgent", | |
"name": "微信公众号【第二版】 #4 Fetch 跳转URL", | |
"disabled": false, | |
"guid": "07771b5a19dc50901cdece327f56a0ba", | |
"options": { | |
"expected_update_period_in_days": "2", | |
"url_from_event": "https://weixin.sogou.com{{url}}", | |
"type": "text", | |
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36", | |
"headers": { | |
"Referer": "{{refer}}", | |
"Cookie": "{{cookie}}" | |
}, | |
"mode": "on_change", | |
"extract": { | |
"content": { | |
"index": "0", | |
"regexp": "\\A(?m:.)*\\z" | |
} | |
} | |
}, | |
"schedule": "never", | |
"keep_events_for": 0, | |
"propagate_immediately": false | |
}, | |
{ | |
"type": "Agents::JavaScriptAgent", | |
"name": "微信公众号【第二版】 #5 解析跳转URL", | |
"disabled": false, | |
"guid": "12a0dbce60dde12672ca32132bfaedca", | |
"options": { | |
"language": "JavaScript", | |
"code": "Agent.receive = function() {\r\n var events = this.incomingEvents();\r\n for(var i = 0; i < events.length; i++) {\r\n var s = events[i].payload['content'];\r\n var reg = /\\+\\= '(.*)';/g;\r\n var url = '';\r\n while (temp = reg.exec(s)) {\r\n url += temp[1];\r\n }\r\n this.createEvent({ 'url': url });\r\n }\r\n}", | |
"expected_receive_period_in_days": "2", | |
"expected_update_period_in_days": "2" | |
}, | |
"schedule": "never", | |
"keep_events_for": 3600, | |
"propagate_immediately": false | |
}, | |
{ | |
"type": "Agents::DeDuplicationAgent", | |
"name": "微信公众号【第二版】 #2 去除重复", | |
"disabled": false, | |
"guid": "3250efcc923ce4c10b30763c6e0365ba", | |
"options": { | |
"property": "{{title}}", | |
"lookback": "200", | |
"expected_update_period_in_days": "20" | |
}, | |
"keep_events_for": 15552000, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::WebsiteAgent", | |
"name": "微信公众号【第二版】 #1 搜索公众号", | |
"disabled": false, | |
"guid": "3ea551c97594a56ac09641eea130820a", | |
"options": { | |
"expected_update_period_in_days": "4", | |
"_comment": [ | |
"中国国家地理", | |
"柴知道" | |
], | |
"url": [ | |
"https://weixin.sogou.com/weixin?type=1&query=dili360&ie=utf8&s_from=input&_sug_=y&_sug_type_=", | |
"https://weixin.sogou.com/weixin?type=1&query=chaiknows&ie=utf8&s_from=input&_sug_=y&_sug_type_=" | |
], | |
"type": "html", | |
"mode": "on_change", | |
"template": { | |
"cookie": "{{ _response_.headers.Set-Cookie }}", | |
"refer": "{{ _url_ }}" | |
}, | |
"extract": { | |
"title": { | |
"css": "#sogou_vr_11002301_box_0 > dl > dd > a", | |
"value": "string(.)" | |
}, | |
"url": { | |
"css": "#sogou_vr_11002301_box_0 > dl > dd > a", | |
"value": "@href" | |
} | |
}, | |
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36" | |
}, | |
"schedule": "2am", | |
"keep_events_for": 172800, | |
"propagate_immediately": false | |
}, | |
{ | |
"type": "Agents::DataOutputAgent", | |
"name": "微信公众号【第二版】 #7 输出RSS", | |
"disabled": false, | |
"guid": "cbc2ced20c71d435a5bd375b3e58079a", | |
"options": { | |
"secrets": [ | |
"weixin_public" | |
], | |
"expected_receive_period_in_days": "20", | |
"template": { | |
"title": "微信公众号【第二版】", | |
"description": "微信公众号【第二版】", | |
"item": { | |
"title": "【{{author}}】{{title}}", | |
"description": "{{ desciption | replace: 'data-src', 'src' }}", | |
"link": "{{url}}" | |
}, | |
"link": "https://weixin.sogou.com/", | |
"icon": "https://res.wx.qq.com/mmbizwap/zh_CN/htmledition/images/icon/common/favicon22c41b.ico" | |
}, | |
"ns_media": "true", | |
"events_to_show": "300" | |
}, | |
"propagate_immediately": true | |
}, | |
{ | |
"type": "Agents::JavaScriptAgent", | |
"name": "微信公众号【第二版】 #3 按搜狗前端代码重新构造URL", | |
"disabled": false, | |
"guid": "e90d2135c2cf93c1dbc2b6e8b78bfeea", | |
"options": { | |
"language": "JavaScript", | |
"code": "Agent.receive = function() {\r\n var events = this.incomingEvents();\r\n for(var i = 0; i < events.length; i++) {\r\n // concat url\r\n var url = events[i].payload['url'];\r\n var b = Math.floor(100 * Math.random()) + 1\r\n , a = url.indexOf(\"url=\")\r\n , c = url.indexOf(\"&k=\");\r\n -1 !== a && -1 === c && (a = url.substr(a + 4 + parseInt(\"21\") + b, 1),\r\n url += \"&k=\" + b + \"&h=\" + a);\r\n events[i].payload['url'] = url;\r\n \r\n // extract SNUID cookie\r\n var re = /SNUID=\\w*/;\r\n var cookie = re.exec(events[i].payload['cookie'])[0];\r\n events[i].payload['cookie'] = cookie;\r\n \r\n // emit\r\n this.createEvent(events[i].payload);\r\n }\r\n}", | |
"expected_receive_period_in_days": "2", | |
"expected_update_period_in_days": "2" | |
}, | |
"schedule": "never", | |
"keep_events_for": 0, | |
"propagate_immediately": false | |
}, | |
{ | |
"type": "Agents::WebsiteAgent", | |
"name": "微信公众号【第二版】 #6 获取单篇文章全文", | |
"disabled": false, | |
"guid": "f7c52fcd85cea0d7426978202f4dc69a", | |
"options": { | |
"expected_update_period_in_days": "20", | |
"url_from_event": "{{url}}", | |
"type": "html", | |
"mode": "merge", | |
"extract": { | |
"desciption": { | |
"css": "#img-content", | |
"value": "./node()" | |
}, | |
"author": { | |
"css": "#js_name", | |
"value": "string(.)" | |
}, | |
"title": { | |
"css": "#activity-name", | |
"value": "string(.)" | |
} | |
} | |
}, | |
"schedule": "never", | |
"keep_events_for": 604800, | |
"propagate_immediately": true | |
} | |
], | |
"links": [ | |
{ | |
"source": 0, | |
"receiver": 1 | |
}, | |
{ | |
"source": 1, | |
"receiver": 6 | |
}, | |
{ | |
"source": 2, | |
"receiver": 5 | |
}, | |
{ | |
"source": 3, | |
"receiver": 2 | |
}, | |
{ | |
"source": 5, | |
"receiver": 0 | |
}, | |
{ | |
"source": 6, | |
"receiver": 4 | |
} | |
], | |
"control_links": [ | |
] | |
} |
大神,如果想添加一个公众号怎么操作?
@onewk 看楼上
好像现在搜狗的微信公众号也不更新了
好像现在搜狗的微信公众号也不更新了
越来越封闭了。以前是没能力连接信息,现在是商业竞争不允许连接信息,对用户来说,没什么进步。
你好 请问下 这个方法现在还能用吗
我也好久没用了,你试一试吧
…Sent from my iPhone
On Aug 20, 2020, at 12:10 PM, Yuan Huang ***@***.***> wrote:
***@***.*** commented on this gist.
你好 请问下 这个方法现在还能用吗
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub, or unsubscribe.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
name 为 “微信公众号【第二版】 #1 搜索公众号” 的Agent中,url列表即为抓取的目标,以 "https://weixin.sogou.com/weixin?type=1&query=dili360&ie=utf8&s_from=input&_sug_=y&_sug_type_=" 为例,其中query=dili360 的dili360就是要抓取的公众号id,可以在搜狗微信搜索中查到。