Last active
May 3, 2024 13:35
-
-
Save tvytlx/328966145a0e4eeaf29215aefa4592c1 to your computer and use it in GitHub Desktop.
小米便签导出, artoo.js 浏览器脚本
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 便签元素在 frame 里,不能直接用 artoo 处理,得先得到内部的 dom 元素,然后传给 artoo。 | |
const iframe = 'iframe#js_note_mod_ctn.js_sandbox.business-mod-ctn.note-mod-ctn'; | |
let get_container = (str)=>{return $(iframe)[0].contentDocument.body.querySelector(str)}; | |
let notes_container = get_container('.home-bd .briefs-ctn.js_home_briefs_ctn'); | |
// 开始抓取 | |
let is_in_box = (this_)=>{ | |
return (this_.attr('class')=='js_folder_brief folder-brief js_normal_folder js_lock') || | |
(this_.attr('class')=='js_folder_brief folder-brief js_normal_folder') | |
}; | |
let parse_item = (item)=>{ | |
console.log('Find item'); | |
return artoo.scrape(item, { | |
created_time: function($){ | |
return $(this).find('.note-brief-hd span').attr('title'); | |
}, | |
content: function($){ | |
return $(this).find('.js_snippet.js_note_brief_bd.note-brief-bd').text(); | |
} | |
}); | |
} | |
let schema_func = function(){ | |
if(is_in_box($(this))){ | |
let result = {}; | |
result.box_name = $(this).find('.folder-brief-hd').text().replace(/\s/g,''); | |
console.log('Enter box: '+result.box_name); | |
result.content = artoo.scrape($(this).find('.js_note_brief_ctn.folder-brief-bd > div'), function(){ | |
return parse_item($(this))[0]; | |
}); | |
return result; | |
} | |
else{ | |
return parse_item($(this))[0]; | |
}; | |
}; | |
let data = artoo.scrape(notes_container, function() { | |
return artoo.scrape($(this).find('> div'), schema_func); | |
}); | |
artoo.savePrettyJson(data[0]); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
VM62:3 Uncaught TypeError: Cannot read properties of undefined (reading 'contentDocument')
at get_container (:3:48)
at :4:23
不知道怎么办了