Created
February 24, 2011 04:19
-
-
Save atuttle/841743 to your computer and use it in GitHub Desktop.
Parses the HTML export from Delicious.com and returns an array of structures representing the links from delicious.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
component { | |
public function init() output="false" { | |
return this; | |
} | |
public function parse(data) output="false" { | |
local.links = []; | |
//loop over each line in the file | |
local.lines = listToArray(data, chr(10)); | |
local.newLink = {}; | |
for (local.i = 1; local.i lte arrayLen(local.lines); local.i++){ | |
local.first4 = ucase(left(local.lines[i], 4)); | |
//ignore unimportant lines | |
if ((local.first4 neq "<DL>") and (local.first4 neq "<DT>") and (local.first4 neq "<DD>")) { | |
continue; | |
} | |
if (local.first4 eq "<DL>" or local.first4 eq "<DT>"){ | |
//first commit previous link to array | |
if (!structIsEmpty(local.newLink)){ | |
arrayAppend(local.links, duplicate(local.newLink)); | |
//then create new link entry | |
local.newLink = {}; | |
} | |
local.linkHtml = right(local.lines[i], len(local.lines[i])-4); | |
local.parts = { | |
url = reFindNoSuck("href=\""([^\""]+)\""",local.linkHtml,1), | |
tags = reFindNoSuck("tags=\""([^\""]+)\""", local.linkHtml, 1), | |
name = reFindNoSuck(">([^<]+)<", local.linkHtml, 1), | |
private = reFindNoSuck("private=\""([^\""]+)\""", local.linkHtml, 1) | |
}; | |
//get the URL | |
if (arrayLen(local.parts.url) gte 2){ | |
local.newLink.url = local.parts.url[2]; | |
} | |
//get the tags | |
if (arrayLen(local.parts.tags) gte 2){ | |
local.newLink.tags = listToArray(local.parts.tags[2]); | |
} | |
//get the name | |
if (arrayLen(local.parts.name) gte 2){ | |
local.newLink.name = local.parts.name[2]; | |
} | |
//get the private flag | |
if (arrayLen(local.parts.private) gte 2){ | |
local.newLink.private = local.parts.private[2]; | |
} | |
//don't commit it yet because the next line *might* add a description | |
}else if (local.first4 eq "<DD>"){ | |
local.newLink.description = right(local.lines[i], len(local.lines[i])-4); | |
} | |
} | |
//finally, add the last link that we've parsed to the array | |
if (!structIsEmpty(local.newLink)){ | |
arrayAppend(local.links, duplicate(local.newLink)); | |
} | |
return local.links; | |
} | |
//=============================== | |
private function reFindNoSuck(pattern, data, startPos = 1) output="false" { | |
var local = StructNew(); | |
local.awesome = arrayNew(1); | |
local.sucky = refindNoCase(arguments.pattern, arguments.data, arguments.startPos, true); | |
if (not isArray(local.sucky.len) or arrayLen(local.sucky.len) eq 0){return arrayNew(1);} //handle no match at all | |
for (local.i=1; local.i<= arrayLen(local.sucky.len); local.i++){ | |
//if there's a match with pos 0 & length 0, that means the mime type was not specified | |
if (local.sucky.len[local.i] gt 0 && local.sucky.pos[local.i] gt 0){ | |
//don't include the group that matches the entire pattern | |
local.matchBody = mid(arguments.data, local.sucky.pos[local.i], local.sucky.len[local.i]); | |
if (local.matchBody neq arguments.data){ | |
arrayAppend( local.awesome, local.matchBody ); | |
} | |
} | |
} | |
return local.awesome; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment