Skip to content

Instantly share code, notes, and snippets.

@pnathan
Created December 23, 2018 09:13
Show Gist options
  • Save pnathan/9876fcb38d285ae396de3f0ab44df442 to your computer and use it in GitHub Desktop.
Save pnathan/9876fcb38d285ae396de3f0ab44df442 to your computer and use it in GitHub Desktop.
facebook post archive -> md file generator
;; this code is (C) Paul Nathan (@[email protected]), licensed
;; under AGPL3, or CC0, or Public Domain. Whichever the reader
;; desires.
;;
;; essential instructions:
;;
;; download and unzip the archive file, IN JSON FORMAT, from the Zuckerland.
;; this file goes in the toplevel of that directory.
This part is important:
this is all your posts.
Yes, including private groups and marketplace posts.
take a real hard look at your generated files.
Delete, IN THIS FILE, this and the last 4 lines, and the following 2 lines
when you acknowledge this sensitive reality.
Exporting all of your data to another system could be VERY EMBARASSING.
;;
;; common lisp needs quicklisp installed
;;
;; if you use SBL, the current Best of Breed open source Common Lisp -
;; run this file with
;;
;; sbcl --load fb_to_blog.lisp
;;
;; A slew of markdown files will be generated. I advise combing
;; through them. You may have to edit some of the initial json to make
;; the generation work, due to weird encoding issues. You may find
;; that a post saying happy birthday without context isn't
;; useful. Etc.
(ql:quickload :alexandria)
(ql:quickload :local-time)
(ql:quickload :cl-json)
(ql:quickload :yason)
(setf yason:*parse-object-as* :hash-table )
(defparameter *text* (alexandria:read-file-into-string "./posts/your_posts.json"))
(defparameter *source-json* (yason:parse *text*))
(defparameter *posts* (gethash "status_updates" *source-json*))
(setf (symbol-function 'keys) #'alexandria:hash-table-keys)
(defun geturl (p)
(let ((attch
(car (gethash "attachments" p))))
(when attch
(let ((data (car (gethash "data"
attch))))
(when (gethash "external_context" data)
(gethash "url"
(gethash "external_context" data)))))))
(defun getpost (p)
(when p
(let ((post-exist
(car
(remove-if-not
#'(lambda (l)
(equal '("post") (keys l)))
(gethash "data" p)))))
(when post-exist
(gethash "post" post-exist)))))
(defun collect-posts ()
(mapcar #'(lambda (p)
(let* ((ts (local-time:unix-to-timestamp
(gethash "timestamp" p)))
(url (when (gethash "attachments" p)
(geturl p)))
(post
(getpost p))
)
(list ts post url)))
*posts*))
(defparameter *links*
(remove-if-not
#'(lambda (s) (or (search "http://" (second s)) (search "http://" (third s)))) (collect-posts)))
(defparameter *non-links*
(remove-if
#'(lambda (s) (or (search "http://" (second s)) (search "http://" (third s)))) (collect-posts)))
;; a very clever regex could get the specific HTTP links, THEN, if
;; there was a HTML document at the end of the link, curl it, search
;; for <title>$foo</title>, and entitle the post Regarding $foo
(defun left-pad (i &key (width 2))
(let ((fmt (format nil "~~~d,'0d" width)))
(format nil fmt i)))
(defun cook-a-formatted-ts (ts)
(format nil "~d-~d-~d"
(local-time:timestamp-year ts)
(left-pad (local-time:timestamp-month ts))
(left-pad (local-time:timestamp-day ts))))
(defun name-phrase (p)
"Generate a name for a post with text, derived from the date the
post was made, along with the text contained in the post. "
(let ((snippet
;; Don't judge me too harshly, please. It was late. This is
;; bad code and should be regex-replaced.
(substitute
#\- #\?
(substitute
#\- #\:
(substitute
#\- #\/
(substitute
#\Space #\Newline
(subseq (second p) 0 (min 20 (length (second p))))))))))
;; if we don't have any text, this exporter doesn't do it.
(when (and snippet (> (length snippet) 0))
(concatenate 'string (cook-a-formatted-ts (first p)) snippet "-export.md")
)))
(defun do-the-big-export ()
(loop for p in (collect-posts)
do (name-phrase p))
(loop for p in (collect-posts)
do
(when (name-phrase p)
(format t "writing ~a~&" (name-phrase p))
(alexandria:write-string-into-file (second p) (name-phrase p) :if-exists :overwrite :if-does-not-exist :create))))
(do-the-big-export)
(quit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment