Skip to content

Instantly share code, notes, and snippets.

@jgreco
Created August 27, 2019 04:00
Show Gist options
  • Save jgreco/d445f6c85697da42a4af8fa84a36e9fe to your computer and use it in GitHub Desktop.
Save jgreco/d445f6c85697da42a4af8fa84a36e9fe to your computer and use it in GitHub Desktop.
#lang racket
(require db
memo
"util.rkt"
racket/serialize)
(provide #%app
#%datum
#%top
;; SQL stuff
reader-jobs-queue
writer-jobs-queue
(struct-out sql-request)
(struct-out sql-response)
send-request/get-response
send-request/async
sql-read
sql-write
;; other stuff
add-file
get-file-id
get-file-name
remove-files
rename-file
number-of-files
;;number-of-tags ;; TODO
create-tag
get-tag-id
list-all-tags
create-tag
get-tag-size
get-tags-for-files
tag-files
untag-files
#%app
#%datum
#%top)
(define reader-jobs-queue (make-parameter null))
(define writer-jobs-queue (make-parameter null))
(struct sql-request (kind sql-stmt args return-channel) #:prefab)
(struct sql-response (status result) #:prefab)
;; Send a request to a SQLite worker and return the response.
;; Handles all serialization and deserialization of arguments.
(define (send-request/get-response target kind sql-stmt args)
(let*-values ([(send receive) (place-channel)]
[(request) (sql-request kind sql-stmt (serialize args) send)])
(place-channel-put target request)
(match (place-channel-get receive)
[(sql-response 'ok result) (deserialize result)]
[(sql-response 'error result) (deserialize result)])))
;; Send a request to a SQLite worker and return a sync event immediately.
;; The return place-channel is a sync event, but send-request/async wraps it
;; to preform deserialization.
(define (send-request/async target kind sql-stmt args)
(let*-values ([(send receive) (place-channel)]
[(request) (sql-request kind sql-stmt (serialize args) send)])
(place-channel-put target request)
(wrap-evt receive (lambda (e)
(match e
[(sql-response 'ok result) (deserialize result)]
;; TODO: raise an exception
[(sql-response 'error result) (deserialize result)])))))
(define (sql-read kind sql-stmt . args) (send-request/get-response (reader-jobs-queue) kind sql-stmt args))
(define (sql-write kind sql-stmt . args) (send-request/get-response (writer-jobs-queue) kind sql-stmt args))
;; File operations
;; ================================================================================================================
;; Add a file to the system, return the file_id
;;
;; last-modify and size are added opportunistically, but silently left null if
;; something goes wrong (e.g. the files are not accessible on this host for
;; whatever reason.)
;;
;; If the file already exists, the existing file-id is returned without hassling
;; the user.
(define (add-file filename)
(or (get-file-id filename)
(begin0 (sql-write 'exec-with-last-rowid "INSERT INTO Files (filename, last_modified, size) VALUES (?,?,?)"
filename
(file-or-directory-modify-seconds filename #f (thunk sql-null))
(with-handlers ([(thunk* #t) (thunk* sql-null)])
(file-size filename)))
(invalidate-memo/single number-of-files)
;; TODO: fire plugins with filename and file-id
)))
;; Get the file-id for a given filename
;; returns #f if the filename isn't in the db.
(define/memoize (get-file-id filename)
(sql-read 'query-maybe-value "SELECT file_id FROM Files WHERE filename=?" filename))
;; Get the filename for a given file-id
;; returns #f if file-id isn't in the db.
(define/memoize (get-file-name file-id)
(sql-read 'query-maybe-value "SELECT filename FROM Files where file_id=?" file-id))
;; For each file-id provided, untag it completely. Optionally, delete the
;; file as well.
;;
;; If this user requests file deletion but the file doesn't exist from the
;; server's perspective, the system silently proceeds. This is designed
;; to not hassle the user, but might change in the future because I'm not sure
;; this is actually good behavior; a file might simply be inaccessible to the
;; server but live elsewhere, and the user might want to know the file isn't
;; actually gone.
;;
;; TODO: give the above some more consideration.
(define (remove-files #:delete? [delete? #f] . file-ids)
;; TODO: make this not suck
(let ([files-with-tags (map cons file-ids (apply get-tags-for-files file-ids))])
(for ([f files-with-tags])
;; untag the file (this sucks)
(for ([t (cdr f)])
(untag-files t (car f)))
;; optionally delete the file, if present
(when delete?
(let ([filename (get-file-name (car f))])
(when (file-exists? filename) (delete-file filename))))
;; finally, remove it from the Files table.
(sql-write 'exec "DELETE FROM Files WHERE file_id=?" (car f)))
(invalidate-memo/single number-of-files)
;; TODO: fire plugins with list of removed file ids.
))
;; Rename a file in the db, optionally moving the file on disk as well.
;; The main reason somebody might want to rename a file in the DB but not
;; their filesystem is probably due to samba nonsense with mangled unicode.
;; Otherwise, this procedure will likely be invoked with #move-file? #t
(define (rename-file file-id new-filename #:move-file? [move-file? #t])
(define old-filename (get-file-name file-id))
(when move-file? (rename-file-or-directory old-filename new-filename #f))
(sql-write 'exec "UPDATE Files SET filename=? WHERE file_id=?" new-filename file-id)
;; invalidate caches concerning old file-id / filename association
(invalidate-memo/partial get-file-id old-filename)
(invalidate-memo/partial get-file-name file-id)
;; TODO: fire plugins with old and new filename
)
;; Total number of files registered in the system.
(define/memoize-zero number-of-files
(sql-read 'query-value "SELECT count(*) FROM Files"))
;; Tag operations
;; =============================================================================================================================
;; Get the id of a tag (passed as a symbol)
;; If the tag does not exist, return #f
;;
;; TODO: this behavior is inconsistent with get-file-id.
(define/memoize (get-tag-id tag)
(sql-read 'query-maybe-value "SELECT tag_id FROM Tags where tag_name=?" (symbol->string tag)))
;; Returns a list of all tags, from the largest to the smallest.
;;
;; Perhaps we should invalidate this cache every time the size of a tag changes
;; since the true ordering of the tags might have changed, but naive invalidation
;; like that would have really awful performance and the consequence of this
;; cache being slightly out of order should be relatively minor. In any
;; event, it gets invalidated each time a tag is created or deleted, which
;; in practice is probably often enough.
(define/memoize-zero list-all-tags
(map string->symbol (sql-read 'query-list "SELECT tag_name FROM Tags ORDER BY num_files DESC")))
;; Create a new tag, returning the tag-id.
;;
;; If the tag already exists, don't hassle the user; just return the tag-id.
;; If we are creating a new tag for real, take the opportunity to invalidate the
;; list-all-tags cache.
(define (create-tag tag)
(or (get-tag-id tag)
(begin0
(sql-write 'exec-with-last-rowid "INSERT INTO Tags (tag_name) VALUES (?)" (symbol->string tag))
(invalidate-memo/single list-all-tags)
(invalidate-memo/partial get-tag-id tag)
;; TODO: fire plugins with new tag
)))
;; Return the number of files associated with a given tag.
(define/memoize (get-tag-size tag)
(let ([tag-id (get-tag-id tag)])
(sql-read 'query-value "SELECT count(*) FROM FileTags WHERE tag_id=?" tag-id)))
;; Given a file-ids, return a list of the tags those file-ids are associated with.
;;
;; If multiple reader places exist, use all of them. This has NOT (yet) been
;; benchmarked. A more clever query running in a single reader place may very
;; well outperform this solution.
;;
;; TODO: benchmark it
(define (get-tags-for-files . file-ids)
(vector->list (sync/vector
(for/vector #:length (length file-ids)
([file-id file-ids])
(send-request/async (reader-jobs-queue) 'query-list
(string-append "SELECT tag_name FROM Tags"
" INNER JOIN"
"(SELECT tag_id FROM FileTags WHERE file_id=?) AS sub"
" ON Tags.tag_id = sub.tag_id"
)
`(,file-id) )))))
;; Apply a given tag to a set of file ids.
;; If the tag doesn't already exist, it is silently created without hassling the user.
;; If any of the file ids already have the given tag, they are silently skipped without
;; hassling the user.
(define (tag-files tag . file-ids)
(let* ([tag-id (create-tag tag)]
[files-with-tags (map cons file-ids (apply get-tags-for-files file-ids))]
[files-without-our-tag (filter (lambda (f) (not (member? tag (cdr f))))
files-with-tags)])
(for ([f files-without-our-tag])
(sql-write 'exec "INSERT INTO FileTags (file_id, tag_id, probability) VALUES (?,?,1.0)" (car f) tag-id)
(sql-write 'exec "UPDATE Files SET num_tags = num_tags + 1 WHERE file_id=?" (car f)))
(invalidate-memo/partial get-tag-size tag)
;; TODO: fire plugins with tag and `files-without-our-tag` (which now have it.)
))
;; Remove a given tag from a set of file ids.
;; If any of the file ids is not tagged with the given tag, they are silently skipped
;; without hassling the user. If a tag is empty after this operation, it is deleted.
;;
;; TODO: This procedure performs up to three database writes. If those writes
;; happened to be interleaved with other writes which are associated with
;; the same tag, screwy things might happen. Therefore these two/three
;; writes should be performed in a single transaction.
(define (untag-files tag . file-ids)
(let* ([tag-id (get-tag-id tag)]
[files-with-tags (map cons file-ids (apply get-tags-for-files file-ids))]
[files-with-our-tag (filter (lambda (f) (member? tag (cdr f))) files-with-tags)])
(for ([f files-with-our-tag])
(sql-write 'exec "DELETE FROM FileTags WHERE file_id=? AND tag_id=? AND probability=1" (car f) tag-id)
(sql-write 'exec "UPDATE Files SET num_tags = num_tags - 1 WHERE file_id=?" (car f)))
(invalidate-memo/partial get-tag-size tag)
(when (zero? (get-tag-size tag))
(sql-write 'exec "DELETE FROM Tags WHERE tag_id=?" tag-id))
;; TODO: fire plugins with tag and `files-with-our-tag` (which now do not have it.)
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment