Created
August 27, 2019 04:00
-
-
Save jgreco/d445f6c85697da42a4af8fa84a36e9fe to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
(require db | |
memo | |
"util.rkt" | |
racket/serialize) | |
(provide #%app | |
#%datum | |
#%top | |
;; SQL stuff | |
reader-jobs-queue | |
writer-jobs-queue | |
(struct-out sql-request) | |
(struct-out sql-response) | |
send-request/get-response | |
send-request/async | |
sql-read | |
sql-write | |
;; other stuff | |
add-file | |
get-file-id | |
get-file-name | |
remove-files | |
rename-file | |
number-of-files | |
;;number-of-tags ;; TODO | |
create-tag | |
get-tag-id | |
list-all-tags | |
create-tag | |
get-tag-size | |
get-tags-for-files | |
tag-files | |
untag-files | |
#%app | |
#%datum | |
#%top) | |
(define reader-jobs-queue (make-parameter null)) | |
(define writer-jobs-queue (make-parameter null)) | |
(struct sql-request (kind sql-stmt args return-channel) #:prefab) | |
(struct sql-response (status result) #:prefab) | |
;; Send a request to a SQLite worker and return the response. | |
;; Handles all serialization and deserialization of arguments. | |
(define (send-request/get-response target kind sql-stmt args) | |
(let*-values ([(send receive) (place-channel)] | |
[(request) (sql-request kind sql-stmt (serialize args) send)]) | |
(place-channel-put target request) | |
(match (place-channel-get receive) | |
[(sql-response 'ok result) (deserialize result)] | |
[(sql-response 'error result) (deserialize result)]))) | |
;; Send a request to a SQLite worker and return a sync event immediately. | |
;; The return place-channel is a sync event, but send-request/async wraps it | |
;; to preform deserialization. | |
(define (send-request/async target kind sql-stmt args) | |
(let*-values ([(send receive) (place-channel)] | |
[(request) (sql-request kind sql-stmt (serialize args) send)]) | |
(place-channel-put target request) | |
(wrap-evt receive (lambda (e) | |
(match e | |
[(sql-response 'ok result) (deserialize result)] | |
;; TODO: raise an exception | |
[(sql-response 'error result) (deserialize result)]))))) | |
(define (sql-read kind sql-stmt . args) (send-request/get-response (reader-jobs-queue) kind sql-stmt args)) | |
(define (sql-write kind sql-stmt . args) (send-request/get-response (writer-jobs-queue) kind sql-stmt args)) | |
;; File operations | |
;; ================================================================================================================ | |
;; Add a file to the system, return the file_id | |
;; | |
;; last-modify and size are added opportunistically, but silently left null if | |
;; something goes wrong (e.g. the files are not accessible on this host for | |
;; whatever reason.) | |
;; | |
;; If the file already exists, the existing file-id is returned without hassling | |
;; the user. | |
(define (add-file filename) | |
(or (get-file-id filename) | |
(begin0 (sql-write 'exec-with-last-rowid "INSERT INTO Files (filename, last_modified, size) VALUES (?,?,?)" | |
filename | |
(file-or-directory-modify-seconds filename #f (thunk sql-null)) | |
(with-handlers ([(thunk* #t) (thunk* sql-null)]) | |
(file-size filename))) | |
(invalidate-memo/single number-of-files) | |
;; TODO: fire plugins with filename and file-id | |
))) | |
;; Get the file-id for a given filename | |
;; returns #f if the filename isn't in the db. | |
(define/memoize (get-file-id filename) | |
(sql-read 'query-maybe-value "SELECT file_id FROM Files WHERE filename=?" filename)) | |
;; Get the filename for a given file-id | |
;; returns #f if file-id isn't in the db. | |
(define/memoize (get-file-name file-id) | |
(sql-read 'query-maybe-value "SELECT filename FROM Files where file_id=?" file-id)) | |
;; For each file-id provided, untag it completely. Optionally, delete the | |
;; file as well. | |
;; | |
;; If this user requests file deletion but the file doesn't exist from the | |
;; server's perspective, the system silently proceeds. This is designed | |
;; to not hassle the user, but might change in the future because I'm not sure | |
;; this is actually good behavior; a file might simply be inaccessible to the | |
;; server but live elsewhere, and the user might want to know the file isn't | |
;; actually gone. | |
;; | |
;; TODO: give the above some more consideration. | |
(define (remove-files #:delete? [delete? #f] . file-ids) | |
;; TODO: make this not suck | |
(let ([files-with-tags (map cons file-ids (apply get-tags-for-files file-ids))]) | |
(for ([f files-with-tags]) | |
;; untag the file (this sucks) | |
(for ([t (cdr f)]) | |
(untag-files t (car f))) | |
;; optionally delete the file, if present | |
(when delete? | |
(let ([filename (get-file-name (car f))]) | |
(when (file-exists? filename) (delete-file filename)))) | |
;; finally, remove it from the Files table. | |
(sql-write 'exec "DELETE FROM Files WHERE file_id=?" (car f))) | |
(invalidate-memo/single number-of-files) | |
;; TODO: fire plugins with list of removed file ids. | |
)) | |
;; Rename a file in the db, optionally moving the file on disk as well. | |
;; The main reason somebody might want to rename a file in the DB but not | |
;; their filesystem is probably due to samba nonsense with mangled unicode. | |
;; Otherwise, this procedure will likely be invoked with #move-file? #t | |
(define (rename-file file-id new-filename #:move-file? [move-file? #t]) | |
(define old-filename (get-file-name file-id)) | |
(when move-file? (rename-file-or-directory old-filename new-filename #f)) | |
(sql-write 'exec "UPDATE Files SET filename=? WHERE file_id=?" new-filename file-id) | |
;; invalidate caches concerning old file-id / filename association | |
(invalidate-memo/partial get-file-id old-filename) | |
(invalidate-memo/partial get-file-name file-id) | |
;; TODO: fire plugins with old and new filename | |
) | |
;; Total number of files registered in the system. | |
(define/memoize-zero number-of-files | |
(sql-read 'query-value "SELECT count(*) FROM Files")) | |
;; Tag operations | |
;; ============================================================================================================================= | |
;; Get the id of a tag (passed as a symbol) | |
;; If the tag does not exist, return #f | |
;; | |
;; TODO: this behavior is inconsistent with get-file-id. | |
(define/memoize (get-tag-id tag) | |
(sql-read 'query-maybe-value "SELECT tag_id FROM Tags where tag_name=?" (symbol->string tag))) | |
;; Returns a list of all tags, from the largest to the smallest. | |
;; | |
;; Perhaps we should invalidate this cache every time the size of a tag changes | |
;; since the true ordering of the tags might have changed, but naive invalidation | |
;; like that would have really awful performance and the consequence of this | |
;; cache being slightly out of order should be relatively minor. In any | |
;; event, it gets invalidated each time a tag is created or deleted, which | |
;; in practice is probably often enough. | |
(define/memoize-zero list-all-tags | |
(map string->symbol (sql-read 'query-list "SELECT tag_name FROM Tags ORDER BY num_files DESC"))) | |
;; Create a new tag, returning the tag-id. | |
;; | |
;; If the tag already exists, don't hassle the user; just return the tag-id. | |
;; If we are creating a new tag for real, take the opportunity to invalidate the | |
;; list-all-tags cache. | |
(define (create-tag tag) | |
(or (get-tag-id tag) | |
(begin0 | |
(sql-write 'exec-with-last-rowid "INSERT INTO Tags (tag_name) VALUES (?)" (symbol->string tag)) | |
(invalidate-memo/single list-all-tags) | |
(invalidate-memo/partial get-tag-id tag) | |
;; TODO: fire plugins with new tag | |
))) | |
;; Return the number of files associated with a given tag. | |
(define/memoize (get-tag-size tag) | |
(let ([tag-id (get-tag-id tag)]) | |
(sql-read 'query-value "SELECT count(*) FROM FileTags WHERE tag_id=?" tag-id))) | |
;; Given a file-ids, return a list of the tags those file-ids are associated with. | |
;; | |
;; If multiple reader places exist, use all of them. This has NOT (yet) been | |
;; benchmarked. A more clever query running in a single reader place may very | |
;; well outperform this solution. | |
;; | |
;; TODO: benchmark it | |
(define (get-tags-for-files . file-ids) | |
(vector->list (sync/vector | |
(for/vector #:length (length file-ids) | |
([file-id file-ids]) | |
(send-request/async (reader-jobs-queue) 'query-list | |
(string-append "SELECT tag_name FROM Tags" | |
" INNER JOIN" | |
"(SELECT tag_id FROM FileTags WHERE file_id=?) AS sub" | |
" ON Tags.tag_id = sub.tag_id" | |
) | |
`(,file-id) ))))) | |
;; Apply a given tag to a set of file ids. | |
;; If the tag doesn't already exist, it is silently created without hassling the user. | |
;; If any of the file ids already have the given tag, they are silently skipped without | |
;; hassling the user. | |
(define (tag-files tag . file-ids) | |
(let* ([tag-id (create-tag tag)] | |
[files-with-tags (map cons file-ids (apply get-tags-for-files file-ids))] | |
[files-without-our-tag (filter (lambda (f) (not (member? tag (cdr f)))) | |
files-with-tags)]) | |
(for ([f files-without-our-tag]) | |
(sql-write 'exec "INSERT INTO FileTags (file_id, tag_id, probability) VALUES (?,?,1.0)" (car f) tag-id) | |
(sql-write 'exec "UPDATE Files SET num_tags = num_tags + 1 WHERE file_id=?" (car f))) | |
(invalidate-memo/partial get-tag-size tag) | |
;; TODO: fire plugins with tag and `files-without-our-tag` (which now have it.) | |
)) | |
;; Remove a given tag from a set of file ids. | |
;; If any of the file ids is not tagged with the given tag, they are silently skipped | |
;; without hassling the user. If a tag is empty after this operation, it is deleted. | |
;; | |
;; TODO: This procedure performs up to three database writes. If those writes | |
;; happened to be interleaved with other writes which are associated with | |
;; the same tag, screwy things might happen. Therefore these two/three | |
;; writes should be performed in a single transaction. | |
(define (untag-files tag . file-ids) | |
(let* ([tag-id (get-tag-id tag)] | |
[files-with-tags (map cons file-ids (apply get-tags-for-files file-ids))] | |
[files-with-our-tag (filter (lambda (f) (member? tag (cdr f))) files-with-tags)]) | |
(for ([f files-with-our-tag]) | |
(sql-write 'exec "DELETE FROM FileTags WHERE file_id=? AND tag_id=? AND probability=1" (car f) tag-id) | |
(sql-write 'exec "UPDATE Files SET num_tags = num_tags - 1 WHERE file_id=?" (car f))) | |
(invalidate-memo/partial get-tag-size tag) | |
(when (zero? (get-tag-size tag)) | |
(sql-write 'exec "DELETE FROM Tags WHERE tag_id=?" tag-id)) | |
;; TODO: fire plugins with tag and `files-with-our-tag` (which now do not have it.) | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment