Created
January 31, 2022 21:42
-
-
Save rcy/18f2aa8b867cd26e247da9e26addd43f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; annotate.el --- mark up foreign language buffers according to anki deck | |
;; Copyright (C) 2009 Ryan Yeske | |
;; Author: Ryan Yeske <[email protected]> | |
;; Keywords: languages | |
;; Version: 2009-03-17 23:04:03 | |
;; This program is free software; you can redistribute it and/or modify | |
;; it under the terms of the GNU General Public License as published by | |
;; the Free Software Foundation, either version 3 of the License, or | |
;; (at your option) any later version. | |
;; This program is distributed in the hope that it will be useful, | |
;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
;; GNU General Public License for more details. | |
;; You should have received a copy of the GNU General Public License | |
;; along with this program. If not, see <http://www.gnu.org/licenses/>. | |
;;; Commentary: | |
;; The main assumption is that your cards have target language | |
;; sentences or words on the front, and anything on the back, which is | |
;; ignored. | |
;; Useful global bindings: | |
;; (global-set-key (kbd "C-c a") 'annotate-make-flashcard) | |
;; (global-set-key (kbd "C-c l") 'annotate-look-for-word) | |
;; (global-set-key (kbd "C-c k") 'annotate-save-text-as-known) | |
;;; TODO: | |
;; bugs: | |
;; - figure out face lossage when switching background colors (light to dark &vv) | |
;; dictionary: | |
;; - implement some kinda automated dictionary lookup | |
;; *report*: | |
;; - occur on RET on buffer | |
;; - dump sentences from words on keystroke | |
;; - count total buffer words | |
;; - keystroke to add words to ignore file (and implement ignore file) | |
;; *sentences*: | |
;; - end of buffer error | |
;; - hit RET to jump to the sentence in the buffer | |
;; - hit (keystroke?) to create a card from sentence | |
;;; Code: | |
(require 'cl) | |
(require 'thingatpt) | |
(defvar annotate-anki-file "~/spanish/srs/initial-spanish.anki") | |
(defvar annotate-import-file "~/import.txt") | |
(defvar annotate-looking-for-file "~/looking-for.txt" | |
"Words in this file will trigger notification when they are | |
seen in annotated buffers.") | |
(defvar annotate-unknown-coverage .02 | |
"How much of buffer to mark up with unknown words.") | |
(defface annotate-mature | |
'((t (:inherit default))) | |
"The face used to highlight mature words.") | |
(defface annotate-young | |
'((((background light)) (:background "#ccffcc")) | |
(((background dark)) (:background "#007700"))) | |
"The face used to highlight young words.") | |
(defface annotate-new | |
'((((background light)) (:background "#e50000")) | |
(((background dark)) (:background "#770000"))) | |
"The face used to highlight new words.") | |
(defface annotate-unknown | |
'((((background light)) (:background "#ffffaa")) | |
(((background dark)) (:background "#555500"))) | |
"The face used to highlight unknown words.") | |
(defface annotate-ignore | |
'((t (:bold t))) | |
"The face used to highlight ignored words.") | |
(defvar annotate-harvest-min-words 4) | |
(defvar annotate-harvest-max-words 12) | |
(defun annotate-make-table () | |
(make-hash-table :test 'equal)) | |
(defvar annotate-mature-table (annotate-make-table) | |
"Hash table of known mature words.") | |
(defvar annotate-known-table (annotate-make-table) | |
"Hash table of known words.") | |
(defvar annotate-young-table (annotate-make-table) | |
"Hash table of known young words.") | |
(defun annotate-buffer (percentage &optional buffer) | |
(interactive "p") | |
(with-current-buffer (or buffer (current-buffer)) | |
(annotate-region (point-min) (point-max) | |
(sort (annotate-hash-table-to-alist | |
(annotate-word-frequency annotate-known-table | |
annotate-known-list)) | |
;; sort by frequency | |
(lambda (a b) (cond ((= (cdr a) (cdr b)) | |
(zerop (random 2))) | |
(t (> (cdr a) (cdr b)))))) | |
(and percentage (/ percentage 100.0))))) | |
(defvar annotate-known-word-file "/home/rcy/known.txt") | |
(defun annotate-update-tables () | |
(interactive) | |
(annotate-word-table annotate-known-table | |
annotate-anki-file "interval > 0") | |
(annotate-word-table annotate-mature-table | |
annotate-anki-file "interval > 21") | |
(annotate-word-table annotate-young-table | |
annotate-anki-file "interval > 0 and interval <= 21" | |
annotate-mature-table) | |
(setq annotate-known-list (annotate-word-list-from-file | |
annotate-known-word-file)) | |
(message "%d %d %d" | |
(hash-table-count annotate-mature-table) | |
(hash-table-count annotate-young-table) | |
(hash-table-count annotate-known-table))) | |
(defun annotate-save-text-as-known (text) | |
(interactive (list | |
(read-from-minibuffer "Known phrase: " | |
(annotate-cleanup-sentence | |
(buffer-substring (region-beginning) | |
(region-end)))))) | |
(with-temp-buffer | |
(insert text) | |
(newline) | |
(append-to-file (point-min) (point-max) annotate-known-word-file))) | |
(defun annotate-word-list-from-file (file) | |
(let (word-list) | |
(with-temp-buffer | |
(insert-file-contents file) | |
(annotate-wordify-buffer) | |
(while (annotate-search-forward-word) | |
(push (match-string 0) word-list))) | |
word-list)) | |
(defun annotate-count-anki-sentences () | |
"Return the number of facts (unique sentences) in anki deck." | |
(interactive) | |
(with-temp-buffer | |
(call-process "sqlite3" nil (current-buffer) nil | |
(expand-file-name annotate-anki-file) | |
(concat "select id from facts")) | |
(count-lines (point-min) (point-max)))) | |
(defun annotate-stats () | |
"Report some stats for graphing" | |
(interactive) | |
(let ((mature-tab (annotate-make-table)) | |
(young-tab (annotate-make-table))) | |
(annotate-word-table mature-tab annotate-anki-file | |
"interval > 21") | |
(annotate-word-table young-tab annotate-anki-file | |
"interval > 0 and interval <= 21" mature-tab) | |
(let ((mature (hash-table-count mature-tab)) | |
(young (hash-table-count young-tab))) | |
(message (concat (format-time-string "%Y%m%d") | |
(format " %d %d %d" | |
(annotate-count-anki-sentences) | |
mature | |
(+ young mature))))))) | |
(defun annotate-display-known-words () | |
(interactive) | |
(annotate-dump-table annotate-known-table)) | |
(defun annotate-table-words (table) | |
(let (words) | |
(maphash (lambda (word freq) (push word words)) table) | |
words)) | |
(defun annotate-add-word (word table) | |
"Add WORD to TABLE or increment count if it exists." | |
(let ((val (gethash word table))) | |
(puthash word (if val (1+ val) 1) table))) | |
(defun annotate-markup (beg end face) | |
"Mark region between BEG and END with FACE." | |
(add-text-properties beg end `(face ,face rear-nonsticky t))) | |
(defun annotate-trim-to-coverage (alist coverage total) | |
"Return a subset of ALIST such that we get COVERAGE of TOTAL." | |
(let ((cell (car alist)) | |
(current 0.0) | |
acc) | |
(while (not (or (>= (/ current total) coverage) | |
(null cell))) | |
(let ((count (cdr cell))) | |
(push cell acc) | |
(incf current count) | |
(setq alist (cdr alist)) | |
(setq cell (car alist)))) | |
acc)) | |
(defun annotate-set-header-line (alist) | |
(setq header-line-format (mapconcat 'car alist " "))) | |
(defvar annotate-document-table nil) | |
(defvar annotate-common-alist nil | |
"Alist of common unknown word and frequency pairs.") | |
(defun annotate-region (start end word-frequency-alist &optional coverage) | |
(interactive "r") | |
;; prepare buffer | |
(let ((inhibit-read-only t) | |
(modified-p (buffer-modified-p))) | |
(remove-text-properties start end '(face)) | |
;; take the top words from word-frequency-alist | |
(set (make-local-variable 'annotate-common-alist) | |
(reverse | |
(annotate-trim-to-coverage word-frequency-alist | |
(or coverage annotate-unknown-coverage) | |
(annotate-count-words-region | |
(point-min) (point-max))))) | |
(annotate-set-header-line annotate-common-alist) | |
(save-excursion | |
(goto-char start) | |
(let ((all (annotate-make-table)) | |
(total-words 0)) | |
(while (annotate-search-forward-word end) | |
(incf total-words) | |
(let ((word (downcase (match-string 0))) | |
(mbeg (match-beginning 0)) | |
(mend (match-end 0))) | |
(annotate-add-word word all) | |
;; look up word for markup | |
(cond | |
;; its a common unknown word | |
((assoc word annotate-common-alist) | |
(annotate-markup mbeg mend 'annotate-new)) | |
;; its a word recently being studied | |
((gethash word annotate-young-table) | |
(annotate-markup mbeg mend 'annotate-young)) | |
;; do nothing | |
(t)))) | |
(annotate-convert-table all total-words) | |
(set (make-local-variable 'annotate-document-table) | |
all))) | |
(set-buffer-modified-p modified-p))) | |
(defun annotate-phrase-regexp (phrase) | |
"Return a regexp to match PHRASE with any case, whitespace or punctuation." | |
(mapconcat (lambda (word) | |
(concat "\\(\\<" word "\\>\\)")) | |
(annotate-wordify-sentence phrase) | |
"[^[:alpha:]]+")) | |
(defun annotate-count-words-region (beg end) | |
"Return the number of words between BEG and END." | |
(save-excursion | |
(goto-char beg) | |
(let ((count 0)) | |
(while (annotate-search-forward-word end) | |
(incf count)) | |
count))) | |
(defun annotate-setup-flashcard-line () | |
(interactive) | |
(let* ((marker (get-text-property (point) 'occur-target)) | |
(file (buffer-file-name (marker-buffer marker))) | |
(posn (marker-position marker))) | |
(annotate-setup-flashcard-region (line-beginning-position) | |
(line-end-position) | |
file posn))) | |
(defun annotate-setup-flashcard-region () | |
(interactive) | |
(save-excursion | |
(annotate-make-flashcard))) | |
(define-derived-mode annotate-sentence-mode text-mode "Sentence" | |
"Major mode for viewing harvested sentences. | |
\\{annotate-sentence-mode-map}" | |
) | |
;;; hack to create a list of common unknown words, from a word | |
;;; frequency list and from the list of words in anki | |
(defun annotate-display-unknown-words () | |
"Display a buffer containing words in current buffer that are unknown." | |
(interactive) | |
(let ((words (annotate-find-unknown-words | |
(buffer-substring (point-min) (point-max)) | |
annotate-known-table))) | |
(pop-to-buffer "*unknown*") | |
(erase-buffer) | |
(dolist (word words) | |
(insert word) | |
(newline)))) | |
(defun annotate-find-hunted (start end) | |
"Mark hunted phrases in current buffer between START and END." | |
(interactive "r") | |
(save-excursion | |
(goto-char start) | |
(let ((re (with-temp-buffer | |
(insert-file-contents-literally annotate-looking-for-file) | |
(mapconcat (lambda (phrase) | |
(annotate-phrase-regexp phrase)) | |
(split-string (buffer-substring (point-min) (point-max)) | |
"\n" t) | |
"\\|"))) | |
(start (point))) | |
(while (re-search-forward re end t) | |
(annotate-markup (match-beginning 0) (match-end 0) | |
'font-lock-warning-face))))) | |
(defun annotate-phrase-regexp (phrase) | |
"Return a regexp to match PHRASE with any case, whitespace or punctuation." | |
(mapconcat (lambda (word) | |
(concat "\\(\\<" word "\\>\\)")) | |
(annotate-wordify-sentence phrase) | |
"[^[:alpha:]]+")) | |
(defun annotate-search-forward-word (&optional bound) | |
"Search forward one word. Moves point to end of search. | |
Optional argument bounds the search; it is a buffer position. | |
Returns nil if there is no word to move forward over. | |
See also the functions `match-beginning', `match-end', `match-string', | |
and `replace-match'." | |
;; this is the point at which we could conditionalize the regexp or | |
;; search logic for chinese characters or other languages that arent | |
;; as trivial as moving across alpha characters | |
(re-search-forward "[[:alpha:]]+" bound t)) | |
(defun annotate-cleanup-sentence (string) | |
"Remove properties and extra whitespace from STRING." | |
(when string | |
(mapconcat 'substring-no-properties (split-string string) " "))) | |
(defun annotate-sentence-words (sentence) | |
"Return a list of downcased words in sentence. | |
Non words are removed, as are duplicate words. | |
Word order not preserved." | |
;;; FIXME: consider reusing wordify-buffer for this (also think | |
;;; about duplicate word issue) | |
(when sentence | |
(with-temp-buffer | |
(insert sentence) | |
(goto-char (point-min)) | |
(let ((start (point)) | |
wordlist) | |
(while (re-search-forward "[[:alpha:]]+" nil t) | |
(let ((word (downcase (match-string 0)))) | |
(push word wordlist))) | |
wordlist)))) | |
(defvar annotate-word-sentence-table nil) ;FIXME: make buffer | |
;local, for multiple | |
;concurrent sentence | |
;mining processes | |
(defun annotate-build-sentence-table (&optional buffer) | |
(with-current-buffer (or buffer (current-buffer)) | |
(setq annotate-word-sentence-table (annotate-make-table)) | |
(goto-char (point-min)) | |
(while (not (>= (point) (point-max))) | |
(forward-sentence) | |
(sit-for 0) | |
(let* ((sentence (annotate-cleanup-sentence (sentence-at-point))) | |
(words (annotate-wordify-sentence sentence)) | |
(slen (length words))) | |
(if (and (>= slen annotate-harvest-min-words) | |
(<= slen annotate-harvest-max-words)) | |
(dolist (word words) | |
(let ((s (propertize sentence | |
'occur-target | |
(set-marker (make-marker) (point)))) | |
(ss (gethash word annotate-word-sentence-table))) | |
(puthash word (if ss | |
(cons s ss) | |
(list s)) | |
annotate-word-sentence-table)))))))) | |
(defun annotate-mine-sentences (&optional buffer) | |
(interactive) | |
(annotate-build-sentence-table buffer) | |
(let ((freq-table)) | |
(maphash (lambda (w ss) | |
(setq freq-table (cons (cons w (length ss)) freq-table))) | |
annotate-word-sentence-table) | |
(setq freq-table | |
(sort freq-table | |
(lambda (a b) | |
(let ((ka (gethash (car a) annotate-known-table)) | |
(kb (gethash (car b) annotate-known-table))) | |
(if (or (and ka kb) | |
(and (not ka) (not kb))) | |
(> (cdr a) (cdr b)) | |
kb))))) | |
(with-current-buffer (get-buffer-create "*sentences*") | |
(erase-buffer) | |
(annotate-sentence-mode) | |
(dolist (cell freq-table) | |
(insert (format "\n* %s %d\n\n" (car cell) (cdr cell))) | |
(message (car cell)) (sit-for 0) | |
(let ((opoint (point))) | |
(dolist (sentence (gethash (car cell) annotate-word-sentence-table)) | |
(insert (format " %.3f. %s\n" | |
(annotate-score-words (annotate-wordify-sentence | |
sentence) | |
annotate-known-table) | |
sentence))) | |
(sort-lines t opoint (point)))) | |
(pop-to-buffer (current-buffer)) | |
(goto-char (point-min))))) | |
(defun annotate-harvest-sentences-buffer () | |
"Harvest sentences from buffer. | |
The region must already be annotated with `annotate-region'." | |
(interactive) | |
(annotate-harvest-sentences-region (point-min) (point-max))) | |
(defun annotate-phrase-at-point () | |
"Return string including word at point and 4 words on either side." | |
(save-excursion | |
(backward-word 5) | |
(let ((start (point))) | |
(forward-word 9) | |
(annotate-cleanup-sentence (buffer-substring start (point)))))) | |
(defun annotate-mine-phrases () | |
(interactive) | |
(let ((table (annotate-mine-phrases-1 (current-buffer)))) | |
(annotate-display-phrases table))) | |
(defun annotate-mine-phrases-1 (buffer &optional table) | |
(save-excursion | |
(with-current-buffer buffer | |
(goto-char (point-min)) | |
(while (re-search-forward "\\<[[:alpha:]]+\\>" nil t) | |
(let ((word (downcase (word-at-point)))) | |
(if (not (annotate-known-p word)) | |
;; find 4 words before and 4 after for a phrase | |
(let ((phrase (annotate-phrase-at-point))) | |
;; if there is more than 1 unknown word, its a candidate phrase | |
(unless (cdr (annotate-find-unknown-words phrase annotate-known-table)) | |
(let ((string (propertize (concat "..." phrase "...") | |
'occur-target | |
(set-marker (make-marker) (point))))) | |
(push (cons word string) table))))))) | |
table))) | |
(defvar annotate-reading-directory "~/spanish/reading") | |
(defun annotate-mine-phrases-files () | |
"Mine phrases from all files in `annotate-reading-directory'." | |
(interactive) | |
(let ((files (directory-files annotate-reading-directory | |
t "^[^.].*\\.txt$" t)) | |
table) | |
(dolist (file files) | |
(find-file file) | |
(setq table (annotate-mine-phrases-1 (current-buffer) table))) | |
(annotate-display-phrases table))) | |
(defun annotate-display-phrases (table) | |
(let ((buf (current-buffer))) | |
(with-current-buffer (get-buffer-create "*phrases*") | |
(erase-buffer) | |
(save-restriction | |
(dolist (cell (reverse table)) | |
(insert (cdr cell)) | |
(newline)) | |
(pop-to-buffer (current-buffer)) | |
(goto-char (point-min)) | |
(annotate-buffer) | |
(annotate-sentence-mode))))) | |
(defun annotate-score-words (words knowntab) | |
"Return the average frequencies of WORDS in KNOWNTAB." | |
(let ((total 0.0)) | |
(dolist (word words) | |
(incf total (or (gethash word knowntab) 0))) | |
(/ total (length words)))) | |
(defun annotate-sentence-mode-goto-sentence () | |
(interactive) | |
(save-excursion | |
(end-of-line) | |
(backward-char) | |
(occur-mode-goto-occurrence))) | |
(defvar annotate-sentence-mode-map | |
(let ((map (make-sparse-keymap))) | |
(define-key map (kbd "RET") 'annotate-sentence-mode-goto-sentence) | |
(define-key map "a" 'annotate-setup-flashcard-line) | |
map) | |
"Keymap for `annotate-sentence-mode'.") | |
(defvar annotate-known-list nil) | |
(defun annotate-known-p (word) | |
(or (gethash word annotate-known-table) ;the anki db | |
(member word annotate-known-list))) ;the known.txt file | |
(defun annotate-find-unknown-words (sentence known-table) | |
"Return a list of words from SENTENCE that are not in KNOWN-TABLE." | |
(let (unknowns) | |
(dolist (word (annotate-wordify-sentence sentence)) | |
(unless (annotate-known-p word) | |
(push word unknowns))) | |
unknowns)) | |
(define-derived-mode annotate-sentence-mode text-mode "Sentence" | |
"Major mode for viewing harvested sentences. | |
\\{annotate-sentence-mode-map}" | |
) | |
(defun annotate-wordify-sentence (string) | |
"Return a list of words, in order, downcased, from string." | |
(when string | |
(with-temp-buffer | |
(insert string) | |
(annotate-wordify-buffer) | |
(split-string (buffer-substring-no-properties | |
(point-min) (point-max)))))) | |
(defun annotate-wordify-buffer () | |
"Remove punctuation and extra whitespace, leaving just one word per line." | |
(goto-char (point-min)) | |
(while (re-search-forward "\\b[^[:alpha:]]+\\b" nil t) | |
(replace-match "\n" t t)) | |
(flush-lines "[[:digit:]]" (point-min) (point-max)) | |
(downcase-region (point-min) (point-max))) | |
(defun annotate-word-table (table file selection &rest override-tables) | |
"Update TABLE with word frequences from anki FILE given sql SELECTION. | |
Update OVERRIDE-TABLES if the word exists in one of them. | |
Return the total number of words processed." | |
(with-temp-buffer | |
(when (not | |
(zerop | |
(call-process "sqlite3" nil (current-buffer) nil | |
(expand-file-name file) | |
(concat "select question from cards where " selection ";")))) | |
(error "%s" (buffer-substring (point-min) (point-max)))) | |
(goto-char (point-min)) | |
;; remove <span> markup | |
(while (re-search-forward "<.*?>" nil t) (replace-match " ")) | |
;; build a hash table of word frequencies | |
(annotate-convert-table | |
table | |
(apply 'annotate-word-table-from-buffer | |
table | |
(current-buffer) (point-min) (point-max) | |
override-tables)))) | |
(defun annotate-word-table-from-buffer (table buffer start end &rest override-tables) | |
"Update hash TABLE of words and number of occurrances | |
from BUFFER between START and END. | |
Return total number of words in buffer." | |
(with-temp-buffer | |
(insert-buffer-substring-no-properties buffer start end) | |
;; one word per line, and no punctuation | |
(annotate-wordify-buffer) | |
(goto-char (point-min)) | |
(clrhash table) | |
(let ((word-count 0)) | |
(while (forward-word) | |
(incf word-count) | |
(let ((word (word-at-point)) | |
exists) | |
;; update in overriding tables, if it exists there | |
(dolist (otab override-tables) | |
(when (gethash word otab) | |
(annotate-add-word word otab) | |
(setq exists t))) | |
;; otherwise add to new table | |
(unless exists | |
(annotate-add-word word table)))) | |
word-count))) | |
(defun annotate-dump-table (table) | |
(with-current-buffer (get-buffer-create "*dump*") | |
(erase-buffer) | |
(maphash (lambda (w f) (insert (format "%05d: %s\n" f w))) | |
table) | |
(pop-to-buffer (current-buffer)) | |
(sort-regexp-fields t "^\\(.*\\):.*" "\\1" (point-min) (point-max)) | |
(annotate-buffer) | |
(goto-char (point-min)))) | |
(defun annotate-convert-table (tab words-total) | |
"Convert a table with integer values into ratio values." | |
(let ((total (float words-total))) | |
(maphash (lambda (w c) | |
(puthash w (/ c total) tab)) | |
tab))) | |
(defun annotate-word-frequency (&optional table-filter list-filter) | |
"Return an table of word/frequency pairs. | |
Ignore words in TABLE-FILTER and LIST-FILTER." | |
(save-excursion | |
(goto-char (point-min)) | |
(let ((tab (annotate-make-table)) | |
(words-total 0)) | |
(while (annotate-search-forward-word (point-max)) | |
(let ((word (downcase (match-string 0)))) | |
(unless (or (and table-filter (gethash word table-filter)) | |
(member word list-filter)) | |
(annotate-add-word word tab)))) | |
tab))) | |
(defun annotate-hash-table-to-alist (table) | |
(let (alist) | |
(maphash (lambda (word freq) | |
(setq alist (cons (cons word freq) alist))) | |
table) | |
alist)) | |
;;; flashcard generation | |
(defvar annotate-flashcard-mode-map | |
(let ((map (make-sparse-keymap))) | |
(define-key map (kbd "C-c C-c") 'annotate-flashcard-submit) | |
(define-key map (kbd "C-c C-i") 'annotate-flashcard-insert-unknown-words) | |
map) | |
"Keymap for `annotate-flashcard-mode'.") | |
(define-derived-mode annotate-flashcard-mode text-mode "Flashcard" | |
"Major mode for creating flashcards. | |
\\{annotate-flashcard-mode-map}" | |
(setq fill-paragraph-function 'annotate-flashcard-fill-paragraph) | |
(setq fill-column 1000)) | |
(defun annotate-flashcard-fill-paragraph (&optional justify region) | |
(save-restriction | |
(narrow-to-region (field-beginning) (field-end)) | |
(let (fill-paragraph-function) | |
(fill-paragraph justify region)))) | |
(defvar annotate-card-divider | |
"\n\n") | |
(defvar annotate-window-configuration nil) | |
(defun annotate-make-flashcard (&optional front) | |
(let ((sentence front)) | |
(setq annotate-window-configuration | |
(current-window-configuration)) | |
(let ((buf (generate-new-buffer-name "*card*"))) | |
(pop-to-buffer (get-buffer-create buf))) | |
(let ((inhibit-read-only t)) (erase-buffer)) | |
(annotate-flashcard-mode) | |
(if sentence | |
(insert sentence) | |
(newline)) | |
(while (re-search-forward "^[^ :]+: \\([ \t]+\\)" nil t) | |
(delete-region (match-beginning 1) (match-end 1))) | |
;; remove whitespace we dont need | |
(whitespace-cleanup-region (point-min) (point-max)) | |
;; leading space isnt handled by whitespace-cleanup | |
(goto-char (point-min)) | |
(save-excursion | |
(while (re-search-forward "^[[:space:]]+" nil t) | |
(delete-region (match-beginning 0) (match-end 0)))) | |
(goto-char (point-max)) | |
(insert (propertize annotate-card-divider | |
'face 'font-lock-preprocessor-face | |
'field t | |
'rear-nonsticky t)) | |
(unless sentence | |
(goto-char (point-min))) | |
(set-input-method 'spanish-postfix))) | |
(defun annotate-flashcard-submit () | |
"Add card to `annotate-import-file'." | |
;; replace divider with tab character, and newlines with <BR> | |
(interactive) | |
(let ((inhibit-read-only t)) | |
(goto-char (point-min)) | |
(re-search-forward annotate-card-divider) | |
(replace-match "\t") | |
;; remove final newline, if present. | |
(goto-char (point-max)) | |
(when (looking-at "^$") | |
(delete-backward-char 1)) | |
;; convert remaining newlines | |
(goto-char (point-min)) | |
(while (re-search-forward "\n" nil t) | |
(replace-match "<BR>")) | |
(goto-char (point-max)) | |
(newline) | |
;; add to import file | |
(append-to-file (point-min) (point-max) annotate-import-file) | |
(kill-buffer) | |
(set-window-configuration annotate-window-configuration))) | |
(defun annotate-flashcard-insert-unknown-words () | |
(interactive) | |
(let (words) | |
(save-excursion | |
(goto-char (point-min)) | |
(re-search-forward annotate-card-divider) | |
(goto-char (match-beginning 0)) | |
(setq words (annotate-find-unknown-words | |
(buffer-substring (point-min) (point)) | |
annotate-known-table))) | |
(save-excursion | |
(dolist (word (reverse words)) | |
(insert (concat word ": ")) | |
(newline 2))) | |
(end-of-line))) | |
(defun annotate-add-sentence (sentence) | |
(when sentence | |
(save-excursion | |
(with-current-buffer (get-buffer-create "*sentence pile*") | |
(goto-char (point-max)) | |
(insert (format "%s\n\n" sentence)))))) | |
(defun annotate-collect-sentences (&optional buffer) | |
(with-current-buffer (or buffer (current-buffer)) | |
(setq annotate-word-sentence-table (annotate-make-table)) | |
(goto-char (point-min)) | |
(while (not (>= (point) (point-max))) | |
(forward-sentence) | |
(sit-for 0) | |
(let* ((sentence (annotate-cleanup-sentence (sentence-at-point)))) | |
(annotate-add-sentence sentence))))) | |
(defun annotate-dump-words () | |
(interactive) | |
(annotate-update-tables) | |
(with-temp-buffer | |
(maphash (lambda (word freq) | |
(insert (format "%s\n" word))) | |
annotate-mature-table) | |
(sort-lines nil (point-min) (point-max)) | |
(write-region (point-min) (point-max) | |
(concat "/home/rcy/spanish/words/" | |
(format-time-string "%Y%m%d") | |
".txt")))) | |
(provide 'annotate) | |
;;; annotate.el ends here |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment