rcy · January 31, 2022 21:42
diff --git a/annotate.el b/annotate.el
 ;;; annotate.el --- mark up foreign language buffers according to anki deck

 ;; Copyright (C) 2009  Ryan Yeske

 ;; Author: Ryan Yeske <[email protected]>
 ;; Keywords: languages
 ;; Version: 2009-03-17 23:04:03

 ;; This program is free software; you can redistribute it and/or modify
 ;; it under the terms of the GNU General Public License as published by
 ;; the Free Software Foundation, either version 3 of the License, or
 ;; (at your option) any later version.

 ;; This program is distributed in the hope that it will be useful,
 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ;; GNU General Public License for more details.

 ;; You should have received a copy of the GNU General Public License
 ;; along with this program.  If not, see <http://www.gnu.org/licenses/>.

 ;;; Commentary:

 ;; The main assumption is that your cards have target language
 ;; sentences or words on the front, and anything on the back, which is
 ;; ignored.

 ;; Useful global bindings:

 ;; (global-set-key (kbd "C-c a") 'annotate-make-flashcard)
 ;; (global-set-key (kbd "C-c l") 'annotate-look-for-word)
 ;; (global-set-key (kbd "C-c k") 'annotate-save-text-as-known)

 ;;; TODO:
 ;; bugs:
 ;;   - figure out face lossage when switching background colors (light to dark &vv)
 ;; dictionary:
 ;;   - implement some kinda automated dictionary lookup
 ;; *report*:
 ;;   - occur on RET on buffer
 ;;   - dump sentences from words on keystroke
 ;;   - count total buffer words
 ;;   - keystroke to add words to ignore file (and implement ignore file)
 ;; *sentences*:
 ;;   - end of buffer error
 ;;   - hit RET to jump to the sentence in the buffer
 ;;   - hit (keystroke?) to create a card from sentence

 ;;; Code:

 (require 'cl)
 (require 'thingatpt)

 (defvar annotate-anki-file "~/spanish/srs/initial-spanish.anki")

 (defvar annotate-import-file "~/import.txt")

 (defvar annotate-looking-for-file "~/looking-for.txt"
  "Words in this file will trigger notification when they are
 seen in annotated buffers.")

 (defvar annotate-unknown-coverage .02
  "How much of buffer to mark up with unknown words.")

 (defface annotate-mature
  '((t (:inherit default)))
  "The face used to highlight mature words.")

 (defface annotate-young
  '((((background light)) (:background "#ccffcc"))
    (((background dark)) (:background "#007700")))
  "The face used to highlight young words.")

 (defface annotate-new
  '((((background light)) (:background "#e50000"))
    (((background dark)) (:background "#770000")))
  "The face used to highlight new words.")

 (defface annotate-unknown
  '((((background light)) (:background "#ffffaa"))
    (((background dark)) (:background "#555500")))
  "The face used to highlight unknown words.")

 (defface annotate-ignore
  '((t (:bold t)))
  "The face used to highlight ignored words.")

 (defvar annotate-harvest-min-words 4)
 (defvar annotate-harvest-max-words 12)

 (defun annotate-make-table ()
  (make-hash-table :test 'equal))

 (defvar annotate-mature-table (annotate-make-table)
  "Hash table of known mature words.")

 (defvar annotate-known-table (annotate-make-table)
  "Hash table of known words.")

 (defvar annotate-young-table (annotate-make-table)
  "Hash table of known young words.")

 (defun annotate-buffer (percentage &optional buffer)
  (interactive "p")
  (with-current-buffer (or buffer (current-buffer))
    (annotate-region (point-min) (point-max)
 		     (sort (annotate-hash-table-to-alist
 			    (annotate-word-frequency annotate-known-table
 						     annotate-known-list))
 			   ;; sort by frequency
 			   (lambda (a b) (cond ((= (cdr a) (cdr b))
                                                (zerop (random 2)))
                                               (t (> (cdr a) (cdr b))))))
 		     (and percentage (/ percentage 100.0)))))

 (defvar annotate-known-word-file "/home/rcy/known.txt")

 (defun annotate-update-tables ()
  (interactive)
  (annotate-word-table annotate-known-table
 		       annotate-anki-file "interval > 0")
  (annotate-word-table annotate-mature-table
 		       annotate-anki-file "interval > 21")
  (annotate-word-table annotate-young-table
 		       annotate-anki-file "interval > 0 and interval <= 21"
 		       annotate-mature-table)
  (setq annotate-known-list (annotate-word-list-from-file 
 			     annotate-known-word-file))
  (message "%d %d %d" 
 	   (hash-table-count annotate-mature-table)
 	   (hash-table-count annotate-young-table)
 	   (hash-table-count annotate-known-table)))

 (defun annotate-save-text-as-known (text)
  (interactive (list
 		(read-from-minibuffer "Known phrase: "
 				      (annotate-cleanup-sentence
 				       (buffer-substring (region-beginning)
 							 (region-end))))))
  (with-temp-buffer
    (insert text)
    (newline)
    (append-to-file (point-min) (point-max) annotate-known-word-file)))

 (defun annotate-word-list-from-file (file)
  (let (word-list)
    (with-temp-buffer
      (insert-file-contents file)
      (annotate-wordify-buffer)
      (while (annotate-search-forward-word)
 	(push (match-string 0) word-list)))
    word-list))

 (defun annotate-count-anki-sentences ()
  "Return the number of facts (unique sentences) in anki deck."
  (interactive)
  (with-temp-buffer
    (call-process "sqlite3" nil (current-buffer) nil
 		  (expand-file-name annotate-anki-file)
 		  (concat "select id from facts"))
    (count-lines (point-min) (point-max))))

 (defun annotate-stats ()
  "Report some stats for graphing"
  (interactive)
  (let ((mature-tab (annotate-make-table))
 	(young-tab (annotate-make-table)))
    (annotate-word-table mature-tab annotate-anki-file
 			 "interval > 21")
    (annotate-word-table young-tab annotate-anki-file
 			 "interval > 0 and interval <= 21" mature-tab)
    (let ((mature (hash-table-count mature-tab))
 	  (young (hash-table-count young-tab)))
      (message (concat (format-time-string "%Y%m%d")
 		       (format " %d %d %d"
 			       (annotate-count-anki-sentences)
 			       mature
 			       (+ young mature)))))))

 (defun annotate-display-known-words ()
  (interactive)
  (annotate-dump-table annotate-known-table))

 (defun annotate-table-words (table)
  (let (words)
    (maphash (lambda (word freq) (push word words)) table)
    words))

 (defun annotate-add-word (word table)
  "Add WORD to TABLE or increment count if it exists."
  (let ((val (gethash word table)))
    (puthash word (if val (1+ val) 1) table)))

 (defun annotate-markup (beg end face)
  "Mark region between BEG and END with FACE."
  (add-text-properties beg end `(face ,face rear-nonsticky t)))

 (defun annotate-trim-to-coverage (alist coverage total)
  "Return a subset of ALIST such that we get COVERAGE of TOTAL."
  (let ((cell (car alist))
 	(current 0.0)
 	acc)
    (while (not (or (>= (/ current total) coverage)
 		    (null cell)))
      (let ((count (cdr cell)))
 	(push cell acc)
 	(incf current count)
 	(setq alist (cdr alist))
 	(setq cell (car alist))))
    acc))

 (defun annotate-set-header-line (alist)
  (setq header-line-format (mapconcat 'car alist " ")))

 (defvar annotate-document-table nil)
 (defvar annotate-common-alist nil
  "Alist of common unknown word and frequency pairs.")
 (defun annotate-region (start end word-frequency-alist &optional coverage)
  (interactive "r")

  ;; prepare buffer
  (let ((inhibit-read-only t)
 	(modified-p (buffer-modified-p)))
    (remove-text-properties start end '(face))

    ;; take the top words from word-frequency-alist
    (set (make-local-variable 'annotate-common-alist)
 	 (reverse
 	  (annotate-trim-to-coverage word-frequency-alist
 				     (or coverage annotate-unknown-coverage)
 				     (annotate-count-words-region 
 				      (point-min) (point-max)))))
    (annotate-set-header-line annotate-common-alist)

    (save-excursion
      (goto-char start)
      (let ((all (annotate-make-table))
 	    (total-words 0))
 	(while (annotate-search-forward-word end)
 	  (incf total-words)
 	  (let ((word (downcase (match-string 0)))
 		(mbeg (match-beginning 0))
 		(mend (match-end 0)))
 	    (annotate-add-word word all)
 	    
 	    ;; look up word for markup
 	    (cond 
 	     ;; its a common unknown word
 	     ((assoc word annotate-common-alist)
 	      (annotate-markup mbeg mend 'annotate-new))

 	     ;; its a word recently being studied
 	     ((gethash word annotate-young-table)
 	      (annotate-markup mbeg mend 'annotate-young))

 	     ;; do nothing
 	     (t))))
 	      
 	(annotate-convert-table all total-words)
 	(set (make-local-variable 'annotate-document-table)
 	     all)))
    (set-buffer-modified-p modified-p)))

 (defun annotate-phrase-regexp (phrase)
  "Return a regexp to match PHRASE with any case, whitespace or punctuation."
  (mapconcat (lambda (word)
 	       (concat "\\(\\<" word "\\>\\)"))
 	     (annotate-wordify-sentence phrase)
 	     "[^[:alpha:]]+"))

 (defun annotate-count-words-region (beg end)
  "Return the number of words between BEG and END."
  (save-excursion
    (goto-char beg)
    (let ((count 0))
      (while (annotate-search-forward-word end)
 	(incf count))
      count)))

 (defun annotate-setup-flashcard-line ()
  (interactive)
  (let* ((marker (get-text-property (point) 'occur-target))
 	 (file (buffer-file-name (marker-buffer marker)))
 	 (posn (marker-position marker)))
    (annotate-setup-flashcard-region (line-beginning-position)
 				     (line-end-position)
 				     file posn)))

 (defun annotate-setup-flashcard-region ()
  (interactive)
  (save-excursion
    (annotate-make-flashcard)))

 (define-derived-mode annotate-sentence-mode text-mode "Sentence"
  "Major mode for viewing harvested sentences.
 \\{annotate-sentence-mode-map}"
  )


 ;;; hack to create a list of common unknown words, from a word
 ;;; frequency list and from the list of words in anki
 (defun annotate-display-unknown-words ()
  "Display a buffer containing words in current buffer that are unknown."
  (interactive)
  (let ((words (annotate-find-unknown-words
 		(buffer-substring (point-min) (point-max))
 		annotate-known-table)))
    (pop-to-buffer "*unknown*")
    (erase-buffer)
    (dolist (word words)
      (insert word)
      (newline))))

 (defun annotate-find-hunted (start end)
  "Mark hunted phrases in current buffer between START and END."
  (interactive "r")
  (save-excursion
    (goto-char start)
    (let ((re (with-temp-buffer
 		(insert-file-contents-literally annotate-looking-for-file)
 		(mapconcat (lambda (phrase)
 			     (annotate-phrase-regexp phrase))
 			   (split-string (buffer-substring (point-min) (point-max))
 					 "\n" t)
 			   "\\|")))
 	  (start (point)))
    (while (re-search-forward re end t)
      (annotate-markup (match-beginning 0) (match-end 0)
 		       'font-lock-warning-face)))))

 (defun annotate-phrase-regexp (phrase)
  "Return a regexp to match PHRASE with any case, whitespace or punctuation."
  (mapconcat (lambda (word)
 	       (concat "\\(\\<" word "\\>\\)"))
 	     (annotate-wordify-sentence phrase)
 	     "[^[:alpha:]]+"))

 (defun annotate-search-forward-word (&optional bound)
  "Search forward one word.  Moves point to end of search.

 Optional argument bounds the search; it is a buffer position.
 Returns nil if there is no word to move forward over.

 See also the functions `match-beginning', `match-end', `match-string',
 and `replace-match'."
  ;; this is the point at which we could conditionalize the regexp or
  ;; search logic for chinese characters or other languages that arent
  ;; as trivial as moving across alpha characters
  (re-search-forward "[[:alpha:]]+" bound t))

 (defun annotate-cleanup-sentence (string)
  "Remove properties and extra whitespace from STRING."
  (when string
    (mapconcat 'substring-no-properties (split-string string) " ")))

 (defun annotate-sentence-words (sentence)
  "Return a list of downcased words in sentence.
 Non words are removed, as are duplicate words.
 Word order not preserved."
  ;;; FIXME: consider reusing wordify-buffer for this (also think
  ;;; about duplicate word issue)
  (when sentence
    (with-temp-buffer
      (insert sentence)
      (goto-char (point-min))
      (let ((start (point))
 	    wordlist)
 	(while (re-search-forward "[[:alpha:]]+" nil t)
 	  (let ((word (downcase (match-string 0))))
 	    (push word wordlist)))
 	wordlist))))

 (defvar annotate-word-sentence-table nil)	;FIXME: make buffer
 						;local, for multiple
 						;concurrent sentence
 						;mining processes

 (defun annotate-build-sentence-table (&optional buffer)
  (with-current-buffer (or buffer (current-buffer))
    (setq annotate-word-sentence-table (annotate-make-table))
    (goto-char (point-min))
    (while (not (>= (point) (point-max)))
      (forward-sentence)
      (sit-for 0)
      (let* ((sentence (annotate-cleanup-sentence (sentence-at-point)))
 	     (words (annotate-wordify-sentence sentence))
 	     (slen (length words)))
 	(if (and (>= slen annotate-harvest-min-words)
 		 (<= slen annotate-harvest-max-words))
 	    (dolist (word words)
 	      (let ((s (propertize sentence
 				   'occur-target
 				   (set-marker (make-marker) (point))))
 		    (ss (gethash word annotate-word-sentence-table)))
 		(puthash word (if ss
 				  (cons s ss)
 				(list s))
 			 annotate-word-sentence-table))))))))

 (defun annotate-mine-sentences (&optional buffer)
  (interactive)
  (annotate-build-sentence-table buffer)
  (let ((freq-table))
    (maphash (lambda (w ss)
 	       (setq freq-table (cons (cons w (length ss)) freq-table)))
 	     annotate-word-sentence-table)
    (setq freq-table
 	  (sort freq-table
 		(lambda (a b) 
 		  (let ((ka (gethash (car a) annotate-known-table))
 			(kb (gethash (car b) annotate-known-table)))
 		    (if (or (and ka kb)
 			    (and (not ka) (not kb)))
 			(> (cdr a) (cdr b))
 		      kb)))))
    (with-current-buffer (get-buffer-create "*sentences*")
      (erase-buffer)
      (annotate-sentence-mode)
      (dolist (cell freq-table)
 	(insert (format "\n* %s %d\n\n" (car cell) (cdr cell)))
 	(message (car cell)) (sit-for 0)
 	(let ((opoint (point)))
 	  (dolist (sentence (gethash (car cell) annotate-word-sentence-table))
 	    (insert (format "  %.3f. %s\n"
 			    (annotate-score-words (annotate-wordify-sentence
 						   sentence)
 						  annotate-known-table)
 			    sentence)))
 	  (sort-lines t opoint (point))))
      (pop-to-buffer (current-buffer))
      (goto-char (point-min)))))

 (defun annotate-harvest-sentences-buffer ()
  "Harvest sentences from buffer.
 The region must already be annotated with `annotate-region'."
  (interactive)
  (annotate-harvest-sentences-region (point-min) (point-max)))

 (defun annotate-phrase-at-point ()
  "Return string including word at point and 4 words on either side."
  (save-excursion
    (backward-word 5)
    (let ((start (point)))
      (forward-word 9)
      (annotate-cleanup-sentence (buffer-substring start (point))))))

 (defun annotate-mine-phrases ()
  (interactive)
  (let ((table (annotate-mine-phrases-1 (current-buffer))))
    (annotate-display-phrases table)))

 (defun annotate-mine-phrases-1 (buffer &optional table)
  (save-excursion
    (with-current-buffer buffer
      (goto-char (point-min))
      (while (re-search-forward "\\<[[:alpha:]]+\\>" nil t)
 	(let ((word (downcase (word-at-point))))
 	  (if (not (annotate-known-p word))
 	      ;; find 4 words before and 4 after for a phrase
 	      (let ((phrase (annotate-phrase-at-point)))
 		;; if there is more than 1 unknown word, its a candidate phrase
 		(unless (cdr (annotate-find-unknown-words phrase annotate-known-table))
 		  (let ((string (propertize (concat "..." phrase "...")
 					    'occur-target
 					    (set-marker (make-marker) (point)))))
 		    (push (cons word string) table)))))))
      table)))

 (defvar annotate-reading-directory "~/spanish/reading")

 (defun annotate-mine-phrases-files ()
  "Mine phrases from all files in `annotate-reading-directory'."
  (interactive)
  (let ((files (directory-files annotate-reading-directory
 				t "^[^.].*\\.txt$" t))
 	table)
    (dolist (file files)
      (find-file file)
      (setq table (annotate-mine-phrases-1 (current-buffer) table)))
    (annotate-display-phrases table)))

 (defun annotate-display-phrases (table)
  (let ((buf (current-buffer)))
    (with-current-buffer (get-buffer-create "*phrases*")
      (erase-buffer)
      (save-restriction
 	(dolist (cell (reverse table))
 	  (insert (cdr cell))
 	  (newline))
 	(pop-to-buffer (current-buffer))
 	(goto-char (point-min))
 	(annotate-buffer)
 	(annotate-sentence-mode)))))

 (defun annotate-score-words (words knowntab)
  "Return the average frequencies of WORDS in KNOWNTAB."
  (let ((total 0.0))
    (dolist (word words)
      (incf total (or (gethash word knowntab) 0)))
    (/ total (length words))))

 (defun annotate-sentence-mode-goto-sentence ()
  (interactive)
  (save-excursion
    (end-of-line)
    (backward-char)
    (occur-mode-goto-occurrence)))

 (defvar annotate-sentence-mode-map
  (let ((map (make-sparse-keymap)))
    (define-key map (kbd "RET") 'annotate-sentence-mode-goto-sentence)
    (define-key map "a" 'annotate-setup-flashcard-line)
    map)
  "Keymap for `annotate-sentence-mode'.")

 (defvar annotate-known-list nil)

 (defun annotate-known-p (word)
  (or (gethash word annotate-known-table) ;the anki db
      (member word annotate-known-list))) ;the known.txt file

 (defun annotate-find-unknown-words (sentence known-table)
  "Return a list of words from SENTENCE that are not in KNOWN-TABLE."
  (let (unknowns)
    (dolist (word (annotate-wordify-sentence sentence))
      (unless (annotate-known-p word)
 	(push word unknowns)))
    unknowns))

 (define-derived-mode annotate-sentence-mode text-mode "Sentence"
  "Major mode for viewing harvested sentences.
 \\{annotate-sentence-mode-map}"
  )

 (defun annotate-wordify-sentence (string)
  "Return a list of words, in order, downcased, from string."
  (when string
    (with-temp-buffer
      (insert string)
      (annotate-wordify-buffer)
      (split-string (buffer-substring-no-properties 
 		     (point-min) (point-max))))))

 (defun annotate-wordify-buffer ()
  "Remove punctuation and extra whitespace, leaving just one word per line."
  (goto-char (point-min))
  (while (re-search-forward "\\b[^[:alpha:]]+\\b" nil t)
    (replace-match "\n" t t))
  (flush-lines "[[:digit:]]" (point-min) (point-max))
  (downcase-region (point-min) (point-max)))

 (defun annotate-word-table (table file selection &rest override-tables)
  "Update TABLE with word frequences from anki FILE given sql SELECTION.

 Update OVERRIDE-TABLES if the word exists in one of them.
 Return the total number of words processed."
  (with-temp-buffer
    (when (not 
 	   (zerop
 	    (call-process "sqlite3" nil (current-buffer) nil
 			  (expand-file-name file)
 			  (concat "select question from cards where " selection ";"))))
      (error "%s" (buffer-substring (point-min) (point-max))))
    (goto-char (point-min))
    ;; remove <span> markup
    (while (re-search-forward "<.*?>" nil t) (replace-match " "))
    ;; build a hash table of word frequencies
    (annotate-convert-table
     table     
     (apply 'annotate-word-table-from-buffer
 	    table
 	    (current-buffer) (point-min) (point-max)
 	    override-tables))))

 (defun annotate-word-table-from-buffer (table buffer start end &rest override-tables)
  "Update hash TABLE of words and number of occurrances
 from BUFFER between START and END.

 Return total number of words in buffer."
  (with-temp-buffer
    (insert-buffer-substring-no-properties buffer start end)
    ;; one word per line, and no punctuation
    (annotate-wordify-buffer)
    (goto-char (point-min))
    (clrhash table)
    (let ((word-count 0))
      (while (forward-word)
 	(incf word-count)
 	(let ((word (word-at-point))
 	      exists)
 	  ;; update in overriding tables, if it exists there
 	  (dolist (otab override-tables)
 	    (when (gethash word otab)
 	      (annotate-add-word word otab)
 	      (setq exists t)))
 	  ;; otherwise add to new table
 	  (unless exists
 	    (annotate-add-word word table))))
      word-count)))

 (defun annotate-dump-table (table)
  (with-current-buffer (get-buffer-create "*dump*")
    (erase-buffer)
    (maphash (lambda (w f) (insert (format "%05d: %s\n" f w)))
 	     table)
    (pop-to-buffer (current-buffer))
    (sort-regexp-fields t "^\\(.*\\):.*" "\\1" (point-min) (point-max))
    (annotate-buffer)
    (goto-char (point-min))))

 (defun annotate-convert-table (tab words-total)
  "Convert a table with integer values into ratio values."
  (let ((total (float words-total)))
    (maphash (lambda (w c)
 	       (puthash w (/ c total) tab))
 	     tab)))

 (defun annotate-word-frequency (&optional table-filter list-filter)
  "Return an table of word/frequency pairs.  
 Ignore words in TABLE-FILTER and LIST-FILTER."
  (save-excursion
    (goto-char (point-min))
    (let ((tab (annotate-make-table))
 	  (words-total 0))
      (while (annotate-search-forward-word (point-max))
 	(let ((word (downcase (match-string 0))))
 	  (unless (or (and table-filter (gethash word table-filter))
 		      (member word list-filter))
 	    (annotate-add-word word tab))))
      tab)))

 (defun annotate-hash-table-to-alist (table)
  (let (alist)
    (maphash (lambda (word freq) 
 	       (setq alist (cons (cons word freq) alist)))
 	     table)
    alist))

 ;;; flashcard generation
 (defvar annotate-flashcard-mode-map
  (let ((map (make-sparse-keymap)))
    (define-key map (kbd "C-c C-c") 'annotate-flashcard-submit)
    (define-key map (kbd "C-c C-i") 'annotate-flashcard-insert-unknown-words)
         map)
  "Keymap for `annotate-flashcard-mode'.")

 (define-derived-mode annotate-flashcard-mode text-mode "Flashcard"
  "Major mode for creating flashcards.
 \\{annotate-flashcard-mode-map}"
  (setq fill-paragraph-function 'annotate-flashcard-fill-paragraph)
  (setq fill-column 1000))

 (defun annotate-flashcard-fill-paragraph (&optional justify region)
  (save-restriction 
    (narrow-to-region (field-beginning) (field-end))
    (let (fill-paragraph-function)
      (fill-paragraph justify region))))

 (defvar annotate-card-divider 
  "\n\n")

 (defvar annotate-window-configuration nil)

 (defun annotate-make-flashcard (&optional front)
  (let ((sentence front))
    (setq annotate-window-configuration
 	  (current-window-configuration))
    (let ((buf (generate-new-buffer-name "*card*")))
      (pop-to-buffer (get-buffer-create buf)))
    (let ((inhibit-read-only t)) (erase-buffer))
    (annotate-flashcard-mode)
    (if sentence
 	(insert sentence)
      (newline))
    (while (re-search-forward "^[^ :]+: \\([ \t]+\\)" nil t)
      (delete-region (match-beginning 1) (match-end 1)))

    ;; remove whitespace we dont need
    (whitespace-cleanup-region (point-min) (point-max))
    ;; leading space isnt handled by whitespace-cleanup
    (goto-char (point-min))
    (save-excursion
      (while (re-search-forward "^[[:space:]]+" nil t)
 	(delete-region (match-beginning 0) (match-end 0))))
    
    (goto-char (point-max))
    (insert (propertize annotate-card-divider
 			'face 'font-lock-preprocessor-face
 			'field t
 			'rear-nonsticky t))
    (unless sentence
      (goto-char (point-min)))
    (set-input-method 'spanish-postfix)))

 (defun annotate-flashcard-submit ()
  "Add card to `annotate-import-file'."
  ;; replace divider with tab character, and newlines with <BR>
  (interactive)
  (let ((inhibit-read-only t))
    (goto-char (point-min))
    (re-search-forward annotate-card-divider)
    (replace-match "\t")
    ;; remove final newline, if present.
    (goto-char (point-max))
    (when (looking-at "^$")
      (delete-backward-char 1))
    ;; convert remaining newlines
    (goto-char (point-min))
    (while (re-search-forward "\n" nil t)
      (replace-match "<BR>"))
    (goto-char (point-max))
    (newline)
    ;; add to import file
    (append-to-file (point-min) (point-max) annotate-import-file)
    (kill-buffer)
    (set-window-configuration annotate-window-configuration)))

 (defun annotate-flashcard-insert-unknown-words ()
  (interactive)
  (let (words)
    (save-excursion
      (goto-char (point-min))
      (re-search-forward annotate-card-divider)
      (goto-char (match-beginning 0))
      (setq words (annotate-find-unknown-words
 		   (buffer-substring (point-min) (point))
 		   annotate-known-table)))
    (save-excursion
      (dolist (word (reverse words))
 	(insert (concat word ": "))
 	(newline 2)))
    (end-of-line)))

 (defun annotate-add-sentence (sentence)
  (when sentence
    (save-excursion
      (with-current-buffer (get-buffer-create "*sentence pile*")
 	(goto-char (point-max))
 	(insert (format "%s\n\n" sentence))))))

 (defun annotate-collect-sentences (&optional buffer)
  (with-current-buffer (or buffer (current-buffer))
    (setq annotate-word-sentence-table (annotate-make-table))
    (goto-char (point-min))
    (while (not (>= (point) (point-max)))
      (forward-sentence)
      (sit-for 0)
      (let* ((sentence (annotate-cleanup-sentence (sentence-at-point))))
 	(annotate-add-sentence sentence)))))


 (defun annotate-dump-words ()
  (interactive)
  (annotate-update-tables)
  (with-temp-buffer
    (maphash (lambda (word freq)
 	       (insert (format "%s\n" word)))
 	     annotate-mature-table)
    (sort-lines nil (point-min) (point-max))
    (write-region (point-min) (point-max) 
 		  (concat "/home/rcy/spanish/words/"
 			  (format-time-string "%Y%m%d")
 			  ".txt"))))


 (provide 'annotate)
 ;;; annotate.el ends here