;;; mew-bq.el --- Base64 and Quoted-Printable encoding for Mew

;; Author:  Kazu Yamamoto <Kazu@Mew.org>
;; Created: Aug 20, 1997
;; Revised: Aug 24, 1998

;;; Code:

(defconst mew-bq-version "mew-bq.el version 0.03")

(require 'mew)

(defvar mew-header-encode-switch
  (list
   (cons "B" 'mew-header-encode-base64)
   (cons "Q" 'mew-header-encode-qp)
   ))

(defvar mew-header-decode-switch
  (list
   (cons "B" 'mew-header-decode-base64)
   (cons "Q" 'mew-header-decode-qp)
   ))

(defconst mew-base64-boundary1 26)
(defconst mew-base64-boundary2 52)
(defconst mew-base64-boundary3 62)
(defconst mew-base64-boundary4 63)

(defconst mew-header-decode-regex 
  "=\\?\\([^?]+\\)\\?\\(.\\)\\?\\([^?]+\\)\\?=")

;;;
;;;
;;;

(defun mew-header-sanity-check-string (str)
  (if (null str)
      str
    (while (string-match "[\000-\010\012-\037\177]+" str)
      (setq str (concat (substring str 0 (match-beginning 0))
			(substring str (match-end 0)))))
    str))

(defun mew-header-sanity-check-region (beg end)
  (save-restriction
    (narrow-to-region beg end)
    (goto-char (point-min))
    (while (re-search-forward "[\000-\010\013-\037\177]+" nil t) ;; allow \n
      (replace-match ""))))

(defun mew-header-encode (str)
  ;; sanity check should be done
  (let* ((charset (mew-charset-guess-string str))
	 (data (mew-charset-to-data charset))
	 (b-or-q (nth 5 data))
	 (cs (nth 4 data))
	 (fun (cdr (mew-assoc-case-equal b-or-q mew-header-encode-switch 0)))
	 (estr (mew-cs-encode-string str cs)))
    (concat "=?" charset "?" b-or-q "?" (funcall fun estr) "?=")))

(defun mew-header-decode (charset b-or-q estr)
  (let* ((fun (cdr (mew-assoc-case-equal b-or-q mew-header-decode-switch 0)))
	 (cs (mew-charset-to-cs charset))
	 ret)
    (cond
     ((and (null cs) (not (mew-case-equal charset "us-ascii")))
      mew-error-unknown-charset)
     (fun ;; cs may be nil
      (setq ret (mew-cs-decode-string (funcall fun estr) cs))
      (mew-header-sanity-check-string ret))
     (t
      estr)
     )))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Base64 encoding
;;;

(defun mew-base64-char256 (ch64)
  (cond
   ((null ch64) 0)
   ((and (<= ?A ch64) (<= ch64 ?Z)) (- ch64 ?A))
   ((and (<= ?a ch64) (<= ch64 ?z)) (+ (- ch64 ?a) mew-base64-boundary1))
   ((and (<= ?0 ch64) (<= ch64 ?9)) (+ (- ch64 ?0) mew-base64-boundary2))
   ((char-equal ch64 ?+) mew-base64-boundary3)
   ((char-equal ch64 ?/) mew-base64-boundary4)
   ((char-equal ch64 ?=) 0)
   )
  )

(defun mew-header-encode-base64 (str256)
  (let* ((rest (% (length str256) 3))
	 (count 0)
	 zpad epad len end
	 (ret ""))
    (cond 
     ((= rest 0) (setq zpad 0 epad 0 end nil))
     ((= rest 1) (setq zpad 2 epad 2 end -2))
     ((= rest 2) (setq zpad 1 epad 1 end -1))
     )
    (setq str256 (concat str256 (make-string zpad 0)))
    (setq len (length str256))
    (while (< count len)
      (let ((char0 (aref str256 count))
	    (char1 (aref str256 (1+ count)))
	    (char2 (aref str256 (+ 2 count))))
	(setq ret
	      (concat
	       ret
	       (char-to-string (mew-base64-char64 (lsh char0 -2)))
	       (char-to-string
		(mew-base64-char64 (logior (lsh (logand char0 3) 4)
					   (lsh char1 -4))))
	       (char-to-string
		(mew-base64-char64 (logior (lsh (logand char1 15) 2)
					   (lsh char2 -6))))
	       (char-to-string (mew-base64-char64 (logand char2 63)))))
	(setq count (+ count 3))))
    (concat (substring ret 0 end) (make-string epad ?=))))
	      
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Base64 decoding
;;;

(defun mew-base64-char64 (ch256)
  (aref "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 
	ch256))

(defun mew-header-decode-base64 (str64)
  (let* ((len (length str64))
	 (pad 0)
	 (count 0)
	 (ret ""))
    (if (string-match "=+$" str64)
	(setq pad (- (match-end 0) (match-beginning 0))))
    (if (or (string-match "[^a-zA-Z0-9+/=]" str64)
	    (not (equal (* (/ len 4) 4) len))
	    (< pad 0)
	    (> pad 3))
	mew-error-illegal-base64 ;; return value
      (while (< count len)
	(let ((char0 (mew-base64-char256 (aref str64 count)))
	      (char1 (mew-base64-char256 (aref str64 (1+ count))))
	      (char2 (mew-base64-char256 (aref str64 (+ 2 count))))
	      (char3 (mew-base64-char256 (aref str64 (+ 3 count)))))
	  (setq ret
		(concat
		 ret
		 (char-to-string
		  (logand (logior (lsh char0 2) (lsh char1 -4)) 255))
		 (char-to-string 
		  (logand (logior (lsh char1 4) (lsh char2 -2)) 255))
		 (char-to-string
		  (logand (logior (lsh char2 6) char3) 255))))
	  (setq count (+ count 4))))
      (if (equal pad 0)
	  ret
	(substring ret 0 (- pad))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Quoted-printable encoding
;;;

(defun mew-char-to-qhex (char)
  (concat "="
	  (char-to-string (aref "0123456789ABCDEF" (lsh char -4)))
	  (char-to-string (aref "0123456789ABCDEF" (logand char 15)))))

(defun mew-header-encode-qp (str)
  (let ((len (length str))
	(count 0)
	(ch nil)
	(ret))
    (while (< count len)
      (setq ch (aref str count))
      (cond
       ((and (> ch 32)
	     (< ch 126)
	     (not (char-equal ch ?=))
	     (not (char-equal ch ??))
	     (not (char-equal ch ?_))) ;; space
	(setq ret (concat ret (char-to-string ch))))
       ((char-equal ch 32)
	(setq ret (concat ret "_")))
       (t 
	(setq ret (concat ret (mew-char-to-qhex ch))))
       )
      (setq count (1+ count)))
    ret))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Quoted-printable decoding
;;;

(defun mew-hexstring-to-int (hex)
  (let ((len (length hex))
	(count 0)
	(ch nil)
	(ret 0))
    (while (< count len)
      (setq ch (aref hex count))
      (cond
       ((and (<= ?0 ch) (<= ch ?9))
	(setq ret (+ (* ret 16) (- ch ?0))))
       ((and (<= ?A ch) (<= ch ?F))
	(setq ret (+ (* ret 16) (+ (- ch ?A) 10))))
       ((and (<= ?a ch) (<= ch ?f))
	(setq ret (+ (* ret 16) (+ (- ch ?a) 10))))
       )
      (setq count (1+ count)))
    ret))

(defun mew-header-decode-qp (qpstr)
  (let ((start -1) (end))
    (while (string-match "_" qpstr)
      (aset qpstr (match-beginning 0) 32)) ;; 32 = space
    (while (string-match "=[0-9A-Z][0-9A-Z]" qpstr (1+ start))
      (setq start (match-beginning 0))
      (setq end (match-end 0))
      (setq qpstr
	    (concat (substring qpstr 0 start)
		    (mew-string-as-multibyte
		     (char-to-string
		      (mew-hexstring-to-int
		       (substring qpstr (1+ start) end))))
		    (substring qpstr end nil))))
    qpstr))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; RFC 2047 encoding
;;

(defvar mew-encode-word-max-length 65)
(defvar mew-field-max-length 70)

(defmacro mew-column ()
  (` (- (point) (save-excursion (beginning-of-line) (point)))))

(defun mew-header-encode-string (str)
  (let ((encoded-word (mew-header-encode str)))
    (if (> (length encoded-word) mew-encode-word-max-length)
        (let ((med (/ (length str) 2))
              (i 0))
          (while (< i med)
            (setq i (+ i (mew-charlen (mew-aref str i)))))
          (append
           (mew-header-encode-string (substring str 0 i))
           (mew-header-encode-string (substring str i nil))))
      (list encoded-word))))

(defun mew-header-encode-split-string (str)
  "Split STR to need-to-encode string and non-encode-string."
  (let ((start 0) beg end ret)
    (while (string-match "\\(^\\|[ \t]+\\)[\t -~]+\\($\\|[ \t]+\\)" str start)
      (setq beg (match-beginning 0))
      (setq end (match-end 0))
      (if (equal start beg)
	  (setq ret (cons (substring str beg end) ret))
	(setq ret (cons (substring str beg end)
			(cons (substring str start beg) ret))))
      (setq start end))
    (if (/= start (length str))
	(setq ret (cons (substring str start nil) ret)))
    (nreverse ret)))

(defun mew-header-encode-comma-text (str)
  (let ((str-list (mapcar (function mew-chop) (mew-split str ?,))))
    (mew-header-encode-text (car str-list))
    (setq str-list (cdr str-list))
    (while str-list
      (insert ", ") ;; must be fold here
      (mew-header-encode-text (car str-list))
      (setq str-list (cdr str-list)))))

(defmacro mew-header-encode-cond (c)
  (` (cond
      ((> (, c) 127) ;; non-ascii
       (if (equal status 'space)
	   (progn
	     (insert (substring str bound i))
	     (setq bound i)))
       (setq status 'non-ascii))
      ;; end of non-ascii
      (t ;; ascii
       (cond
	((equal status 'space)
	 (insert (substring str bound i)) ;; spaces
	 (setq bound i)
	 (setq status 'ascii))
	((equal status 'ascii)
	 (setq status 'ascii))
	((equal status 'non-ascii)
	 (setq status 'non-ascii))
	((equal status 'non-ascii-space)
	 (mew-header-encode-text (substring str bound (1- i)))
	 ;; non-ascii
	 (insert (substring str (1- i) i)) ;; a space
	 (setq bound i)
	 (setq status 'ascii))))
      ;; end of ascii
      )))

(defun mew-header-encode-addr (str)
  (let* ((len (length str))
	 (i 0) (bound 0) (status 'space)
	 qstatus c I)
    ;; status space, ascii, non-ascii, non-ascii-space
    ;; assumptions:
    ;;  <> doesn't contain non-ascii characters.
    ;;  () doesn't recurse.
    (while (< i len)
      (setq c (mew-aref str i))
      (cond
       ;; quote
       ((char-equal c ?\")
	(setq I (1+ i))
	(setq qstatus 'ascii)
	(catch 'quote
	  (while (< I len)
	    (setq c (mew-aref str I))
	    (cond
	     ((char-equal c ?\")
	      (throw 'quote nil))
	     ((> c 127)
	      (setq qstatus 'non-ascii)))
	    (setq I (+ I (mew-charlen c)))))
	(mew-header-encode-cond (if (equal qstatus 'ascii) ?a 128))
	(setq i I))
       ;; end of quote
       ;; comment
       ((char-equal c ?\()
	(if (or (equal status 'ascii) (equal status 'space))
	    (insert (substring str bound i))
	  (mew-header-encode-text (substring str bound i)))
	(insert "(")
	(setq i (1+ i))
	(setq bound i)
	(setq status 'ascii)
	(catch 'comment
	  (while (< i len)
	    (setq c (mew-aref str i))
	    (cond
	     ((char-equal c ?\))
	      (throw 'comment nil))
	     ((> c 127)
	      (setq status 'non-ascii)))
	    (setq i (+ i (mew-charlen c)))))
	(if (equal status 'ascii)
	    (insert (substring str bound i))
	  (mew-header-encode-text (substring str bound i) 'comment))
	(if (equal i len)
	    ()
	  (insert ")")
	  (setq bound (1+ i)))
	(setq status 'space))
       ;; end of ()
       ;; space
       ((or (char-equal c 32) (char-equal c ?\t))
	(cond
	 ((or (equal status 'ascii) (equal status 'space))
	  (insert (substring str bound i)) ;; 'ascii
	  (setq bound i)
	  (setq status 'space))
	 ((equal status 'non-ascii)
	  (setq status 'non-ascii-space))
	 ))
       ;; end of white space
       ;; comma
       ((char-equal c ?,)
	(cond
	 ((or (equal status 'ascii) (equal status 'space))
	  (insert (substring str bound i)))
	 (t (mew-header-encode-text (substring str bound i))))
	(insert ",\n ")
	(setq i (1+ i))
	(catch 'comma
	  (while (< i len)
	    (setq c (mew-aref str i))
	    (if (or (char-equal c 32) (char-equal c ?\t) (char-equal c ?\n))
		() ;; loop
	      (throw 'comma nil))
	    (setq i (1+ i))))
	(setq bound i)
	(setq c 32)
	(setq i (1- i))
	(setq status 'space))
       ;; end of comma
       ;; the others
       (t (mew-header-encode-cond c)))
      ;; end of outside cond
      (setq i (+ i (mew-charlen c))))
    (cond
     ((or (equal status 'ascii) (equal status 'space))
      (insert (substring str bound i)))
     (t (mew-header-encode-text (substring str bound i))))))

(defun mew-header-encode-text (str &optional comment)
  (let ((str-list (mew-header-encode-split-string str))
	e-list)
    (if (string-match "^[\t -~]+$" (car str-list))
	(progn
	  ;; ascii
	  (insert (car str-list))
	  (setq str-list (cdr str-list))))
    (while str-list
      ;; encoded-words
      (setq e-list (mew-header-encode-string (car str-list)))
      (if (> (+ (mew-column) (length (car e-list))) mew-field-max-length)
	  (progn
	    (forward-char (if comment -2 -1))
	    (cond
	     ((looking-at "[ \t]")
	      (insert "\n")
	      (forward-char (if comment 2 1)))
	     (t
	      (if comment (forward-char))
	      (insert "\n ") ;; xxx miserable...
	      (forward-char)))))
      (insert (car e-list))
      (setq e-list (cdr e-list))
      (while e-list
	(insert "\n " (car e-list))
	(setq e-list (cdr e-list)))
      ;; ascii
      (setq str-list (cdr str-list))
      (if (car str-list)
	  (if (> (+ (mew-column) (length (car str-list))) mew-field-max-length)
	      (insert "\n" (car str-list))
	    (insert (car str-list))))
      (setq str-list (cdr str-list)))))

(defun mew-header-encode-region (beg end)
  (save-restriction
    (narrow-to-region beg end)
    (mew-header-sanity-check-region (point-min) (point-max))
    (mew-charset-sanity-check (point-min) (point-max))
    (goto-char (point-min))
    (let (key med type str)
      (while (not (eobp))
	(if (not (looking-at mew-keyval))
	    (forward-line)
	  (setq key (mew-match 1))
	  (setq med (match-end 0))
	  (forward-line)
	  (mew-header-goto-next)
	  (if (or (equal med (1- (point)))
		  (equal (list mew-lc-ascii)
			 (mew-find-cs-region med (1- (point)))))
	      () ;; let it be
	    (setq type (mew-field-type key))
	    (setq str (buffer-substring med (1- (point)))) ;; excluding \n
	    (delete-region med (point))
	    (cond
	     ((equal type 'mailbox)
	      (mew-header-encode-addr str))
	     ((equal type 'mime)
	      (mew-header-encode-addr str))
	     ((equal type 'comma-text)
	      (mew-header-encode-comma-text str))
	     ((equal type 'text)
	      (mew-header-encode-text str)))
	    (insert "\n") ;; previously deleted, so insert here
	    ))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; RFC 2047 decoding
;;

(defun mew-header-decode-region (structp rbeg rend &optional unfold)
  "RFC 2047 decoding. This is liberal on the one point from RFC 2047.
That is, each line may be more than 75. "
  (save-restriction
    (narrow-to-region rbeg rend)
    (goto-char (point-min))
    (if (and (not unfold) 
	     (not (re-search-forward mew-header-decode-regex nil t)))
	() ;; let it be
      (let ((regex (concat "\"\\|" mew-header-decode-regex))
	    beg end cs-str)
	(goto-char (point-min))
	(cond
	 (structp
	  ;; If each line doesn't end with ",", unfold it.
	  ;; In Page 5 of RFC822 says, "Unfolding is accomplished by
	  ;; regarding CRLF immediately followed by a LWSP-char as
	  ;; equivalent to the LWSP-char". However, it also says, 
	  ;; "In structured field bodies, multiple linear space ASCII 
	  ;; characters (namely HTABs and SPACEs) are treated as single 
	  ;; spaces and may freely surround any symbol." So, remove
	  ;; continuous white spaces.
	  (while (re-search-forward ",[ \t]+\n" nil t)
	    (replace-match ",\n" nil t))
	  (goto-char (point-min))
	  (while (re-search-forward "\\([^,]\\)[ \t]*\n[ \t]+" nil t)
	    (replace-match (concat (mew-match 1) " ") nil t)))
	 (t ;; text
	  ;; In Page 5 of RFC822 says, "Unfolding is accomplished by
	  ;; regarding CRLF immediately followed by a LWSP-char as
	  ;; equivalent to the LWSP-char".
	  (while (re-search-forward "\n\\([ \t]\\)" nil t)
	    (replace-match (mew-match 1) nil t))
	  )
	 )
	;; In Page 10 of RFC 2047 says, "When displaying a particular 
	;; header field that contains multiple 'encoded-word's, any 
	;; 'linear-white-space' that separates a pair of adjacent 
	;; 'encoded-word's is ignored".
	(goto-char (point-min))
	(while (re-search-forward "\\?=[ \t]+=\\?" nil t)
	  (replace-match "?==?" nil t))
	(goto-char (point-min))
	(while (re-search-forward regex nil t)
	  (if (eq (char-after (match-beginning 0)) ?\")
	      (if (re-search-forward "[^\\]\"" nil t)
		  (goto-char (match-end 0)))
	    (setq beg (match-beginning 0)
		  end (match-end 0)
		  cs-str (mew-header-decode (mew-match 1)
					    (mew-match 2)
					    (mew-match 3)))
	    (delete-region beg end)
	    (insert cs-str)))))
    (goto-char (point-max))))

(provide 'mew-bq)

;;; Copyright Notice:

;; Copyright (C) 1997, 1998 Mew developing team.
;; All rights reserved.

;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions
;; are met:
;; 
;; 1. Redistributions of source code must retain the above copyright
;;    notice, this list of conditions and the following disclaimer.
;; 2. Redistributions in binary form must reproduce the above copyright
;;    notice, this list of conditions and the following disclaimer in the
;;    documentation and/or other materials provided with the distribution.
;; 3. Neither the name of the team nor the names of its contributors
;;    may be used to endorse or promote products derived from this software
;;    without specific prior written permission.
;; 
;; THIS SOFTWARE IS PROVIDED BY THE TEAM AND CONTRIBUTORS ``AS IS'' AND
;; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
;; PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE TEAM OR CONTRIBUTORS BE
;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
;; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
;; IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

;;; mew-bq.el ends here
