gdritter repos when-computer / master generator / telml.scm
master

Tree @master (Download .tar.gz)

telml.scm @masterraw · history · blame

(use-modules (ice-9 match))
(use-modules (ice-9 rdelim))
(use-modules (ice-9 regex))
(use-modules (srfi srfi-9))

(define-record-type <telml-doc>
  (make-telml-doc meta content)
  telml-doc?
  (meta telml-meta)
  (content telml-content))

(define-record-type <meta>
  (make-meta-record slug title tags date)
  meta?
  (slug  meta-slug)
  (title meta-title)
  (tags  meta-tags)
  (date  meta-date))

(define (make-meta slug title tags . args)
  (if (null? args)
      (make-meta-record slug title tags 0)
      (make-meta-record slug title tags (car args))))

;; This builds a stream abstraction, where a stream is a stream of
;; characters. This makes writing a parser a _lot_ easier.
(define (mk-stream s)
  (let ((lst (string->list s)))
    (lambda (msg)
      (cond ((eq? 'next msg)
             (if (null? lst)
                 #f
                 (let ((x (car lst)))
                   (set! lst (cdr lst))
                   x)))
            ((eq? 'peek msg)
             (if (null? lst) #f (car lst)))
            ((eq? 'peek2 msg)
             (if (or (null? lst) (null? (cdr lst))) #f
                 (cadr lst)))
            ((eq? 'done msg) (null? lst))))))

;; Special characters in TeLML are in the set [\{}|], and this also
;; considers EOF to be a special character.
(define (special-char? c)
  (or (eq? c #\\)
      (eq? c #\{)
      (eq? c #\})
      (eq? c #\|)
      (not c)))

;; This turns a stream back into a proper list, mostly for debugging
;; purposes.
(define (stream->list s)
  (if (s 'peek) (cons (s 'next) (stream->list s)) '()))

;; Identifiers consist of the characters [A-Za-z0-9_-], which are
;; matched here by number.
(define (ident-char? c)
  (let ((i (char->integer c)))
    (or (= i 45)
        (= i 95)
        (and (>= i 48) (<= i 57))
        (and (>= i 64) (<= i 90))
        (and (>= i 97) (<= i 122)))))

;; Whitespace is in [\t\r\n ]
(define (whitespace? c)
  (let ((i (char->integer c)))
    (or (= i 9) (= i 10) (= i 13) (= i 32))))

;; Our parsing function consists of a number of mutually recursive
;; functions that operate over a stream of characters.
(define (parse str)
  (let ((s (mk-stream str)))

    (define (match-char c)
      (if (eq? (s 'next) c) #t (throw 'unexpected-char c)))

    (define (parse-tag)
      (let* ((_    (match-char #\\))
             (name (parse-tag-name))
             (_    (skip-whitespace))
             (_    (match-char #\{))
             (args (parse-args))
             (_    (match-char #\})))
        `(,(string->symbol name) ,@args)))

    (define (parse-text)
      (define (go)
        (cond ((and (eq? (s 'peek) #\\) (special-char? (s 'peek2)))
               (begin (s 'next) (cons (s 'next) (go))))
              ((special-char? (s 'peek)) '())
              (else
               (let ((c (s 'next))) (cons c (go))))))
      (list->string (go)))

    (define (skip-whitespace)
      (if (whitespace? (s 'peek))
          (begin (s 'next) (skip-whitespace))))

    (define (parse-tag-name)
      (define (go)
        (if (ident-char? (s 'peek))
            (cons (s 'next) (go))
            '()))
        (list->string (go)))

    (define (parse-args)
      (if (eq? (s 'peek) #\})
          '()
          (let ((f (parse-fragment-list)))
            (cond ((eq? (s 'peek) #\|) (begin (s 'next) (cons f (parse-args))))
                  ((eq? (s 'peek) #\}) (list f))
                  (else (raise 'unreachable))))))

    (define (parse-fragment)
      (let ((c (s 'peek)))
        (cond ((not c) '())
              ((eq? c #\\) (parse-tag))
              (else (parse-text)))))

    (define (parse-fragment-list)
      (cond ((or (not (s 'peek))
                 (eq? (s 'peek) #\})
                 (eq? (s 'peek) #\|)) '())
            (else (let* ((f  (parse-fragment))
                         (fs (parse-fragment-list)))
                         (cons f fs)))))

    (parse-fragment-list)))

(define basic-tag-list
  (let ((simple-tag (lambda (n) (cons n (lambda (arg) (list n arg)))))
        (list-tag   (lambda (n) (cons n (lambda args  (cons n args))))))
    (list (simple-tag 'em)
          (simple-tag 'strong)
          (simple-tag 'li)
          (simple-tag 'h1)
          (simple-tag 'h2)
          (simple-tag 'table)
          (simple-tag 'th)
          (simple-tag 'tr)
          (simple-tag 'td)
          (cons 'header (lambda args `(th ,@(map (lambda (x) (list 'td x)) args))))
          (cons 'row (lambda args `(tr ,@(map (lambda (x) (list 'td x)) args))))
          (list-tag 'sub)
          (cons 'p (lambda (n) `(div (@ (class para)) ,@n)))
          (list-tag   'ul)
          (list-tag   'ol)
          (simple-tag 'blockquote)
          (cons 'ttcom (lambda  (n) `(span (@ (class comment)) ,n)))
          (cons 'ttkw  (lambda  (n) `(span (@ (class keyword)) ,n)))
          (cons 'ttcn  (lambda  (n) `(span (@ (class constr)) ,n)))
          (cons 'ttstr (lambda  (n) `(span (@ (class string)) ,n)))
          (cons 'red (lambda  (n) `(span (@ (style "color: red;")) ,n)))
          (cons 'tt (lambda (n) (list 'code n)))
          (cons 'br (lambda _ `(br)))
          (cons 'hr (lambda _ `(hr)))
          (cons 'code (lambda (n) `(pre (code ,n))))
          (cons 'comment (lambda _ ""))
          (cons 'link (lambda (url name)
                        `(a (@ (href ,(apply string-append url))) ,name)))
          (cons 'img (lambda (src)
                       `(img (@ (src ,(apply string-append src))))))
          (cons 'svgimg (lambda (src)
                            `(img (@ (class "svgimage")
                                   (src ,(apply string-append src))))))
          (cons 'audio (lambda (src)
                         `(div (@ (align "center"))
                               (audio
                           (@ (controls "controls"))
                           (source (@ (src ,(apply string-append src))
                                      (type "audio/mp3")))))))
          (cons 'youtube
                (lambda (src width height)
                  `(div (@ (align "center"))
                        (iframe (@ (width ,(apply string-append width))
                                   (height ,(apply string-append height))
                                   (src ,(apply string-append (cons "https://www.youtube.com/embed/" src)))
                                   (frameborder "0")
                                   (allowfullscreen))
                                ""))))
          (cons 'center (lambda (arg)
                          `(div (@ (align "center")) ,arg))))))

(define standard-tag-list
  (append basic-tag-list
          (list (cons 'wd (lambda (wd mn)
                            `(span (@ (class "word"))
                                   ,wd
                                   (span (@ (class "meaning")) ,mn))))
                (cons 'sidenote (lambda (arg)
                                  `(span (@ (class sidenote)) ,arg)))
                (cons 'ref (lambda (name)
                       `(label (@ (for ,(car name)) (class "sidenote-number")) ""))))))

(define feed-tag-list
  (append basic-tag-list
          (list (cons 'wd (lambda (wd _) wd))
                (cons 'sidenote (lambda _ ""))
                (cons 'ref (lambda _ "")))))

(define (partition doc lst)
  (cond ((null? doc) (reverse lst))
        ((not (car doc)) (partition (cdr doc) (cons '() lst)))
        (else (partition (cdr doc)
                         (cons (append (car lst) (list (car doc)))
                               (cdr lst))))))

(define (string-take str i)
  (substring str 0 i))

(define (string-drop str i)
  (let ((l (string-length str)))
    (substring str i l)))

(define (gather-para document)
  (define (split-string text)
    (let ((m (string-match "\n\n" text)))
      (if (not m) (list text)
          (let ((i (match:start m)))
            (cons (string-take text i)
                  (cons #f (split-string (string-drop text (+ i 2)))))))))
  (define (go doc)
    (cond ((null? doc) '())
          ((not (string? (car doc)))
           (cons (car doc) (go (cdr doc))))
          (else
           (append (split-string (car doc)) (go (cdr doc))))))
  (partition (go document) '(())))

(define (string->sexp str)
  (catch 'read-error
         (lambda () (call-with-input-string str read))
         (lambda _ #f)))

(define (escape-chars str)
  (define (go c rest)
    (cond
     ((eq? c #\<)
      (cons #\; (cons #\t (cons #\l (cons #\& rest)))))
     ((eq? c #\>)
      (cons #\; (cons #\t (cons #\g (cons #\& rest)))))
     ((eq? c #\&)
      (cons #\; (cons #\p (cons #\m (cons #\a (cons #\& rest))))))
     (else (cons c rest))))
  (reverse-list->string (string-fold go '() str)))

(define (telml->sxml telml tags)
  (define (rec arg)
    (cond ((string? arg) (escape-chars arg))
          ((null? arg) '())
          ((symbol? (car arg))
           (let ((args (map rec (cdr arg)))
                 (func (assoc (car arg) tags)))
             (if func
                 (apply (cdr func) args)
                 (raise "Unknown tag"))))
          (else
           (map rec arg))))
  (let ((meta (match telml
                     ((('meta (xs)) . _) xs)
                     (_                #f)))
        (body (match telml
                     ((('meta _) . xs) xs)
                     (xs               xs))))
    (make-telml-doc
     (if meta (apply make-meta (string->sexp meta)) '())
     (map (lambda (x)
            `(div (@ (class para)) ,(rec x)))
               (gather-para body)))))

(define (translate-file filename)
  (let ((body (with-input-from-file filename read-string #:encoding "UTF-8")))
    (telml->sxml (parse body) standard-tag-list)))

(define (translate-feed-file filename)
  (let ((body (with-input-from-file filename read-string #:encoding "UTF-8")))
    (telml->sxml (parse body) feed-tag-list)))