How to eval Lisp code inside a reader macro? - lisp

I'm writing my own x86-64 assembler in Common Lisp and it produces correct binary code for a subset of x86-64. I use a custom reader macro to convert assembly code to a syntax tree, and it works as expected.
What I am attempting to accomplish is to allow using Lisp code inside assembly code, that way I could use Lisp as a macro language for my assembler. I use #a as the macro dispatch character and #e to signal end for the reader. Inside reader #l changes to Lisp mode and #a back to assembly mode, #e (to signal end for the reader macro) should work in both modes.
What I don't understand is how to output the results of the evaluated code back to the input stream (to be processed before the rest of the code), or otherwise how to get the Lisp code output be read again, so that the output of Lisp code (it would be assembly code) could be processed appropriately (the same way as the rest of the assembly code). How can I reach that goal?
A sidenote: this is my first reader macro, so there may be design flaws. I think my approach to read Lisp code into a string is not necessarily the best way, if there is some shorter and more idiomatic way to do it.
Here's a simplified version of my reader macro:
(eval-when (:compile-toplevel :load-toplevel :execute)
(defun get-last-character-string (my-string)
"This function returns a string consisting of the last character of the input string."
(subseq my-string (1- (length my-string))))
(defun get-string-without-last-character (my-string)
"This function returns a string without the last character of the input string."
(subseq my-string 0 (1- (length my-string))))
(defun get-string-without-invalid-last-character (my-string invalid-last-characters)
"If the last character of the string is invalid, the string is returned without it, otherwise completely."
(loop for invalid-last-character in invalid-last-characters
do (if (equal (get-last-character-string my-string) invalid-last-character)
(setf my-string (get-string-without-last-character my-string))))
my-string)
(defun transform-code-to-string (stream sub-char numarg)
"This function converts assembly code into a string.
#l marks change to Lisp code. #a marks return to asm. #e marks end.
Partially based on: http://weitz.de/macros.lisp"
(declare (ignore sub-char numarg))
(let*
((invalid-last-characters (list "'" " " "(" ")"))
(current-mode "asm")
(is-there-code-on-this-line nil)
(current-phase "beginning-of-line")
(my-string "(list ")
(lisp-code-string ""))
;; loop through stream.
(loop for my-char = (coerce (list (read-char stream t nil t)) 'string)
do (cond
((equal current-mode "asm")
(cond
((equal current-phase "hash-sign-read")
;; is character e ?
;; if yes, we're done, fix closing parentheses and return.
(cond
((equal my-char "e")
(return-from transform-code-to-string
(concatenate 'string (get-string-without-invalid-last-character
(get-string-without-invalid-last-character
my-string invalid-last-characters)
invalid-last-characters) "))")))
;; is character l ?
;; if yes, change to Lisp mode.
((equal my-char "l")
;; could Lisp code could be read and evaluated here
;; without reading it into a string?
(progn
(setf current-mode "Lisp")
(setf is-there-code-on-this-line nil)
(setf lisp-code-string "")
(setf current-phase "beginning-of-line")))
;; otherwise, print error.
(t (error "in asm mode undefined control character after #"))))
;; is character # ?
;; if yes, mark hash sign read.
((equal my-char "#")
(setf current-phase "hash-sign-read"))
;; is character newline?
((equal my-char (coerce (list #\Newline) 'string))
(progn
(cond
;; is there _no_ code on this line?
;; if true, do not output anything.
((not is-there-code-on-this-line)
(setf current-phase "beginning-of-line"))
;; are we inside instruction or inside a parameter?
;; if true, output ")
((or (equal current-phase "inside-instruction")
(equal current-phase "inside-parameters"))
(progn
(setf current-phase "beginning-of-line")
(setf is-there-code-on-this-line nil)
(setf my-string (concatenate 'string my-string "\")"))))
;; otherwise output )
(t (progn
(setf current-phase "beginning-of-line")
(setf is-there-code-on-this-line nil)
(setf my-string (concatenate 'string my-string ")")))))))
;; are we inside a comment?
;; if yes, don't output anything.
((equal current-phase "inside-comment")
nil)
;; are we in the beginning of the line?
((equal current-phase "beginning-of-line")
(cond
;; is this a space in the beginning of the line?
;; if yes, do not output anything.
((equal my-char " ")
nil)
;; is this the first character of instruction and not ( or ) ?
;; if yes, mark there is code on this line, mark first character as printed, output " and current character.
((and
(not (equal my-char "("))
(not (equal my-char ")")))
(progn
(setf current-phase "inside-instruction")
(setf is-there-code-on-this-line t)
(setf my-string (concatenate 'string my-string "'(\"" my-char))))
(t nil)))
;; is character ; ?
;; if yes, don't output anything, begin comment.
((equal my-char ";")
(setf current-phase "inside-comment"))
;; is character space or comma?
((or (equal my-char " ")
(equal my-char ","))
(cond
;; is character space or comma, and last character was _not_ space, comma or opening parenthesis?
;; if yes, output " and space.
((and
(not (equal (get-last-character-string my-string) " "))
(not (equal (get-last-character-string my-string) ","))
(not (equal (get-last-character-string my-string) "(")))
(progn
(setf current-phase "in-space")
(setf my-string (concatenate 'string my-string "\" "))))
(t nil)))
;; is instruction printed and this is the 1st character of a parameter?
((and
(not (equal current-phase "inside-instruction"))
(or (equal (get-last-character-string my-string) " ")
(equal (get-last-character-string my-string) ",")))
(cond
;; mark we're inside parameters, output " and current character.
(t (progn
(setf current-phase "inside-parameters")
(setf my-string (concatenate 'string my-string "\"" my-char))))))
;; otherwise output the character.
(t (setf my-string (concatenate 'string my-string my-char)))))
((equal current-mode "Lisp")
;; in Lisp mode, read text until #e or #a is reached and eval it.
(cond
((equal current-phase "hash-sign-read")
(cond
;; is character e ?
;; if yes, we're done, fix closing parentheses and return.
((equal my-char "e")
(progn
(concatenate 'string "#a" (eval lisp-code-string) "#e") ; this should be something different.
(return-from transform-code-to-string
(concatenate 'string (get-string-without-invalid-last-character
(get-string-without-invalid-last-character
my-string invalid-last-characters)
invalid-last-characters) "))"))))
;; is character a ?
;; if yes, change to asm mode.
((equal my-char "a")
(progn
(setf current-mode "asm")
(setf is-there-code-on-this-line nil)
(setf current-phase "beginning-of-line")
(concatenate 'string "#a" (eval lisp-code-string) "#e") ; this should be something different.
;; otherwise, add # and the character to the Lisp code to be evaluated.
(t (progn
(setf current-phase "")
(setf my-string (concatenate 'string lisp-code-string "#" my-char))))))
;; is character # ?
;; if yes, mark hash sign read.
((equal my-char "#")
(setf current-phase "hash-sign-read"))
;; otherwise add the character to the Lisp code to be evaluated.
(t (setf my-string (concatenate 'string lisp-code-string my-char)))))
(t (error "invalid current mode"))))))
;;; #a is the input which starts the custom reader.
(set-dispatch-macro-character #\# #\a #'transform-code-to-string))
Here's some example assembly code without Lisp code inside, works:
(defparameter *example-code-x64*
#a
inc r10 ; increment register r10.
mov r11,r12 ; store value of r12 into r11.
#e)
And here's some assembly code with Lisp code inside, fails (see compiling error further below). In this one the Lisp code is after assembly code, but assembly and Lisp code should be allowed to be mixed freely using #a and #l as separators.
(defparameter *example-code-x64-with-lisp-fails*
#a
inc r10 ; increment register r10.
mov r11,r12 ; store value of r12 into r11.
#l
(loop for current-instruction in (list "inc" "dec")
do (loop for current-arg in (list "r13" "r14" "r15")
do (princ (concatenate 'string
current-instruction
" "
current-arg
(coerce (list #\Newline) 'string)))))
#e)
The Lisp part of the above code should be evaluated in the custom reader, so that it should produce identical results as the code below:
(defparameter *example-code-x64-with-lisp-fails*
#a
inc r10 ; increment register r10.
mov r11,r12 ; store value of r12 into r11.
inc r13
inc r14
inc r15
dec r13
dec r14
dec r15
#e)
But instead the compiling fails:
CL-USER> ; compiling file "/home/user/code/lisp/lisp-asm-reader-for-stackoverflow.lisp" (written 28 MAR 2014 10:11:29 PM):
;
; caught ERROR:
; READ error during COMPILE-FILE:
;
; The value -1 is not of type (MOD 4611686018427387901).
;
; (in form starting at line: 1, column: 0, file-position: 0)
;
; compilation unit aborted
; caught 1 fatal ERROR condition
; caught 1 ERROR condition
; compilation aborted after 0:00:00.004
1 compiler notes:
/home/user/code/lisp/lisp-asm-reader-for-stackoverflow.lisp:10487
read-error: READ error during COMPILE-FILE:
The value -1 is not of type (MOD 4611686018427387901).
(in form starting at line: 1, column: 0, file-position: 0)
CL-USER>

The idiomatic way to read lisp code from within a reader macro is to call cl:read. In your example, calling read after consuming #L will return the list whose car is loop, and that list can be passed to eval.
To collect the output created during the eval, you can bind *standard-output*. So an option is to use something akin to the following within your reader macro:
(let ((lisp-printed-string
(with-output-to-string (*standard-output*)
(eval (read stream t t t)))))
;; concatenate the lisp printed string onto your
;; hand parsed string here
)
An alternative is to have the user input a lisp form which returns a string {e.g. (concatenate "bar" "baz")}, and collect eval's return value instead of its printed output.

Related

Destructuring bind for regex matches

In elisp, how can I get a destructuring bind for regex matches?
For example,
;; what is the equivalent of this with destructuring?
(with-temp-buffer
(save-excursion (insert "a b"))
(re-search-forward "\\(a\\) \\(b\\)")
(cons (match-string 1)
(match-string 2)))
;; trying to do something like the following
(with-temp-buffer
(save-excursion (insert "a b"))
(cl-destructuring-bind (a b) (re-search-forward "\\(a\\) \\(b\\)")
(cons a b)))
I was thinking I would have to write a macro to expand matches if there isn't another way.
Here is one way: you first extend pcase to accept a new re-match pattern, with a definition such as:
(pcase-defmacro re-match (re)
"Matches a string if that string matches RE.
RE should be a regular expression (a string).
It can use the special syntax \\(?VAR: to bind a sub-match
to variable VAR. All other subgroups will be treated as shy.
Multiple uses of this macro in a single `pcase' are not optimized
together, so don't expect lex-like performance. But in order for
such optimization to be possible in some distant future, back-references
are not supported."
(let ((start 0)
(last 0)
(new-re '())
(vars '())
(gn 0))
(while (string-match "\\\\(\\(?:\\?\\([-[:alnum:]]*\\):\\)?" re start)
(setq start (match-end 0))
(let ((beg (match-beginning 0))
(name (match-string 1 re)))
;; Skip false positives, either backslash-escaped or within [...].
(when (subregexp-context-p re start last)
(cond
((null name)
(push (concat (substring re last beg) "\\(?:") new-re))
((string-match "\\`[0-9]" name)
(error "Variable can't start with a digit: %S" name))
(t
(let* ((var (intern name))
(id (cdr (assq var vars))))
(unless id
(setq gn (1+ gn))
(setq id gn)
(push (cons var gn) vars))
(push (concat (substring re last beg) (format "\\(?%d:" id))
new-re))))
(setq last start))))
(push (substring re last) new-re)
(setq new-re (mapconcat #'identity (nreverse new-re) ""))
`(and (pred stringp)
(app (lambda (s)
(save-match-data
(when (string-match ,new-re s)
(vector ,#(mapcar (lambda (x) `(match-string ,(cdr x) s))
vars)))))
(,'\` [,#(mapcar (lambda (x) (list '\, (car x))) vars)])))))
and once that is done, you can use it as follows:
(pcase X
((re-match "\\(?var:[[:alpha:]]*\\)=\\(?val:.*\\)")
(cons var val)))
or
(pcase-let
(((re-match "\\(?var:[[:alpha:]]*\\)=\\(?val:.*\\)") X))
(cons var val))
This has not been heavily tested, and as mentioned in the docstring it doesn't work as efficiently as it (c|sh)ould when matching a string against various regexps at the same time. Also you only get the matched substrings, not their position. And finally, it applies the regexp search to a string, whereas in manny/most cases regexps searches are used in a buffer. But you may still find it useful.

Functions inside a loop behaves differently

So I have a loop to just repeat the little text game I have made about dota, but when the function 'play' is called within a loop it doesn't return the result of my cond function, it just takes an input and then moves on to the next loop.
;;;;learn the invoker combo's
(defparameter *invoker-combo* '((cold-snap (3 0 0) 'QQQ);all of the possible invoker combo's
(ghost-walk (2 1 0) 'QQW)
(Ice-Wall (2 0 1) 'QQE)
(EMP (0 3 0) 'WWW)
(Tornado (1 2 0) 'QWW)
(Alacrity (0 2 1) 'WWE)
(Sun-Strike (0 0 3) 'EEE)
(Forge-Spirit (1 0 2) 'QEE)
(Chaos-Meteor (0 1 2) 'WEE)
(Deafening-Blast (1 1 1) 'QWE)))
(defun rand-combo (invoker-combo);returns a random combo
(nth (random (length invoker-combo))invoker-combo))
(defun count-letters (input);converts the keyboard strokes into numbers to be compared as it doesn't matter what order they are in, just that there is the correct quantity of them e.g QQE could also be written QEQ.
(append
(list (count #\Q input)
(count #\W input)
(count #\E input))))
(defun try-for-combo (rand-combo);takes i-p and compares it with the value for the random combo
(print(car rand-combo))
(let* ((i-p (string-upcase(read-line)))
(try (count-letters i-p)))
(cond ((equal try (cadr rand-combo))'Good-job)
((equal i-p "END")(list 'Thanks 'for 'playing))
(t (list i-p 'was 'wrong 'correct 'is (caddr(assoc (car rand-combo)*invoker-combo*)))))))
(defun play ()
(try-for-combo (rand-combo *invoker-combo*)))
(defun loop-play (x)
(loop for i from 0 to x
:do (play)))
If I call the function 'play' I get the following o/p:
FORGE-SPIRIT asdf
("ASDF" WAS WRONG CORRECT IS 'QEE)
or
ALACRITY wwe
GOOD-JOB
But if I call the function 'loop-play' I get the following o/p:
Break 3 [7]> (loop-play 2)
SUN-STRIKE eee
ALACRITY wwe
TORNADO qww
NIL
Can someone explain to me why this is happening?
EDIT: feel free to change the title, I didn't really know what to put.
The indentation and formatting of the code is poor. Please make it easier for you and for us to read the code.
(defun try-for-combo (rand-combo);takes i-p and compares it with the value for the random combo
(print(car rand-combo))
(let* ((i-p (string-upcase(read-line)))
(try (count-letters i-p)))
(cond ((equal try (cadr rand-combo))'Good-job) ; wrong indent level
((equal i-p "END")(list 'Thanks 'for 'playing))
(t (list i-p 'was 'wrong 'correct 'is (caddr(assoc (car rand-combo)*invoker-combo*)))))))
lacks spaces between s-expressions
wrong indentation levels
structure of the code unclear
does not use built-in documentation features
some lines are too long
Better:
(defun try-for-combo (rand-combo)
"takes i-p and compares it with the value for the random combo" ; built in doc
(print (car rand-combo))
(let* ((i-p (string-upcase (read-line)))
(try (count-letters i-p)))
(cond ((equal try (cadr rand-combo)) ; indentation
'Good-job)
((equal i-p "END")
(list 'Thanks 'for 'playing))
(t
(list i-p 'was 'wrong 'correct 'is ; several lines
(caddr (assoc (car rand-combo)
*invoker-combo*)))))))
I would propose to use an editor which actually understands some Lisp formatting. like GNU Emacs / SLIME, Clozure CL's Hemlock, LispWorks' editor...
If you are unsure about formatting, you can also ask Lisp to do it. Clisp is not that good at formatting, but something like SBCL or CCL would do:
* (let ((*print-case* :downcase))
(pprint '(defun try-for-combo (rand-combo)
(print (car rand-combo))
(let* ((i-p (string-upcase (read-line)))
(try (count-letters i-p)))
(cond ((equal try (cadr rand-combo))
'Good-job) ((equal i-p "END")
(list 'Thanks 'for 'playing))
(t (list i-p 'was 'wrong 'correct 'is
(caddr (assoc (car rand-combo)
*invoker-combo*)))))))))
And you get nicely formatted code:
(defun try-for-combo (rand-combo)
(print (car rand-combo))
(let* ((i-p (string-upcase (read-line))) (try (count-letters i-p)))
(cond ((equal try (cadr rand-combo)) 'good-job)
((equal i-p "END") (list 'thanks 'for 'playing))
(t
(list i-p 'was 'wrong 'correct 'is
(caddr (assoc (car rand-combo) *invoker-combo*)))))))
Automatic indenting of Lisp code by the editor saves you a lot of work.
There are hints for manual indentation.
Your try-for-combo function doesn't actually output anything. Rather, it returns values.
In the REPL, if you evaluate a form, like (+ 1 2), it will always print the evaluation of that form at the end (in this case, 3). However, consider instead (+ 1 (print 2)). The print function actually outputs the argument to standard output, then returns the value itself. So this will show (on the repl)
2
3
The 2 is outputted first, because (print 2) itself prints 2. Then, the form (+ 1 (print 2)) is evaluates to the same things as (+ 1 2), or 3.
In your case, your try-for-combo function should look like:
(defun try-for-combo (rand-combo)
(print (car rand-combo))
(let* ((i-p (string-upcase(read-line)))
(try (count-letters i-p)))
(print
(cond
((equal try (cadr rand-combo)) 'Good-job)
((equal i-p "END") (list 'Thanks 'for 'playing))
(t (list i-p 'was 'wrong 'correct 'is (caddr(assoc (car rand-combo) *invoker-combo*))))))
nil))
This will print the result of that cond form, and return 'nil'.
That's just the difference between the output your program does and the output the Lisp system does for each evaluation:
print prints something (a newline and then its argument) and returns a value. The value is printed by the REPL. Thus we see output twice:
[3]> (print "3")
"3"
"3"
Next we do several call to print in a progn. The value of the progn form is printed by the REPL. The first three strings are printed by the code and the last string is printed because of the Lisp REPL printing the value:
[4]> (progn (print "1") (print "2") (print "3"))
"1"
"2"
"3"
"3"

tweak-text: Lisp nesting exceeds `max-lisp-eval-depth

The program should reformat the string like below.
Example: (game-print '(THIS IS A SENTENCE。 WHAT ABOUT THIS? PROBABLY.))
This is a sentence. What about ths? Probably.
But something is wrong( Lisp nesting exceeds `max-lisp-eval-depth) and i don't know why. This piece of code is actually from the book "Land of lisp" in page 97. The original code is written in common lisp. I want to rewrite it in elisp. The last two argument in tweak-text means captain and literal.
(defun tweak-text (lst caps lit)
(when lst
(let ((item (car lst))
(rest (cdr lst)))
(cond ((eql item ?\ ) (cons item (tweak-text rest caps lit)))
((member item '(?\! ?\? ?\.)) (cons item (tweak-text rest t lit)))
((eql item ?\") (tweak-text rest caps (not lit)))
(lit (cons item (tweak-text rest nil lit)))
(caps (cons (upcase item) (tweak-text rest nil lit)))
(t (cons (downcase item) (tweak-text rest nil nil)))))))
(defun game-print (lst)
(print (coerce (tweak-text (coerce (prin1-to-string lst) 'list) t nil) 'string)))
(game-print '(not only does this sentence have a "comma," it also mentions the "iPad."))
The orignal code written in common lisp.
(defun tweak-text (lst caps lit)
(when lst
(let ((item (car lst))
(rest (cdr lst)))
(cond ((eql item #\space) (cons item (tweak-text rest caps lit)))
((member item '(#\! #\? #\.)) (cons item (tweak-text rest t lit)))
((eql item #\") (tweak-text rest caps (not lit)))
(lit (cons item (tweak-text rest nil lit)))
(caps (cons (char-upcase item) (tweak-text rest nil lit)))
(t (cons (char-downcase item) (tweak-text rest nil nil)))))))
(defun game-print (lst)
(princ (coerce (tweak-text (coerce (string-trim "() " (prin1-to-string lst)) 'list) t nil) 'string))
(fresh-line))
In both cases, you have non-terminal recursions, so you're using
O(length(lst)) stack space. Obviously, systems may limit the stack
space you can use, and you do indeed reach this limit in emacs. (Now
then in emacs, you can increase the limit by changing
max-lisp-eval-depth, but this won't solve the fundamental problem).
The solution is to use iteration instead of recursion.
But first, write in emacs:
(defun character (x)
"common-lisp: return the character designated by X."
(etypecase x
(integer x)
(string (aref x 0))
(symbol (aref (symbol-name x) 0))))
(defun string-trim (character-bag string-designator)
"common-lisp: returns a substring of string, with all characters in \
character-bag stripped off the beginning and end."
(unless (sequencep character-bag)
(signal 'type-error "expected a sequence for `character-bag'."))
(let* ((string (string* string-designator))
(margin (format "[%s]*" (regexp-quote
(if (stringp character-bag)
character-bag
(map 'string 'identity character-bag)))))
(trimer (format "\\`%s\\(\\(.\\|\n\\)*?\\)%s\\'" margin margin)))
(replace-regexp-in-string trimer "\\1" string)))
(require 'cl)
so that you can write a single function for both CL and elisp:
(defun tweak-text (list caps lit)
(let ((result '()))
(dolist (item list (nreverse result))
(cond ((find item " !?.") (push item result))
((eql item (character "\"")) (setf lit (not lit)))
(lit (push item result)
(setf caps nil))
(caps (push (char-upcase item) result)
(setf caps nil))
(t (push (char-downcase item) result)
(setf caps nil
lit nil))))))
(defun game-print (list)
(princ (coerce (tweak-text (coerce (string-trim "() " (prin1-to-string list)) 'list)
t nil)
'string))
(terpri))
Then:
(game-print '(not only does this sentence have a "comma," it also mentions the "iPad."))
in emacs:
prints: Not only does this sentence have a comma, it also mentions the iPad.
returns: t
in Common Lisp:
prints: Not only does this sentence have a comma, it also mentions the iPad.
returns: nil
Now, in general there's little point of using lists to process strings, both emacs lisp and Common Lisp have powerful primitives to deal with sequences and strings directly.
Note that elisp (sadly) does not optimise for tail-recursion, so that turns out to be a very inefficient way to write this function.
You are indeed hitting the 'max-lisp-eval-depth' limit when recursing in tweak-text. I don't see anything wrong with the way the code is(I didn't check if its doing what you intend it to do).
You can configure/raise the 'max-lisp-eval-depth' limit. The documentation for that variable states that you can raise it as long as you are confident that you are not going to trip into running out of stack space. The limit is conservatively set to 541 on my machine. Raising it to 600 got the function definition above to work on the input you gave as example.
As Pascal Bourguignon already mentioned it, using strings w/o coercing them to lists and back would be a better approach, below is my take at it. Note that it is slightly different in that literal strings are verified for punctuation, and if they appear to have punctuation such as would cause it otherwise to have the next letter upper-cased, then it would be upper cased too. I'm not sure this is a disadvantage, this is why I didn't take care of this difference.
(defun tweak-text (source)
(let ((i 0) (separator "") (cap t) current)
(with-output-to-string
(dolist (i source)
(setq current
(concat separator
(etypecase i
(string i)
(symbol (downcase (symbol-name i)))))
separator " ")
(let (current-char)
(dotimes (j (length current))
(setq current-char (aref current j))
(cond
((position current-char " \t\n\r"))
(cap (setq cap nil
current-char (upcase current-char)))
((position current-char ".?!")
(setq cap t)))
(princ (char-to-string current-char))))))))
(tweak-text '(not only does this sentence have a "comma," it also mentions the "iPad."))
"Not only does this sentence have a comma, it also mentions the iPad."
I think you should write something like this:
(defun tweak-text-wrapper (&rest args)
(let ((max-lisp-eval-depth 9001)) ; as much as you want
(apply tweak-text args)))

In elisp, how to evaluate a string of "var=value\n..." into lisp variables of the same name?

An mplayer tool (midentify) outputs "shell-ready" lines intended to be evaluated by a bash/sh/whatever interpreter.
How can I assign these var-names to their corresponding values as elisp var-names in emacs?
The data is in a string (via shell-command-to-string)
Here is the data
ID_AUDIO_ID=0
ID_FILENAME=/home/axiom/abc.wav
ID_DEMUXER=audio
ID_AUDIO_FORMAT=1
ID_AUDIO_BITRATE=512000
ID_AUDIO_RATE=0
ID_AUDIO_NCH=1
ID_LENGTH=3207.00
ID_SEEKABLE=1
ID_CHAPTERS=0
ID_AUDIO_BITRATE=512000
ID_AUDIO_RATE=32000
ID_AUDIO_NCH=1
ID_AUDIO_CODEC=pcm
ID_EXIT=EOF
Here's a routine that takes a string containing midentify output, and returns an association list of the key-value pairs (which is safer than setting Emacs variables willy-nilly). It also has the advantage that it parses numeric values into actual numbers:
(require 'cl) ; for "loop"
(defun midentify-output-to-alist (str)
(setq str (replace-regexp-in-string "\n+" "\n" str))
(setq str (replace-regexp-in-string "\n+\\'" "" str))
(loop for index = 0 then (match-end 0)
while (string-match "^\\(?:\\([A-Z_]+\\)=\\(?:\\([0-9]+\\(?:\\.[0-9]+\\)?\\)\\|\\(.*\\)\\)\\|\\(.*\\)\\)\n?" str index)
if (match-string 4 str)
do (error "Invalid line: %s" (match-string 4 str))
collect (cons (match-string 1 str)
(if (match-string 2 str)
(string-to-number (match-string 2 str))
(match-string 3 str)))))
You'd use this function like so:
(setq alist (midentify-output-to-alist my-output))
(if (assoc "ID_LENGTH" alist)
(setq id-length (cdr (assoc "ID_LENGTH" alist)))
(error "Didn't find an ID_LENGTH!"))
EDIT: Modified function to handle blank lines and trailing newlines correctly.
The regexp is indeed a beast; Emacs regexps are not known for their easiness on the eyes. To break it down a bit:
The outermost pattern is ^(?:valid-line)|(.*). It tries to match a valid line, or else matches the entire line (the .*) in match-group 4. If (match-group 4 str) is not nil, that indicates that an invalid line was encountered, and an error is raised.
valid-line is (word)=(?:(number)|(.*)). If this matches, then the name part of the name-value pair is in match-string 1, and if the rest of the line matches a number, then the number is in match-string 2, otherwise the entire rest of the line is in match-string 3.
There's probably a better way but this should do it:
(require 'cl)
(let ((s "ID_AUDIO_ID=0
ID_FILENAME=/home/axiom/abc.wav
ID_DEMUXER=audio
ID_AUDIO_FORMAT=1
ID_AUDIO_BITRATE=512000
ID_AUDIO_RATE=0
ID_AUDIO_NCH=1
ID_LENGTH=3207.00
ID_SEEKABLE=1
ID_CHAPTERS=0
ID_AUDIO_BITRATE=512000
ID_AUDIO_RATE=32000
ID_AUDIO_NCH=1
ID_AUDIO_CODEC=pcm
ID_EXIT=EOF"))
(loop for p in (split-string s "\n")
do
(let* ((elements (split-string p "="))
(key (elt elements 0))
(value (elt elements 1)))
(set (intern key) value))))
Here's a function you can run on the output buffer:
(defun set-variables-from-shell-assignments ()
(goto-char (point-min))
(while (< (point) (point-max))
(and (looking-at "\\([A-Z_]+\\)=\\(.*\\)$")
(set (intern (match-string 1)) (match-string 2)))
(forward-line 1)))
I don't think regexp is what really need. You need to split your string by \n and =, so you just say exactly the same to interpreter.
I think you can also use intern to get symbol from string(and set variables). I use it for the first time, so comment here if i am wrong. Anyways, if list is what you want, just remove top-level mapcar.
(defun set=(str)
(mapcar (lambda(arg)
(set
(intern (car arg))
(cadr arg)))
(mapcar (lambda(arg)
(split-string arg "=" t))
(split-string
str
"\n" t))))
(set=
"ID_AUDIO_ID=0
ID_FILENAME=/home/axiom/abc.wav
ID_DEMUXER=audio
ID_AUDIO_FORMAT=1
ID_AUDIO_BITRATE=512000
ID_AUDIO_RATE=0
ID_AUDIO_NCH=1
ID_LENGTH=3207.00
ID_SEEKABLE=1
ID_CHAPTERS=0
ID_AUDIO_BITRATE=512000
ID_AUDIO_RATE=32000
ID_AUDIO_NCH=1
ID_AUDIO_CODEC=pcm
ID_EXIT=EOF")

Using ispell/aspell to spell check camelcased words

I need to spell check a large document containing many camelcased words. I want ispell or aspell to check if the individual words are spelled correctly.
So, in case of this word:
ScientificProgrezGoesBoink
I would love to have it suggest this instead:
ScientificProgressGoesBoink
Is there any way to do this? (And I mean, while running it on an Emacs buffer.) Note that I don't necessarily want it to suggest the complete alternative. However, if it understands that Progrez is not recognized, I would love to be able to replace that part at least, or add that word to my private dictionary, rather than including every camel-cased word into the dictionary.
I took #phils suggestions and dug around a little deeper. It turns out that if you get camelCase-mode and reconfigure some of ispell like this:
(defun ispell-get-word (following)
(when following
(camelCase-forward-word 1))
(let* ((start (progn (camelCase-backward-word 1)
(point)))
(end (progn (camelCase-forward-word 1)
(point))))
(list (buffer-substring-no-properties start end)
start end)))
then, in that case, individual camel cased words suchAsThisOne will actually be spell-checked correctly. (Unless you're at the beginning of a document -- I just found out.)
So this clearly isn't the fullblown solution, but at least it's something.
There is "--run-together" option in aspell. Hunspell can't check camelcased word.
If you read the code of aspell, you will find its algorithm actually does not split camelcase word into a list of sub-words. Maybe this algorithm is faster, but it will wrongly report word containing two character sub-word as typo. Don't waste time to tweak other aspell options. I tried and they didn't work.
So we got two problems:
aspell reports SOME camelcased words as typos
hunspell reports ALL camelcased words as typos
Solution to solve BOTH problems is to write our own predicate in Emacs Lisp.
Here is a sample predicate written for javascript:
(defun split-camel-case (word)
"Split camel case WORD into a list of strings.
Ported from 'https://github.com/fatih/camelcase/blob/master/camelcase.go'."
(let* ((case-fold-search nil)
(len (length word))
;; ten sub-words is enough
(runes [nil nil nil nil nil nil nil nil nil nil])
(runes-length 0)
(i 0)
ch
(last-class 0)
(class 0)
rlt)
;; split into fields based on class of character
(while (< i len)
(setq ch (elt word i))
(cond
;; lower case
((and (>= ch ?a) (<= ch ?z))
(setq class 1))
;; upper case
((and (>= ch ?A) (<= ch ?Z))
(setq class 2))
((and (>= ch ?0) (<= ch ?9))
(setq class 3))
(t
(setq class 4)))
(cond
((= class last-class)
(aset runes
(1- runes-length)
(concat (aref runes (1- runes-length)) (char-to-string ch))))
(t
(aset runes runes-length (char-to-string ch))
(setq runes-length (1+ runes-length))))
(setq last-class class)
;; end of while
(setq i (1+ i)))
;; handle upper case -> lower case sequences, e.g.
;; "PDFL", "oader" -> "PDF", "Loader"
(setq i 0)
(while (< i (1- runes-length))
(let* ((ch-first (aref (aref runes i) 0))
(ch-second (aref (aref runes (1+ i)) 0)))
(when (and (and (>= ch-first ?A) (<= ch-first ?Z))
(and (>= ch-second ?a) (<= ch-second ?z)))
(aset runes (1+ i) (concat (substring (aref runes i) -1) (aref runes (1+ i))))
(aset runes i (substring (aref runes i) 0 -1))))
(setq i (1+ i)))
;; construct final result
(setq i 0)
(while (< i runes-length)
(when (> (length (aref runes i)) 0)
(setq rlt (add-to-list 'rlt (aref runes i) t)))
(setq i (1+ i)))
rlt))
(defun flyspell-detect-ispell-args (&optional run-together)
"If RUN-TOGETHER is true, spell check the CamelCase words.
Please note RUN-TOGETHER will make aspell less capable. So it should only be used in prog-mode-hook."
;; force the English dictionary, support Camel Case spelling check (tested with aspell 0.6)
(let* ((args (list "--sug-mode=ultra" "--lang=en_US"))args)
(if run-together
(setq args (append args '("--run-together" "--run-together-limit=16"))))
args))
;; {{ for aspell only, hunspell does not need setup `ispell-extra-args'
(setq ispell-program-name "aspell")
(setq-default ispell-extra-args (flyspell-detect-ispell-args t))
;; }}
;; ;; {{ hunspell setup, please note we use dictionary "en_US" here
;; (setq ispell-program-name "hunspell")
;; (setq ispell-local-dictionary "en_US")
;; (setq ispell-local-dictionary-alist
;; '(("en_US" "[[:alpha:]]" "[^[:alpha:]]" "[']" nil ("-d" "en_US") nil utf-8)))
;; ;; }}
(defvar extra-flyspell-predicate '(lambda (word) t)
"A callback to check WORD. Return t if WORD is typo.")
(defun my-flyspell-predicate (word)
"Use aspell to check WORD. If it's typo return t."
(let* ((cmd (cond
;; aspell: `echo "helle world" | aspell pipe`
((string-match-p "aspell$" ispell-program-name)
(format "echo \"%s\" | %s pipe"
word
ispell-program-name))
;; hunspell: `echo "helle world" | hunspell -a -d en_US`
(t
(format "echo \"%s\" | %s -a -d en_US"
word
ispell-program-name))))
(cmd-output (shell-command-to-string cmd))
rlt)
;; (message "word=%s cmd=%s" word cmd)
;; (message "cmd-output=%s" cmd-output)
(cond
((string-match-p "^&" cmd-output)
;; it's a typo because at least one sub-word is typo
(setq rlt t))
(t
;; not a typo
(setq rlt nil)))
rlt))
(defun js-flyspell-verify ()
(let* ((case-fold-search nil)
(font-matched (memq (get-text-property (- (point) 1) 'face)
'(js2-function-call
js2-function-param
js2-object-property
js2-object-property-access
font-lock-variable-name-face
font-lock-string-face
font-lock-function-name-face
font-lock-builtin-face
rjsx-text
rjsx-tag
rjsx-attr)))
subwords
word
(rlt t))
(cond
((not font-matched)
(setq rlt nil))
;; ignore two character word
((< (length (setq word (thing-at-point 'word))) 2)
(setq rlt nil))
;; handle camel case word
((and (setq subwords (split-camel-case word)) (> (length subwords) 1))
(let* ((s (mapconcat (lambda (w)
(cond
;; sub-word wholse length is less than three
((< (length w) 3)
"")
;; special characters
((not (string-match-p "^[a-zA-Z]*$" w))
"")
(t
w))) subwords " ")))
(setq rlt (my-flyspell-predicate s))))
(t
(setq rlt (funcall extra-flyspell-predicate word))))
rlt))
(put 'js2-mode 'flyspell-mode-predicate 'js-flyspell-verify)
Or just use my new pacakge https://github.com/redguardtoo/wucuo
You should parse the camel cased words and split them, then check the individual spelling for each one and assemble a suggestion taking into account the single suggestion for each misspelled token. Considering that each misspelled token can have multiple suggestions this sounds a bit inefficient to me.