Compute offset in file given line and column numbers - emacs

Is there a built-in function in elisp that given a filename, line number and column number will return the character offset into the file?

Built-in? Not that I know of. Is this what you want?
(defun foo (file line column &optional msgp)
"..."
(interactive (list (read-file-name "File: ")
(read-number "Line: ")
(read-number "Columnn: ")
t))
(with-current-buffer (find-file-noselect file)
(goto-line line)
(forward-char column)
(when msgp (message "Char in file: %d" (point)))
(point)))
This doesn't bother to handle whether there are enough lines or columns on the chosen line, but you can take care of that.

Related

Split lines of current paragraph in Emacs

I want to add a function (para2lines) to Emacs by which I can split the current paragraph into its sentences and print them line by line in a separate buffer. Following is code in Racket/Scheme:
(define (p2l paraString)
(define lst (string-split paraString ". "))
(for ((i lst))
(displayln i)))
Testing:
(p2l "This is a test. For checking only. Only three lines.")
Output:
This is a test.
For checking only.
Only three lines.
In Emacs Lisp, I could manage following code:
(defun pl (ss)
(interactive)
(let ((lst (split-string (ss))))
(while lst
(print (pop lst)))))
But I do not know how to get the text from the paragraph with current position. How can I correct this function?
Edit: basically, I want to read it as separate lines but want to save it as paragraph.
Here's an example that might help you on your way. It will do your conversion to the current paragraph (i.e. where the cursor is positioned), rather than to a new buffer. You could modify this to pass a string to your function if that's what you require.
(defun p2l ()
"Format current paragraph into single lines."
(interactive "*")
(save-excursion
(forward-paragraph)
(let ((foo (point)))
(backward-paragraph)
(replace-regexp "\n" " " nil (1+ (point)) foo)
(backward-paragraph)
(replace-regexp "\\. ?" ".\n" nil (point) foo))))
I would just run Emacs commands or write a macro to convert a paragraph to single-sentence lines, but maybe you are really just wanting to read wrapped paragraphs as lines, thus the need to have an Emacs command.
Here's something that will grab the current paragraph, insert a new buffer *Lines*, and then convert sentences to lines.
(defun para-lines ()
"Split sentences of paragraph to lines in new buffer."
(interactive)
;; Move the paragraph to a new buffer.
(let ((b (generate-new-buffer "*Lines*")))
(with-output-to-temp-buffer b
(let ((beg (save-excursion (forward-paragraph -1) (point)))
(end (save-excursion (forward-paragraph +1) (point))))
(princ (buffer-substring-no-properties beg end))))
;; Switch to new buffer
(with-current-buffer b
;; Since the name starts with "*", shut off Help Mode
(fundamental-mode)
;; Make sure buffer is writable
(setq buffer-read-only nil)
;; From the start of the buffer
(goto-char (point-min))
;; While not at the end of the buffer
(while (< (point) (point-max))
(forward-sentence 1)
;; Delete spaces between sentences before making new new line
(delete-horizontal-space)
;; Don't add a new line, if already at the end of the line
(unless (= (line-end-position) (point))
(newline))))))
To avoid using forward-sentence, and just use a regular expression, use re-search-forward. For instance, to match semi-colons as well as periods.
(defun para-lines ()
"Split sentences of paragraph to lines in new buffer."
(interactive)
;; Move the paragraph to a new buffer.
(let ((b (generate-new-buffer "*Lines*")))
(with-output-to-temp-buffer b
(let ((beg (save-excursion (forward-paragraph -1) (point)))
(end (save-excursion (forward-paragraph +1) (point))))
(princ (buffer-substring-no-properties beg end))))
;; Switch to new buffer
(with-current-buffer b
;; Since the name starts with "*", shut off Help Mode
(fundamental-mode)
;; Make sure buffer is writable
(setq buffer-read-only nil)
;; From the start of the buffer
(goto-char (point-min))
;; While not at the end of the buffer
(while (< (point) (point-max))
(re-search-forward "[.;]\\s-+" nil t)
;; Delete spaces between sentences before making new new line
(delete-horizontal-space)
;; Don't add a new line, if already at the end of the line
(unless (= (line-end-position) (point))
(newline))))))

elisp: read file into list of lists

I need to read a file contents into a two-dimensional list, split by newlines and spaces. For example,
a b
c d
needs to become
(list (list "a" "b") (list "c" "d"))
Currently I only know how to read the contents into a simple list determined by newlines. Whenever I need to use an element from that list, I have to split it by spaces everytime, but preferably this should be done only once beforehand.
While abo-abo's answer above is fine, it creates a temporary string with the full contents of the file, which is inefficient. If the file is very large, it is better to walk a buffer collecting data line-by-line:
(defun file-to-matrix (filename)
(with-temp-buffer
(insert-file-contents filename)
(let ((list '()))
(while (not (eobp))
(let ((beg (point)))
(move-end-of-line nil)
(push (split-string (buffer-substring beg (point)) " ") list)
(forward-char)))
(nreverse list))))
Note the use of with-temp-buffer, which avoids leaving a buffer lying around, and the use of insert-file-contents, which avoids interfering with any other buffer that might be visiting the same file.
Like this:
(with-current-buffer (find-file-noselect "~/foo")
(mapcar (lambda (x) (split-string x " " t))
(split-string
(buffer-substring-no-properties (point-min) (point-max))
"\n")))
With dash, s and f third-party libraries:
(--map (s-split " " it) (s-lines (s-chomp (f-read "FILE.TXT"))))
or:
(->> "FILE.TXT" f-read s-chomp s-lines (--map (s-split " " it)))
which are the same thing.

Duplicate lines in region "in-place"

I am trying to code an elisp function which takes all (non-empty) lines in the current region and duplicates these lines in-place. I'll give you an example:
This input:
Line1
Line2
Line3
Becomes
Line1
Line1
Line2
Line2
Line3
Line3
If there were empty lines, they would remain in place and should not be duplicated. If have coded the following function:
(defun duplicate-lines-in-region (beg end)
"Duplicates the lines in the current region \"in-place\"."
(interactive "r")
(if (use-region-p)
(let* ((text (buffer-substring-no-properties (region-beginning) (region-end)))
(lines (split-string text "\n" t))
(num-lines (length lines))
(current-line 0)
(end-pos 0))
(save-excursion
(goto-char (region-beginning))
(while (< current-line num-lines)
(end-of-line)
(insert "\n")
(insert (nth current-line lines))
(next-line)
(setq current-line (+ current-line 1))
(setq end-pos (point))))
(goto-char end-pos))
(error "No active region!")))
However, this function has some (at least two) bugs:
Empty lines are not ignored but rather completely destroy the output (lines are inserted in the wrong places).
The first line that is inserted is always inserted with the wrong indentation (at column zero), all other lines are inserted at the right indentation.
I'm kinda stuck with advancing the function to a more useful state. Also, I highly doubt that my approach is particularly efficient/well-written... maybe some elisp-guru knows a more straightforward approach that can be used to work on each line in a region individually...
This might be cheating, but you could just use a regex replace to match non-blank lines and replace them with the captured line and a duplicate. Use M-x replace-regexp with a region highlighted, and the following arguments:
Replace regexp: \(.+\)$
With: \&^J\&
Note, in the above, ^J represents a newline/quoted enter key, C-q C-j.
To translate this into elisp, you just need to make sure to escape the backslashes and parentheses:
(defun duplicate-lines-in-region (beg end)
(interactive "*r")
(replace-regexp "\\(.+\\)$" "\\&\n\\&" nil beg end))

How to finish this job in emacs?

The original string is like this:
# chrom,name,strand,txStart
And the result should looks like this:
# $1: chrom
# $2: name
# $3: strand
# $4: txStart
Does anyone have idea of a quick way to do that?
Lots of ways.
You could use a search and replace making use of the \# counter in the replacement. That's zero-based, so you'd either need to add a dummy replacement at the front to use up the zero, or else use the elisp replacement expression \,(1+ \#) instead.
You could use a keyboard macro, and insert a counter with C-xC-kTAB or <f3>. You can seed that counter by supplying a prefix argument when you start recording.
On Emacs 24 you can number the lines of a marked region using a custom format string with C-uC-xrN, so your format string could be # $%1d:
Evaluate following code and execute foo function on input line.
(require 'cl)
(defun foo ()
(interactive)
(let* ((str (buffer-substring-no-properties
(line-beginning-position) (line-end-position)))
(words-str (and (string-match "# \\(.+\\)$" str)
(match-string 1 str)))
(buf (get-buffer-create "*tmp*")))
(unless words-str
(error "Line should be '# word1,word2,...'"))
(with-current-buffer buf
(erase-buffer)
(loop with index = 1
for word in (split-string words-str ",")
do
(progn
(insert (format "# $%d: %s\n" index word))
(incf index)))
(pop-to-buffer buf))))

Emacs: capture paragraphs and act on each

Quite often I need to capture some paragraphs in a region with regexp - and then act on each paragraph.
For example consider a problem of recovering a numberd list:
1. Some text with a blank
line. I want not to have that line break
2. Some more text. Also - with
a line break.
3. I want to have a defun which
will capture each numbered entry
and then join it
I want to write a defun which will make the previous text like that:
1. Some text with a blank line. I want not to have that line break
2. Some more text. Also - with a line break.
3. I want to have a defun which will capture each numbered entry and then join it
Here's my best try for now:
(defun joining-lines (start end)
(interactive "r")
(save-restriction
(narrow-to-region start end)
(goto-char (point-min))
(while (search-forward-regexp "\\([[:digit:]]\\. \\)\\(\\[^[:digit:]\\].*?\\)" nil t)
(replace-match "\\1\\,(replace-regexp-in-string " ;; here's a line break
" " " (match-string 2))" t nil))
)
)
It neither work - nor give an error.
Actually it would be better to have a separate defun to act on a string. This way it will be easy to expand the code to have multiple substitutions on the replace-match.
There are two issues with your code:
A period in a regexp matches "anything except newline," so your .*? will never include a newline character.
The \,(...) regexp replacement construct is only available interactively. If issue #1 were resolved, you'd get an error (error "Invalid use of '\\' in replacement text"). Programmatically, you have to write the code yourself, eg: (replace-match (concat (match-string 1) (replace-regexp-in-string "\n" " " (match-string 2)))).
I think you'd be better off not relying on regexps to do the heavy lifting here. This works for me:
(defun massage-list (start end)
(interactive "r")
(save-excursion
(save-restriction
(narrow-to-region start end)
(goto-char start)
(while (progn (forward-line) (= (point) (line-beginning-position)))
(when (not (looking-at "^[[:digit:]]+\\."))
(delete-indentation)
(beginning-of-line))))))
Try something like this code. It's not the shortest possible but rather something straigthforward.
(defun joining-lines(start end)
(interactive "r")
(let ((newline-string "~~~"))
(save-restriction
(narrow-to-region start end)
(mark-whole-buffer)
(replace-string "\n" newline-string)
(goto-char start)
(while (re-search-forward (concat newline-string "\\([[:digit:]]+. \\)") nil t)
(replace-match "\n\\1" nil nil))
(mark-whole-buffer)
(replace-string newline-string " "))))
Here's a solution using an external defun:
(defun capturing-paragraphs (start end)
(interactive "r")
(save-restriction
(narrow-to-region start end)
(goto-char (point-min))
(while (search-forward-regexp "^\\(^[[:digit:]]+\\.[^[:digit:]]+$\\)" nil t) (replace-match (multiple-find-replace-in-match) t nil))))
(defun multiple-find-replace-in-match ()
"Returns a string based on current regex match."
(let (matchedText newText)
(setq matchedText
(buffer-substring-no-properties
(match-beginning 1) (match-end 1)))
(setq newText
(replace-regexp-in-string "\n" "" matchedText) )
newText))
it works only if there's no figures in the text. But this solution is straighforward to expand - to add new replacements on a matched string.