01a0958e6f
This really was a lark to see if I could change ALL the files using woccur and a regular expression. Quite please with how simple that was.
251 lines
10 KiB
Org Mode
251 lines
10 KiB
Org Mode
#+TITLE: Editing Data Files
|
||
#+AUTHOR: Howard X. Abrams
|
||
#+DATE: 2022-10-14
|
||
#+FILETAGS: :emacs:
|
||
|
||
A literate programming file for configuring Emacs to edit files of data.
|
||
|
||
#+begin_src emacs-lisp :exports none
|
||
;;; ha-data --- edit data files. -*- lexical-binding: t; -*-
|
||
;;
|
||
;; © 2022-2023 Howard X. Abrams
|
||
;; Licensed under a Creative Commons Attribution 4.0 International License.
|
||
;; See http://creativecommons.org/licenses/by/4.0/
|
||
;;
|
||
;; Author: Howard X. Abrams <http://gitlab.com/howardabrams>
|
||
;; Maintainer: Howard X. Abrams
|
||
;; Created: October 14, 2022
|
||
;;
|
||
;; While obvious, GNU Emacs does not include this file or project.
|
||
;;
|
||
;; *NB:* Do not edit this file. Instead, edit the original literate file at:
|
||
;; /Users/howard.abrams/other/hamacs/ha-data.org
|
||
;; And tangle the file to recreate this one.
|
||
;;
|
||
;;; Code:
|
||
#+end_src
|
||
* Introduction
|
||
Once upon a time, I [[https://www.youtube.com/watch?v=HKJMDJ4i-XI][gave a talk]] to EmacsConf 2019, about [[http://howardism.org/Technical/Emacs/piper-presentation-transcript.html][an interesting idea]] I called [[https://gitlab.com/howardabrams/emacs-piper][emacs-piper]]. I still like the idea of sometimes editing an Emacs buffer on the entire contents, as if it were a data file. This file contains what I feel are the best functions for that… oh, and a leader to call it (instead of the original Hydra).
|
||
|
||
#+begin_src emacs-lisp
|
||
(ha-leader
|
||
"d" '(:ignore t :which-key "data")
|
||
"d |" '("pipe to shell" . replace-buffer-with-shell-command)
|
||
"d r" '("replace buffer" . ha-vr-replace-all)
|
||
"d y" '("copy to clipboard" . ha-yank-buffer-contents))
|
||
#+end_src
|
||
** Global Replacements
|
||
The string replacement functions operate at the current point, which means I need to jump to the beginning before calling something like [[help:vr/replace][vr/replace]].
|
||
|
||
#+begin_src emacs-lisp
|
||
(defun ha-vr-replace-all (regexp replace start end)
|
||
"Regexp-replace entire buffer with live visual feedback."
|
||
(interactive
|
||
(vr--interactive-get-args 'vr--mode-regexp-replace 'vr--calling-func-replace))
|
||
(vr/replace regexp replace (point-min) (point-max)))
|
||
#+end_src
|
||
* Line-Oriented Functions
|
||
These functions focus on the data in the buffer as a series of lines:
|
||
#+begin_src emacs-lisp
|
||
(ha-leader
|
||
"d l" '(:ignore t :which-key "on lines")
|
||
"d l f" '("flush lines" . flush-lines)
|
||
"d l k" '("keep lines" . keep-lines)
|
||
"d l s" '("sort lines" . ha-sort-lines)
|
||
"d l u" '("unique lines" . delete-duplicate-lines)
|
||
"d l b" '("flush blanks" . flush-blank-lines))
|
||
#+end_src
|
||
|
||
One issue I have is [[help:keep-lines][keep-lines]] operate on the lines /starting with the point/, not on the entire buffer. Let’s fix that:
|
||
#+begin_src emacs-lisp
|
||
(defun call-function-at-buffer-beginning (orig-fun &rest args)
|
||
"Call ORIG-FUN after moving point to beginning of buffer.
|
||
Point restored after completion. Good for advice."
|
||
(save-excursion
|
||
(goto-char (point-min))
|
||
(apply orig-fun args)))
|
||
|
||
(advice-add 'keep-lines :around #'call-function-at-buffer-beginning)
|
||
(advice-add 'flush-lines :around #'call-function-at-buffer-beginning)
|
||
#+end_src
|
||
|
||
The [[help:sort-lines][sort-lines]] is useful, but insists on an /active/ region. Let’s made a data-focused version:
|
||
#+begin_src emacs-lisp
|
||
(defun ha-sort-lines (prefix)
|
||
"Sort the lines in a buffer or region (if active).
|
||
If PREFIX given, sort in reverse order."
|
||
(interactive "P")
|
||
(save-excursion
|
||
(if (region-active-p)
|
||
(sort-lines prefix (region-beginning) (region-end))
|
||
(sort-lines prefix (point-min) (point-max)))))
|
||
#+end_src
|
||
|
||
Getting rid of blank lines seems somewhat useful:
|
||
#+begin_src emacs-lisp
|
||
(defun flush-blank-lines ()
|
||
"Delete all empty lines in the buffer. See `flush-lines'."
|
||
(interactive)
|
||
(save-excursion
|
||
(goto-char (point-min))
|
||
(flush-lines (rx line-start (zero-or-more space) line-end))))
|
||
#+end_src
|
||
* Table-Oriented Functions
|
||
These functions focus on the data in the buffer as a table consisting of columns and rows of some sort.
|
||
#+begin_src emacs-lisp
|
||
(ha-leader
|
||
"d t" '(:ignore t :which-key "on tables")
|
||
"d t k" '("keep columns" . keep-columns)
|
||
"d t f" '("flush columns" . flush-columns))
|
||
#+end_src
|
||
|
||
Each of the /table/ functions require a /table separator/ (for instance the =|= character) and often the columns to operate on.
|
||
The =keep-columns= removes all text, except for /indexed/ text between the separator:
|
||
#+begin_src emacs-lisp
|
||
(defun keep-columns (separator columns)
|
||
"Keep tabular text columns, deleting the rest in buffer or region.
|
||
Defined columns as text between SEPARATOR, not numerical
|
||
position. Note that text _before_ the separator is column 0.
|
||
|
||
For instance, given the following table:
|
||
|
||
apple : avocado : apricot
|
||
banana : blueberry : bramble
|
||
cantaloupe : cherry : courgette : cucumber
|
||
data : durian
|
||
|
||
Calling this function with a `:' character, as columns: `0, 2'
|
||
results in the buffer text:
|
||
|
||
apple : apricot
|
||
banana : bramble
|
||
cantaloupe : courgette
|
||
data : durian"
|
||
(interactive "sSeparator: \nsColumns to Keep: ")
|
||
(operate-columns separator columns t))
|
||
#+end_src
|
||
|
||
The =flush-columns= is similar, except that is deletes the given columns.
|
||
#+begin_src emacs-lisp
|
||
(defun flush-columns (separator columns)
|
||
"Delete tabular text columns in buffer or region.
|
||
Defined columns as text between SEPARATOR, not numerical
|
||
position. Note that text _before_ the separator is column 0.
|
||
|
||
For instance, given the following table:
|
||
|
||
apple : avocado : apricot
|
||
banana : blueberry : bramble
|
||
cantaloupe : cherry : courgette : cucumber
|
||
data : durian
|
||
|
||
Calling this function with a `:' character, as columns: `1'
|
||
(remember the colums are 0-indexed),
|
||
results in the buffer text:
|
||
|
||
apple : avocado : apricot
|
||
banana : blueberry : bramble
|
||
cantaloupe : cherry : courgette : cucumber
|
||
data : durian
|
||
|
||
apple : apricot
|
||
banana : bramble
|
||
cantaloupe : courgette
|
||
data : durian"
|
||
(interactive "sSeparator: \nsColumns to Delete: ")
|
||
(operate-columns separator columns nil))
|
||
#+end_src
|
||
|
||
Both functions are similar, and their behavior comes from =operate-columns=, which walks through the buffer, line-by-line:
|
||
#+begin_src emacs-lisp
|
||
(defun operate-columns (separator columns-str keep?)
|
||
"Call `operate-columns-on-line' for each line in buffer.
|
||
First, convert string COLUMNS-STR to a list of number, then
|
||
search for SEPARATOR."
|
||
(let ((columns (numbers-to-number-list columns-str)))
|
||
(save-excursion
|
||
(when (region-active-p)
|
||
(narrow-to-region (region-beginning) (region-end)))
|
||
(goto-char (point-min))
|
||
(while (re-search-forward (rx (literal separator)) nil t)
|
||
(operate-columns-on-line separator columns t)
|
||
(next-line)))))
|
||
#+end_src
|
||
|
||
For each line, the =operate-columns= calls this function:
|
||
#+begin_src emacs-lisp
|
||
(defun operate-columns-on-line (separator columns keep?)
|
||
"Replace current line after keeping or deleting COLUMNS.
|
||
Keep the COLUMNS if KEEP? is non-nil, delete otherwise.
|
||
Defined columns as the text between SEPARATOR."
|
||
(cl-labels ((keep-oper (idx it) (if keep?
|
||
(when (member idx columns) it)
|
||
(unless (member idx columns) it))))
|
||
(let* ((start (line-beginning-position))
|
||
(end (line-end-position))
|
||
(line (buffer-substring start end))
|
||
(parts (thread-last (split-string line separator)
|
||
(--map-indexed (keep-oper it-index it))
|
||
(-remove 'null)))
|
||
(nline (string-join parts separator)))
|
||
(delete-region start end)
|
||
(insert nline))))
|
||
#+end_src
|
||
|
||
I like the idea of the shell command, =cut=, where you can have an arbitrary character as a separator, and then either delete or keep the data between them, as columns. But I need a function that can convert a string of “columns”, for instance ="1, 4-7 9"= to an list of numbers, like ='(1 4 5 6 7 9)=:
|
||
#+begin_src emacs-lisp
|
||
(defun numbers-to-number-list (input)
|
||
"Convert the string, INPUT, to a list of numbers.
|
||
For instance: `1, 4-7 9' returns `(1 4 5 6 7 9)'"
|
||
(let* ((separator (rx (* space) (or "," space) (* space)))
|
||
(dashed (rx (* space) "-" (* space)))
|
||
(ranged (rx (group (+ digit)) (regexp dashed) (group (+ digit))))
|
||
(str-list (split-string input separator t)))
|
||
(--reduce-from (append acc (if (string-match ranged it)
|
||
(number-sequence
|
||
(string-to-number (match-string 1 it))
|
||
(string-to-number (match-string 2 it)))
|
||
(list (string-to-number it))))
|
||
() str-list)))
|
||
#+end_src
|
||
Does this work?
|
||
#+begin_src emacs-lisp :tangle no
|
||
(ert-deftest numbers-to-number-list-test ()
|
||
(should (equal (numbers-to-number-list "2") '(2)))
|
||
(should (equal (numbers-to-number-list "1, 2 3") '(1 2 3)))
|
||
(should (equal (numbers-to-number-list "1, 4-7 9") '(1 4 5 6 7 9))))
|
||
#+end_src
|
||
* Buffer-Oriented Functions
|
||
If there is no specific function, but you can think of a shell command that will work, then
|
||
#+begin_src emacs-lisp
|
||
(defun replace-buffer-with-shell-command (command)
|
||
"Replaces the contents of the buffer, or the contents of the
|
||
selected region, with the output from running an external
|
||
executable, COMMAND.
|
||
|
||
This is a wrapper around `shell-command-on-region'."
|
||
(interactive "sCommand: ")
|
||
(save-excursion
|
||
(save-restriction
|
||
(when (region-active-p)
|
||
(narrow-to-region (region-beginning) (region-end)))
|
||
(shell-command-on-region (point-min) (point-max) command nil t))))
|
||
#+end_src
|
||
|
||
* Technical Artifacts :noexport:
|
||
Let's =provide= a name so we can =require= this file:
|
||
#+begin_src emacs-lisp :exports none
|
||
(provide 'ha-data)
|
||
;;; ha-data.el ends here
|
||
#+end_src
|
||
|
||
#+DESCRIPTION: configuring Emacs to edit files of data.
|
||
|
||
#+PROPERTY: header-args:sh :tangle no
|
||
#+PROPERTY: header-args:emacs-lisp :tangle yes
|
||
#+PROPERTY: header-args :results none :eval no-export :comments no mkdirp yes
|
||
|
||
#+OPTIONS: num:nil toc:nil todo:nil tasks:nil tags:nil date:nil
|
||
#+OPTIONS: skip:nil author:nil email:nil creator:nil timestamp:nil
|
||
#+INFOJS_OPT: view:nil toc:nil ltoc:t mouse:underline buttons:0 path:http://orgmode.org/org-info.js
|