2

I want to check whether the string variable s is a number (integer or float) formatted as a string. I thought it could be done using string-to-number like this

(defun string-number-base-p (s)
    (when (or (equal "0" s)
              (not (equal 0 (string-to-number s))))
      t))

but (as NickD pointed out) that also returns t for string-numbers with trailing characters like "123zxyz".

Note that the built-in function string-to-number returns 0 instead of nil when it fails to convert a string, but also returns 0 for the string "0" which contains a valid number. Also, string-to-number considers , a non-number character and not a decimal or thousands separator, and will hence ignore everything from , when parsing. So (string-to-number "2,3") returns 2.

Examples of valid number-strings include "0", "1", "-1", "1.3", and "4.3e10".

Comparison of answers

Based on the answers given, I've run some output and speed comparisons: see the table and code below. Notable is that the pure regex version is not slower than the other versions while being much more flexible: To parse the strings in the header of the table 1 million times, the regex-based method took 13.6s, the read-based method 13.6s, and the imperfect base method 11.2s.

string-number "0" "1" "-0" "-1" "+0" "+1" "01" "-01" "+01" "2.3" "-2.3" "1.00" "0.00" "-0.00" "00.00" "-00.00" "02.3" ".0" ".1" "-.1" "0." "1." "10." "2,3" ",0" "0e0" "2e5" "2.3e5" "-2.3e5" "2.3e-5" "2.3e03" "2.3e0" "0.01e4" ".1e5" "-.1e5" "-.0e0" "-0.0e10" "2,3e5" " " "." "-" "-." "b1" "1b" "2-3" "3.4.5" "4.,6" "1,." "e" "-e" "-.e" ".e4" "1e2-2" "e2.3" "e2.3.4" "-e10" "1e.-" "10e." "1\n" "1\t" "1 1" "\n" "\t"
-regex-p t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t t nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil t nil nil nil nil
-read-p t t t t t t t t t t t t t t t t t t t t t t t t nil t t t t t t t t t t t t t nil nil nil nil nil nil nil nil t t nil nil nil nil nil nil nil nil nil nil t t t nil nil
-read2-p t t t t t t t t t t t t t t t t t t t t t t t t nil t t t t t t t t t t t t t nil nil nil nil nil nil nil nil t t nil nil nil nil nil nil nil nil nil nil t t t nil nil
-read3-p t t t t t t t t t t t t t t t t t t t t t t t nil nil t t t t t t t t t t t t nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil t t nil nil nil
-base-p t t nil t nil t t t t t t t t t t t t t t t nil t t t nil t t t t t t t t t t t t t nil nil nil nil nil t t t t t nil nil nil nil t nil nil nil t t t t t nil nil
(setq l (list "0" "1" "-0" "-1" "+0" "+1" "01" "-01" "+01" "2.3" "-2.3" "1.00"
              "0.00" "-0.00" "00.00" "-00.00" "02.3" ".0" ".1" "-.1" "0." "1."
              "10." "2,3" ",0" "0e0" "2e5" "2.3e5" "-2.3e5" "2.3e-5" "2.3e03"
              "2.3e0" "0.01e4" ".1e5" "-.1e5" "-.0e0" "-0.0e10" "2,3e5" " " "."
              "-" "-." "b1" "1b" "2-3" "3.4.5" "4.,6" "1,." "e" "-e" "-.e"
              ".e4" "1e2-2" "e2.3" "e2.3.4" "-e10" "1e.-" "10e." "1\n" "1\t"
              "1 1" "\n" "\t"))

(defconst string-number-regex
  (concat "^[+-]?\\(?:[0-9]+\\(?:[.,][0-9]*\\)?\\(?:e[+-]?[0-9]+\\)?"
          "\\|[.,][0-9]+\\(?:e[+-]?[0-9]+\\)?\\)$")
  "Matches integers and floats with exponent.
This allows for leading and trailing decimal point, leading zeros in base,
leading zeros in exponent, + signs, and , as alternative decimal separator.")

(defun string-number-regex-p (s)
  (when (string-match-p
         string-number-regex s)
    t))

(defun string-number-read-p (s)
  (condition-case _invalid-read-syntax
      (numberp (read s))
    (error nil)))

(defun string-number-read2-p (s)
  (when (and (string-match-p "[^ .,\t\n\r]" s)
             (numberp (read s)))
    t))

(cl-defun string-number-read3-p (str &key (test #'numberp))
  "Test whether STR is a string that contains one sexp of a certain type.
The type is identified by the TEST.
The default TEST is `numberp'."
  (and (stringp str)
       (string-match "\\S-" str) ;; not a whitespace string
       (condition-case nil
       (with-temp-buffer
         (insert str)
         (goto-char (point-min))
         (and (funcall test (read (current-buffer)))
          (looking-at "\\s-*\\'"))) ;; only whitespace up to eob
     (error nil))))

(defun string-number-base-p (s)
  (when (or (equal "0" s)
            (not (equal 0 (string-to-number s))))
    t))

(concat
 "| string-number | "
 (mapconcat (lambda (s) (concat "\"" s "\"")) l " | ")
 " |\n|---|"
 (mapconcat (lambda (s) "---") l "|")
 "|\n| -regex-p | "
 (mapconcat (lambda (s) (symbol-name (string-number-regex-p s))) l " | ")
 " |\n| -read-p | "
 (mapconcat (lambda (s) (symbol-name (string-number-read-p s))) l " | ")
 " |\n| -read2-p | "
 (mapconcat (lambda (s) (symbol-name (string-number-read2-p s))) l " | ")
 " |\n| -base-p | "
 (mapconcat (lambda (s) (symbol-name (string-number-base-p s))) l " | ")
 " |")
;; => table

(let ((reps 1000000))
  (list 
   (cons 'regex (benchmark-run-compiled reps (mapc #'string-number-regex-p l)))
   (cons 'read (benchmark-run-compiled reps (mapc #'string-number-read-p l)))
   (cons 'read2 (benchmark-run-compiled reps (mapc #'string-number-read2-p l)))
   (cons 'base (benchmark-run-compiled reps (mapc #'string-number-base-p l)))))
;; =>
;; ((regex 13.573386  0 0.0)
;;  (read  13.607431 81 4.161939)
;;  (read2 17.170425 14 0.739117)
;;  (base  11.242897 12 0.591818))
;; note: read3 is around 40 times slower, 
;; taking 58s for 100000 repetitions where the others take 1.4s

(let ((reps 1000000))
  (list 
   (cons 'regex (benchmark-run reps (mapc #'string-number-regex-p l)))
   (cons 'read (benchmark-run reps (mapc #'string-number-read-p l)))
   (cons 'read2 (benchmark-run reps (mapc #'string-number-read2-p l)))
   (cons 'base (benchmark-run reps (mapc #'string-number-base-p l)))))
;; =>
;; ((regex 13.58847  0 0.0)
;;  (read  15.86764 81 6.213117)
;;  (read2 17.26436 14 0.870470)
;;  (base  11.18225 12 0.565733))
orgtre
  • 1,012
  • 4
  • 15

3 Answers3

2

string-match-p can be used to check for numbers in a string STRING using the syntax (string-match-p REGEXP STRING) where REGEXP is a string holding a regular expression. Note that string-match-p does not change match data, whereas string-match does.

Matching numbers (integers and floats) using regexes is quite involved in general, see this Stackoverflow question (or this one) for a great overview. A few examples are given below. To test them in the browser click the test link. To test them in Emacs use M-x re-builder.

In the (most of the) regexes below it is assumed that explicit plus signs + are not allowed. To allow them, change the parts of the regexes matching a - below (i.e. - except when part of a number range) to [+-]. Moreover, it is assumed that the decimal separator is .; to allow , too simply add it directly after all occurances of .. See (regex12) for an example. It is also assumed that signed zeros are allowed.

Match only integers

positive integers (test):

"^\\(?:0\\|[1-9][0-9]*\\)$" (regex1)

positive integers, allow leading zeros (test):

"^[0-9]+$" (regex2)

(positive or negative) integers (test):

"^-?\\(?:0\\|[1-9][0-9]*\\)$" (regex3)

(positive or negative) integers, allow leading zeros (test):

"^-?[0-9]+$" (regex4)

Match intergers and floats without exponent

integers and floats without exponents, allowing for leading decimal point (test):

"^-?\\(?:\\(?:\\(?:0\\|[1-9][0-9]*\\)?[.][0-9]+\\)\\|\\(?:0\\|[1-9][0-9]*\\)\\)$" (regex5)

integers and floats without exponents, allowing for leading decimal point, and allowing leading zeros (test):

"^-?\\(?:[0-9]*[.]\\)?[0-9]+$" (regex6)

integers and floats without exponents, allowing for leading and trailing decimal point, and allowing leading zeros (test):

"^-?\\(?:[0-9]+\\(?:[.][0-9]*\\)?\\|[.][0-9]+\\)$" (regex7)

Match integers and floats with exponent (scientific notation)

integers and floats with exponent, allowing for leading decimal point (test):

"^-?\\(?:\\(?:\\(?:0\\|[1-9][0-9]*\\)?[.][0-9]+\\)\\|\\(?:0\\|[1-9][0-9]*\\)\\)\\(?:e-?\\(?:0\\|[1-9][0-9]*\\)?\\)?$" (regex8)

integers and floats with exponent, allowing for leading decimal point, leading zeros in base, and leading zeros in exponent (test):

"^-?\\(?:[0-9]*[.]\\)?[0-9]+\\(?:e-?[0-9]+\\)?$" (regex9)

integers and floats with exponent, allowing for leading decimal point, leading zeros in base, and overly permissive exponent (this is eshell-number-regexp surrounded with ^ and $ and using shy groups) (test):

"^-?\\(?:[0-9]*[.]\\)?[0-9]+\\(?:e[-0-9.]+\\)?$" (regex10)

integers and floats with exponent, allowing for leading and trailing decimal point, leading zeros in base, and leading zeros in exponent (test):

"^[-]?\\(?:[0-9]+\\(?:[.][0-9]*\\)?\\(?:e[-]?[0-9]+\\)?\\|[.][0-9]+\\(?:e[-]?[0-9]+\\)?\\)$" (regex11)

integers and floats with exponent, allowing for leading and trailing decimal point, leading zeros in base, leading zeros in exponent, + signs, and , as alternative decimal separator (test):

"^[+-]?\\(?:[0-9]+\\(?:[.,][0-9]*\\)?\\(?:e[+-]?[0-9]+\\)?\\|[.,][0-9]+\\(?:e[+-]?[0-9]+\\)?\\)$" (regex12)
orgtre
  • 1,012
  • 4
  • 15
lawlist
  • 18,826
  • 5
  • 37
  • 118
  • Yes, some kind of regex would do it. I added some examples of what are valid number-strings, and your answer works for only some. – orgtre Jan 14 '23 at 18:32
  • The question has been modified from the initial example of an integer, to now an integer / float / scientific, .... A moving target becomes somewhat difficult to answer. – lawlist Jan 14 '23 at 18:37
  • Sorry, but I though a "number" is a general term and not necessarily an integer. – orgtre Jan 14 '23 at 18:39
  • I will keep the answer posted for the time being, pending something better by another forum participant, but I am changing `string-match` to `string-match-p` (which does not change the match data). – lawlist Jan 14 '23 at 18:47
  • For easy reference, I expanded the post with regexes covering many cases, as regexes found elsewhere on stackexchange often include lookarounds and other non-Emacs syntax, plus they have to be quoted. – orgtre Jan 17 '23 at 14:29
  • Beware of using regexes allowing `,` as a decimal separator if you not only want to check for a number, but want to parse it too using `sting-to-number`. `sting-to-number` doesn't consider `,` a decimal separator and will instead truncate the number at `,` when parsing. This applies to regex12. Regex 1-11 seem to match only numbers which are correctly parsed by `string-to-number`. – orgtre Jan 18 '23 at 12:58
2

the read function can be applied to a string and then the numberp function tests whether the read object represents a number or not, integer, or float.

(mapcar (lambda(x)(numberp(read x))) (list "123" "1.23" "sqrt(123)" "12e-3" "12ab3"))

=>

 (t t nil t nil)

You catch any error with this function

(defun string-numberp(s)
  (condition-case invalid-read-syntax
      (numberp(read s))
    (error nil)))

Test :

(mapcar #'string-numberp  (list "12345" "12.345" "#12345" "." "," "\n"))

=>

(t t nil nil nil nil)
gigiair
  • 2,124
  • 1
  • 8
  • 14
  • Nice, this works well, but will throw an error for strings like `"."`, `","`, or `"\n"`. – orgtre Jan 16 '23 at 10:42
  • 1
    I added a function that is supposed to trap any invalid read syntax error. I haven't tested it extensively. – gigiair Jan 16 '23 at 12:58
  • Thanks, I tested it a bit. It suppresses wanted errors when passed things like `'10` and even does very bad things when passed things like `'kill-line`... In my comparison above I also tried a variant `string-number-read2-p` which seems to work well, but I don't know if it catches all input problems. – orgtre Jan 16 '23 at 15:05
1

Use read. It just does the right job with regard to Elisp.
But, do not use it on strings because read only reads the first sexp.
You must be able to test what follows by checking the text from the next read position. That is only possible by reading from a buffer, e.g. a temporary one.

You have also to protect against expressions that cannot be read, e.g., by condition-case.

The following example code contains some tests as comments at the end.

(require 'cl-lib)

(cl-defun string-test-p (str &key (test #'numberp))
  "Test whether STR is a string that contains one sexp of a certain type.
The type is identified by the TEST.
The default TEST is `numberp'."
  (and (stringp str)
       (string-match "\\S-" str) ;; not a whitespace string
       (condition-case nil
       (with-temp-buffer
         (insert str)
         (goto-char (point-min))
         (and (funcall test (read (current-buffer)))
          (looking-at "\\s-*\\'"))) ;; only whitespace up to eob
     (error nil))))
;; Test:
;; (string-test-p " 1.23 ") ;; Returns t.
;; (string-test-p "'kill-line") ;; Returns nil. This is no problem because the string is read but not evaluated.
;; (string-test-p "(kill-line)") ;; Returns nil. This is no problem because the string is read but not evaluated.
;; (string-test-p " \"string\" " :test #'stringp) ;; Returns t.
;; (string-test-p " kill-line " :test #'functionp) ;; Returns t.
;; (string-test-p " (unfinished list ") ;; Returns nil.
Tobias
  • 32,569
  • 1
  • 34
  • 75
  • Thanks, I added this to the comparison table as `string-number-read3-p`, it has good parsing properties, but offers no flexibility as to what counts as a number and is around 40 times slower than the other answers. – orgtre Jan 18 '23 at 11:02
  • @orgtre Yes, it is tailored to read numbers from strings that are appropriate for the Elisp reader. – Tobias Jan 18 '23 at 11:44
  • Is that the same as "numbers in strings that are correctly parsed by `string-to-number`"? That would be useful, since many times when checking for a number, you actually want to parse it too. With the regex approach one risks matching numbers which won't be parsed correctly. – orgtre Jan 18 '23 at 12:51
  • 1
    @orgtre `string-to-number` is essentially what is used by the Elisp reader ([`string_to_number` in `lread.c`](https://github.com/emacs-mirror/emacs/blob/281f48f19ecad706a639d57cb937afb0b97eded7/src/lread.c#L4480)). So `read` reads the same as `string-to-number` if the string can be read as number. – Tobias Jan 18 '23 at 13:13
  • @orgtre Note the call to `string_to_number` at the end of the [definition of `string-to-number`](https://github.com/emacs-mirror/emacs/blob/281f48f19ecad706a639d57cb937afb0b97eded7/src/data.c#L3059)`. – Tobias Jan 18 '23 at 13:16