Richard W.M. Jones
2017-Sep-20 14:50 UTC
[Libguestfs] [PATCH 0/4] Replace some uses of the Str module with PCRE.
Str is a pretty ugly regexp module. Let's try to replace it with PCRE. This series of commits goes some small way towards that eventual goal. - - - I wonder if there was a deep reason why we had this? let unix2dos s String.concat "\r\n" (Str.split_delim (Str.regexp_string "\n") s) I replaced it with what I think should be (nearly) equivalent: let unix2dos s String.concat "\r\n" (String.nsplit "\n" s) Rich.
Richard W.M. Jones
2017-Sep-20 14:50 UTC
[Libguestfs] [PATCH 1/4] common/mlpcre: Raise Invalid_argument if PCRE.sub n parameter is negative.
---
common/mlpcre/pcre-c.c | 7 +++++--
common/mlpcre/pcre_tests.ml | 7 +++++++
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c
index 6fae0e6f4..da9b50d34 100644
--- a/common/mlpcre/pcre-c.c
+++ b/common/mlpcre/pcre-c.c
@@ -201,6 +201,7 @@ value
guestfs_int_pcre_sub (value nv)
{
CAMLparam1 (nv);
+ int n = Int_val (nv);
CAMLlocal1 (strv);
int len;
CLEANUP_FREE char *str = NULL;
@@ -209,8 +210,10 @@ guestfs_int_pcre_sub (value nv)
if (m == NULL)
raise_pcre_error ("PCRE.sub called without calling PCRE.matches",
0);
- len = pcre_get_substring (m->subject, m->vec, m->r, Int_val (nv),
- (const char **) &str);
+ if (n < 0)
+ caml_invalid_argument ("PCRE.sub: n must be >= 0");
+
+ len = pcre_get_substring (m->subject, m->vec, m->r, n, (const char
**) &str);
if (len == PCRE_ERROR_NOSUBSTRING)
caml_raise_not_found ();
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index e5214eab8..2b18f462f 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -69,6 +69,13 @@ let () | PCRE.Error (msg, code) ->
failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg
code)
+(* Run some out of range [sub] calls to ensure an exception is thrown. *)
+let () + let re2 = compile "(a+)(b*)" in
+ ignore (matches re2 "ccac");
+ (try ignore (sub 3) with Not_found -> ());
+ (try ignore (sub (-1)) with Invalid_argument _ -> ())
+
(* Compile some bad regexps and check that an exception is thrown.
* It would be nice to check the error message is right but
* that involves dealing with language and future changes of
--
2.13.2
Richard W.M. Jones
2017-Sep-20 14:50 UTC
[Libguestfs] [PATCH 2/4] common/mlpcre: Add PCRE.subi to return indexes instead of the substring.
---
common/mlpcre/PCRE.ml | 3 +--
common/mlpcre/PCRE.mli | 14 ++++++++++++++
common/mlpcre/pcre-c.c | 27 +++++++++++++++++++++++++++
common/mlpcre/pcre_tests.ml | 19 ++++++++++++++++---
4 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 94eea4b34..5269d41f8 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -23,10 +23,9 @@ exception Error of string * int
type regexp
external compile : string -> regexp = "guestfs_int_pcre_compile"
-
external matches : regexp -> string -> bool =
"guestfs_int_pcre_matches"
-
external sub : int -> string = "guestfs_int_pcre_sub"
+external subi : int -> int * int = "guestfs_int_pcre_subi"
let () Callback.register_exception "PCRE.Error" (Error
("", 0))
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 331a50a9a..02f16d19d 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -77,3 +77,17 @@ val sub : int -> string
If there was no nth substring then this raises [Not_found].
This can also raise {!Error} for other PCRE-related errors. *)
+
+val subi : int -> int * int
+(** Return the nth substring (capture) matched by the previous call
+ to {!matches} in the current thread.
+
+ This is the same as {!sub} but instead of copying the
+ substring out, it returns the indexes into the original string
+ of the first character of the substring and the first
+ character after the substring.
+
+ (See pcreapi(3) section "How pcre_exec() returns captured
substrings"
+ for exact details).
+
+ If there was no nth substring then this raises [Not_found]. *)
diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c
index da9b50d34..15775dad0 100644
--- a/common/mlpcre/pcre-c.c
+++ b/common/mlpcre/pcre-c.c
@@ -225,3 +225,30 @@ guestfs_int_pcre_sub (value nv)
memcpy (String_val (strv), str, len);
CAMLreturn (strv);
}
+
+value
+guestfs_int_pcre_subi (value nv)
+{
+ CAMLparam1 (nv);
+ int n = Int_val (nv);
+ CAMLlocal1 (rv);
+ struct last_match *m = gl_tls_get (last_match);
+
+ if (m == NULL)
+ raise_pcre_error ("PCRE.subi called without calling
PCRE.matches", 0);
+
+ if (n < 0)
+ caml_invalid_argument ("PCRE.subi: n must be >= 0");
+
+ /* eg if there are 2 captures, m->r == 3, and valid values of n are
+ * 0, 1 or 2.
+ */
+ if (n >= m->r)
+ caml_raise_not_found ();
+
+ rv = caml_alloc (2, 0);
+ Store_field (rv, 0, Val_int (m->vec[n*2]));
+ Store_field (rv, 1, Val_int (m->vec[n*2+1]));
+
+ CAMLreturn (rv);
+}
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index 2b18f462f..316a4348e 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -34,6 +34,12 @@ let sub i eprintf " %s\n%!" r;
r
+let subi i + eprintf "PCRE.subi %d ->%!" i;
+ let i1, i2 = PCRE.subi i in
+ eprintf " (%d, %d)\n%!" i1 i2;
+ (i1, i2)
+
let () try
let re0 = compile "a+b" in
@@ -62,19 +68,26 @@ let () assert (matches re2 "ccac" = true);
assert (sub 1 = "a");
assert (sub 2 = "");
- assert (sub 0 = "a")
+ assert (sub 0 = "a");
+ assert (subi 0 = (2, 3));
+ assert (subi 1 = (2, 3));
+ assert (subi 2 = (3, 3))
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised
Not_found"
| PCRE.Error (msg, code) ->
failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg
code)
-(* Run some out of range [sub] calls to ensure an exception is thrown. *)
+(* Run some out of range [sub] and [subi] calls to ensure an exception
+ * is thrown.
+ *)
let () let re2 = compile "(a+)(b*)" in
ignore (matches re2 "ccac");
(try ignore (sub 3) with Not_found -> ());
- (try ignore (sub (-1)) with Invalid_argument _ -> ())
+ (try ignore (sub (-1)) with Invalid_argument _ -> ());
+ (try ignore (subi 3) with Not_found -> ());
+ (try ignore (subi (-1)) with Invalid_argument _ -> ())
(* Compile some bad regexps and check that an exception is thrown.
* It would be nice to check the error message is right but
--
2.13.2
Richard W.M. Jones
2017-Sep-20 14:50 UTC
[Libguestfs] [PATCH 3/4] common/mlpcre: Add PCRE.replace function.
Similar to Perl s/// but lacks backreferences.
---
common/mlpcre/PCRE.ml | 25 +++++++++++++++++++++++++
common/mlpcre/PCRE.mli | 13 +++++++++++++
common/mlpcre/pcre_tests.ml | 23 ++++++++++++++++++++++-
3 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 5269d41f8..d80021f8c 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -27,5 +27,30 @@ external matches : regexp -> string -> bool =
"guestfs_int_pcre_matches"
external sub : int -> string = "guestfs_int_pcre_sub"
external subi : int -> int * int = "guestfs_int_pcre_subi"
+let rec replace ?(global = false) patt subst subj + if not (matches patt subj)
then
+ (* return original string unchanged if patt doesn't match *)
+ subj
+ else (
+ (* If patt matches "yyyy" in the original string then we have
+ * the following situation, where "xxxx" is the part of the
+ * original string before the match, and "zzzz..." is the
+ * part after the match:
+ * "xxxxyyyyzzzzzzzzzzzzz"
+ * ^ ^
+ * i1 i2
+ *)
+ let i1, i2 = subi 0 in
+ let xs = String.sub subj 0 i1 (* "xxxx", part before the match *)
in
+ let zs = String.sub subj i2 (String.length subj - i2) (* rest *) in
+
+ (* If the global flag was set, we want to continue substitutions
+ * in the rest of the string.
+ *)
+ let zs = if global then replace ~global patt subst zs else zs in
+
+ xs ^ subst ^ zs
+ )
+
let () Callback.register_exception "PCRE.Error" (Error
("", 0))
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 02f16d19d..634cc600c 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -91,3 +91,16 @@ val subi : int -> int * int
for exact details).
If there was no nth substring then this raises [Not_found]. *)
+
+val replace : ?global:bool -> regexp -> string -> string -> string
+(** [replace ?global patt subst subj] performs a search and replace
+ on the subject string ([subj]). Where [patt] matches the
+ string, [subst] is substituted. This works similarly to the
+ Perl function [s///].
+
+ The [?global] flag defaults to false, so only the first
+ instance of [patt] in the string is replaced. If set to true
+ then every instance of [patt] in the string is replaced.
+
+ Note that this function does not allow backreferences.
+ Any captures in [patt] are ignored. *)
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index 316a4348e..b5f712d20 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -28,6 +28,12 @@ let matches re str eprintf " %b\n%!" r;
r
+let replace ?(global = false) patt subst subj + eprintf "PCRE.replace
global:%b <patt> %s %s ->%!" global subst subj;
+ let r = PCRE.replace ~global patt subst subj in
+ eprintf " %s\n%!" r;
+ r
+
let sub i eprintf "PCRE.sub %d ->%!" i;
let r = PCRE.sub i in
@@ -45,6 +51,7 @@ let () let re0 = compile "a+b" in
let re1 = compile "(a+)b" in
let re2 = compile "(a+)(b*)" in
+ let re3 = compile "[^A-Za-z0-9_]" in
assert (matches re0 "ccaaabbbb" = true);
assert (sub 0 = "aaab");
@@ -71,7 +78,21 @@ let () assert (sub 0 = "a");
assert (subi 0 = (2, 3));
assert (subi 1 = (2, 3));
- assert (subi 2 = (3, 3))
+ assert (subi 2 = (3, 3));
+
+ assert (replace re0 "dd" "abcabcaabccca" =
"ddcabcaabccca");
+ assert (replace ~global:true re0 "dd" "abcabcaabccca" =
"ddcddcddccca");
+
+ (* This example copies a usage from customize/firstboot.ml
+ * "\xc2\xa3" is utf-8 for the GBP sign. Ideally PCRE would
+ * recognize that this is a single character, however doing that
+ * would involve passing the PCRE_UTF8 flag when compiling
+ * patterns, and that could be problematic if PCRE was built
+ * without Unicode support (XXX).
+ *)
+ assert (replace ~global:true re3 "-" "this is
a\xc2\xa3funny.name?"
+ (* = "this-is-a-funny-name-" if UTF-8 worked *)
+ = "this-is-a--funny-name-");
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised
Not_found"
--
2.13.2
Richard W.M. Jones
2017-Sep-20 14:50 UTC
[Libguestfs] [PATCH 4/4] customize: Remove use of Str module from virt-customize code.
---
customize/Makefile.am | 3 +++
customize/firstboot.ml | 6 +++---
customize/ssh_key.ml | 7 +++----
sysprep/Makefile.am | 3 +++
v2v/Makefile.am | 4 ++++
5 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/customize/Makefile.am b/customize/Makefile.am
index 18f4dce44..ff2c2e2d0 100644
--- a/customize/Makefile.am
+++ b/customize/Makefile.am
@@ -126,6 +126,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/ocaml \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/mllib \
-I $(builddir)
if HAVE_OCAML_PKG_GETTEXT
@@ -155,6 +156,7 @@ endif
OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
customize.$(MLARCHIVE) \
@@ -176,6 +178,7 @@ virt_customize_DEPENDENCIES = \
$(CUSTOMIZE_THEOBJECTS) \
$(CUSTOMIZE_CMA) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE)
virt_customize_LINK = \
$(top_srcdir)/ocaml-link.sh -cclib '$(OCAMLCLIBS)' -- \
diff --git a/customize/firstboot.ml b/customize/firstboot.ml
index b07bda001..3ac669e5b 100644
--- a/customize/firstboot.ml
+++ b/customize/firstboot.ml
@@ -25,12 +25,12 @@ open Common_gettext.Gettext
open Regedit
let unix2dos s - String.concat "\r\n" (Str.split_delim
(Str.regexp_string "\n") s)
+ String.concat "\r\n" (String.nsplit "\n" s)
let sanitize_name - let rex = Str.regexp "[^A-Za-z0-9_]" in
+ let rex = PCRE.compile "[^A-Za-z0-9_]" in
fun n ->
- let n = Str.global_replace rex "-" n in
+ let n = PCRE.replace ~global:true rex "-" n in
let len = String.length n and max = 60 in
if len >= max then String.sub n 0 max else n
diff --git a/customize/ssh_key.ml b/customize/ssh_key.ml
index 185536d1d..da0e7d90c 100644
--- a/customize/ssh_key.ml
+++ b/customize/ssh_key.ml
@@ -47,8 +47,8 @@ and parse_selector_list orig_arg = function
(* Find the local [on the host] user's SSH public key. See
* ssh-copy-id(1) default_ID_file for rationale.
*)
-let pubkey_re = Str.regexp "^id.*\\.pub$"
-let pubkey_ignore_re = Str.regexp ".*-cert\\.pub$"
+let pubkey_re = PCRE.compile "^id.*\\.pub$"
+let pubkey_ignore_re = PCRE.compile ".*-cert\\.pub$"
let local_user_ssh_pubkey () let home_dir @@ -60,8 +60,7 @@ let
local_user_ssh_pubkey () let files = Array.to_list files in
let files = List.filter (
fun file ->
- Str.string_match pubkey_re file 0 &&
- not (Str.string_match pubkey_ignore_re file 0)
+ PCRE.matches pubkey_re file && not (PCRE.matches pubkey_ignore_re
file)
) files in
if files = [] then
error (f_"ssh-inject: no public key file found in %s") ssh_dir;
diff --git a/sysprep/Makefile.am b/sysprep/Makefile.am
index 0acd1ddbf..5c20a437b 100644
--- a/sysprep/Makefile.am
+++ b/sysprep/Makefile.am
@@ -117,6 +117,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/common/visit/.libs \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/common/mlvisit \
-I $(top_builddir)/mllib \
-I $(top_builddir)/customize
@@ -145,6 +146,7 @@ endif
OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
mlvisit.$(MLARCHIVE) \
@@ -155,6 +157,7 @@ virt_sysprep_DEPENDENCIES = \
$(OBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
../customize/customize.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
diff --git a/v2v/Makefile.am b/v2v/Makefile.am
index 0aafc5725..3a38b3a98 100644
--- a/v2v/Makefile.am
+++ b/v2v/Makefile.am
@@ -151,6 +151,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/ocaml \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/common/mlxml \
-I $(top_builddir)/mllib \
-I $(top_builddir)/customize
@@ -177,6 +178,7 @@ endif
OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlxml.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
@@ -218,6 +220,7 @@ virt_v2v_copy_to_local_DEPENDENCIES = \
$(COPY_TO_LOCAL_OBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlxml/mlxml.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
@@ -508,6 +511,7 @@ v2v_unit_tests_DEPENDENCIES = \
$(v2v_unit_tests_THEOBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlxml/mlxml.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
--
2.13.2
Reasonably Related Threads
- [PATCH v2 00/18] Replace many more uses of the Str module with PCRE.
- [PATCH v3 00/22] Replace almost all uses of the Str module with PCRE.
- [PATCH v2 0/3] v2v: add -o json output mode
- [PATCH 0/2] Add lightweight bindings for PCRE.
- [PATCH v2 0/3] common: Add a lightweight OCaml binding for PCRE.