Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 00/22] Replace almost all uses of the Str module with PCRE.
v1: https://www.redhat.com/archives/libguestfs/2017-September/msg00135.html v2: https://www.redhat.com/archives/libguestfs/2017-September/msg00158.html v3 is almost identical to v2, but I have added 4 extra commits to almost finish the job of replacing Str everywhere possible (note it's not possible to replace Str in common/mlstdutils or the generator because those are pure OCaml). As before there is an outstanding question:> I wonder if there was a deep reason why we had this? > > let unix2dos s > String.concat "\r\n" (Str.split_delim (Str.regexp_string "\n") s) > > I replaced it with what I think should be (nearly) equivalent: > > let unix2dos s > String.concat "\r\n" (String.nsplit "\n" s)Rich.
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 01/22] common/mlpcre: Raise Invalid_argument if PCRE.sub n parameter is negative.
---
common/mlpcre/pcre-c.c | 7 +++++--
common/mlpcre/pcre_tests.ml | 7 +++++++
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c
index 6fae0e6f4..da9b50d34 100644
--- a/common/mlpcre/pcre-c.c
+++ b/common/mlpcre/pcre-c.c
@@ -201,6 +201,7 @@ value
guestfs_int_pcre_sub (value nv)
{
CAMLparam1 (nv);
+ int n = Int_val (nv);
CAMLlocal1 (strv);
int len;
CLEANUP_FREE char *str = NULL;
@@ -209,8 +210,10 @@ guestfs_int_pcre_sub (value nv)
if (m == NULL)
raise_pcre_error ("PCRE.sub called without calling PCRE.matches",
0);
- len = pcre_get_substring (m->subject, m->vec, m->r, Int_val (nv),
- (const char **) &str);
+ if (n < 0)
+ caml_invalid_argument ("PCRE.sub: n must be >= 0");
+
+ len = pcre_get_substring (m->subject, m->vec, m->r, n, (const char
**) &str);
if (len == PCRE_ERROR_NOSUBSTRING)
caml_raise_not_found ();
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index e5214eab8..2b18f462f 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -69,6 +69,13 @@ let () | PCRE.Error (msg, code) ->
failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg
code)
+(* Run some out of range [sub] calls to ensure an exception is thrown. *)
+let () + let re2 = compile "(a+)(b*)" in
+ ignore (matches re2 "ccac");
+ (try ignore (sub 3) with Not_found -> ());
+ (try ignore (sub (-1)) with Invalid_argument _ -> ())
+
(* Compile some bad regexps and check that an exception is thrown.
* It would be nice to check the error message is right but
* that involves dealing with language and future changes of
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 02/22] common/mlpcre: Add PCRE.subi to return indexes instead of the substring.
---
common/mlpcre/PCRE.ml | 3 +--
common/mlpcre/PCRE.mli | 14 ++++++++++++++
common/mlpcre/pcre-c.c | 27 +++++++++++++++++++++++++++
common/mlpcre/pcre_tests.ml | 19 ++++++++++++++++---
4 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 94eea4b34..5269d41f8 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -23,10 +23,9 @@ exception Error of string * int
type regexp
external compile : string -> regexp = "guestfs_int_pcre_compile"
-
external matches : regexp -> string -> bool =
"guestfs_int_pcre_matches"
-
external sub : int -> string = "guestfs_int_pcre_sub"
+external subi : int -> int * int = "guestfs_int_pcre_subi"
let () Callback.register_exception "PCRE.Error" (Error
("", 0))
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 331a50a9a..02f16d19d 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -77,3 +77,17 @@ val sub : int -> string
If there was no nth substring then this raises [Not_found].
This can also raise {!Error} for other PCRE-related errors. *)
+
+val subi : int -> int * int
+(** Return the nth substring (capture) matched by the previous call
+ to {!matches} in the current thread.
+
+ This is the same as {!sub} but instead of copying the
+ substring out, it returns the indexes into the original string
+ of the first character of the substring and the first
+ character after the substring.
+
+ (See pcreapi(3) section "How pcre_exec() returns captured
substrings"
+ for exact details).
+
+ If there was no nth substring then this raises [Not_found]. *)
diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c
index da9b50d34..15775dad0 100644
--- a/common/mlpcre/pcre-c.c
+++ b/common/mlpcre/pcre-c.c
@@ -225,3 +225,30 @@ guestfs_int_pcre_sub (value nv)
memcpy (String_val (strv), str, len);
CAMLreturn (strv);
}
+
+value
+guestfs_int_pcre_subi (value nv)
+{
+ CAMLparam1 (nv);
+ int n = Int_val (nv);
+ CAMLlocal1 (rv);
+ struct last_match *m = gl_tls_get (last_match);
+
+ if (m == NULL)
+ raise_pcre_error ("PCRE.subi called without calling
PCRE.matches", 0);
+
+ if (n < 0)
+ caml_invalid_argument ("PCRE.subi: n must be >= 0");
+
+ /* eg if there are 2 captures, m->r == 3, and valid values of n are
+ * 0, 1 or 2.
+ */
+ if (n >= m->r)
+ caml_raise_not_found ();
+
+ rv = caml_alloc (2, 0);
+ Store_field (rv, 0, Val_int (m->vec[n*2]));
+ Store_field (rv, 1, Val_int (m->vec[n*2+1]));
+
+ CAMLreturn (rv);
+}
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index 2b18f462f..316a4348e 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -34,6 +34,12 @@ let sub i eprintf " %s\n%!" r;
r
+let subi i + eprintf "PCRE.subi %d ->%!" i;
+ let i1, i2 = PCRE.subi i in
+ eprintf " (%d, %d)\n%!" i1 i2;
+ (i1, i2)
+
let () try
let re0 = compile "a+b" in
@@ -62,19 +68,26 @@ let () assert (matches re2 "ccac" = true);
assert (sub 1 = "a");
assert (sub 2 = "");
- assert (sub 0 = "a")
+ assert (sub 0 = "a");
+ assert (subi 0 = (2, 3));
+ assert (subi 1 = (2, 3));
+ assert (subi 2 = (3, 3))
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised
Not_found"
| PCRE.Error (msg, code) ->
failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg
code)
-(* Run some out of range [sub] calls to ensure an exception is thrown. *)
+(* Run some out of range [sub] and [subi] calls to ensure an exception
+ * is thrown.
+ *)
let () let re2 = compile "(a+)(b*)" in
ignore (matches re2 "ccac");
(try ignore (sub 3) with Not_found -> ());
- (try ignore (sub (-1)) with Invalid_argument _ -> ())
+ (try ignore (sub (-1)) with Invalid_argument _ -> ());
+ (try ignore (subi 3) with Not_found -> ());
+ (try ignore (subi (-1)) with Invalid_argument _ -> ())
(* Compile some bad regexps and check that an exception is thrown.
* It would be nice to check the error message is right but
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 03/22] common/mlpcre: Add PCRE.replace function.
Similar to Perl s/// but lacks backreferences.
---
common/mlpcre/PCRE.ml | 25 +++++++++++++++++++++++++
common/mlpcre/PCRE.mli | 13 +++++++++++++
common/mlpcre/pcre_tests.ml | 23 ++++++++++++++++++++++-
3 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 5269d41f8..0eb7eb2ec 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -27,5 +27,30 @@ external matches : regexp -> string -> bool =
"guestfs_int_pcre_matches"
external sub : int -> string = "guestfs_int_pcre_sub"
external subi : int -> int * int = "guestfs_int_pcre_subi"
+let rec replace ?(global = false) patt subst subj + if not (matches patt subj)
then
+ (* Return original string unchanged if patt doesn't match. *)
+ subj
+ else (
+ (* If patt matches "yyyy" in the original string then we have
+ * the following situation, where "xxxx" is the part of the
+ * original string before the match, and "zzzz..." is the
+ * part after the match:
+ * "xxxxyyyyzzzzzzzzzzzzz"
+ * ^ ^
+ * i1 i2
+ *)
+ let i1, i2 = subi 0 in
+ let xs = String.sub subj 0 i1 (* "xxxx", part before the match *)
in
+ let zs = String.sub subj i2 (String.length subj - i2) (* after *) in
+
+ (* If the global flag was set, we want to continue substitutions
+ * in the rest of the string.
+ *)
+ let zs = if global then replace ~global patt subst zs else zs in
+
+ xs ^ subst ^ zs
+ )
+
let () Callback.register_exception "PCRE.Error" (Error
("", 0))
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 02f16d19d..634cc600c 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -91,3 +91,16 @@ val subi : int -> int * int
for exact details).
If there was no nth substring then this raises [Not_found]. *)
+
+val replace : ?global:bool -> regexp -> string -> string -> string
+(** [replace ?global patt subst subj] performs a search and replace
+ on the subject string ([subj]). Where [patt] matches the
+ string, [subst] is substituted. This works similarly to the
+ Perl function [s///].
+
+ The [?global] flag defaults to false, so only the first
+ instance of [patt] in the string is replaced. If set to true
+ then every instance of [patt] in the string is replaced.
+
+ Note that this function does not allow backreferences.
+ Any captures in [patt] are ignored. *)
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index 316a4348e..b5f712d20 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -28,6 +28,12 @@ let matches re str eprintf " %b\n%!" r;
r
+let replace ?(global = false) patt subst subj + eprintf "PCRE.replace
global:%b <patt> %s %s ->%!" global subst subj;
+ let r = PCRE.replace ~global patt subst subj in
+ eprintf " %s\n%!" r;
+ r
+
let sub i eprintf "PCRE.sub %d ->%!" i;
let r = PCRE.sub i in
@@ -45,6 +51,7 @@ let () let re0 = compile "a+b" in
let re1 = compile "(a+)b" in
let re2 = compile "(a+)(b*)" in
+ let re3 = compile "[^A-Za-z0-9_]" in
assert (matches re0 "ccaaabbbb" = true);
assert (sub 0 = "aaab");
@@ -71,7 +78,21 @@ let () assert (sub 0 = "a");
assert (subi 0 = (2, 3));
assert (subi 1 = (2, 3));
- assert (subi 2 = (3, 3))
+ assert (subi 2 = (3, 3));
+
+ assert (replace re0 "dd" "abcabcaabccca" =
"ddcabcaabccca");
+ assert (replace ~global:true re0 "dd" "abcabcaabccca" =
"ddcddcddccca");
+
+ (* This example copies a usage from customize/firstboot.ml
+ * "\xc2\xa3" is utf-8 for the GBP sign. Ideally PCRE would
+ * recognize that this is a single character, however doing that
+ * would involve passing the PCRE_UTF8 flag when compiling
+ * patterns, and that could be problematic if PCRE was built
+ * without Unicode support (XXX).
+ *)
+ assert (replace ~global:true re3 "-" "this is
a\xc2\xa3funny.name?"
+ (* = "this-is-a-funny-name-" if UTF-8 worked *)
+ = "this-is-a--funny-name-");
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised
Not_found"
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 04/22] common/mlpcre: Allow some PCRE_* flags to be passed to pcre_compile.
Only five simple flags are allowed so far, and not all of them are
actually used in any code. They are:
~anchored / PCRE_ANCHORED - implicit ^...$ around regexp
~caseless / PCRE_CASELESS - fold upper and lower case
~dotall / PCRE_DOTALL - ‘.’ matches anything
~extended / PCRE_EXTENDED - extended regular expressions
~multiline / PCRE_MULTILINE - ^ and $ match lines within the subject string
---
common/mlpcre/PCRE.ml | 2 +-
common/mlpcre/PCRE.mli | 10 ++++++++--
common/mlpcre/pcre-c.c | 41 ++++++++++++++++++++++++++++++++++++++---
common/mlpcre/pcre_tests.ml | 22 +++++++++++++++-------
4 files changed, 62 insertions(+), 13 deletions(-)
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 0eb7eb2ec..753e247e4 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -22,7 +22,7 @@ exception Error of string * int
type regexp
-external compile : string -> regexp = "guestfs_int_pcre_compile"
+external compile : ?anchored:bool -> ?caseless:bool -> ?dotall:bool ->
?extended:bool -> ?multiline:bool -> string -> regexp =
"guestfs_int_pcre_compile_byte" "guestfs_int_pcre_compile"
external matches : regexp -> string -> bool =
"guestfs_int_pcre_matches"
external sub : int -> string = "guestfs_int_pcre_sub"
external subi : int -> int * int = "guestfs_int_pcre_subi"
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 634cc600c..fcf6fd25e 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -53,8 +53,14 @@ exception Error of string * int
type regexp
(** The type of a compiled regular expression. *)
-val compile : string -> regexp
-(** Compile a regular expression. This can raise {!Error}. *)
+val compile : ?anchored:bool -> ?caseless:bool -> ?dotall:bool ->
?extended:bool -> ?multiline:bool -> string -> regexp
+(** Compile a regular expression. This can raise {!Error}.
+
+ The flags [?anchored], [?caseless], [?dotall], [?extended],
+ [?multiline]
+ correspond to the [pcre_compile] flags [PCRE_ANCHORED] etc.
+ See pcreapi(3) for details of what they do.
+ All flags default to false. *)
val matches : regexp -> string -> bool
(** Test whether the regular expression matches the string. This
diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c
index 15775dad0..6dc30087d 100644
--- a/common/mlpcre/pcre-c.c
+++ b/common/mlpcre/pcre-c.c
@@ -112,22 +112,57 @@ Val_regexp (pcre *re)
CAMLreturn (rv);
}
+static int
+is_Some_true (value v)
+{
+ return
+ v != Val_int (0) /* !None */ &&
+ Bool_val (Field (v, 0)) /* Some true */;
+}
+
value
-guestfs_int_pcre_compile (value pattv)
+guestfs_int_pcre_compile (value anchoredv, value caselessv, value dotallv,
+ value extendedv, value multilinev,
+ value pattv)
{
- CAMLparam1 (pattv);
+ CAMLparam5 (anchoredv, caselessv, dotallv, extendedv, multilinev);
+ CAMLxparam1 (pattv);
+ int options = 0;
pcre *re;
int errcode = 0;
const char *err;
int offset;
- re = pcre_compile2 (String_val (pattv), 0, &errcode, &err,
&offset, NULL);
+ /* Flag parameters are all ‘bool option’, defaulting to false. */
+ if (is_Some_true (anchoredv))
+ options |= PCRE_ANCHORED;
+ if (is_Some_true (caselessv))
+ options |= PCRE_CASELESS;
+ if (is_Some_true (dotallv))
+ options |= PCRE_DOTALL;
+ if (is_Some_true (extendedv))
+ options |= PCRE_EXTENDED;
+ if (is_Some_true (multilinev))
+ options |= PCRE_MULTILINE;
+
+ re = pcre_compile2 (String_val (pattv), options,
+ &errcode, &err, &offset, NULL);
if (re == NULL)
raise_pcre_error (err, errcode);
CAMLreturn (Val_regexp (re));
}
+/* OCaml calls C functions from bytecode a bit differently when they
+ * have more than 5 parameters.
+ */
+value
+guestfs_int_pcre_compile_byte (value *argv, int argn)
+{
+ return guestfs_int_pcre_compile (argv[0], argv[1], argv[2], argv[3], argv[4],
+ argv[5]);
+}
+
value
guestfs_int_pcre_matches (value rev, value strv)
{
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index b5f712d20..9d42914b9 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -18,9 +18,17 @@
open Printf
-let compile patt - eprintf "PCRE.compile %s\n%!" patt;
- PCRE.compile patt
+let compile ?(anchored = false) ?(caseless = false)
+ ?(dotall = false) ?(extended = false) ?(multiline = false)
+ patt + eprintf "PCRE.compile%s%s%s%s%s %s\n%!"
+ (if anchored then " ~anchored:true" else "")
+ (if caseless then " ~caseless:true" else "")
+ (if dotall then " ~dotall:true" else "")
+ (if extended then " ~extended:true" else "")
+ (if multiline then " ~multiline:true" else "")
+ patt;
+ PCRE.compile ~anchored ~caseless ~dotall ~extended ~multiline patt
let matches re str eprintf "PCRE.matches %s ->%!" str;
@@ -51,7 +59,7 @@ let () let re0 = compile "a+b" in
let re1 = compile "(a+)b" in
let re2 = compile "(a+)(b*)" in
- let re3 = compile "[^A-Za-z0-9_]" in
+ let re3 = compile ~caseless:true "[^a-z0-9_]" in
assert (matches re0 "ccaaabbbb" = true);
assert (sub 0 = "aaab");
@@ -90,9 +98,9 @@ let () * patterns, and that could be problematic if PCRE
was built
* without Unicode support (XXX).
*)
- assert (replace ~global:true re3 "-" "this is
a\xc2\xa3funny.name?"
- (* = "this-is-a-funny-name-" if UTF-8 worked *)
- = "this-is-a--funny-name-");
+ assert (replace ~global:true re3 "-" "this is
a\xc2\xa3FUNNY.name?"
+ (* = "this-is-a-FUNNY-name-" if UTF-8 worked *)
+ = "this-is-a--FUNNY-name-");
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised
Not_found"
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 05/22] common/mlstdutils: Add String.nsplit ?max parameter, and tests.
This idea was previously proposed by Tomáš Golembiovský in
https://www.redhat.com/archives/libguestfs/2017-January/msg00138.html
---
common/mlstdutils/std_utils.ml | 28 ++++++++++++++++------------
common/mlstdutils/std_utils.mli | 11 ++++++++---
common/mlstdutils/std_utils_tests.ml | 29 +++++++++++++++++++++++++++++
3 files changed, 53 insertions(+), 15 deletions(-)
diff --git a/common/mlstdutils/std_utils.ml b/common/mlstdutils/std_utils.ml
index b731b8fd5..37eef0348 100644
--- a/common/mlstdutils/std_utils.ml
+++ b/common/mlstdutils/std_utils.ml
@@ -147,18 +147,7 @@ module String = struct
done;
if not !r then s else Bytes.to_string b2
- let rec nsplit sep str - let len = length str in
- let seplen = length sep in
- let i = find str sep in
- if i = -1 then [str]
- else (
- let s' = sub str 0 i in
- let s'' = sub str (i+seplen) (len-i-seplen) in
- s' :: nsplit sep s''
- )
-
- let split sep str + let rec split sep str let len = length sep in
let seplen = length str in
let i = find str sep in
@@ -167,6 +156,21 @@ module String = struct
sub str 0 i, sub str (i + len) (seplen - i - len)
)
+ and nsplit ?(max = 0) sep str + if max < 0 then
+ invalid_arg "String.nsplit: max parameter should not be
negative";
+
+ (* If we reached the limit, OR if the pattern does not match the string
+ * at all, return the rest of the string as a single element list.
+ *)
+ if max = 1 || find str sep = -1 then
+ [str]
+ else (
+ let s1, s2 = split sep str in
+ let max = if max = 0 then 0 else max - 1 in
+ s1 :: nsplit ~max sep s2
+ )
+
let rec lines_split str let buf = Buffer.create 16 in
let len = length str in
diff --git a/common/mlstdutils/std_utils.mli b/common/mlstdutils/std_utils.mli
index d217e48d4..c08e51360 100644
--- a/common/mlstdutils/std_utils.mli
+++ b/common/mlstdutils/std_utils.mli
@@ -88,14 +88,19 @@ module String : sig
[str] with [s2]. *)
val replace_char : string -> char -> char -> string
(** Replace character in string. *)
- val nsplit : string -> string -> string list
- (** [nsplit sep str] splits [str] into multiple strings at each
- separator [sep]. *)
val split : string -> string -> string * string
(** [split sep str] splits [str] at the first occurrence of the
separator [sep], returning the part before and the part after.
If separator is not found, return the whole string and an
empty string. *)
+ val nsplit : ?max:int -> string -> string -> string list
+ (** [nsplit ?max sep str] splits [str] into multiple strings at each
+ separator [sep].
+
+ As with the Perl split function, you can give an optional
+ [?max] parameter to limit the number of strings returned. The
+ final element of the list will contain the remainder of the
+ input string. *)
val lines_split : string -> string list
(** [lines_split str] splits [str] into lines, keeping continuation
characters (i.e. [\] at the end of lines) into account. *)
diff --git a/common/mlstdutils/std_utils_tests.ml
b/common/mlstdutils/std_utils_tests.ml
index ce49c7606..dcd237dab 100644
--- a/common/mlstdutils/std_utils_tests.ml
+++ b/common/mlstdutils/std_utils_tests.ml
@@ -18,6 +18,8 @@
(* This file tests the Std_utils module. *)
+open Printf
+
open OUnit2
open Std_utils
@@ -26,6 +28,7 @@ let assert_equal_string = assert_equal ~printer:(fun x ->
x)
let assert_equal_int = assert_equal ~printer:(fun x -> string_of_int x)
let assert_equal_int64 = assert_equal ~printer:(fun x -> Int64.to_string x)
let assert_equal_stringlist = assert_equal ~printer:(fun x -> "("
^ (String.escaped (String.concat "," x)) ^ ")")
+let assert_equal_stringpair = assert_equal ~printer:(fun (x, y) -> sprintf
"%S, %S" x y)
let test_subdirectory ctx assert_equal_string "" (subdirectory
"/foo" "/foo");
@@ -83,6 +86,30 @@ let test_string_find ctx assert_equal_int (-1)
(String.find "" "baz");
assert_equal_int (-1) (String.find "foobar" "baz")
+(* Test Std_utils.String.split. *)
+let test_string_split ctx + assert_equal_stringpair ("a",
"b") (String.split " " "a b");
+ assert_equal_stringpair ("", "ab") (String.split "
" " ab");
+ assert_equal_stringpair ("", "abc") (String.split
"" "abc");
+ assert_equal_stringpair ("abc", "") (String.split "
" "abc");
+ assert_equal_stringpair ("", "") (String.split "
" "")
+
+(* Test Std_utils.String.nsplit. *)
+let test_string_nsplit ctx + (* XXX Not clear if the next test case indicates
an error in
+ * String.nsplit. However this is how it has historically worked.
+ *)
+ assert_equal_stringlist [""] (String.nsplit " "
"");
+ assert_equal_stringlist ["abc"] (String.nsplit " "
"abc");
+ assert_equal_stringlist ["a"; "b"; "c"]
(String.nsplit " " "a b c");
+ assert_equal_stringlist ["a"; "b"; "c";
""] (String.nsplit " " "a b c ");
+ assert_equal_stringlist [""; "a"; "b";
"c"] (String.nsplit " " " a b c");
+ assert_equal_stringlist [""; "a"; "b";
"c"; ""] (String.nsplit " " " a b c ");
+ assert_equal_stringlist ["a b c d"] (String.nsplit ~max:1 "
" "a b c d");
+ assert_equal_stringlist ["a"; "b c d"] (String.nsplit
~max:2 " " "a b c d");
+ assert_equal_stringlist ["a"; "b"; "c d"]
(String.nsplit ~max:3 " " "a b c d");
+ assert_equal_stringlist ["a"; "b"; "c";
"d"] (String.nsplit ~max:10 " " "a b c d")
+
(* Test Std_utils.String.lines_split. *)
let test_string_lines_split ctx assert_equal_stringlist [""]
(String.lines_split "");
@@ -129,6 +156,8 @@ let suite "strings.is_prefix" >::
test_string_is_prefix;
"strings.is_suffix" >:: test_string_is_suffix;
"strings.find" >:: test_string_find;
+ "strings.split" >:: test_string_split;
+ "strings.nsplit" >:: test_string_nsplit;
"strings.lines_split" >:: test_string_lines_split;
"strings.span" >:: test_string_span;
"strings.chomp" >:: test_string_chomp;
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 06/22] common/mlpcre: Add split and nsplit functions.
These work like our String.split and String.nsplit functions.
---
common/mlpcre/PCRE.ml | 33 +++++++++++++++++++++++++++++++++
common/mlpcre/PCRE.mli | 19 +++++++++++++++++++
common/mlpcre/pcre_tests.ml | 29 +++++++++++++++++++++++++++++
3 files changed, 81 insertions(+)
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 753e247e4..b054928f9 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -52,5 +52,38 @@ let rec replace ?(global = false) patt subst subj xs ^
subst ^ zs
)
+let rec split patt subj + if not (matches patt subj) then
+ subj, ""
+ else (
+ (* If patt matches "yyyy" in the original string then we have
+ * the following situation, where "xxxx" is the part of the
+ * original string before the match, and "zzzz..." is the
+ * part after the match:
+ * "xxxxyyyyzzzzzzzzzzzzz"
+ * ^ ^
+ * i1 i2
+ *)
+ let i1, i2 = subi 0 in
+ let xs = String.sub subj 0 i1 (* "xxxx", part before the match *)
in
+ let zs = String.sub subj i2 (String.length subj - i2) (* after *) in
+ xs, zs
+ )
+
+and nsplit ?(max = 0) patt subj + if max < 0 then
+ invalid_arg "PCRE.nsplit: max parameter should not be negative";
+
+ (* If we reached the limit, OR if the pattern does not match the string
+ * at all, return the rest of the string as a single element list.
+ *)
+ if max = 1 || not (matches patt subj) then
+ [subj]
+ else (
+ let s1, s2 = split patt subj in
+ let max = if max = 0 then 0 else max - 1 in
+ s1 :: nsplit ~max patt s2
+ )
+
let () Callback.register_exception "PCRE.Error" (Error
("", 0))
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index fcf6fd25e..eacb6fd90 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -110,3 +110,22 @@ val replace : ?global:bool -> regexp -> string ->
string -> string
Note that this function does not allow backreferences.
Any captures in [patt] are ignored. *)
+
+val split : regexp -> string -> string * string
+val nsplit : ?max:int -> regexp -> string -> string list
+(** [split patt subj] splits the string at the first occurrence
+ of the regular expression [patt], returning the parts of the
+ string before and after the match (the matching part is not
+ returned). If the pattern does not match then the whole
+ input is returned in the first string, and the second string
+ is empty.
+
+ [nsplit patt subj] is the same but the string is split
+ on every occurrence of [patt]. Note that if the pattern
+ matches at the beginning or end of the string, then an
+ empty string element will be returned at the beginning or
+ end of the list.
+
+ [nsplit] has an optional [?max] parameter which controls
+ the maximum length of the returned list. The final element
+ contains the remainder of the string. *)
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index 9d42914b9..346019c40 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -42,6 +42,20 @@ let replace ?(global = false) patt subst subj eprintf
" %s\n%!" r;
r
+let split patt subj + eprintf "PCRE.split <patt> %s ->%!"
subj;
+ let s1, s2 = PCRE.split patt subj in
+ eprintf " (%s, %s)\n%!" s1 s2;
+ (s1, s2)
+
+let nsplit ?(max = 0) patt subj + eprintf "PCRE.nsplit%s <patt> %s
->%!"
+ (if max = 0 then "" else sprintf " ~max:%d" max)
+ subj;
+ let ss = PCRE.nsplit ~max patt subj in
+ eprintf " [%s]\n%!" (String.concat "; " ss);
+ ss
+
let sub i eprintf "PCRE.sub %d ->%!" i;
let r = PCRE.sub i in
@@ -60,6 +74,7 @@ let () let re1 = compile "(a+)b" in
let re2 = compile "(a+)(b*)" in
let re3 = compile ~caseless:true "[^a-z0-9_]" in
+ let ws = compile "\\s+" in
assert (matches re0 "ccaaabbbb" = true);
assert (sub 0 = "aaab");
@@ -101,6 +116,20 @@ let () assert (replace ~global:true re3 "-"
"this is a\xc2\xa3FUNNY.name?"
(* = "this-is-a-FUNNY-name-" if UTF-8 worked *)
= "this-is-a--FUNNY-name-");
+
+ (* This also tests PCRE.split since that is used by nsplit. *)
+ assert (nsplit ~max:1 ws "a b c" = [ "a b c" ]);
+ assert (nsplit ~max:2 ws "a b c" = [ "a"; "b
c" ]);
+ assert (nsplit ~max:3 ws "a b c" = [ "a";
"b"; "c" ]);
+ assert (nsplit ~max:10 ws "a b c" = [ "a";
"b"; "c" ]);
+ assert (nsplit ws "the cat sat on \t\t the mat." +
[ "the"; "cat"; "sat"; "on";
"the"; "mat." ]);
+ assert (nsplit ~max:5 ws "the cat sat on \t\t the mat." +
[ "the"; "cat"; "sat"; "on"; "the
mat." ]);
+ assert (nsplit ws " the " = [ ""; "the";
"" ]);
+ assert (nsplit ws "the " = [ "the"; "" ]);
+ assert (nsplit ws " the" = [ ""; "the" ]);
+ assert (nsplit ws " \t the" = [ ""; "the"
]);
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised
Not_found"
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 07/22] valgrind: Ignore PCRE.compile regexps stored in a global variable.
---
valgrind-suppressions | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/valgrind-suppressions b/valgrind-suppressions
index 1c4b35355..2bc70659d 100644
--- a/valgrind-suppressions
+++ b/valgrind-suppressions
@@ -171,6 +171,17 @@
fun:caml_build_primitive_table
}
+# The OCaml PCRE.compile function calls pcre_compile2 which allocates
+# memory. If these regexps are stored in a global variable then they
+# can never be freed. This is not really a bug, so ignore it.
+{
+ ocaml_pcre_globals
+ Memcheck:Leak
+ fun:malloc
+ fun:pcre_compile2
+ ...
+}
+
# glibc
{
glibc_cond
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 08/22] builder: Simplify PCRE regular expression by using case-insensitive matching.
Updates commit e5182b87cf0c96933b39045bea3c5a94e29dcd95. --- builder/languages.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builder/languages.ml b/builder/languages.ml index 89d173999..155322014 100644 --- a/builder/languages.ml +++ b/builder/languages.ml @@ -20,7 +20,7 @@ open Std_utils open Common_utils let re_locale - PCRE.compile "^([A-Za-z]+)(_([A-Za-z]+))?(\\.([A-Za-z0-9-]+))?(@([A-Za-z]+))?$" + PCRE.compile ~caseless:true "^([a-z]+)(_([a-z]+))?(\\.([a-z0-9-]+))?(@([a-z]+))?$" let split_locale loc let l = ref [] in -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 09/22] customize: Remove use of Str module from virt-customize code.
---
customize/Makefile.am | 3 +++
customize/firstboot.ml | 6 +++---
customize/ssh_key.ml | 7 +++----
sysprep/Makefile.am | 3 +++
v2v/Makefile.am | 4 ++++
5 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/customize/Makefile.am b/customize/Makefile.am
index 18f4dce44..ff2c2e2d0 100644
--- a/customize/Makefile.am
+++ b/customize/Makefile.am
@@ -126,6 +126,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/ocaml \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/mllib \
-I $(builddir)
if HAVE_OCAML_PKG_GETTEXT
@@ -155,6 +156,7 @@ endif
OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
customize.$(MLARCHIVE) \
@@ -176,6 +178,7 @@ virt_customize_DEPENDENCIES = \
$(CUSTOMIZE_THEOBJECTS) \
$(CUSTOMIZE_CMA) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE)
virt_customize_LINK = \
$(top_srcdir)/ocaml-link.sh -cclib '$(OCAMLCLIBS)' -- \
diff --git a/customize/firstboot.ml b/customize/firstboot.ml
index b07bda001..25203cf91 100644
--- a/customize/firstboot.ml
+++ b/customize/firstboot.ml
@@ -25,12 +25,12 @@ open Common_gettext.Gettext
open Regedit
let unix2dos s - String.concat "\r\n" (Str.split_delim
(Str.regexp_string "\n") s)
+ String.concat "\r\n" (String.nsplit "\n" s)
let sanitize_name - let rex = Str.regexp "[^A-Za-z0-9_]" in
+ let rex = PCRE.compile ~caseless:true "[^a-z0-9_]" in
fun n ->
- let n = Str.global_replace rex "-" n in
+ let n = PCRE.replace ~global:true rex "-" n in
let len = String.length n and max = 60 in
if len >= max then String.sub n 0 max else n
diff --git a/customize/ssh_key.ml b/customize/ssh_key.ml
index 185536d1d..da0e7d90c 100644
--- a/customize/ssh_key.ml
+++ b/customize/ssh_key.ml
@@ -47,8 +47,8 @@ and parse_selector_list orig_arg = function
(* Find the local [on the host] user's SSH public key. See
* ssh-copy-id(1) default_ID_file for rationale.
*)
-let pubkey_re = Str.regexp "^id.*\\.pub$"
-let pubkey_ignore_re = Str.regexp ".*-cert\\.pub$"
+let pubkey_re = PCRE.compile "^id.*\\.pub$"
+let pubkey_ignore_re = PCRE.compile ".*-cert\\.pub$"
let local_user_ssh_pubkey () let home_dir @@ -60,8 +60,7 @@ let
local_user_ssh_pubkey () let files = Array.to_list files in
let files = List.filter (
fun file ->
- Str.string_match pubkey_re file 0 &&
- not (Str.string_match pubkey_ignore_re file 0)
+ PCRE.matches pubkey_re file && not (PCRE.matches pubkey_ignore_re
file)
) files in
if files = [] then
error (f_"ssh-inject: no public key file found in %s") ssh_dir;
diff --git a/sysprep/Makefile.am b/sysprep/Makefile.am
index 400b3c1a3..c1aca2966 100644
--- a/sysprep/Makefile.am
+++ b/sysprep/Makefile.am
@@ -117,6 +117,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/common/visit/.libs \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/common/mlvisit \
-I $(top_builddir)/mllib \
-I $(top_builddir)/customize
@@ -145,6 +146,7 @@ endif
OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
mlvisit.$(MLARCHIVE) \
@@ -155,6 +157,7 @@ virt_sysprep_DEPENDENCIES = \
$(OBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
../customize/customize.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
diff --git a/v2v/Makefile.am b/v2v/Makefile.am
index 0aafc5725..3a38b3a98 100644
--- a/v2v/Makefile.am
+++ b/v2v/Makefile.am
@@ -151,6 +151,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/ocaml \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/common/mlxml \
-I $(top_builddir)/mllib \
-I $(top_builddir)/customize
@@ -177,6 +178,7 @@ endif
OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlxml.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
@@ -218,6 +220,7 @@ virt_v2v_copy_to_local_DEPENDENCIES = \
$(COPY_TO_LOCAL_OBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlxml/mlxml.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
@@ -508,6 +511,7 @@ v2v_unit_tests_DEPENDENCIES = \
$(v2v_unit_tests_THEOBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlxml/mlxml.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 10/22] mllib: Convert Common_utils.parse_size and parse_resize to use PCRE.
This also involved enhancing/fixing the test so that parse_size has
test coverage.
---
dib/Makefile.am | 3 ++
get-kernel/Makefile.am | 3 ++
mllib/Makefile.am | 6 +++
mllib/common_utils.ml | 107 +++++++++++++++++++-------------------------
mllib/common_utils_tests.ml | 10 ++++-
resize/Makefile.am | 3 ++
sparsify/Makefile.am | 3 ++
7 files changed, 72 insertions(+), 63 deletions(-)
diff --git a/dib/Makefile.am b/dib/Makefile.am
index d5fc4e48d..fda074b45 100644
--- a/dib/Makefile.am
+++ b/dib/Makefile.am
@@ -85,6 +85,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/ocaml \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/mllib
if HAVE_OCAML_PKG_GETTEXT
OCAMLPACKAGES += -package gettext-stub
@@ -109,6 +110,7 @@ OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
$(LINK_CUSTOM_OCAMLC_ONLY)
@@ -116,6 +118,7 @@ virt_dib_DEPENDENCIES = \
$(OBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
virt_dib_LINK = \
diff --git a/get-kernel/Makefile.am b/get-kernel/Makefile.am
index 8c194df36..f833f35f4 100644
--- a/get-kernel/Makefile.am
+++ b/get-kernel/Makefile.am
@@ -68,6 +68,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/ocaml \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/mllib
if HAVE_OCAML_PKG_GETTEXT
OCAMLPACKAGES += -package gettext-stub
@@ -91,6 +92,7 @@ endif
OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
$(LINK_CUSTOM_OCAMLC_ONLY)
@@ -99,6 +101,7 @@ virt_get_kernel_DEPENDENCIES = \
$(OBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
virt_get_kernel_LINK = \
diff --git a/mllib/Makefile.am b/mllib/Makefile.am
index 2f6d2043a..3f2af3c61 100644
--- a/mllib/Makefile.am
+++ b/mllib/Makefile.am
@@ -86,6 +86,7 @@ libmllib_a_CPPFLAGS = \
-I$(top_srcdir)/common/utils \
-I$(top_srcdir)/lib \
-I$(top_srcdir)/common/options \
+ -I$(top_srcdir)/common/mlpcre \
-I$(top_srcdir)/common/mlxml \
-I$(top_srcdir)/common/mlstdutils \
-I$(top_srcdir)/common/mlutils
@@ -106,6 +107,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/lib/.libs \
-I $(top_builddir)/gnulib/lib/.libs \
-I $(top_builddir)/ocaml \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/common/mlxml \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlutils \
@@ -189,12 +191,14 @@ endif
OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
$(LINK_CUSTOM_OCAMLC_ONLY)
common_utils_tests_DEPENDENCIES = \
$(common_utils_tests_THEOBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
$(MLLIB_CMA) \
$(top_srcdir)/ocaml-link.sh
common_utils_tests_LINK = \
@@ -206,6 +210,7 @@ common_utils_tests_LINK = \
getopt_tests_DEPENDENCIES = \
$(getopt_tests_THEOBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
$(MLLIB_CMA) \
$(top_srcdir)/ocaml-link.sh
getopt_tests_LINK = \
@@ -217,6 +222,7 @@ getopt_tests_LINK = \
JSON_tests_DEPENDENCIES = \
$(JSON_tests_THEOBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
$(MLLIB_CMA) \
$(top_srcdir)/ocaml-link.sh
JSON_tests_LINK = \
diff --git a/mllib/common_utils.ml b/mllib/common_utils.ml
index 597128967..3355365c6 100644
--- a/mllib/common_utils.ml
+++ b/mllib/common_utils.ml
@@ -155,73 +155,56 @@ let virt_tools_data_dir ) in
fun () -> Lazy.force dir
+(* Used by parse_size and parse_resize below. *)
+let const_re = PCRE.compile "^([.0-9]+)([bKMG])$"
+let plus_const_re = PCRE.compile "^\\+([.0-9]+)([bKMG])$"
+let minus_const_re = PCRE.compile "^-([.0-9]+)([bKMG])$"
+let percent_re = PCRE.compile "^([.0-9]+)%$"
+let plus_percent_re = PCRE.compile "^\\+([.0-9]+)%$"
+let minus_percent_re = PCRE.compile "^-([.0-9]+)%$"
+let size_scaled f = function
+ | "b" -> Int64.of_float f
+ | "K" -> Int64.of_float (f *. 1024.)
+ | "M" -> Int64.of_float (f *. 1024. *. 1024.)
+ | "G" -> Int64.of_float (f *. 1024. *. 1024. *. 1024.)
+ | _ -> assert false
+
(* Parse a size field, eg. "10G". *)
-let parse_size - let const_re = Str.regexp
"^\\([.0-9]+\\)\\([bKMG]\\)$" in
- fun field ->
- let matches rex = Str.string_match rex field 0 in
- let sub i = Str.matched_group i field in
- let size_scaled f = function
- | "b" -> Int64.of_float f
- | "K" -> Int64.of_float (f *. 1024.)
- | "M" -> Int64.of_float (f *. 1024. *. 1024.)
- | "G" -> Int64.of_float (f *. 1024. *. 1024. *. 1024.)
- | _ -> assert false
- in
-
- if matches const_re then (
- size_scaled (float_of_string (sub 1)) (sub 2)
- )
- else
- error "%s: cannot parse size field" field
+let parse_size field + if PCRE.matches const_re field then
+ size_scaled (float_of_string (PCRE.sub 1)) (PCRE.sub 2)
+ else
+ error "%s: cannot parse size field" field
(* Parse a size field, eg. "10G", "+20%" etc. Used
particularly by
* virt-resize --resize and --resize-force options.
*)
-let parse_resize - let const_re = Str.regexp
"^\\([.0-9]+\\)\\([bKMG]\\)$"
- and plus_const_re = Str.regexp "^\\+\\([.0-9]+\\)\\([bKMG]\\)$"
- and minus_const_re = Str.regexp "^-\\([.0-9]+\\)\\([bKMG]\\)$"
- and percent_re = Str.regexp "^\\([.0-9]+\\)%$"
- and plus_percent_re = Str.regexp "^\\+\\([.0-9]+\\)%$"
- and minus_percent_re = Str.regexp "^-\\([.0-9]+\\)%$"
- in
- fun oldsize field ->
- let matches rex = Str.string_match rex field 0 in
- let sub i = Str.matched_group i field in
- let size_scaled f = function
- | "b" -> Int64.of_float f
- | "K" -> Int64.of_float (f *. 1024.)
- | "M" -> Int64.of_float (f *. 1024. *. 1024.)
- | "G" -> Int64.of_float (f *. 1024. *. 1024. *. 1024.)
- | _ -> assert false
- in
-
- if matches const_re then (
- size_scaled (float_of_string (sub 1)) (sub 2)
- )
- else if matches plus_const_re then (
- let incr = size_scaled (float_of_string (sub 1)) (sub 2) in
- oldsize +^ incr
- )
- else if matches minus_const_re then (
- let incr = size_scaled (float_of_string (sub 1)) (sub 2) in
- oldsize -^ incr
- )
- else if matches percent_re then (
- let percent = Int64.of_float (10. *. float_of_string (sub 1)) in
- oldsize *^ percent /^ 1000L
- )
- else if matches plus_percent_re then (
- let percent = Int64.of_float (10. *. float_of_string (sub 1)) in
- oldsize +^ oldsize *^ percent /^ 1000L
- )
- else if matches minus_percent_re then (
- let percent = Int64.of_float (10. *. float_of_string (sub 1)) in
- oldsize -^ oldsize *^ percent /^ 1000L
- )
- else
- error "%s: cannot parse resize field" field
+let parse_resize oldsize field + if PCRE.matches const_re field then (
+ size_scaled (float_of_string (PCRE.sub 1)) (PCRE.sub 2)
+ )
+ else if PCRE.matches plus_const_re field then (
+ let incr = size_scaled (float_of_string (PCRE.sub 1)) (PCRE.sub 2) in
+ oldsize +^ incr
+ )
+ else if PCRE.matches minus_const_re field then (
+ let incr = size_scaled (float_of_string (PCRE.sub 1)) (PCRE.sub 2) in
+ oldsize -^ incr
+ )
+ else if PCRE.matches percent_re field then (
+ let percent = Int64.of_float (10. *. float_of_string (PCRE.sub 1)) in
+ oldsize *^ percent /^ 1000L
+ )
+ else if PCRE.matches plus_percent_re field then (
+ let percent = Int64.of_float (10. *. float_of_string (PCRE.sub 1)) in
+ oldsize +^ oldsize *^ percent /^ 1000L
+ )
+ else if PCRE.matches minus_percent_re field then (
+ let percent = Int64.of_float (10. *. float_of_string (PCRE.sub 1)) in
+ oldsize -^ oldsize *^ percent /^ 1000L
+ )
+ else
+ error "%s: cannot parse resize field" field
let human_size i let sign, i = if i < 0L then "-", Int64.neg i
else "", i in
diff --git a/mllib/common_utils_tests.ml b/mllib/common_utils_tests.ml
index ee8a60463..f7a4eafd1 100644
--- a/mllib/common_utils_tests.ml
+++ b/mllib/common_utils_tests.ml
@@ -29,8 +29,16 @@ let assert_equal_int = assert_equal ~printer:(fun x ->
string_of_int x)
let assert_equal_int64 = assert_equal ~printer:(fun x -> Int64.to_string x)
let assert_equal_intlist = assert_equal ~printer:(fun x -> "(" ^
(String.concat ";" (List.map string_of_int x)) ^ ")")
-(* Test Common_utils.parse_size. *)
+(* Test Common_utils.parse_size and Common_utils.parse_resize. *)
let test_parse_resize ctx + assert_equal_int64 1_L (parse_size
"1b");
+ assert_equal_int64 10_L (parse_size "10b");
+ assert_equal_int64 1024_L (parse_size "1K");
+ assert_equal_int64 102400_L (parse_size "100K");
+ (* Fractions are always rounded down. *)
+ assert_equal_int64 1153433_L (parse_size "1.1M");
+ assert_equal_int64 1202590842_L (parse_size "1.12G");
+
(* For absolute sizes, oldsize is ignored. *)
assert_equal_int64 100_L (parse_resize 100_L "100b");
assert_equal_int64 100_L (parse_resize 1000_L "100b");
diff --git a/resize/Makefile.am b/resize/Makefile.am
index df73af4a4..035bfb9fe 100644
--- a/resize/Makefile.am
+++ b/resize/Makefile.am
@@ -65,6 +65,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlprogress \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/mllib
if HAVE_OCAML_PKG_GETTEXT
OCAMLPACKAGES += -package gettext-stub
@@ -90,6 +91,7 @@ OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
mlprogress.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
$(LINK_CUSTOM_OCAMLC_ONLY)
@@ -98,6 +100,7 @@ virt_resize_DEPENDENCIES = \
$(OBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
virt_resize_LINK = \
diff --git a/sparsify/Makefile.am b/sparsify/Makefile.am
index 3b4ea3420..427d35670 100644
--- a/sparsify/Makefile.am
+++ b/sparsify/Makefile.am
@@ -73,6 +73,7 @@ OCAMLPACKAGES = \
-I $(top_builddir)/common/mlstdutils \
-I $(top_builddir)/common/mlprogress \
-I $(top_builddir)/common/mlutils \
+ -I $(top_builddir)/common/mlpcre \
-I $(top_builddir)/mllib
if HAVE_OCAML_PKG_GETTEXT
OCAMLPACKAGES += -package gettext-stub
@@ -98,6 +99,7 @@ OCAMLLINKFLAGS = \
mlstdutils.$(MLARCHIVE) \
mlguestfs.$(MLARCHIVE) \
mlprogress.$(MLARCHIVE) \
+ mlpcre.$(MLARCHIVE) \
mlcutils.$(MLARCHIVE) \
mllib.$(MLARCHIVE) \
$(LINK_CUSTOM_OCAMLC_ONLY)
@@ -106,6 +108,7 @@ virt_sparsify_DEPENDENCIES = \
$(OBJECTS) \
../common/mlstdutils/mlstdutils.$(MLARCHIVE) \
../common/mlutils/mlcutils.$(MLARCHIVE) \
+ ../common/mlpcre/mlpcre.$(MLARCHIVE) \
../mllib/mllib.$(MLARCHIVE) \
$(top_srcdir)/ocaml-link.sh
virt_sparsify_LINK = \
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 11/22] v2v: linux: Fix uninstallation of kmod-xenpv from /etc/rc.local script.
In the original conversion of virt-v2v from Perl
(commit 0131d6f666d93dd353c4e4092fd945861646d319), the Perl regular
expression was incorrectly transscribed. ‘\b’ was not converted to
‘\\b’ so the new regexp was looking for an ASCII BEL character, not a
word boundary.
To fix this problem I converted the code to use PCRE, and went back to
the original virt-v2v code (virt-v2v.git:
lib/Sys/VirtConvert/Converter/Linux.pm)to find out what the Perl
regular expression should have been.
Note I have also removed ‘.*’ at the beginning and end of the regexp
because PCRE regexps are not anchored.
---
v2v/convert_linux.ml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/v2v/convert_linux.ml b/v2v/convert_linux.ml
index 3e7652716..f78d18d39 100644
--- a/v2v/convert_linux.ml
+++ b/v2v/convert_linux.ml
@@ -115,10 +115,10 @@ let rec convert (g : G.guestfs) inspect source output
rcaps (try
let lines = g#read_lines "/etc/rc.local" in
let lines = Array.to_list lines in
- let rex = Str.regexp
".*\\b\\(insmod|modprobe\\)\b.*\\bxen-vbd.*" in
+ let rex = PCRE.compile "\\b(insmod|modprobe)\\b.*\\bxen-vbd"
in
let lines = List.map (
fun s ->
- if Str.string_match rex s 0 then
+ if PCRE.matches rex s then
"#" ^ s
else
s
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 12/22] v2v: linux: Fix rewriting of grub2 GRUB_CMDLINE=...resume=<device>...
Commit dbe0b69f24421f258da2fb420b58fc1530a9cd03 transscribed the Perl
regexp incorrectly so that it only matched the impossible case of
‘resume=/dev/X’ for a single non-whitespace character X.
This fixes the regular expression, referencing back to the original
Perl code in virt-v2v.
---
v2v/convert_linux.ml | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/v2v/convert_linux.ml b/v2v/convert_linux.ml
index f78d18d39..93a6f1732 100644
--- a/v2v/convert_linux.ml
+++ b/v2v/convert_linux.ml
@@ -917,7 +917,7 @@ let rec convert (g : G.guestfs) inspect source output rcaps
List.flatten (List.map Array.to_list (List.map g#aug_match paths)) in
(* Map device names for each entry. *)
- let rex_resume = Str.regexp "^\\(.*resume=\\)\\(/dev/[^
]\\)\\(.*\\)$"
+ let rex_resume = PCRE.compile "^(.*resume=)(/dev/\\S+)(.*)$"
and rex_device_cciss_p Str.regexp
"^/dev/\\(cciss/c[0-9]+d[0-9]+\\)p\\([0-9]+\\)$"
and rex_device_cciss @@ -943,10 +943,10 @@ let rec convert (g : G.guestfs)
inspect source output rcaps
if String.find path "GRUB_CMDLINE" >= 0 then (
(* Handle grub2 resume=<dev> specially. *)
- if Str.string_match rex_resume value 0 then (
- let start = Str.matched_group 1 value
- and device = Str.matched_group 2 value
- and end_ = Str.matched_group 3 value in
+ if PCRE.matches rex_resume value then (
+ let start = PCRE.sub 1
+ and device = PCRE.sub 2
+ and end_ = PCRE.sub 3 in
let device = replace_if_device path device in
start ^ device ^ end_
)
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 13/22] v2v: linux: Properly ignore rpm/dpkg-move-aside kernels.
The old virt-v2v code ignored boot kernels with names like
"/boot/vmlinuz-*.rpmsave". The transscribed code did not because the
Str module requires ‘|’ to be escaped as ‘\|’.
This changes the code to use PCRE and fixes it.
---
v2v/linux_bootloaders.ml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/v2v/linux_bootloaders.ml b/v2v/linux_bootloaders.ml
index 210cce762..00cb5cd19 100644
--- a/v2v/linux_bootloaders.ml
+++ b/v2v/linux_bootloaders.ml
@@ -319,9 +319,9 @@ object (self)
Array.to_list (g#glob_expand "/boot/kernel-*") @
Array.to_list (g#glob_expand "/boot/vmlinuz-*") @
Array.to_list (g#glob_expand "/vmlinuz-*") in
- let rex = Str.regexp ".*\\.\\(dpkg-.*|rpmsave|rpmnew\\)$" in
+ let rex = PCRE.compile "\\.(?:dpkg-.*|rpmsave|rpmnew)$" in
let vmlinuzes = List.filter (
- fun file -> not (Str.string_match rex file 0)
+ fun file -> not (PCRE.matches rex file)
) vmlinuzes in
vmlinuzes
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 14/22] v2v: linux: Convert the Linux-related conversion modules from Str to PCRE.
For each regular expression I went back to the original Perl code to
ensure that the regexp is correct.
---
v2v/convert_linux.ml | 61 +++++++++++++++++-------------------------------
v2v/linux_bootloaders.ml | 32 ++++++++++++-------------
v2v/linux_kernels.ml | 17 +++++++-------
3 files changed, 46 insertions(+), 64 deletions(-)
diff --git a/v2v/convert_linux.ml b/v2v/convert_linux.ml
index 93a6f1732..df7d4dd14 100644
--- a/v2v/convert_linux.ml
+++ b/v2v/convert_linux.ml
@@ -176,11 +176,11 @@ let rec convert (g : G.guestfs) inspect source output
rcaps if g#is_file ~followsymlinks:true vboxconfig then (
let lines = g#read_lines vboxconfig in
let lines = Array.to_list lines in
- let rex = Str.regexp "^INSTALL_DIR=\\(.*\\)$" in
+ let rex = PCRE.compile "^INSTALL_DIR=(.*)$" in
let lines = filter_map (
fun line ->
- if Str.string_match rex line 0 then (
- let path = Str.matched_group 1 line in
+ if PCRE.matches rex line then (
+ let path = PCRE.sub 1 in
let path = shell_unquote path in
if String.length path >= 1 && path.[0] = '/'
then (
let vboxuninstall = path ^ "/uninstall.sh" in
@@ -260,8 +260,7 @@ let rec convert (g : G.guestfs) inspect source output rcaps
*)
if provides <> [] then (
(* Trim whitespace. *)
- let rex = Str.regexp "^[ \t]*\\([^ \t]+\\)[ \t]*$" in
- let provides = List.map (Str.replace_first rex "\\1")
provides in
+ let provides = List.map String.trim provides in
(* Install the dependencies with yum. Use yum explicitly
* because we don't have package names and local install is
@@ -327,7 +326,7 @@ let rec convert (g : G.guestfs) inspect source output rcaps
* ttys in /etc/inittab if the system uses it. We need to put
* them back.
*)
- let rex = Str.regexp
"^\\([1-6]\\):\\([2-5]+\\):respawn:\\(.*\\)" in
+ let rex = PCRE.compile "^([1-6]):([2-5]+):respawn:(.*)" in
let updated = ref false in
let rec loop () let comments = g#aug_match
"/files/etc/inittab/#comment" in
@@ -336,10 +335,10 @@ let rec convert (g : G.guestfs) inspect source output
rcaps | [] -> ()
| commentp :: _ ->
let comment = g#aug_get commentp in
- if Str.string_match rex comment 0 then (
- let name = Str.matched_group 1 comment in
- let runlevels = Str.matched_group 2 comment in
- let process = Str.matched_group 3 comment in
+ if PCRE.matches rex comment then (
+ let name = PCRE.sub 1
+ and runlevels = PCRE.sub 2
+ and process = PCRE.sub 3 in
if String.find process "getty" >= 0 then (
updated := true;
@@ -628,14 +627,12 @@ let rec convert (g : G.guestfs) inspect source output
rcaps *)
let paths = g#aug_match "/files/etc/inittab/*/process" in
let paths = Array.to_list paths in
- let rex = Str.regexp "\\(.*\\)\\b\\([xh]vc0\\)\\b\\(.*\\)" in
+ let rex = PCRE.compile "\\b([xh]vc0)\\b" in
List.iter (
fun path ->
let proc = g#aug_get path in
- if Str.string_match rex proc 0 then (
- let proc = Str.global_replace rex "\\1ttyS0\\3" proc in
- g#aug_set path proc
- );
+ let proc' = PCRE.replace ~global:true rex "ttyS0" proc in
+ if proc <> proc' then g#aug_set path proc'
) paths;
let paths = g#aug_match "/files/etc/securetty/*" in
@@ -658,11 +655,11 @@ let rec convert (g : G.guestfs) inspect source output
rcaps *)
let paths = g#aug_match "/files/etc/inittab/*/process" in
let paths = Array.to_list paths in
- let rex = Str.regexp ".*\\b\\([xh]vc0|ttyS0\\)\\b.*" in
+ let rex = PCRE.compile "\\b([xh]vc0|ttyS0)\\b" in
List.iter (
fun path ->
let proc = g#aug_get path in
- if Str.string_match rex proc 0 then
+ if PCRE.matches rex proc then
ignore (g#aug_rm (path ^ "/.."))
) paths;
@@ -918,14 +915,8 @@ let rec convert (g : G.guestfs) inspect source output rcaps
(* Map device names for each entry. *)
let rex_resume = PCRE.compile "^(.*resume=)(/dev/\\S+)(.*)$"
- and rex_device_cciss_p - Str.regexp
"^/dev/\\(cciss/c[0-9]+d[0-9]+\\)p\\([0-9]+\\)$"
- and rex_device_cciss - Str.regexp
"^/dev/\\(cciss/c[0-9]+d[0-9]+\\)$"
- and rex_device_p - Str.regexp
"^/dev/\\([a-z]+\\)\\([0-9]+\\)$"
- and rex_device - Str.regexp "^/dev/\\([a-z]+\\)$" in
+ and rex_device_cciss = PCRE.compile
"^/dev/(cciss/c\\d+d\\d+)(?:p(\\d+))?$"
+ and rex_device = PCRE.compile "^/dev/([a-z]+)(\\d*)?$" in
let rec replace_if_device path value let replace device @@ -952,24
+943,16 @@ let rec convert (g : G.guestfs) inspect source output rcaps
)
else value
)
- else if Str.string_match rex_device_cciss_p value 0 then (
- let device = Str.matched_group 1 value
- and part = Str.matched_group 2 value in
+ else if PCRE.matches rex_device_cciss value then (
+ let device = PCRE.sub 1
+ and part = try PCRE.sub 2 with Not_found -> "" in
"/dev/" ^ replace device ^ part
)
- else if Str.string_match rex_device_cciss value 0 then (
- let device = Str.matched_group 1 value in
- "/dev/" ^ replace device
- )
- else if Str.string_match rex_device_p value 0 then (
- let device = Str.matched_group 1 value
- and part = Str.matched_group 2 value in
+ else if PCRE.matches rex_device value then (
+ let device = PCRE.sub 1
+ and part = try PCRE.sub 2 with Not_found -> "" in
"/dev/" ^ replace device ^ part
)
- else if Str.string_match rex_device value 0 then (
- let device = Str.matched_group 1 value in
- "/dev/" ^ replace device
- )
else (* doesn't look like a known device name *)
value
in
diff --git a/v2v/linux_bootloaders.ml b/v2v/linux_bootloaders.ml
index 00cb5cd19..59af38a86 100644
--- a/v2v/linux_bootloaders.ml
+++ b/v2v/linux_bootloaders.ml
@@ -44,9 +44,9 @@ type bootloader_type | Grub2
(* Helper function for SUSE: remove (hdX,X) prefix from a path. *)
-let remove_hd_prefix path - let rex = Str.regexp "^(hd.*)\\(.*\\)"
in
- Str.replace_first rex "\\1" path
+let remove_hd_prefix + let rex = PCRE.compile "^\\(hd.*\\)" in
+ PCRE.replace rex ""
(* Grub1 (AKA grub-legacy) representation. *)
class bootloader_grub1 (g : G.guestfs) inspect grub_config @@ -132,11 +132,11
@@ object
if paths = [] then
error (f_"didn't find grub entry for kernel %s") vmlinuz;
let path = List.hd paths in
- let rex = Str.regexp ".*/title\\[\\([1-9][0-9]*\\)\\]/kernel" in
- if not (Str.string_match rex path 0) then
+ let rex = PCRE.compile "/title(?:\\[(\\d+)\\])?/kernel" in
+ if not (PCRE.matches rex path) then
error (f_"internal error: regular expression did not match
‘%s’")
path;
- let index = int_of_string (Str.matched_group 1 path) - 1 in
+ let index = try int_of_string (PCRE.sub 1) - 1 with Not_found -> 0 in
g#aug_set (sprintf "/files%s/default" grub_config) (string_of_int
index);
g#aug_save ()
@@ -151,7 +151,7 @@ object
) else false
method configure_console () - let rex = Str.regexp
"\\(.*\\)\\b\\([xh]vc0\\)\\b\\(.*\\)" in
+ let rex = PCRE.compile "\\b([xh]vc0)\\b" in
let expr = sprintf "/files%s/title/kernel/console" grub_config in
let paths = g#aug_match expr in
@@ -159,23 +159,21 @@ object
List.iter (
fun path ->
let console = g#aug_get path in
- if Str.string_match rex console 0 then (
- let console = Str.global_replace rex "\\1ttyS0\\3" console
in
- g#aug_set path console
- )
+ let console' = PCRE.replace ~global:true rex "ttyS0"
console in
+ if console <> console' then g#aug_set path console'
) paths;
g#aug_save ()
method remove_console () - let rex = Str.regexp
"\\(.*\\)\\b\\([xh]vc0\\)\\b\\(.*\\)" in
+ let rex = PCRE.compile "\\b([xh]vc0)\\b" in
let expr = sprintf "/files%s/title/kernel/console" grub_config in
let rec loop = function
| [] -> ()
| path :: paths ->
let console = g#aug_get path in
- if Str.string_match rex console 0 then (
+ if PCRE.matches rex console then (
ignore (g#aug_rm path);
(* All the paths are invalid, restart the loop. *)
let paths = g#aug_match expr in
@@ -231,7 +229,7 @@ object (self)
inherit bootloader
method private grub2_update_console ~remove () - let rex = Str.regexp
"\\(.*\\)\\bconsole=[xh]vc0\\b\\(.*\\)" in
+ let rex = PCRE.compile "\\bconsole=[xh]vc0\\b" in
let paths = [
"/files/etc/sysconfig/grub/GRUB_CMDLINE_LINUX";
@@ -249,12 +247,12 @@ object (self)
warning (f_"could not remove grub2 serial console (ignored)")
| path :: _ ->
let grub_cmdline = g#aug_get path in
- if Str.string_match rex grub_cmdline 0 then (
+ if PCRE.matches rex grub_cmdline then (
let new_grub_cmdline if not remove then
- Str.global_replace rex "\\1console=ttyS0\\2" grub_cmdline
+ PCRE.replace ~global:true rex "console=ttyS0"
grub_cmdline
else
- Str.global_replace rex "\\1\\2" grub_cmdline in
+ PCRE.replace ~global:true rex "" grub_cmdline in
g#aug_set path new_grub_cmdline;
g#aug_save ();
diff --git a/v2v/linux_kernels.ml b/v2v/linux_kernels.ml
index 6e1ca4bf1..9853b0029 100644
--- a/v2v/linux_kernels.ml
+++ b/v2v/linux_kernels.ml
@@ -63,10 +63,12 @@ let print_kernel_info chan prefix ki fpf "pvpanic=%b
xen=%b debug=%b\n"
ki.ki_supports_isa_pvpanic ki.ki_is_xen_pv_only_kernel ki.ki_is_debug
+let rex_ko = PCRE.compile "\\.k?o(?:\\.xz)?$"
+let rex_ko_extract = PCRE.compile "/([^/]+)\\.k?o(?:\\.xz)?$"
+
let detect_kernels (g : G.guestfs) inspect family bootloader (* What
kernel/kernel-like packages are installed on the current guest? *)
let installed_kernels : kernel_info list - let rex_ko = Str.regexp
".*\\.k?o\\(\\.xz\\)?$" in
let check_config feature = function
| None -> false
| Some config ->
@@ -82,12 +84,11 @@ let detect_kernels (g : G.guestfs) inspect family bootloader
| _ -> false
)
in
- let rex_ko_extract = Str.regexp
".*/\\([^/]+\\)\\.k?o\\(\\.xz\\)?$" in
let rex_initrd if family = `Debian_family then
- Str.regexp "^initrd.img-.*$"
+ PCRE.compile "^initrd.img-.*$"
else
- Str.regexp "^initr\\(d\\|amfs\\)-.*\\(\\.img\\)?$" in
+ PCRE.compile "^initr(?:d|amfs)-.*(?:\\.img)?$" in
filter_map (
function
| { G.app2_name = name } as app
@@ -133,7 +134,7 @@ let detect_kernels (g : G.guestfs) inspect family bootloader
let files = g#ls "/boot" in
let files = Array.to_list files in
let files - List.filter (fun n ->
Str.string_match rex_initrd n 0) files in
+ List.filter (fun n -> PCRE.matches rex_initrd n) files in
let files List.filter (
fun n ->
@@ -165,7 +166,7 @@ let detect_kernels (g : G.guestfs) inspect family bootloader
let modules = g#find modpath in
let modules = Array.to_list modules in
let modules - List.filter (fun m ->
Str.string_match rex_ko m 0) modules in
+ List.filter (fun m -> PCRE.matches rex_ko m) modules in
assert (List.length modules > 0);
(* Determine the kernel architecture by looking at the
@@ -178,8 +179,8 @@ let detect_kernels (g : G.guestfs) inspect family bootloader
(* Just return the module names, without path or extension. *)
let modules = filter_map (
fun m ->
- if Str.string_match rex_ko_extract m 0 then
- Some (Str.matched_group 1 m)
+ if PCRE.matches rex_ko_extract m then
+ Some (PCRE.sub 1)
else
None
) modules in
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 15/22] v2v: windows: Convert the Windows-related conversion modules from Str to PCRE.
This is all new code since the virt-v2v conversion from Perl so there
was no need to check back with the original code.
See also commit 9d920732a35d9ef1b6a33509dabbbd44123e7eda.
---
v2v/convert_windows.ml | 4 ++--
v2v/windows.ml | 15 +++++++--------
2 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/v2v/convert_windows.ml b/v2v/convert_windows.ml
index ac2b8180b..fa87ed84a 100644
--- a/v2v/convert_windows.ml
+++ b/v2v/convert_windows.ml
@@ -125,6 +125,7 @@ let convert (g : G.guestfs) inspect source output rcaps
) in
(* Locate and retrieve all uninstallation commands for Parallels Tools *)
+ let prltools_rex = PCRE.compile "(Parallels|Virtuozzo) Tools" in
let prltools_uninsts let uninsts = ref [] in
@@ -146,8 +147,7 @@ let convert (g : G.guestfs) inspect source output rcaps
raise Not_found;
let dispname = g#hivex_value_string valueh in
- if not (Str.string_match (Str.regexp
".*\\(Parallels\\|Virtuozzo\\) Tools.*")
- dispname 0) then
+ if not (PCRE.matches prltools_rex dispname) then
raise Not_found;
let uninstval = "UninstallString" in
diff --git a/v2v/windows.ml b/v2v/windows.ml
index 6c6ed01c7..fb68c86c9 100644
--- a/v2v/windows.ml
+++ b/v2v/windows.ml
@@ -24,12 +24,12 @@ open Common_utils
open Utils
(* Detect anti-virus (AV) software installed in Windows guests. *)
-let rex_virus = Str.regexp_case_fold "virus" (* generic *)
-let rex_kaspersky = Str.regexp_case_fold "kaspersky"
-let rex_mcafee = Str.regexp_case_fold "mcafee"
-let rex_norton = Str.regexp_case_fold "norton"
-let rex_sophos = Str.regexp_case_fold "sophos"
-let rex_avg_tech = Str.regexp_case_fold "avg technologies" (*
RHBZ#1261436 *)
+let rex_virus = PCRE.compile ~caseless:true "virus" (* generic *)
+let rex_kaspersky = PCRE.compile ~caseless:true "kaspersky"
+let rex_mcafee = PCRE.compile ~caseless:true "mcafee"
+let rex_norton = PCRE.compile ~caseless:true "norton"
+let rex_sophos = PCRE.compile ~caseless:true "sophos"
+let rex_avg_tech = PCRE.compile ~caseless:true "avg technologies" (*
RHBZ#1261436 *)
let rec detect_antivirus { Types.i_type = t; i_apps = apps } assert (t =
"windows");
@@ -44,5 +44,4 @@ and check_app { Guestfs.app2_name = name;
name =~ rex_sophos ||
publisher =~ rex_avg_tech
-and (=~) str rex - try ignore (Str.search_forward rex str 0); true with
Not_found -> false
+and (=~) str rex = PCRE.matches rex str
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 16/22] v2v: -i ova: Use PCRE to match lines in manifest files.
---
v2v/input_ova.ml | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index 9ec991ba7..d521a9fc8 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -209,7 +209,7 @@ object
(* Read any .mf (manifest) files and verify sha1. *)
let mf = find_files exploded ".mf" in
- let rex = Str.regexp "\\(SHA1\\|SHA256\\)(\\(.*\\))=
\\([0-9a-fA-F]+\\)\r?" in
+ let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)=
([0-9a-fA-F]+)\r?$" in
List.iter (
fun mf ->
debug "processing manifest %s" mf;
@@ -218,10 +218,10 @@ object
let chan = open_in mf in
let rec loop () let line = input_line chan in
- if Str.string_match rex line 0 then (
- let mode = Str.matched_group 1 line in
- let disk = Str.matched_group 2 line in
- let expected = Str.matched_group 3 line in
+ if PCRE.matches rex line then (
+ let mode = PCRE.sub 1
+ and disk = PCRE.sub 2
+ and expected = PCRE.sub 3 in
let csum = Checksums.of_string mode expected in
try
if partial then
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 17/22] v2v: -o libvirt: Use PCRE to verify arch is sane.
--- v2v/output_libvirt.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/v2v/output_libvirt.ml b/v2v/output_libvirt.ml index b5df8245f..bc7f41ff9 100644 --- a/v2v/output_libvirt.ml +++ b/v2v/output_libvirt.ml @@ -28,8 +28,8 @@ open Xpath_helpers open Create_libvirt_xml let arch_is_sane_or_die - let rex = Str.regexp "^[-_A-Za-z0-9]+$" in - fun arch -> assert (Str.string_match rex arch 0) + let rex = PCRE.compile ~caseless:true "^[-_a-z0-9]+$" in + fun arch -> assert (PCRE.matches rex arch) let target_features_of_capabilities_doc doc arch let xpathctx = Xml.xpath_new_context doc in -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 18/22] v2v: vCenter: Replace Str with PCRE.
---
v2v/vCenter.ml | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/v2v/vCenter.ml b/v2v/vCenter.ml
index d5e7c0378..434c93395 100644
--- a/v2v/vCenter.ml
+++ b/v2v/vCenter.ml
@@ -113,7 +113,7 @@ let get_session_cookie password scheme uri sslverify url
Some !session_cookie
)
-let multiple_slash = Str.regexp "/+"
+let multiple_slash = PCRE.compile "/+"
let default_dc = "ha-datacenter"
let guess_dcPath uri = function
@@ -136,7 +136,7 @@ let guess_dcPath uri = function
* remove the Cluster name (which still works in libvirt).
*)
(* Collapse multiple slashes to single slash. *)
- let path = Str.global_replace multiple_slash "/" path in
+ let path = PCRE.replace ~global:true multiple_slash "/" path
in
(* Chop off the first and trailing '/' (if found). *)
let path let len = String.length path in
@@ -158,14 +158,13 @@ let guess_dcPath uri = function
| _ -> (* Don't know, so guess. *)
default_dc
-let source_re = Str.regexp "^\\[\\(.*\\)\\] \\(.*\\)\\.vmdk$"
+let source_re = PCRE.compile "^\\[(.*)\\] (.*)\\.vmdk$"
let map_source_to_https dcPath uri server path - if not (Str.string_match
source_re path 0) then
+ if not (PCRE.matches source_re path) then
(path, true)
else (
- let datastore = Str.matched_group 1 path
- and path = Str.matched_group 2 path in
+ let datastore = PCRE.sub 1 and path = PCRE.sub 2 in
let port match uri.uri_port with
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 19/22] v2v: parse OVF: Replace Str with PCRE.
---
v2v/parse_ovf_from_ova.ml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/v2v/parse_ovf_from_ova.ml b/v2v/parse_ovf_from_ova.ml
index 6dc032407..613e8d075 100644
--- a/v2v/parse_ovf_from_ova.ml
+++ b/v2v/parse_ovf_from_ova.ml
@@ -142,10 +142,10 @@ let parse_ovf_from_ova ovf_filename
Xml.xpathctx_set_current_context xpathctx n;
let file_id = xpath_string_default "rasd:HostResource/text()"
"" in
- let rex = Str.regexp "^\\(ovf:\\)?/disk/\\(.*\\)" in
- if Str.string_match rex file_id 0 then (
+ let rex = PCRE.compile "^(?:ovf:)?/disk/(.*)" in
+ if PCRE.matches rex file_id then (
(* Chase the references through to the actual file name. *)
- let file_id = Str.matched_group 2 file_id in
+ let file_id = PCRE.sub 1 in
let expr = sprintf
"/ovf:Envelope/ovf:DiskSection/ovf:Disk[@ovf:diskId='%s']/@ovf:fileRef"
file_id in
let file_ref match xpath_string expr with
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 20/22] v2v: parse VMX: Replace Str with PCRE.
---
v2v/parse_vmx.ml | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/v2v/parse_vmx.ml b/v2v/parse_vmx.ml
index 770dc29d3..271066eb9 100644
--- a/v2v/parse_vmx.ml
+++ b/v2v/parse_vmx.ml
@@ -226,7 +226,7 @@ and vmx_bool_of_string t else failwith
"bool_of_string"
(* Regular expression used to match key = "value" in VMX file. *)
-let rex = Str.regexp "^\\([^ \t=]+\\)[ \t]*=[
\t]*\"\\(.*\\)\"$"
+let rex = PCRE.compile "^([^ \t=]+)\\s*=\\s*\"(.*)\"$"
(* Remove the weird escapes used in value strings. See description above. *)
let remove_vmx_escapes str @@ -292,10 +292,9 @@ and parse_string str (*
Parse the lines into key = "value". *)
let lines = filter_map (
fun line ->
- if Str.string_match rex line 0 then (
- let key = Str.matched_group 1 line in
+ if PCRE.matches rex line then (
+ let key = PCRE.sub 1 and value = PCRE.sub 2 in
let key = String.lowercase_ascii key in
- let value = Str.matched_group 2 line in
let value = remove_vmx_escapes value in
Some (key, value)
)
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 21/22] generator: Replace use of Str.split with String.nsplit.
Faster and equivalent.
---
generator/actions.ml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/generator/actions.ml b/generator/actions.ml
index 914c123b1..e052284d0 100644
--- a/generator/actions.ml
+++ b/generator/actions.ml
@@ -98,7 +98,7 @@ let non_daemon_functions, daemon_functions List.fold_left
(
fun a b ->
a ^ String.uppercase_ascii (Str.first_chars b 1) ^ Str.string_after b 1
- ) "" (Str.split (Str.regexp "_") name)
+ ) "" (String.nsplit "_" name)
in
let make_camel_case_if_not_set f if f.camel_name = "" then
--
2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 22/22] v2v: utils: Replace Str.bounded_split with PCRE.nsplit.
Updates commit 8f91d3a9b0356a701c1904e0a2efa5d272d08ac2 similar to
Tomáš's original intended code.
---
v2v/utils.ml | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/v2v/utils.ml b/v2v/utils.ml
index 008452d69..12ebe23f4 100644
--- a/v2v/utils.ml
+++ b/v2v/utils.ml
@@ -138,6 +138,8 @@ let backend_is_libvirt () let backend = fst (String.split
":" backend) in
backend = "libvirt"
+let ws = PCRE.compile "\\s+"
+
let find_file_in_tar tar filename let lines = external_command (sprintf
"tar tRvf %s" (Filename.quote tar)) in
let rec loop lines @@ -147,7 +149,7 @@ let find_file_in_tar tar filename
(* Lines have the form:
* block <offset>: <perms> <owner>/<group>
<size> <mdate> <mtime> <file>
*)
- let elems = Str.bounded_split (Str.regexp " +") line 8 in
+ let elems = PCRE.nsplit ~max:8 ws line in
if List.length elems = 8 && List.hd elems = "block"
then (
let elems = Array.of_list elems in
let offset = elems.(1) in
--
2.13.2
Pino Toscano
2017-Sep-27 12:37 UTC
Re: [Libguestfs] [PATCH v3 01/22] common/mlpcre: Raise Invalid_argument if PCRE.sub n parameter is negative.
On Friday, 22 September 2017 09:36:02 CEST Richard W.M. Jones wrote:> --- a/common/mlpcre/pcre-c.c > +++ b/common/mlpcre/pcre-c.c > @@ -201,6 +201,7 @@ value > guestfs_int_pcre_sub (value nv) > { > CAMLparam1 (nv); > + int n = Int_val (nv);This variable can be const, to avoid accidental changes. -- Pino Toscano
Pino Toscano
2017-Sep-27 12:37 UTC
Re: [Libguestfs] [PATCH v3 02/22] common/mlpcre: Add PCRE.subi to return indexes instead of the substring.
On Friday, 22 September 2017 09:36:03 CEST Richard W.M. Jones wrote:> diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c > index da9b50d34..15775dad0 100644 > --- a/common/mlpcre/pcre-c.c > +++ b/common/mlpcre/pcre-c.c > @@ -225,3 +225,30 @@ guestfs_int_pcre_sub (value nv) > memcpy (String_val (strv), str, len); > CAMLreturn (strv); > } > + > +value > +guestfs_int_pcre_subi (value nv) > +{ > + CAMLparam1 (nv); > + int n = Int_val (nv); > + CAMLlocal1 (rv); > + struct last_match *m = gl_tls_get (last_match);Both 'n' and 'm' can be const. -- Pino Toscano
Pino Toscano
2017-Sep-27 12:38 UTC
Re: [Libguestfs] [PATCH v3 13/22] v2v: linux: Properly ignore rpm/dpkg-move-aside kernels.
On Friday, 22 September 2017 09:36:14 CEST Richard W.M. Jones wrote:> diff --git a/v2v/linux_bootloaders.ml b/v2v/linux_bootloaders.ml > index 210cce762..00cb5cd19 100644 > --- a/v2v/linux_bootloaders.ml > +++ b/v2v/linux_bootloaders.ml > @@ -319,9 +319,9 @@ object (self) > Array.to_list (g#glob_expand "/boot/kernel-*") @ > Array.to_list (g#glob_expand "/boot/vmlinuz-*") @ > Array.to_list (g#glob_expand "/vmlinuz-*") in > - let rex = Str.regexp ".*\\.\\(dpkg-.*|rpmsave|rpmnew\\)$" in > + let rex = PCRE.compile "\\.(?:dpkg-.*|rpmsave|rpmnew)$" in > let vmlinuzes = List.filter ( > - fun file -> not (Str.string_match rex file 0) > + fun file -> not (PCRE.matches rex file) > ) vmlinuzes inTBH here I'd just drop the regexp usage, and use a static list of suffixes (there are not that many, anyway) -- something like: let suffixes = [ ".dpkg-old"; ".dpkg-new"; ".rpmsave"; ".rpmnew"; ] in let vmlinuzes = List.filter ( fun file -> not (List.exists (Filename.check_suffix file)) ) vmlinuxes in Even better, this can be moved to a separate function in the Linux module, which is where (most of) the interaction with package managers happen: let is_package_manager_file file (* Recognized suffixes of package managers. *) let suffixes = [ ".dpkg-old"; ".dpkg-new"; ".rpmsave"; ".rpmnew"; ] in List.exists (Filename.check_suffix file) ... let vmlinuzes = List.filter ( fun file -> not (Linux.is_package_manager_file file) ) vmlinuxes in (Of course with a better naming for the function.) -- Pino Toscano
Pino Toscano
2017-Sep-27 12:38 UTC
Re: [Libguestfs] [PATCH v3 15/22] v2v: windows: Convert the Windows-related conversion modules from Str to PCRE.
On Friday, 22 September 2017 09:36:16 CEST Richard W.M. Jones wrote:> diff --git a/v2v/convert_windows.ml b/v2v/convert_windows.ml > index ac2b8180b..fa87ed84a 100644 > --- a/v2v/convert_windows.ml > +++ b/v2v/convert_windows.ml > @@ -125,6 +125,7 @@ let convert (g : G.guestfs) inspect source output rcaps > ) in > > (* Locate and retrieve all uninstallation commands for Parallels Tools *) > + let prltools_rex = PCRE.compile "(Parallels|Virtuozzo) Tools" in > let prltools_uninsts > let uninsts = ref [] in > > @@ -146,8 +147,7 @@ let convert (g : G.guestfs) inspect source output rcaps > raise Not_found; > > let dispname = g#hivex_value_string valueh in > - if not (Str.string_match (Str.regexp ".*\\(Parallels\\|Virtuozzo\\) Tools.*") > - dispname 0) then > + if not (PCRE.matches prltools_rex dispname) thenTBH I'd simplify here using String.find: if (String.find dispname "Parallels Tools." = -1) && (String.find dispname "Virtuozzo Tools." = -1) then It should be simpler and faster. -- Pino Toscano
Pino Toscano
2017-Sep-27 12:39 UTC
Re: [Libguestfs] [PATCH v3 18/22] v2v: vCenter: Replace Str with PCRE.
On Friday, 22 September 2017 09:36:19 CEST Richard W.M. Jones wrote:> --- > v2v/vCenter.ml | 11 +++++------ > 1 file changed, 5 insertions(+), 6 deletions(-) > > diff --git a/v2v/vCenter.ml b/v2v/vCenter.ml > index d5e7c0378..434c93395 100644 > --- a/v2v/vCenter.ml > +++ b/v2v/vCenter.ml > @@ -113,7 +113,7 @@ let get_session_cookie password scheme uri sslverify url > Some !session_cookie > ) > > -let multiple_slash = Str.regexp "/+" > +let multiple_slash = PCRE.compile "/+"This can be optimized as "/{2,}", so single slashes will not be replaced by themselves. -- Pino Toscano
Apparently Analagous Threads
- [PATCH v2 1/3] common/mlpcre: add offset flag for PCRE.matches
- [PATCH 1/3] common/mlpcre: add offset flag for PCRE.matches
- [PATCH v3 01/22] common/mlpcre: Raise Invalid_argument if PCRE.sub n parameter is negative.
- [PATCH 0/4] Replace some uses of the Str module with PCRE.
- [PATCH v2 00/18] Replace many more uses of the Str module with PCRE.