Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 00/22] Replace almost all uses of the Str module with PCRE.
v1: https://www.redhat.com/archives/libguestfs/2017-September/msg00135.html v2: https://www.redhat.com/archives/libguestfs/2017-September/msg00158.html v3 is almost identical to v2, but I have added 4 extra commits to almost finish the job of replacing Str everywhere possible (note it's not possible to replace Str in common/mlstdutils or the generator because those are pure OCaml). As before there is an outstanding question:> I wonder if there was a deep reason why we had this? > > let unix2dos s > String.concat "\r\n" (Str.split_delim (Str.regexp_string "\n") s) > > I replaced it with what I think should be (nearly) equivalent: > > let unix2dos s > String.concat "\r\n" (String.nsplit "\n" s)Rich.
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 01/22] common/mlpcre: Raise Invalid_argument if PCRE.sub n parameter is negative.
--- common/mlpcre/pcre-c.c | 7 +++++-- common/mlpcre/pcre_tests.ml | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c index 6fae0e6f4..da9b50d34 100644 --- a/common/mlpcre/pcre-c.c +++ b/common/mlpcre/pcre-c.c @@ -201,6 +201,7 @@ value guestfs_int_pcre_sub (value nv) { CAMLparam1 (nv); + int n = Int_val (nv); CAMLlocal1 (strv); int len; CLEANUP_FREE char *str = NULL; @@ -209,8 +210,10 @@ guestfs_int_pcre_sub (value nv) if (m == NULL) raise_pcre_error ("PCRE.sub called without calling PCRE.matches", 0); - len = pcre_get_substring (m->subject, m->vec, m->r, Int_val (nv), - (const char **) &str); + if (n < 0) + caml_invalid_argument ("PCRE.sub: n must be >= 0"); + + len = pcre_get_substring (m->subject, m->vec, m->r, n, (const char **) &str); if (len == PCRE_ERROR_NOSUBSTRING) caml_raise_not_found (); diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml index e5214eab8..2b18f462f 100644 --- a/common/mlpcre/pcre_tests.ml +++ b/common/mlpcre/pcre_tests.ml @@ -69,6 +69,13 @@ let () | PCRE.Error (msg, code) -> failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg code) +(* Run some out of range [sub] calls to ensure an exception is thrown. *) +let () + let re2 = compile "(a+)(b*)" in + ignore (matches re2 "ccac"); + (try ignore (sub 3) with Not_found -> ()); + (try ignore (sub (-1)) with Invalid_argument _ -> ()) + (* Compile some bad regexps and check that an exception is thrown. * It would be nice to check the error message is right but * that involves dealing with language and future changes of -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 02/22] common/mlpcre: Add PCRE.subi to return indexes instead of the substring.
--- common/mlpcre/PCRE.ml | 3 +-- common/mlpcre/PCRE.mli | 14 ++++++++++++++ common/mlpcre/pcre-c.c | 27 +++++++++++++++++++++++++++ common/mlpcre/pcre_tests.ml | 19 ++++++++++++++++--- 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml index 94eea4b34..5269d41f8 100644 --- a/common/mlpcre/PCRE.ml +++ b/common/mlpcre/PCRE.ml @@ -23,10 +23,9 @@ exception Error of string * int type regexp external compile : string -> regexp = "guestfs_int_pcre_compile" - external matches : regexp -> string -> bool = "guestfs_int_pcre_matches" - external sub : int -> string = "guestfs_int_pcre_sub" +external subi : int -> int * int = "guestfs_int_pcre_subi" let () Callback.register_exception "PCRE.Error" (Error ("", 0)) diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli index 331a50a9a..02f16d19d 100644 --- a/common/mlpcre/PCRE.mli +++ b/common/mlpcre/PCRE.mli @@ -77,3 +77,17 @@ val sub : int -> string If there was no nth substring then this raises [Not_found]. This can also raise {!Error} for other PCRE-related errors. *) + +val subi : int -> int * int +(** Return the nth substring (capture) matched by the previous call + to {!matches} in the current thread. + + This is the same as {!sub} but instead of copying the + substring out, it returns the indexes into the original string + of the first character of the substring and the first + character after the substring. + + (See pcreapi(3) section "How pcre_exec() returns captured substrings" + for exact details). + + If there was no nth substring then this raises [Not_found]. *) diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c index da9b50d34..15775dad0 100644 --- a/common/mlpcre/pcre-c.c +++ b/common/mlpcre/pcre-c.c @@ -225,3 +225,30 @@ guestfs_int_pcre_sub (value nv) memcpy (String_val (strv), str, len); CAMLreturn (strv); } + +value +guestfs_int_pcre_subi (value nv) +{ + CAMLparam1 (nv); + int n = Int_val (nv); + CAMLlocal1 (rv); + struct last_match *m = gl_tls_get (last_match); + + if (m == NULL) + raise_pcre_error ("PCRE.subi called without calling PCRE.matches", 0); + + if (n < 0) + caml_invalid_argument ("PCRE.subi: n must be >= 0"); + + /* eg if there are 2 captures, m->r == 3, and valid values of n are + * 0, 1 or 2. + */ + if (n >= m->r) + caml_raise_not_found (); + + rv = caml_alloc (2, 0); + Store_field (rv, 0, Val_int (m->vec[n*2])); + Store_field (rv, 1, Val_int (m->vec[n*2+1])); + + CAMLreturn (rv); +} diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml index 2b18f462f..316a4348e 100644 --- a/common/mlpcre/pcre_tests.ml +++ b/common/mlpcre/pcre_tests.ml @@ -34,6 +34,12 @@ let sub i eprintf " %s\n%!" r; r +let subi i + eprintf "PCRE.subi %d ->%!" i; + let i1, i2 = PCRE.subi i in + eprintf " (%d, %d)\n%!" i1 i2; + (i1, i2) + let () try let re0 = compile "a+b" in @@ -62,19 +68,26 @@ let () assert (matches re2 "ccac" = true); assert (sub 1 = "a"); assert (sub 2 = ""); - assert (sub 0 = "a") + assert (sub 0 = "a"); + assert (subi 0 = (2, 3)); + assert (subi 1 = (2, 3)); + assert (subi 2 = (3, 3)) with | Not_found -> failwith "one of the PCRE.sub functions unexpectedly raised Not_found" | PCRE.Error (msg, code) -> failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg code) -(* Run some out of range [sub] calls to ensure an exception is thrown. *) +(* Run some out of range [sub] and [subi] calls to ensure an exception + * is thrown. + *) let () let re2 = compile "(a+)(b*)" in ignore (matches re2 "ccac"); (try ignore (sub 3) with Not_found -> ()); - (try ignore (sub (-1)) with Invalid_argument _ -> ()) + (try ignore (sub (-1)) with Invalid_argument _ -> ()); + (try ignore (subi 3) with Not_found -> ()); + (try ignore (subi (-1)) with Invalid_argument _ -> ()) (* Compile some bad regexps and check that an exception is thrown. * It would be nice to check the error message is right but -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 03/22] common/mlpcre: Add PCRE.replace function.
Similar to Perl s/// but lacks backreferences. --- common/mlpcre/PCRE.ml | 25 +++++++++++++++++++++++++ common/mlpcre/PCRE.mli | 13 +++++++++++++ common/mlpcre/pcre_tests.ml | 23 ++++++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml index 5269d41f8..0eb7eb2ec 100644 --- a/common/mlpcre/PCRE.ml +++ b/common/mlpcre/PCRE.ml @@ -27,5 +27,30 @@ external matches : regexp -> string -> bool = "guestfs_int_pcre_matches" external sub : int -> string = "guestfs_int_pcre_sub" external subi : int -> int * int = "guestfs_int_pcre_subi" +let rec replace ?(global = false) patt subst subj + if not (matches patt subj) then + (* Return original string unchanged if patt doesn't match. *) + subj + else ( + (* If patt matches "yyyy" in the original string then we have + * the following situation, where "xxxx" is the part of the + * original string before the match, and "zzzz..." is the + * part after the match: + * "xxxxyyyyzzzzzzzzzzzzz" + * ^ ^ + * i1 i2 + *) + let i1, i2 = subi 0 in + let xs = String.sub subj 0 i1 (* "xxxx", part before the match *) in + let zs = String.sub subj i2 (String.length subj - i2) (* after *) in + + (* If the global flag was set, we want to continue substitutions + * in the rest of the string. + *) + let zs = if global then replace ~global patt subst zs else zs in + + xs ^ subst ^ zs + ) + let () Callback.register_exception "PCRE.Error" (Error ("", 0)) diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli index 02f16d19d..634cc600c 100644 --- a/common/mlpcre/PCRE.mli +++ b/common/mlpcre/PCRE.mli @@ -91,3 +91,16 @@ val subi : int -> int * int for exact details). If there was no nth substring then this raises [Not_found]. *) + +val replace : ?global:bool -> regexp -> string -> string -> string +(** [replace ?global patt subst subj] performs a search and replace + on the subject string ([subj]). Where [patt] matches the + string, [subst] is substituted. This works similarly to the + Perl function [s///]. + + The [?global] flag defaults to false, so only the first + instance of [patt] in the string is replaced. If set to true + then every instance of [patt] in the string is replaced. + + Note that this function does not allow backreferences. + Any captures in [patt] are ignored. *) diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml index 316a4348e..b5f712d20 100644 --- a/common/mlpcre/pcre_tests.ml +++ b/common/mlpcre/pcre_tests.ml @@ -28,6 +28,12 @@ let matches re str eprintf " %b\n%!" r; r +let replace ?(global = false) patt subst subj + eprintf "PCRE.replace global:%b <patt> %s %s ->%!" global subst subj; + let r = PCRE.replace ~global patt subst subj in + eprintf " %s\n%!" r; + r + let sub i eprintf "PCRE.sub %d ->%!" i; let r = PCRE.sub i in @@ -45,6 +51,7 @@ let () let re0 = compile "a+b" in let re1 = compile "(a+)b" in let re2 = compile "(a+)(b*)" in + let re3 = compile "[^A-Za-z0-9_]" in assert (matches re0 "ccaaabbbb" = true); assert (sub 0 = "aaab"); @@ -71,7 +78,21 @@ let () assert (sub 0 = "a"); assert (subi 0 = (2, 3)); assert (subi 1 = (2, 3)); - assert (subi 2 = (3, 3)) + assert (subi 2 = (3, 3)); + + assert (replace re0 "dd" "abcabcaabccca" = "ddcabcaabccca"); + assert (replace ~global:true re0 "dd" "abcabcaabccca" = "ddcddcddccca"); + + (* This example copies a usage from customize/firstboot.ml + * "\xc2\xa3" is utf-8 for the GBP sign. Ideally PCRE would + * recognize that this is a single character, however doing that + * would involve passing the PCRE_UTF8 flag when compiling + * patterns, and that could be problematic if PCRE was built + * without Unicode support (XXX). + *) + assert (replace ~global:true re3 "-" "this is a\xc2\xa3funny.name?" + (* = "this-is-a-funny-name-" if UTF-8 worked *) + = "this-is-a--funny-name-"); with | Not_found -> failwith "one of the PCRE.sub functions unexpectedly raised Not_found" -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 04/22] common/mlpcre: Allow some PCRE_* flags to be passed to pcre_compile.
Only five simple flags are allowed so far, and not all of them are actually used in any code. They are: ~anchored / PCRE_ANCHORED - implicit ^...$ around regexp ~caseless / PCRE_CASELESS - fold upper and lower case ~dotall / PCRE_DOTALL - ‘.’ matches anything ~extended / PCRE_EXTENDED - extended regular expressions ~multiline / PCRE_MULTILINE - ^ and $ match lines within the subject string --- common/mlpcre/PCRE.ml | 2 +- common/mlpcre/PCRE.mli | 10 ++++++++-- common/mlpcre/pcre-c.c | 41 ++++++++++++++++++++++++++++++++++++++--- common/mlpcre/pcre_tests.ml | 22 +++++++++++++++------- 4 files changed, 62 insertions(+), 13 deletions(-) diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml index 0eb7eb2ec..753e247e4 100644 --- a/common/mlpcre/PCRE.ml +++ b/common/mlpcre/PCRE.ml @@ -22,7 +22,7 @@ exception Error of string * int type regexp -external compile : string -> regexp = "guestfs_int_pcre_compile" +external compile : ?anchored:bool -> ?caseless:bool -> ?dotall:bool -> ?extended:bool -> ?multiline:bool -> string -> regexp = "guestfs_int_pcre_compile_byte" "guestfs_int_pcre_compile" external matches : regexp -> string -> bool = "guestfs_int_pcre_matches" external sub : int -> string = "guestfs_int_pcre_sub" external subi : int -> int * int = "guestfs_int_pcre_subi" diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli index 634cc600c..fcf6fd25e 100644 --- a/common/mlpcre/PCRE.mli +++ b/common/mlpcre/PCRE.mli @@ -53,8 +53,14 @@ exception Error of string * int type regexp (** The type of a compiled regular expression. *) -val compile : string -> regexp -(** Compile a regular expression. This can raise {!Error}. *) +val compile : ?anchored:bool -> ?caseless:bool -> ?dotall:bool -> ?extended:bool -> ?multiline:bool -> string -> regexp +(** Compile a regular expression. This can raise {!Error}. + + The flags [?anchored], [?caseless], [?dotall], [?extended], + [?multiline] + correspond to the [pcre_compile] flags [PCRE_ANCHORED] etc. + See pcreapi(3) for details of what they do. + All flags default to false. *) val matches : regexp -> string -> bool (** Test whether the regular expression matches the string. This diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c index 15775dad0..6dc30087d 100644 --- a/common/mlpcre/pcre-c.c +++ b/common/mlpcre/pcre-c.c @@ -112,22 +112,57 @@ Val_regexp (pcre *re) CAMLreturn (rv); } +static int +is_Some_true (value v) +{ + return + v != Val_int (0) /* !None */ && + Bool_val (Field (v, 0)) /* Some true */; +} + value -guestfs_int_pcre_compile (value pattv) +guestfs_int_pcre_compile (value anchoredv, value caselessv, value dotallv, + value extendedv, value multilinev, + value pattv) { - CAMLparam1 (pattv); + CAMLparam5 (anchoredv, caselessv, dotallv, extendedv, multilinev); + CAMLxparam1 (pattv); + int options = 0; pcre *re; int errcode = 0; const char *err; int offset; - re = pcre_compile2 (String_val (pattv), 0, &errcode, &err, &offset, NULL); + /* Flag parameters are all ‘bool option’, defaulting to false. */ + if (is_Some_true (anchoredv)) + options |= PCRE_ANCHORED; + if (is_Some_true (caselessv)) + options |= PCRE_CASELESS; + if (is_Some_true (dotallv)) + options |= PCRE_DOTALL; + if (is_Some_true (extendedv)) + options |= PCRE_EXTENDED; + if (is_Some_true (multilinev)) + options |= PCRE_MULTILINE; + + re = pcre_compile2 (String_val (pattv), options, + &errcode, &err, &offset, NULL); if (re == NULL) raise_pcre_error (err, errcode); CAMLreturn (Val_regexp (re)); } +/* OCaml calls C functions from bytecode a bit differently when they + * have more than 5 parameters. + */ +value +guestfs_int_pcre_compile_byte (value *argv, int argn) +{ + return guestfs_int_pcre_compile (argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5]); +} + value guestfs_int_pcre_matches (value rev, value strv) { diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml index b5f712d20..9d42914b9 100644 --- a/common/mlpcre/pcre_tests.ml +++ b/common/mlpcre/pcre_tests.ml @@ -18,9 +18,17 @@ open Printf -let compile patt - eprintf "PCRE.compile %s\n%!" patt; - PCRE.compile patt +let compile ?(anchored = false) ?(caseless = false) + ?(dotall = false) ?(extended = false) ?(multiline = false) + patt + eprintf "PCRE.compile%s%s%s%s%s %s\n%!" + (if anchored then " ~anchored:true" else "") + (if caseless then " ~caseless:true" else "") + (if dotall then " ~dotall:true" else "") + (if extended then " ~extended:true" else "") + (if multiline then " ~multiline:true" else "") + patt; + PCRE.compile ~anchored ~caseless ~dotall ~extended ~multiline patt let matches re str eprintf "PCRE.matches %s ->%!" str; @@ -51,7 +59,7 @@ let () let re0 = compile "a+b" in let re1 = compile "(a+)b" in let re2 = compile "(a+)(b*)" in - let re3 = compile "[^A-Za-z0-9_]" in + let re3 = compile ~caseless:true "[^a-z0-9_]" in assert (matches re0 "ccaaabbbb" = true); assert (sub 0 = "aaab"); @@ -90,9 +98,9 @@ let () * patterns, and that could be problematic if PCRE was built * without Unicode support (XXX). *) - assert (replace ~global:true re3 "-" "this is a\xc2\xa3funny.name?" - (* = "this-is-a-funny-name-" if UTF-8 worked *) - = "this-is-a--funny-name-"); + assert (replace ~global:true re3 "-" "this is a\xc2\xa3FUNNY.name?" + (* = "this-is-a-FUNNY-name-" if UTF-8 worked *) + = "this-is-a--FUNNY-name-"); with | Not_found -> failwith "one of the PCRE.sub functions unexpectedly raised Not_found" -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 05/22] common/mlstdutils: Add String.nsplit ?max parameter, and tests.
This idea was previously proposed by Tomáš Golembiovský in https://www.redhat.com/archives/libguestfs/2017-January/msg00138.html --- common/mlstdutils/std_utils.ml | 28 ++++++++++++++++------------ common/mlstdutils/std_utils.mli | 11 ++++++++--- common/mlstdutils/std_utils_tests.ml | 29 +++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 15 deletions(-) diff --git a/common/mlstdutils/std_utils.ml b/common/mlstdutils/std_utils.ml index b731b8fd5..37eef0348 100644 --- a/common/mlstdutils/std_utils.ml +++ b/common/mlstdutils/std_utils.ml @@ -147,18 +147,7 @@ module String = struct done; if not !r then s else Bytes.to_string b2 - let rec nsplit sep str - let len = length str in - let seplen = length sep in - let i = find str sep in - if i = -1 then [str] - else ( - let s' = sub str 0 i in - let s'' = sub str (i+seplen) (len-i-seplen) in - s' :: nsplit sep s'' - ) - - let split sep str + let rec split sep str let len = length sep in let seplen = length str in let i = find str sep in @@ -167,6 +156,21 @@ module String = struct sub str 0 i, sub str (i + len) (seplen - i - len) ) + and nsplit ?(max = 0) sep str + if max < 0 then + invalid_arg "String.nsplit: max parameter should not be negative"; + + (* If we reached the limit, OR if the pattern does not match the string + * at all, return the rest of the string as a single element list. + *) + if max = 1 || find str sep = -1 then + [str] + else ( + let s1, s2 = split sep str in + let max = if max = 0 then 0 else max - 1 in + s1 :: nsplit ~max sep s2 + ) + let rec lines_split str let buf = Buffer.create 16 in let len = length str in diff --git a/common/mlstdutils/std_utils.mli b/common/mlstdutils/std_utils.mli index d217e48d4..c08e51360 100644 --- a/common/mlstdutils/std_utils.mli +++ b/common/mlstdutils/std_utils.mli @@ -88,14 +88,19 @@ module String : sig [str] with [s2]. *) val replace_char : string -> char -> char -> string (** Replace character in string. *) - val nsplit : string -> string -> string list - (** [nsplit sep str] splits [str] into multiple strings at each - separator [sep]. *) val split : string -> string -> string * string (** [split sep str] splits [str] at the first occurrence of the separator [sep], returning the part before and the part after. If separator is not found, return the whole string and an empty string. *) + val nsplit : ?max:int -> string -> string -> string list + (** [nsplit ?max sep str] splits [str] into multiple strings at each + separator [sep]. + + As with the Perl split function, you can give an optional + [?max] parameter to limit the number of strings returned. The + final element of the list will contain the remainder of the + input string. *) val lines_split : string -> string list (** [lines_split str] splits [str] into lines, keeping continuation characters (i.e. [\] at the end of lines) into account. *) diff --git a/common/mlstdutils/std_utils_tests.ml b/common/mlstdutils/std_utils_tests.ml index ce49c7606..dcd237dab 100644 --- a/common/mlstdutils/std_utils_tests.ml +++ b/common/mlstdutils/std_utils_tests.ml @@ -18,6 +18,8 @@ (* This file tests the Std_utils module. *) +open Printf + open OUnit2 open Std_utils @@ -26,6 +28,7 @@ let assert_equal_string = assert_equal ~printer:(fun x -> x) let assert_equal_int = assert_equal ~printer:(fun x -> string_of_int x) let assert_equal_int64 = assert_equal ~printer:(fun x -> Int64.to_string x) let assert_equal_stringlist = assert_equal ~printer:(fun x -> "(" ^ (String.escaped (String.concat "," x)) ^ ")") +let assert_equal_stringpair = assert_equal ~printer:(fun (x, y) -> sprintf "%S, %S" x y) let test_subdirectory ctx assert_equal_string "" (subdirectory "/foo" "/foo"); @@ -83,6 +86,30 @@ let test_string_find ctx assert_equal_int (-1) (String.find "" "baz"); assert_equal_int (-1) (String.find "foobar" "baz") +(* Test Std_utils.String.split. *) +let test_string_split ctx + assert_equal_stringpair ("a", "b") (String.split " " "a b"); + assert_equal_stringpair ("", "ab") (String.split " " " ab"); + assert_equal_stringpair ("", "abc") (String.split "" "abc"); + assert_equal_stringpair ("abc", "") (String.split " " "abc"); + assert_equal_stringpair ("", "") (String.split " " "") + +(* Test Std_utils.String.nsplit. *) +let test_string_nsplit ctx + (* XXX Not clear if the next test case indicates an error in + * String.nsplit. However this is how it has historically worked. + *) + assert_equal_stringlist [""] (String.nsplit " " ""); + assert_equal_stringlist ["abc"] (String.nsplit " " "abc"); + assert_equal_stringlist ["a"; "b"; "c"] (String.nsplit " " "a b c"); + assert_equal_stringlist ["a"; "b"; "c"; ""] (String.nsplit " " "a b c "); + assert_equal_stringlist [""; "a"; "b"; "c"] (String.nsplit " " " a b c"); + assert_equal_stringlist [""; "a"; "b"; "c"; ""] (String.nsplit " " " a b c "); + assert_equal_stringlist ["a b c d"] (String.nsplit ~max:1 " " "a b c d"); + assert_equal_stringlist ["a"; "b c d"] (String.nsplit ~max:2 " " "a b c d"); + assert_equal_stringlist ["a"; "b"; "c d"] (String.nsplit ~max:3 " " "a b c d"); + assert_equal_stringlist ["a"; "b"; "c"; "d"] (String.nsplit ~max:10 " " "a b c d") + (* Test Std_utils.String.lines_split. *) let test_string_lines_split ctx assert_equal_stringlist [""] (String.lines_split ""); @@ -129,6 +156,8 @@ let suite "strings.is_prefix" >:: test_string_is_prefix; "strings.is_suffix" >:: test_string_is_suffix; "strings.find" >:: test_string_find; + "strings.split" >:: test_string_split; + "strings.nsplit" >:: test_string_nsplit; "strings.lines_split" >:: test_string_lines_split; "strings.span" >:: test_string_span; "strings.chomp" >:: test_string_chomp; -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 06/22] common/mlpcre: Add split and nsplit functions.
These work like our String.split and String.nsplit functions. --- common/mlpcre/PCRE.ml | 33 +++++++++++++++++++++++++++++++++ common/mlpcre/PCRE.mli | 19 +++++++++++++++++++ common/mlpcre/pcre_tests.ml | 29 +++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml index 753e247e4..b054928f9 100644 --- a/common/mlpcre/PCRE.ml +++ b/common/mlpcre/PCRE.ml @@ -52,5 +52,38 @@ let rec replace ?(global = false) patt subst subj xs ^ subst ^ zs ) +let rec split patt subj + if not (matches patt subj) then + subj, "" + else ( + (* If patt matches "yyyy" in the original string then we have + * the following situation, where "xxxx" is the part of the + * original string before the match, and "zzzz..." is the + * part after the match: + * "xxxxyyyyzzzzzzzzzzzzz" + * ^ ^ + * i1 i2 + *) + let i1, i2 = subi 0 in + let xs = String.sub subj 0 i1 (* "xxxx", part before the match *) in + let zs = String.sub subj i2 (String.length subj - i2) (* after *) in + xs, zs + ) + +and nsplit ?(max = 0) patt subj + if max < 0 then + invalid_arg "PCRE.nsplit: max parameter should not be negative"; + + (* If we reached the limit, OR if the pattern does not match the string + * at all, return the rest of the string as a single element list. + *) + if max = 1 || not (matches patt subj) then + [subj] + else ( + let s1, s2 = split patt subj in + let max = if max = 0 then 0 else max - 1 in + s1 :: nsplit ~max patt s2 + ) + let () Callback.register_exception "PCRE.Error" (Error ("", 0)) diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli index fcf6fd25e..eacb6fd90 100644 --- a/common/mlpcre/PCRE.mli +++ b/common/mlpcre/PCRE.mli @@ -110,3 +110,22 @@ val replace : ?global:bool -> regexp -> string -> string -> string Note that this function does not allow backreferences. Any captures in [patt] are ignored. *) + +val split : regexp -> string -> string * string +val nsplit : ?max:int -> regexp -> string -> string list +(** [split patt subj] splits the string at the first occurrence + of the regular expression [patt], returning the parts of the + string before and after the match (the matching part is not + returned). If the pattern does not match then the whole + input is returned in the first string, and the second string + is empty. + + [nsplit patt subj] is the same but the string is split + on every occurrence of [patt]. Note that if the pattern + matches at the beginning or end of the string, then an + empty string element will be returned at the beginning or + end of the list. + + [nsplit] has an optional [?max] parameter which controls + the maximum length of the returned list. The final element + contains the remainder of the string. *) diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml index 9d42914b9..346019c40 100644 --- a/common/mlpcre/pcre_tests.ml +++ b/common/mlpcre/pcre_tests.ml @@ -42,6 +42,20 @@ let replace ?(global = false) patt subst subj eprintf " %s\n%!" r; r +let split patt subj + eprintf "PCRE.split <patt> %s ->%!" subj; + let s1, s2 = PCRE.split patt subj in + eprintf " (%s, %s)\n%!" s1 s2; + (s1, s2) + +let nsplit ?(max = 0) patt subj + eprintf "PCRE.nsplit%s <patt> %s ->%!" + (if max = 0 then "" else sprintf " ~max:%d" max) + subj; + let ss = PCRE.nsplit ~max patt subj in + eprintf " [%s]\n%!" (String.concat "; " ss); + ss + let sub i eprintf "PCRE.sub %d ->%!" i; let r = PCRE.sub i in @@ -60,6 +74,7 @@ let () let re1 = compile "(a+)b" in let re2 = compile "(a+)(b*)" in let re3 = compile ~caseless:true "[^a-z0-9_]" in + let ws = compile "\\s+" in assert (matches re0 "ccaaabbbb" = true); assert (sub 0 = "aaab"); @@ -101,6 +116,20 @@ let () assert (replace ~global:true re3 "-" "this is a\xc2\xa3FUNNY.name?" (* = "this-is-a-FUNNY-name-" if UTF-8 worked *) = "this-is-a--FUNNY-name-"); + + (* This also tests PCRE.split since that is used by nsplit. *) + assert (nsplit ~max:1 ws "a b c" = [ "a b c" ]); + assert (nsplit ~max:2 ws "a b c" = [ "a"; "b c" ]); + assert (nsplit ~max:3 ws "a b c" = [ "a"; "b"; "c" ]); + assert (nsplit ~max:10 ws "a b c" = [ "a"; "b"; "c" ]); + assert (nsplit ws "the cat sat on \t\t the mat." + [ "the"; "cat"; "sat"; "on"; "the"; "mat." ]); + assert (nsplit ~max:5 ws "the cat sat on \t\t the mat." + [ "the"; "cat"; "sat"; "on"; "the mat." ]); + assert (nsplit ws " the " = [ ""; "the"; "" ]); + assert (nsplit ws "the " = [ "the"; "" ]); + assert (nsplit ws " the" = [ ""; "the" ]); + assert (nsplit ws " \t the" = [ ""; "the" ]); with | Not_found -> failwith "one of the PCRE.sub functions unexpectedly raised Not_found" -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 07/22] valgrind: Ignore PCRE.compile regexps stored in a global variable.
--- valgrind-suppressions | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/valgrind-suppressions b/valgrind-suppressions index 1c4b35355..2bc70659d 100644 --- a/valgrind-suppressions +++ b/valgrind-suppressions @@ -171,6 +171,17 @@ fun:caml_build_primitive_table } +# The OCaml PCRE.compile function calls pcre_compile2 which allocates +# memory. If these regexps are stored in a global variable then they +# can never be freed. This is not really a bug, so ignore it. +{ + ocaml_pcre_globals + Memcheck:Leak + fun:malloc + fun:pcre_compile2 + ... +} + # glibc { glibc_cond -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 08/22] builder: Simplify PCRE regular expression by using case-insensitive matching.
Updates commit e5182b87cf0c96933b39045bea3c5a94e29dcd95. --- builder/languages.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builder/languages.ml b/builder/languages.ml index 89d173999..155322014 100644 --- a/builder/languages.ml +++ b/builder/languages.ml @@ -20,7 +20,7 @@ open Std_utils open Common_utils let re_locale - PCRE.compile "^([A-Za-z]+)(_([A-Za-z]+))?(\\.([A-Za-z0-9-]+))?(@([A-Za-z]+))?$" + PCRE.compile ~caseless:true "^([a-z]+)(_([a-z]+))?(\\.([a-z0-9-]+))?(@([a-z]+))?$" let split_locale loc let l = ref [] in -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 09/22] customize: Remove use of Str module from virt-customize code.
--- customize/Makefile.am | 3 +++ customize/firstboot.ml | 6 +++--- customize/ssh_key.ml | 7 +++---- sysprep/Makefile.am | 3 +++ v2v/Makefile.am | 4 ++++ 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/customize/Makefile.am b/customize/Makefile.am index 18f4dce44..ff2c2e2d0 100644 --- a/customize/Makefile.am +++ b/customize/Makefile.am @@ -126,6 +126,7 @@ OCAMLPACKAGES = \ -I $(top_builddir)/ocaml \ -I $(top_builddir)/common/mlstdutils \ -I $(top_builddir)/common/mlutils \ + -I $(top_builddir)/common/mlpcre \ -I $(top_builddir)/mllib \ -I $(builddir) if HAVE_OCAML_PKG_GETTEXT @@ -155,6 +156,7 @@ endif OCAMLLINKFLAGS = \ mlstdutils.$(MLARCHIVE) \ mlguestfs.$(MLARCHIVE) \ + mlpcre.$(MLARCHIVE) \ mlcutils.$(MLARCHIVE) \ mllib.$(MLARCHIVE) \ customize.$(MLARCHIVE) \ @@ -176,6 +178,7 @@ virt_customize_DEPENDENCIES = \ $(CUSTOMIZE_THEOBJECTS) \ $(CUSTOMIZE_CMA) \ ../common/mlutils/mlcutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ ../mllib/mllib.$(MLARCHIVE) virt_customize_LINK = \ $(top_srcdir)/ocaml-link.sh -cclib '$(OCAMLCLIBS)' -- \ diff --git a/customize/firstboot.ml b/customize/firstboot.ml index b07bda001..25203cf91 100644 --- a/customize/firstboot.ml +++ b/customize/firstboot.ml @@ -25,12 +25,12 @@ open Common_gettext.Gettext open Regedit let unix2dos s - String.concat "\r\n" (Str.split_delim (Str.regexp_string "\n") s) + String.concat "\r\n" (String.nsplit "\n" s) let sanitize_name - let rex = Str.regexp "[^A-Za-z0-9_]" in + let rex = PCRE.compile ~caseless:true "[^a-z0-9_]" in fun n -> - let n = Str.global_replace rex "-" n in + let n = PCRE.replace ~global:true rex "-" n in let len = String.length n and max = 60 in if len >= max then String.sub n 0 max else n diff --git a/customize/ssh_key.ml b/customize/ssh_key.ml index 185536d1d..da0e7d90c 100644 --- a/customize/ssh_key.ml +++ b/customize/ssh_key.ml @@ -47,8 +47,8 @@ and parse_selector_list orig_arg = function (* Find the local [on the host] user's SSH public key. See * ssh-copy-id(1) default_ID_file for rationale. *) -let pubkey_re = Str.regexp "^id.*\\.pub$" -let pubkey_ignore_re = Str.regexp ".*-cert\\.pub$" +let pubkey_re = PCRE.compile "^id.*\\.pub$" +let pubkey_ignore_re = PCRE.compile ".*-cert\\.pub$" let local_user_ssh_pubkey () let home_dir @@ -60,8 +60,7 @@ let local_user_ssh_pubkey () let files = Array.to_list files in let files = List.filter ( fun file -> - Str.string_match pubkey_re file 0 && - not (Str.string_match pubkey_ignore_re file 0) + PCRE.matches pubkey_re file && not (PCRE.matches pubkey_ignore_re file) ) files in if files = [] then error (f_"ssh-inject: no public key file found in %s") ssh_dir; diff --git a/sysprep/Makefile.am b/sysprep/Makefile.am index 400b3c1a3..c1aca2966 100644 --- a/sysprep/Makefile.am +++ b/sysprep/Makefile.am @@ -117,6 +117,7 @@ OCAMLPACKAGES = \ -I $(top_builddir)/common/visit/.libs \ -I $(top_builddir)/common/mlstdutils \ -I $(top_builddir)/common/mlutils \ + -I $(top_builddir)/common/mlpcre \ -I $(top_builddir)/common/mlvisit \ -I $(top_builddir)/mllib \ -I $(top_builddir)/customize @@ -145,6 +146,7 @@ endif OCAMLLINKFLAGS = \ mlstdutils.$(MLARCHIVE) \ mlguestfs.$(MLARCHIVE) \ + mlpcre.$(MLARCHIVE) \ mlcutils.$(MLARCHIVE) \ mllib.$(MLARCHIVE) \ mlvisit.$(MLARCHIVE) \ @@ -155,6 +157,7 @@ virt_sysprep_DEPENDENCIES = \ $(OBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ ../common/mlutils/mlcutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ ../mllib/mllib.$(MLARCHIVE) \ ../customize/customize.$(MLARCHIVE) \ $(top_srcdir)/ocaml-link.sh diff --git a/v2v/Makefile.am b/v2v/Makefile.am index 0aafc5725..3a38b3a98 100644 --- a/v2v/Makefile.am +++ b/v2v/Makefile.am @@ -151,6 +151,7 @@ OCAMLPACKAGES = \ -I $(top_builddir)/ocaml \ -I $(top_builddir)/common/mlstdutils \ -I $(top_builddir)/common/mlutils \ + -I $(top_builddir)/common/mlpcre \ -I $(top_builddir)/common/mlxml \ -I $(top_builddir)/mllib \ -I $(top_builddir)/customize @@ -177,6 +178,7 @@ endif OCAMLLINKFLAGS = \ mlstdutils.$(MLARCHIVE) \ mlguestfs.$(MLARCHIVE) \ + mlpcre.$(MLARCHIVE) \ mlxml.$(MLARCHIVE) \ mlcutils.$(MLARCHIVE) \ mllib.$(MLARCHIVE) \ @@ -218,6 +220,7 @@ virt_v2v_copy_to_local_DEPENDENCIES = \ $(COPY_TO_LOCAL_OBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ ../common/mlxml/mlxml.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ ../common/mlutils/mlcutils.$(MLARCHIVE) \ ../mllib/mllib.$(MLARCHIVE) \ $(top_srcdir)/ocaml-link.sh @@ -508,6 +511,7 @@ v2v_unit_tests_DEPENDENCIES = \ $(v2v_unit_tests_THEOBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ ../common/mlxml/mlxml.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ ../common/mlutils/mlcutils.$(MLARCHIVE) \ ../mllib/mllib.$(MLARCHIVE) \ $(top_srcdir)/ocaml-link.sh -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 10/22] mllib: Convert Common_utils.parse_size and parse_resize to use PCRE.
This also involved enhancing/fixing the test so that parse_size has test coverage. --- dib/Makefile.am | 3 ++ get-kernel/Makefile.am | 3 ++ mllib/Makefile.am | 6 +++ mllib/common_utils.ml | 107 +++++++++++++++++++------------------------- mllib/common_utils_tests.ml | 10 ++++- resize/Makefile.am | 3 ++ sparsify/Makefile.am | 3 ++ 7 files changed, 72 insertions(+), 63 deletions(-) diff --git a/dib/Makefile.am b/dib/Makefile.am index d5fc4e48d..fda074b45 100644 --- a/dib/Makefile.am +++ b/dib/Makefile.am @@ -85,6 +85,7 @@ OCAMLPACKAGES = \ -I $(top_builddir)/ocaml \ -I $(top_builddir)/common/mlstdutils \ -I $(top_builddir)/common/mlutils \ + -I $(top_builddir)/common/mlpcre \ -I $(top_builddir)/mllib if HAVE_OCAML_PKG_GETTEXT OCAMLPACKAGES += -package gettext-stub @@ -109,6 +110,7 @@ OCAMLLINKFLAGS = \ mlstdutils.$(MLARCHIVE) \ mlguestfs.$(MLARCHIVE) \ mlcutils.$(MLARCHIVE) \ + mlpcre.$(MLARCHIVE) \ mllib.$(MLARCHIVE) \ $(LINK_CUSTOM_OCAMLC_ONLY) @@ -116,6 +118,7 @@ virt_dib_DEPENDENCIES = \ $(OBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ ../common/mlutils/mlcutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ ../mllib/mllib.$(MLARCHIVE) \ $(top_srcdir)/ocaml-link.sh virt_dib_LINK = \ diff --git a/get-kernel/Makefile.am b/get-kernel/Makefile.am index 8c194df36..f833f35f4 100644 --- a/get-kernel/Makefile.am +++ b/get-kernel/Makefile.am @@ -68,6 +68,7 @@ OCAMLPACKAGES = \ -I $(top_builddir)/ocaml \ -I $(top_builddir)/common/mlstdutils \ -I $(top_builddir)/common/mlutils \ + -I $(top_builddir)/common/mlpcre \ -I $(top_builddir)/mllib if HAVE_OCAML_PKG_GETTEXT OCAMLPACKAGES += -package gettext-stub @@ -91,6 +92,7 @@ endif OCAMLLINKFLAGS = \ mlstdutils.$(MLARCHIVE) \ mlguestfs.$(MLARCHIVE) \ + mlpcre.$(MLARCHIVE) \ mlcutils.$(MLARCHIVE) \ mllib.$(MLARCHIVE) \ $(LINK_CUSTOM_OCAMLC_ONLY) @@ -99,6 +101,7 @@ virt_get_kernel_DEPENDENCIES = \ $(OBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ ../common/mlutils/mlcutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ ../mllib/mllib.$(MLARCHIVE) \ $(top_srcdir)/ocaml-link.sh virt_get_kernel_LINK = \ diff --git a/mllib/Makefile.am b/mllib/Makefile.am index 2f6d2043a..3f2af3c61 100644 --- a/mllib/Makefile.am +++ b/mllib/Makefile.am @@ -86,6 +86,7 @@ libmllib_a_CPPFLAGS = \ -I$(top_srcdir)/common/utils \ -I$(top_srcdir)/lib \ -I$(top_srcdir)/common/options \ + -I$(top_srcdir)/common/mlpcre \ -I$(top_srcdir)/common/mlxml \ -I$(top_srcdir)/common/mlstdutils \ -I$(top_srcdir)/common/mlutils @@ -106,6 +107,7 @@ OCAMLPACKAGES = \ -I $(top_builddir)/lib/.libs \ -I $(top_builddir)/gnulib/lib/.libs \ -I $(top_builddir)/ocaml \ + -I $(top_builddir)/common/mlpcre \ -I $(top_builddir)/common/mlxml \ -I $(top_builddir)/common/mlstdutils \ -I $(top_builddir)/common/mlutils \ @@ -189,12 +191,14 @@ endif OCAMLLINKFLAGS = \ mlstdutils.$(MLARCHIVE) \ mlcutils.$(MLARCHIVE) \ + mlpcre.$(MLARCHIVE) \ mlguestfs.$(MLARCHIVE) \ $(LINK_CUSTOM_OCAMLC_ONLY) common_utils_tests_DEPENDENCIES = \ $(common_utils_tests_THEOBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ $(MLLIB_CMA) \ $(top_srcdir)/ocaml-link.sh common_utils_tests_LINK = \ @@ -206,6 +210,7 @@ common_utils_tests_LINK = \ getopt_tests_DEPENDENCIES = \ $(getopt_tests_THEOBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ $(MLLIB_CMA) \ $(top_srcdir)/ocaml-link.sh getopt_tests_LINK = \ @@ -217,6 +222,7 @@ getopt_tests_LINK = \ JSON_tests_DEPENDENCIES = \ $(JSON_tests_THEOBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ $(MLLIB_CMA) \ $(top_srcdir)/ocaml-link.sh JSON_tests_LINK = \ diff --git a/mllib/common_utils.ml b/mllib/common_utils.ml index 597128967..3355365c6 100644 --- a/mllib/common_utils.ml +++ b/mllib/common_utils.ml @@ -155,73 +155,56 @@ let virt_tools_data_dir ) in fun () -> Lazy.force dir +(* Used by parse_size and parse_resize below. *) +let const_re = PCRE.compile "^([.0-9]+)([bKMG])$" +let plus_const_re = PCRE.compile "^\\+([.0-9]+)([bKMG])$" +let minus_const_re = PCRE.compile "^-([.0-9]+)([bKMG])$" +let percent_re = PCRE.compile "^([.0-9]+)%$" +let plus_percent_re = PCRE.compile "^\\+([.0-9]+)%$" +let minus_percent_re = PCRE.compile "^-([.0-9]+)%$" +let size_scaled f = function + | "b" -> Int64.of_float f + | "K" -> Int64.of_float (f *. 1024.) + | "M" -> Int64.of_float (f *. 1024. *. 1024.) + | "G" -> Int64.of_float (f *. 1024. *. 1024. *. 1024.) + | _ -> assert false + (* Parse a size field, eg. "10G". *) -let parse_size - let const_re = Str.regexp "^\\([.0-9]+\\)\\([bKMG]\\)$" in - fun field -> - let matches rex = Str.string_match rex field 0 in - let sub i = Str.matched_group i field in - let size_scaled f = function - | "b" -> Int64.of_float f - | "K" -> Int64.of_float (f *. 1024.) - | "M" -> Int64.of_float (f *. 1024. *. 1024.) - | "G" -> Int64.of_float (f *. 1024. *. 1024. *. 1024.) - | _ -> assert false - in - - if matches const_re then ( - size_scaled (float_of_string (sub 1)) (sub 2) - ) - else - error "%s: cannot parse size field" field +let parse_size field + if PCRE.matches const_re field then + size_scaled (float_of_string (PCRE.sub 1)) (PCRE.sub 2) + else + error "%s: cannot parse size field" field (* Parse a size field, eg. "10G", "+20%" etc. Used particularly by * virt-resize --resize and --resize-force options. *) -let parse_resize - let const_re = Str.regexp "^\\([.0-9]+\\)\\([bKMG]\\)$" - and plus_const_re = Str.regexp "^\\+\\([.0-9]+\\)\\([bKMG]\\)$" - and minus_const_re = Str.regexp "^-\\([.0-9]+\\)\\([bKMG]\\)$" - and percent_re = Str.regexp "^\\([.0-9]+\\)%$" - and plus_percent_re = Str.regexp "^\\+\\([.0-9]+\\)%$" - and minus_percent_re = Str.regexp "^-\\([.0-9]+\\)%$" - in - fun oldsize field -> - let matches rex = Str.string_match rex field 0 in - let sub i = Str.matched_group i field in - let size_scaled f = function - | "b" -> Int64.of_float f - | "K" -> Int64.of_float (f *. 1024.) - | "M" -> Int64.of_float (f *. 1024. *. 1024.) - | "G" -> Int64.of_float (f *. 1024. *. 1024. *. 1024.) - | _ -> assert false - in - - if matches const_re then ( - size_scaled (float_of_string (sub 1)) (sub 2) - ) - else if matches plus_const_re then ( - let incr = size_scaled (float_of_string (sub 1)) (sub 2) in - oldsize +^ incr - ) - else if matches minus_const_re then ( - let incr = size_scaled (float_of_string (sub 1)) (sub 2) in - oldsize -^ incr - ) - else if matches percent_re then ( - let percent = Int64.of_float (10. *. float_of_string (sub 1)) in - oldsize *^ percent /^ 1000L - ) - else if matches plus_percent_re then ( - let percent = Int64.of_float (10. *. float_of_string (sub 1)) in - oldsize +^ oldsize *^ percent /^ 1000L - ) - else if matches minus_percent_re then ( - let percent = Int64.of_float (10. *. float_of_string (sub 1)) in - oldsize -^ oldsize *^ percent /^ 1000L - ) - else - error "%s: cannot parse resize field" field +let parse_resize oldsize field + if PCRE.matches const_re field then ( + size_scaled (float_of_string (PCRE.sub 1)) (PCRE.sub 2) + ) + else if PCRE.matches plus_const_re field then ( + let incr = size_scaled (float_of_string (PCRE.sub 1)) (PCRE.sub 2) in + oldsize +^ incr + ) + else if PCRE.matches minus_const_re field then ( + let incr = size_scaled (float_of_string (PCRE.sub 1)) (PCRE.sub 2) in + oldsize -^ incr + ) + else if PCRE.matches percent_re field then ( + let percent = Int64.of_float (10. *. float_of_string (PCRE.sub 1)) in + oldsize *^ percent /^ 1000L + ) + else if PCRE.matches plus_percent_re field then ( + let percent = Int64.of_float (10. *. float_of_string (PCRE.sub 1)) in + oldsize +^ oldsize *^ percent /^ 1000L + ) + else if PCRE.matches minus_percent_re field then ( + let percent = Int64.of_float (10. *. float_of_string (PCRE.sub 1)) in + oldsize -^ oldsize *^ percent /^ 1000L + ) + else + error "%s: cannot parse resize field" field let human_size i let sign, i = if i < 0L then "-", Int64.neg i else "", i in diff --git a/mllib/common_utils_tests.ml b/mllib/common_utils_tests.ml index ee8a60463..f7a4eafd1 100644 --- a/mllib/common_utils_tests.ml +++ b/mllib/common_utils_tests.ml @@ -29,8 +29,16 @@ let assert_equal_int = assert_equal ~printer:(fun x -> string_of_int x) let assert_equal_int64 = assert_equal ~printer:(fun x -> Int64.to_string x) let assert_equal_intlist = assert_equal ~printer:(fun x -> "(" ^ (String.concat ";" (List.map string_of_int x)) ^ ")") -(* Test Common_utils.parse_size. *) +(* Test Common_utils.parse_size and Common_utils.parse_resize. *) let test_parse_resize ctx + assert_equal_int64 1_L (parse_size "1b"); + assert_equal_int64 10_L (parse_size "10b"); + assert_equal_int64 1024_L (parse_size "1K"); + assert_equal_int64 102400_L (parse_size "100K"); + (* Fractions are always rounded down. *) + assert_equal_int64 1153433_L (parse_size "1.1M"); + assert_equal_int64 1202590842_L (parse_size "1.12G"); + (* For absolute sizes, oldsize is ignored. *) assert_equal_int64 100_L (parse_resize 100_L "100b"); assert_equal_int64 100_L (parse_resize 1000_L "100b"); diff --git a/resize/Makefile.am b/resize/Makefile.am index df73af4a4..035bfb9fe 100644 --- a/resize/Makefile.am +++ b/resize/Makefile.am @@ -65,6 +65,7 @@ OCAMLPACKAGES = \ -I $(top_builddir)/common/mlstdutils \ -I $(top_builddir)/common/mlprogress \ -I $(top_builddir)/common/mlutils \ + -I $(top_builddir)/common/mlpcre \ -I $(top_builddir)/mllib if HAVE_OCAML_PKG_GETTEXT OCAMLPACKAGES += -package gettext-stub @@ -90,6 +91,7 @@ OCAMLLINKFLAGS = \ mlstdutils.$(MLARCHIVE) \ mlguestfs.$(MLARCHIVE) \ mlprogress.$(MLARCHIVE) \ + mlpcre.$(MLARCHIVE) \ mlcutils.$(MLARCHIVE) \ mllib.$(MLARCHIVE) \ $(LINK_CUSTOM_OCAMLC_ONLY) @@ -98,6 +100,7 @@ virt_resize_DEPENDENCIES = \ $(OBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ ../common/mlutils/mlcutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ ../mllib/mllib.$(MLARCHIVE) \ $(top_srcdir)/ocaml-link.sh virt_resize_LINK = \ diff --git a/sparsify/Makefile.am b/sparsify/Makefile.am index 3b4ea3420..427d35670 100644 --- a/sparsify/Makefile.am +++ b/sparsify/Makefile.am @@ -73,6 +73,7 @@ OCAMLPACKAGES = \ -I $(top_builddir)/common/mlstdutils \ -I $(top_builddir)/common/mlprogress \ -I $(top_builddir)/common/mlutils \ + -I $(top_builddir)/common/mlpcre \ -I $(top_builddir)/mllib if HAVE_OCAML_PKG_GETTEXT OCAMLPACKAGES += -package gettext-stub @@ -98,6 +99,7 @@ OCAMLLINKFLAGS = \ mlstdutils.$(MLARCHIVE) \ mlguestfs.$(MLARCHIVE) \ mlprogress.$(MLARCHIVE) \ + mlpcre.$(MLARCHIVE) \ mlcutils.$(MLARCHIVE) \ mllib.$(MLARCHIVE) \ $(LINK_CUSTOM_OCAMLC_ONLY) @@ -106,6 +108,7 @@ virt_sparsify_DEPENDENCIES = \ $(OBJECTS) \ ../common/mlstdutils/mlstdutils.$(MLARCHIVE) \ ../common/mlutils/mlcutils.$(MLARCHIVE) \ + ../common/mlpcre/mlpcre.$(MLARCHIVE) \ ../mllib/mllib.$(MLARCHIVE) \ $(top_srcdir)/ocaml-link.sh virt_sparsify_LINK = \ -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 11/22] v2v: linux: Fix uninstallation of kmod-xenpv from /etc/rc.local script.
In the original conversion of virt-v2v from Perl (commit 0131d6f666d93dd353c4e4092fd945861646d319), the Perl regular expression was incorrectly transscribed. ‘\b’ was not converted to ‘\\b’ so the new regexp was looking for an ASCII BEL character, not a word boundary. To fix this problem I converted the code to use PCRE, and went back to the original virt-v2v code (virt-v2v.git: lib/Sys/VirtConvert/Converter/Linux.pm)to find out what the Perl regular expression should have been. Note I have also removed ‘.*’ at the beginning and end of the regexp because PCRE regexps are not anchored. --- v2v/convert_linux.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/v2v/convert_linux.ml b/v2v/convert_linux.ml index 3e7652716..f78d18d39 100644 --- a/v2v/convert_linux.ml +++ b/v2v/convert_linux.ml @@ -115,10 +115,10 @@ let rec convert (g : G.guestfs) inspect source output rcaps (try let lines = g#read_lines "/etc/rc.local" in let lines = Array.to_list lines in - let rex = Str.regexp ".*\\b\\(insmod|modprobe\\)\b.*\\bxen-vbd.*" in + let rex = PCRE.compile "\\b(insmod|modprobe)\\b.*\\bxen-vbd" in let lines = List.map ( fun s -> - if Str.string_match rex s 0 then + if PCRE.matches rex s then "#" ^ s else s -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 12/22] v2v: linux: Fix rewriting of grub2 GRUB_CMDLINE=...resume=<device>...
Commit dbe0b69f24421f258da2fb420b58fc1530a9cd03 transscribed the Perl regexp incorrectly so that it only matched the impossible case of ‘resume=/dev/X’ for a single non-whitespace character X. This fixes the regular expression, referencing back to the original Perl code in virt-v2v. --- v2v/convert_linux.ml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/v2v/convert_linux.ml b/v2v/convert_linux.ml index f78d18d39..93a6f1732 100644 --- a/v2v/convert_linux.ml +++ b/v2v/convert_linux.ml @@ -917,7 +917,7 @@ let rec convert (g : G.guestfs) inspect source output rcaps List.flatten (List.map Array.to_list (List.map g#aug_match paths)) in (* Map device names for each entry. *) - let rex_resume = Str.regexp "^\\(.*resume=\\)\\(/dev/[^ ]\\)\\(.*\\)$" + let rex_resume = PCRE.compile "^(.*resume=)(/dev/\\S+)(.*)$" and rex_device_cciss_p Str.regexp "^/dev/\\(cciss/c[0-9]+d[0-9]+\\)p\\([0-9]+\\)$" and rex_device_cciss @@ -943,10 +943,10 @@ let rec convert (g : G.guestfs) inspect source output rcaps if String.find path "GRUB_CMDLINE" >= 0 then ( (* Handle grub2 resume=<dev> specially. *) - if Str.string_match rex_resume value 0 then ( - let start = Str.matched_group 1 value - and device = Str.matched_group 2 value - and end_ = Str.matched_group 3 value in + if PCRE.matches rex_resume value then ( + let start = PCRE.sub 1 + and device = PCRE.sub 2 + and end_ = PCRE.sub 3 in let device = replace_if_device path device in start ^ device ^ end_ ) -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 13/22] v2v: linux: Properly ignore rpm/dpkg-move-aside kernels.
The old virt-v2v code ignored boot kernels with names like "/boot/vmlinuz-*.rpmsave". The transscribed code did not because the Str module requires ‘|’ to be escaped as ‘\|’. This changes the code to use PCRE and fixes it. --- v2v/linux_bootloaders.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/v2v/linux_bootloaders.ml b/v2v/linux_bootloaders.ml index 210cce762..00cb5cd19 100644 --- a/v2v/linux_bootloaders.ml +++ b/v2v/linux_bootloaders.ml @@ -319,9 +319,9 @@ object (self) Array.to_list (g#glob_expand "/boot/kernel-*") @ Array.to_list (g#glob_expand "/boot/vmlinuz-*") @ Array.to_list (g#glob_expand "/vmlinuz-*") in - let rex = Str.regexp ".*\\.\\(dpkg-.*|rpmsave|rpmnew\\)$" in + let rex = PCRE.compile "\\.(?:dpkg-.*|rpmsave|rpmnew)$" in let vmlinuzes = List.filter ( - fun file -> not (Str.string_match rex file 0) + fun file -> not (PCRE.matches rex file) ) vmlinuzes in vmlinuzes -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 14/22] v2v: linux: Convert the Linux-related conversion modules from Str to PCRE.
For each regular expression I went back to the original Perl code to ensure that the regexp is correct. --- v2v/convert_linux.ml | 61 +++++++++++++++++------------------------------- v2v/linux_bootloaders.ml | 32 ++++++++++++------------- v2v/linux_kernels.ml | 17 +++++++------- 3 files changed, 46 insertions(+), 64 deletions(-) diff --git a/v2v/convert_linux.ml b/v2v/convert_linux.ml index 93a6f1732..df7d4dd14 100644 --- a/v2v/convert_linux.ml +++ b/v2v/convert_linux.ml @@ -176,11 +176,11 @@ let rec convert (g : G.guestfs) inspect source output rcaps if g#is_file ~followsymlinks:true vboxconfig then ( let lines = g#read_lines vboxconfig in let lines = Array.to_list lines in - let rex = Str.regexp "^INSTALL_DIR=\\(.*\\)$" in + let rex = PCRE.compile "^INSTALL_DIR=(.*)$" in let lines = filter_map ( fun line -> - if Str.string_match rex line 0 then ( - let path = Str.matched_group 1 line in + if PCRE.matches rex line then ( + let path = PCRE.sub 1 in let path = shell_unquote path in if String.length path >= 1 && path.[0] = '/' then ( let vboxuninstall = path ^ "/uninstall.sh" in @@ -260,8 +260,7 @@ let rec convert (g : G.guestfs) inspect source output rcaps *) if provides <> [] then ( (* Trim whitespace. *) - let rex = Str.regexp "^[ \t]*\\([^ \t]+\\)[ \t]*$" in - let provides = List.map (Str.replace_first rex "\\1") provides in + let provides = List.map String.trim provides in (* Install the dependencies with yum. Use yum explicitly * because we don't have package names and local install is @@ -327,7 +326,7 @@ let rec convert (g : G.guestfs) inspect source output rcaps * ttys in /etc/inittab if the system uses it. We need to put * them back. *) - let rex = Str.regexp "^\\([1-6]\\):\\([2-5]+\\):respawn:\\(.*\\)" in + let rex = PCRE.compile "^([1-6]):([2-5]+):respawn:(.*)" in let updated = ref false in let rec loop () let comments = g#aug_match "/files/etc/inittab/#comment" in @@ -336,10 +335,10 @@ let rec convert (g : G.guestfs) inspect source output rcaps | [] -> () | commentp :: _ -> let comment = g#aug_get commentp in - if Str.string_match rex comment 0 then ( - let name = Str.matched_group 1 comment in - let runlevels = Str.matched_group 2 comment in - let process = Str.matched_group 3 comment in + if PCRE.matches rex comment then ( + let name = PCRE.sub 1 + and runlevels = PCRE.sub 2 + and process = PCRE.sub 3 in if String.find process "getty" >= 0 then ( updated := true; @@ -628,14 +627,12 @@ let rec convert (g : G.guestfs) inspect source output rcaps *) let paths = g#aug_match "/files/etc/inittab/*/process" in let paths = Array.to_list paths in - let rex = Str.regexp "\\(.*\\)\\b\\([xh]vc0\\)\\b\\(.*\\)" in + let rex = PCRE.compile "\\b([xh]vc0)\\b" in List.iter ( fun path -> let proc = g#aug_get path in - if Str.string_match rex proc 0 then ( - let proc = Str.global_replace rex "\\1ttyS0\\3" proc in - g#aug_set path proc - ); + let proc' = PCRE.replace ~global:true rex "ttyS0" proc in + if proc <> proc' then g#aug_set path proc' ) paths; let paths = g#aug_match "/files/etc/securetty/*" in @@ -658,11 +655,11 @@ let rec convert (g : G.guestfs) inspect source output rcaps *) let paths = g#aug_match "/files/etc/inittab/*/process" in let paths = Array.to_list paths in - let rex = Str.regexp ".*\\b\\([xh]vc0|ttyS0\\)\\b.*" in + let rex = PCRE.compile "\\b([xh]vc0|ttyS0)\\b" in List.iter ( fun path -> let proc = g#aug_get path in - if Str.string_match rex proc 0 then + if PCRE.matches rex proc then ignore (g#aug_rm (path ^ "/..")) ) paths; @@ -918,14 +915,8 @@ let rec convert (g : G.guestfs) inspect source output rcaps (* Map device names for each entry. *) let rex_resume = PCRE.compile "^(.*resume=)(/dev/\\S+)(.*)$" - and rex_device_cciss_p - Str.regexp "^/dev/\\(cciss/c[0-9]+d[0-9]+\\)p\\([0-9]+\\)$" - and rex_device_cciss - Str.regexp "^/dev/\\(cciss/c[0-9]+d[0-9]+\\)$" - and rex_device_p - Str.regexp "^/dev/\\([a-z]+\\)\\([0-9]+\\)$" - and rex_device - Str.regexp "^/dev/\\([a-z]+\\)$" in + and rex_device_cciss = PCRE.compile "^/dev/(cciss/c\\d+d\\d+)(?:p(\\d+))?$" + and rex_device = PCRE.compile "^/dev/([a-z]+)(\\d*)?$" in let rec replace_if_device path value let replace device @@ -952,24 +943,16 @@ let rec convert (g : G.guestfs) inspect source output rcaps ) else value ) - else if Str.string_match rex_device_cciss_p value 0 then ( - let device = Str.matched_group 1 value - and part = Str.matched_group 2 value in + else if PCRE.matches rex_device_cciss value then ( + let device = PCRE.sub 1 + and part = try PCRE.sub 2 with Not_found -> "" in "/dev/" ^ replace device ^ part ) - else if Str.string_match rex_device_cciss value 0 then ( - let device = Str.matched_group 1 value in - "/dev/" ^ replace device - ) - else if Str.string_match rex_device_p value 0 then ( - let device = Str.matched_group 1 value - and part = Str.matched_group 2 value in + else if PCRE.matches rex_device value then ( + let device = PCRE.sub 1 + and part = try PCRE.sub 2 with Not_found -> "" in "/dev/" ^ replace device ^ part ) - else if Str.string_match rex_device value 0 then ( - let device = Str.matched_group 1 value in - "/dev/" ^ replace device - ) else (* doesn't look like a known device name *) value in diff --git a/v2v/linux_bootloaders.ml b/v2v/linux_bootloaders.ml index 00cb5cd19..59af38a86 100644 --- a/v2v/linux_bootloaders.ml +++ b/v2v/linux_bootloaders.ml @@ -44,9 +44,9 @@ type bootloader_type | Grub2 (* Helper function for SUSE: remove (hdX,X) prefix from a path. *) -let remove_hd_prefix path - let rex = Str.regexp "^(hd.*)\\(.*\\)" in - Str.replace_first rex "\\1" path +let remove_hd_prefix + let rex = PCRE.compile "^\\(hd.*\\)" in + PCRE.replace rex "" (* Grub1 (AKA grub-legacy) representation. *) class bootloader_grub1 (g : G.guestfs) inspect grub_config @@ -132,11 +132,11 @@ object if paths = [] then error (f_"didn't find grub entry for kernel %s") vmlinuz; let path = List.hd paths in - let rex = Str.regexp ".*/title\\[\\([1-9][0-9]*\\)\\]/kernel" in - if not (Str.string_match rex path 0) then + let rex = PCRE.compile "/title(?:\\[(\\d+)\\])?/kernel" in + if not (PCRE.matches rex path) then error (f_"internal error: regular expression did not match ‘%s’") path; - let index = int_of_string (Str.matched_group 1 path) - 1 in + let index = try int_of_string (PCRE.sub 1) - 1 with Not_found -> 0 in g#aug_set (sprintf "/files%s/default" grub_config) (string_of_int index); g#aug_save () @@ -151,7 +151,7 @@ object ) else false method configure_console () - let rex = Str.regexp "\\(.*\\)\\b\\([xh]vc0\\)\\b\\(.*\\)" in + let rex = PCRE.compile "\\b([xh]vc0)\\b" in let expr = sprintf "/files%s/title/kernel/console" grub_config in let paths = g#aug_match expr in @@ -159,23 +159,21 @@ object List.iter ( fun path -> let console = g#aug_get path in - if Str.string_match rex console 0 then ( - let console = Str.global_replace rex "\\1ttyS0\\3" console in - g#aug_set path console - ) + let console' = PCRE.replace ~global:true rex "ttyS0" console in + if console <> console' then g#aug_set path console' ) paths; g#aug_save () method remove_console () - let rex = Str.regexp "\\(.*\\)\\b\\([xh]vc0\\)\\b\\(.*\\)" in + let rex = PCRE.compile "\\b([xh]vc0)\\b" in let expr = sprintf "/files%s/title/kernel/console" grub_config in let rec loop = function | [] -> () | path :: paths -> let console = g#aug_get path in - if Str.string_match rex console 0 then ( + if PCRE.matches rex console then ( ignore (g#aug_rm path); (* All the paths are invalid, restart the loop. *) let paths = g#aug_match expr in @@ -231,7 +229,7 @@ object (self) inherit bootloader method private grub2_update_console ~remove () - let rex = Str.regexp "\\(.*\\)\\bconsole=[xh]vc0\\b\\(.*\\)" in + let rex = PCRE.compile "\\bconsole=[xh]vc0\\b" in let paths = [ "/files/etc/sysconfig/grub/GRUB_CMDLINE_LINUX"; @@ -249,12 +247,12 @@ object (self) warning (f_"could not remove grub2 serial console (ignored)") | path :: _ -> let grub_cmdline = g#aug_get path in - if Str.string_match rex grub_cmdline 0 then ( + if PCRE.matches rex grub_cmdline then ( let new_grub_cmdline if not remove then - Str.global_replace rex "\\1console=ttyS0\\2" grub_cmdline + PCRE.replace ~global:true rex "console=ttyS0" grub_cmdline else - Str.global_replace rex "\\1\\2" grub_cmdline in + PCRE.replace ~global:true rex "" grub_cmdline in g#aug_set path new_grub_cmdline; g#aug_save (); diff --git a/v2v/linux_kernels.ml b/v2v/linux_kernels.ml index 6e1ca4bf1..9853b0029 100644 --- a/v2v/linux_kernels.ml +++ b/v2v/linux_kernels.ml @@ -63,10 +63,12 @@ let print_kernel_info chan prefix ki fpf "pvpanic=%b xen=%b debug=%b\n" ki.ki_supports_isa_pvpanic ki.ki_is_xen_pv_only_kernel ki.ki_is_debug +let rex_ko = PCRE.compile "\\.k?o(?:\\.xz)?$" +let rex_ko_extract = PCRE.compile "/([^/]+)\\.k?o(?:\\.xz)?$" + let detect_kernels (g : G.guestfs) inspect family bootloader (* What kernel/kernel-like packages are installed on the current guest? *) let installed_kernels : kernel_info list - let rex_ko = Str.regexp ".*\\.k?o\\(\\.xz\\)?$" in let check_config feature = function | None -> false | Some config -> @@ -82,12 +84,11 @@ let detect_kernels (g : G.guestfs) inspect family bootloader | _ -> false ) in - let rex_ko_extract = Str.regexp ".*/\\([^/]+\\)\\.k?o\\(\\.xz\\)?$" in let rex_initrd if family = `Debian_family then - Str.regexp "^initrd.img-.*$" + PCRE.compile "^initrd.img-.*$" else - Str.regexp "^initr\\(d\\|amfs\\)-.*\\(\\.img\\)?$" in + PCRE.compile "^initr(?:d|amfs)-.*(?:\\.img)?$" in filter_map ( function | { G.app2_name = name } as app @@ -133,7 +134,7 @@ let detect_kernels (g : G.guestfs) inspect family bootloader let files = g#ls "/boot" in let files = Array.to_list files in let files - List.filter (fun n -> Str.string_match rex_initrd n 0) files in + List.filter (fun n -> PCRE.matches rex_initrd n) files in let files List.filter ( fun n -> @@ -165,7 +166,7 @@ let detect_kernels (g : G.guestfs) inspect family bootloader let modules = g#find modpath in let modules = Array.to_list modules in let modules - List.filter (fun m -> Str.string_match rex_ko m 0) modules in + List.filter (fun m -> PCRE.matches rex_ko m) modules in assert (List.length modules > 0); (* Determine the kernel architecture by looking at the @@ -178,8 +179,8 @@ let detect_kernels (g : G.guestfs) inspect family bootloader (* Just return the module names, without path or extension. *) let modules = filter_map ( fun m -> - if Str.string_match rex_ko_extract m 0 then - Some (Str.matched_group 1 m) + if PCRE.matches rex_ko_extract m then + Some (PCRE.sub 1) else None ) modules in -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 15/22] v2v: windows: Convert the Windows-related conversion modules from Str to PCRE.
This is all new code since the virt-v2v conversion from Perl so there was no need to check back with the original code. See also commit 9d920732a35d9ef1b6a33509dabbbd44123e7eda. --- v2v/convert_windows.ml | 4 ++-- v2v/windows.ml | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/v2v/convert_windows.ml b/v2v/convert_windows.ml index ac2b8180b..fa87ed84a 100644 --- a/v2v/convert_windows.ml +++ b/v2v/convert_windows.ml @@ -125,6 +125,7 @@ let convert (g : G.guestfs) inspect source output rcaps ) in (* Locate and retrieve all uninstallation commands for Parallels Tools *) + let prltools_rex = PCRE.compile "(Parallels|Virtuozzo) Tools" in let prltools_uninsts let uninsts = ref [] in @@ -146,8 +147,7 @@ let convert (g : G.guestfs) inspect source output rcaps raise Not_found; let dispname = g#hivex_value_string valueh in - if not (Str.string_match (Str.regexp ".*\\(Parallels\\|Virtuozzo\\) Tools.*") - dispname 0) then + if not (PCRE.matches prltools_rex dispname) then raise Not_found; let uninstval = "UninstallString" in diff --git a/v2v/windows.ml b/v2v/windows.ml index 6c6ed01c7..fb68c86c9 100644 --- a/v2v/windows.ml +++ b/v2v/windows.ml @@ -24,12 +24,12 @@ open Common_utils open Utils (* Detect anti-virus (AV) software installed in Windows guests. *) -let rex_virus = Str.regexp_case_fold "virus" (* generic *) -let rex_kaspersky = Str.regexp_case_fold "kaspersky" -let rex_mcafee = Str.regexp_case_fold "mcafee" -let rex_norton = Str.regexp_case_fold "norton" -let rex_sophos = Str.regexp_case_fold "sophos" -let rex_avg_tech = Str.regexp_case_fold "avg technologies" (* RHBZ#1261436 *) +let rex_virus = PCRE.compile ~caseless:true "virus" (* generic *) +let rex_kaspersky = PCRE.compile ~caseless:true "kaspersky" +let rex_mcafee = PCRE.compile ~caseless:true "mcafee" +let rex_norton = PCRE.compile ~caseless:true "norton" +let rex_sophos = PCRE.compile ~caseless:true "sophos" +let rex_avg_tech = PCRE.compile ~caseless:true "avg technologies" (* RHBZ#1261436 *) let rec detect_antivirus { Types.i_type = t; i_apps = apps } assert (t = "windows"); @@ -44,5 +44,4 @@ and check_app { Guestfs.app2_name = name; name =~ rex_sophos || publisher =~ rex_avg_tech -and (=~) str rex - try ignore (Str.search_forward rex str 0); true with Not_found -> false +and (=~) str rex = PCRE.matches rex str -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 16/22] v2v: -i ova: Use PCRE to match lines in manifest files.
--- v2v/input_ova.ml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml index 9ec991ba7..d521a9fc8 100644 --- a/v2v/input_ova.ml +++ b/v2v/input_ova.ml @@ -209,7 +209,7 @@ object (* Read any .mf (manifest) files and verify sha1. *) let mf = find_files exploded ".mf" in - let rex = Str.regexp "\\(SHA1\\|SHA256\\)(\\(.*\\))= \\([0-9a-fA-F]+\\)\r?" in + let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$" in List.iter ( fun mf -> debug "processing manifest %s" mf; @@ -218,10 +218,10 @@ object let chan = open_in mf in let rec loop () let line = input_line chan in - if Str.string_match rex line 0 then ( - let mode = Str.matched_group 1 line in - let disk = Str.matched_group 2 line in - let expected = Str.matched_group 3 line in + if PCRE.matches rex line then ( + let mode = PCRE.sub 1 + and disk = PCRE.sub 2 + and expected = PCRE.sub 3 in let csum = Checksums.of_string mode expected in try if partial then -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 17/22] v2v: -o libvirt: Use PCRE to verify arch is sane.
--- v2v/output_libvirt.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/v2v/output_libvirt.ml b/v2v/output_libvirt.ml index b5df8245f..bc7f41ff9 100644 --- a/v2v/output_libvirt.ml +++ b/v2v/output_libvirt.ml @@ -28,8 +28,8 @@ open Xpath_helpers open Create_libvirt_xml let arch_is_sane_or_die - let rex = Str.regexp "^[-_A-Za-z0-9]+$" in - fun arch -> assert (Str.string_match rex arch 0) + let rex = PCRE.compile ~caseless:true "^[-_a-z0-9]+$" in + fun arch -> assert (PCRE.matches rex arch) let target_features_of_capabilities_doc doc arch let xpathctx = Xml.xpath_new_context doc in -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 18/22] v2v: vCenter: Replace Str with PCRE.
--- v2v/vCenter.ml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/v2v/vCenter.ml b/v2v/vCenter.ml index d5e7c0378..434c93395 100644 --- a/v2v/vCenter.ml +++ b/v2v/vCenter.ml @@ -113,7 +113,7 @@ let get_session_cookie password scheme uri sslverify url Some !session_cookie ) -let multiple_slash = Str.regexp "/+" +let multiple_slash = PCRE.compile "/+" let default_dc = "ha-datacenter" let guess_dcPath uri = function @@ -136,7 +136,7 @@ let guess_dcPath uri = function * remove the Cluster name (which still works in libvirt). *) (* Collapse multiple slashes to single slash. *) - let path = Str.global_replace multiple_slash "/" path in + let path = PCRE.replace ~global:true multiple_slash "/" path in (* Chop off the first and trailing '/' (if found). *) let path let len = String.length path in @@ -158,14 +158,13 @@ let guess_dcPath uri = function | _ -> (* Don't know, so guess. *) default_dc -let source_re = Str.regexp "^\\[\\(.*\\)\\] \\(.*\\)\\.vmdk$" +let source_re = PCRE.compile "^\\[(.*)\\] (.*)\\.vmdk$" let map_source_to_https dcPath uri server path - if not (Str.string_match source_re path 0) then + if not (PCRE.matches source_re path) then (path, true) else ( - let datastore = Str.matched_group 1 path - and path = Str.matched_group 2 path in + let datastore = PCRE.sub 1 and path = PCRE.sub 2 in let port match uri.uri_port with -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 19/22] v2v: parse OVF: Replace Str with PCRE.
--- v2v/parse_ovf_from_ova.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/v2v/parse_ovf_from_ova.ml b/v2v/parse_ovf_from_ova.ml index 6dc032407..613e8d075 100644 --- a/v2v/parse_ovf_from_ova.ml +++ b/v2v/parse_ovf_from_ova.ml @@ -142,10 +142,10 @@ let parse_ovf_from_ova ovf_filename Xml.xpathctx_set_current_context xpathctx n; let file_id = xpath_string_default "rasd:HostResource/text()" "" in - let rex = Str.regexp "^\\(ovf:\\)?/disk/\\(.*\\)" in - if Str.string_match rex file_id 0 then ( + let rex = PCRE.compile "^(?:ovf:)?/disk/(.*)" in + if PCRE.matches rex file_id then ( (* Chase the references through to the actual file name. *) - let file_id = Str.matched_group 2 file_id in + let file_id = PCRE.sub 1 in let expr = sprintf "/ovf:Envelope/ovf:DiskSection/ovf:Disk[@ovf:diskId='%s']/@ovf:fileRef" file_id in let file_ref match xpath_string expr with -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 20/22] v2v: parse VMX: Replace Str with PCRE.
--- v2v/parse_vmx.ml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/v2v/parse_vmx.ml b/v2v/parse_vmx.ml index 770dc29d3..271066eb9 100644 --- a/v2v/parse_vmx.ml +++ b/v2v/parse_vmx.ml @@ -226,7 +226,7 @@ and vmx_bool_of_string t else failwith "bool_of_string" (* Regular expression used to match key = "value" in VMX file. *) -let rex = Str.regexp "^\\([^ \t=]+\\)[ \t]*=[ \t]*\"\\(.*\\)\"$" +let rex = PCRE.compile "^([^ \t=]+)\\s*=\\s*\"(.*)\"$" (* Remove the weird escapes used in value strings. See description above. *) let remove_vmx_escapes str @@ -292,10 +292,9 @@ and parse_string str (* Parse the lines into key = "value". *) let lines = filter_map ( fun line -> - if Str.string_match rex line 0 then ( - let key = Str.matched_group 1 line in + if PCRE.matches rex line then ( + let key = PCRE.sub 1 and value = PCRE.sub 2 in let key = String.lowercase_ascii key in - let value = Str.matched_group 2 line in let value = remove_vmx_escapes value in Some (key, value) ) -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 21/22] generator: Replace use of Str.split with String.nsplit.
Faster and equivalent. --- generator/actions.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generator/actions.ml b/generator/actions.ml index 914c123b1..e052284d0 100644 --- a/generator/actions.ml +++ b/generator/actions.ml @@ -98,7 +98,7 @@ let non_daemon_functions, daemon_functions List.fold_left ( fun a b -> a ^ String.uppercase_ascii (Str.first_chars b 1) ^ Str.string_after b 1 - ) "" (Str.split (Str.regexp "_") name) + ) "" (String.nsplit "_" name) in let make_camel_case_if_not_set f if f.camel_name = "" then -- 2.13.2
Richard W.M. Jones
2017-Sep-22 07:36 UTC
[Libguestfs] [PATCH v3 22/22] v2v: utils: Replace Str.bounded_split with PCRE.nsplit.
Updates commit 8f91d3a9b0356a701c1904e0a2efa5d272d08ac2 similar to Tomáš's original intended code. --- v2v/utils.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/v2v/utils.ml b/v2v/utils.ml index 008452d69..12ebe23f4 100644 --- a/v2v/utils.ml +++ b/v2v/utils.ml @@ -138,6 +138,8 @@ let backend_is_libvirt () let backend = fst (String.split ":" backend) in backend = "libvirt" +let ws = PCRE.compile "\\s+" + let find_file_in_tar tar filename let lines = external_command (sprintf "tar tRvf %s" (Filename.quote tar)) in let rec loop lines @@ -147,7 +149,7 @@ let find_file_in_tar tar filename (* Lines have the form: * block <offset>: <perms> <owner>/<group> <size> <mdate> <mtime> <file> *) - let elems = Str.bounded_split (Str.regexp " +") line 8 in + let elems = PCRE.nsplit ~max:8 ws line in if List.length elems = 8 && List.hd elems = "block" then ( let elems = Array.of_list elems in let offset = elems.(1) in -- 2.13.2
Pino Toscano
2017-Sep-27 12:37 UTC
Re: [Libguestfs] [PATCH v3 01/22] common/mlpcre: Raise Invalid_argument if PCRE.sub n parameter is negative.
On Friday, 22 September 2017 09:36:02 CEST Richard W.M. Jones wrote:> --- a/common/mlpcre/pcre-c.c > +++ b/common/mlpcre/pcre-c.c > @@ -201,6 +201,7 @@ value > guestfs_int_pcre_sub (value nv) > { > CAMLparam1 (nv); > + int n = Int_val (nv);This variable can be const, to avoid accidental changes. -- Pino Toscano
Pino Toscano
2017-Sep-27 12:37 UTC
Re: [Libguestfs] [PATCH v3 02/22] common/mlpcre: Add PCRE.subi to return indexes instead of the substring.
On Friday, 22 September 2017 09:36:03 CEST Richard W.M. Jones wrote:> diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c > index da9b50d34..15775dad0 100644 > --- a/common/mlpcre/pcre-c.c > +++ b/common/mlpcre/pcre-c.c > @@ -225,3 +225,30 @@ guestfs_int_pcre_sub (value nv) > memcpy (String_val (strv), str, len); > CAMLreturn (strv); > } > + > +value > +guestfs_int_pcre_subi (value nv) > +{ > + CAMLparam1 (nv); > + int n = Int_val (nv); > + CAMLlocal1 (rv); > + struct last_match *m = gl_tls_get (last_match);Both 'n' and 'm' can be const. -- Pino Toscano
Pino Toscano
2017-Sep-27 12:38 UTC
Re: [Libguestfs] [PATCH v3 13/22] v2v: linux: Properly ignore rpm/dpkg-move-aside kernels.
On Friday, 22 September 2017 09:36:14 CEST Richard W.M. Jones wrote:> diff --git a/v2v/linux_bootloaders.ml b/v2v/linux_bootloaders.ml > index 210cce762..00cb5cd19 100644 > --- a/v2v/linux_bootloaders.ml > +++ b/v2v/linux_bootloaders.ml > @@ -319,9 +319,9 @@ object (self) > Array.to_list (g#glob_expand "/boot/kernel-*") @ > Array.to_list (g#glob_expand "/boot/vmlinuz-*") @ > Array.to_list (g#glob_expand "/vmlinuz-*") in > - let rex = Str.regexp ".*\\.\\(dpkg-.*|rpmsave|rpmnew\\)$" in > + let rex = PCRE.compile "\\.(?:dpkg-.*|rpmsave|rpmnew)$" in > let vmlinuzes = List.filter ( > - fun file -> not (Str.string_match rex file 0) > + fun file -> not (PCRE.matches rex file) > ) vmlinuzes inTBH here I'd just drop the regexp usage, and use a static list of suffixes (there are not that many, anyway) -- something like: let suffixes = [ ".dpkg-old"; ".dpkg-new"; ".rpmsave"; ".rpmnew"; ] in let vmlinuzes = List.filter ( fun file -> not (List.exists (Filename.check_suffix file)) ) vmlinuxes in Even better, this can be moved to a separate function in the Linux module, which is where (most of) the interaction with package managers happen: let is_package_manager_file file (* Recognized suffixes of package managers. *) let suffixes = [ ".dpkg-old"; ".dpkg-new"; ".rpmsave"; ".rpmnew"; ] in List.exists (Filename.check_suffix file) ... let vmlinuzes = List.filter ( fun file -> not (Linux.is_package_manager_file file) ) vmlinuxes in (Of course with a better naming for the function.) -- Pino Toscano
Pino Toscano
2017-Sep-27 12:38 UTC
Re: [Libguestfs] [PATCH v3 15/22] v2v: windows: Convert the Windows-related conversion modules from Str to PCRE.
On Friday, 22 September 2017 09:36:16 CEST Richard W.M. Jones wrote:> diff --git a/v2v/convert_windows.ml b/v2v/convert_windows.ml > index ac2b8180b..fa87ed84a 100644 > --- a/v2v/convert_windows.ml > +++ b/v2v/convert_windows.ml > @@ -125,6 +125,7 @@ let convert (g : G.guestfs) inspect source output rcaps > ) in > > (* Locate and retrieve all uninstallation commands for Parallels Tools *) > + let prltools_rex = PCRE.compile "(Parallels|Virtuozzo) Tools" in > let prltools_uninsts > let uninsts = ref [] in > > @@ -146,8 +147,7 @@ let convert (g : G.guestfs) inspect source output rcaps > raise Not_found; > > let dispname = g#hivex_value_string valueh in > - if not (Str.string_match (Str.regexp ".*\\(Parallels\\|Virtuozzo\\) Tools.*") > - dispname 0) then > + if not (PCRE.matches prltools_rex dispname) thenTBH I'd simplify here using String.find: if (String.find dispname "Parallels Tools." = -1) && (String.find dispname "Virtuozzo Tools." = -1) then It should be simpler and faster. -- Pino Toscano
Pino Toscano
2017-Sep-27 12:39 UTC
Re: [Libguestfs] [PATCH v3 18/22] v2v: vCenter: Replace Str with PCRE.
On Friday, 22 September 2017 09:36:19 CEST Richard W.M. Jones wrote:> --- > v2v/vCenter.ml | 11 +++++------ > 1 file changed, 5 insertions(+), 6 deletions(-) > > diff --git a/v2v/vCenter.ml b/v2v/vCenter.ml > index d5e7c0378..434c93395 100644 > --- a/v2v/vCenter.ml > +++ b/v2v/vCenter.ml > @@ -113,7 +113,7 @@ let get_session_cookie password scheme uri sslverify url > Some !session_cookie > ) > > -let multiple_slash = Str.regexp "/+" > +let multiple_slash = PCRE.compile "/+"This can be optimized as "/{2,}", so single slashes will not be replaced by themselves. -- Pino Toscano
Maybe Matching Threads
- [PATCH v2 00/18] Replace many more uses of the Str module with PCRE.
- [PATCH 0/4] Replace some uses of the Str module with PCRE.
- [PATCH v2 0/3] v2v: add -o json output mode
- [PATCH 0/2] Add lightweight bindings for PCRE.
- [PATCH 0/3] RFC: v2v: add -o json output mode