Pino Toscano
2019-Aug-13 13:04 UTC
[Libguestfs] [PATCH 0/3] generator: pod2text-related improvements
- refactor memoization code - pass pod as stdin rather than files Pino Toscano (3): generator: isolate memoized cache in own module generator: adjust variable names generator: improve pod2text invocation generator/Makefile.am | 3 ++ generator/memoized_cache.ml | 62 +++++++++++++++++++++ generator/memoized_cache.mli | 29 ++++++++++ generator/utils.ml | 101 +++++++++++++++-------------------- 4 files changed, 137 insertions(+), 58 deletions(-) create mode 100644 generator/memoized_cache.ml create mode 100644 generator/memoized_cache.mli -- 2.21.0
Pino Toscano
2019-Aug-13 13:04 UTC
[Libguestfs] [PATCH 1/3] generator: isolate memoized cache in own module
Isolate the logic for the memoized disk cache in a small module, so it
can be reused for other tools.
Other than refactoring, there should be no behaviour changes.
---
generator/Makefile.am | 3 ++
generator/memoized_cache.ml | 62 ++++++++++++++++++++++++
generator/memoized_cache.mli | 29 ++++++++++++
generator/utils.ml | 92 ++++++++++++++++--------------------
4 files changed, 134 insertions(+), 52 deletions(-)
create mode 100644 generator/memoized_cache.ml
create mode 100644 generator/memoized_cache.mli
diff --git a/generator/Makefile.am b/generator/Makefile.am
index 283cf3769..fd854ad03 100644
--- a/generator/Makefile.am
+++ b/generator/Makefile.am
@@ -85,6 +85,8 @@ sources = \
lua.mli \
main.ml \
main.mli \
+ memoized_cache.ml \
+ memoized_cache.mli \
OCaml.ml \
OCaml.mli \
optgroups.ml \
@@ -121,6 +123,7 @@ sources = \
# In build dependency order.
objects = \
types.cmo \
+ memoized_cache.cmo \
utils.cmo \
proc_nr.cmo \
actions_augeas.cmo \
diff --git a/generator/memoized_cache.ml b/generator/memoized_cache.ml
new file mode 100644
index 000000000..91493942e
--- /dev/null
+++ b/generator/memoized_cache.ml
@@ -0,0 +1,62 @@
+(* libguestfs
+ * Copyright (C) 2009-2019 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *)
+
+(* Please read generator/README first. *)
+
+open Std_utils
+
+open Printf
+
+type ('a, 'b) t = {
+ memo : ('a, 'b) Hashtbl.t;
+ filename : string;
+ lookup_fn : 'a -> 'b;
+ batch_size : int;
+ mutable unsaved_count : int;
+}
+
+let memo_save t + with_open_out t.filename
+ (fun chan -> output_value chan t.memo);
+ t.unsaved_count <- 0
+
+let memo_updated t + t.unsaved_count <- t.unsaved_count + 1;
+ if t.unsaved_count >= t.batch_size then
+ memo_save t
+
+let create ?(version = 1) ?(batch_size = 100) name lookup_fn + let filename =
sprintf "generator/.%s.data.version.%d" name version in
+ let memo + try with_open_in filename input_value
+ with _ -> Hashtbl.create 13 in
+ {
+ memo; filename; lookup_fn; batch_size; unsaved_count = 0;
+ }
+
+let save t + if t.unsaved_count > 0 then
+ memo_save t
+
+let find t key + try Hashtbl.find t.memo key
+ with Not_found ->
+ let res = t.lookup_fn key in
+ Hashtbl.add t.memo key res;
+ memo_updated t;
+ res
diff --git a/generator/memoized_cache.mli b/generator/memoized_cache.mli
new file mode 100644
index 000000000..7ad6c7319
--- /dev/null
+++ b/generator/memoized_cache.mli
@@ -0,0 +1,29 @@
+(* libguestfs
+ * Copyright (C) 2009-2019 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *)
+
+(* Please read generator/README first. *)
+
+(** A simple memoized cache. *)
+
+type ('a, 'b) t
+
+val create : ?version:int -> ?batch_size:int -> string -> ('a
-> 'b) -> ('a, 'b) t
+
+val find : ('a, 'b) t -> 'a -> 'b
+
+val save : ('a, 'b) t -> unit
diff --git a/generator/utils.ml b/generator/utils.ml
index 460b61384..dea352afd 100644
--- a/generator/utils.ml
+++ b/generator/utils.ml
@@ -176,26 +176,44 @@ let html_escape text type memo_key = int option * bool *
bool * string * string
(* width, trim, discard, name, longdesc *)
type memo_value = string list (* list of lines of POD file *)
+let run_pod2text (width, trim, discard, name, longdesc) + let filename, chan =
Filename.open_temp_file "gen" ".tmp" in
+ fprintf chan "=encoding utf8\n\n";
+ fprintf chan "=head1 %s\n\n%s\n" name longdesc;
+ close_out chan;
+ let cmd + match width with
+ | Some width ->
+ sprintf "pod2text -w %d %s" width (Filename.quote filename)
+ | None ->
+ sprintf "pod2text %s" (Filename.quote filename) in
+ let chan = open_process_in cmd in
+ let lines = ref [] in
+ let rec loop i + let line = input_line chan in
+ if i = 1 && discard then (* discard the first line of output *)
+ loop (i+1)
+ else (
+ let line = if trim then String.triml line else line in
+ lines := line :: !lines;
+ loop (i+1)
+ ) in
+ let lines : memo_value = try loop 1 with End_of_file -> List.rev !lines in
+ unlink filename;
+ (match close_process_in chan with
+ | WEXITED 0 -> ()
+ | WEXITED i ->
+ failwithf "pod2text: process exited with non-zero status (%d)"
i
+ | WSIGNALED i | WSTOPPED i ->
+ failwithf "pod2text: process signalled or stopped by signal
%d" i
+ );
+ lines
+let pod2text_memo : (memo_key, memo_value) Memoized_cache.t +
Memoized_cache.create ~version:2 "pod2text" run_pod2text
-let pod2text_memo_filename = "generator/.pod2text.data.version.2"
-let pod2text_memo : (memo_key, memo_value) Hashtbl.t - try with_open_in
pod2text_memo_filename input_value
- with _ -> Hashtbl.create 13
-let pod2text_memo_unsaved_count = ref 0
let pod2text_memo_atexit = ref false
let pod2text_memo_save () - with_open_out pod2text_memo_filename
- (fun chan -> output_value chan pod2text_memo)
-let pod2text_memo_updated () - if not (!pod2text_memo_atexit) then (
- at_exit pod2text_memo_save;
- pod2text_memo_atexit := true;
- );
- pod2text_memo_unsaved_count := !pod2text_memo_unsaved_count + 1;
- if !pod2text_memo_unsaved_count >= 100 then (
- pod2text_memo_save ();
- pod2text_memo_unsaved_count := 0;
- )
+ Memoized_cache.save pod2text_memo
(* Useful if you need the longdesc POD text as plain text. Returns a
* list of lines.
@@ -205,41 +223,11 @@ let pod2text_memo_updated () *)
let pod2text ?width ?(trim = true) ?(discard = true) name longdesc let key :
memo_key = width, trim, discard, name, longdesc in
- try Hashtbl.find pod2text_memo key
- with Not_found ->
- let filename, chan = Filename.open_temp_file "gen"
".tmp" in
- fprintf chan "=encoding utf8\n\n";
- fprintf chan "=head1 %s\n\n%s\n" name longdesc;
- close_out chan;
- let cmd - match width with
- | Some width ->
- sprintf "pod2text -w %d %s" width (Filename.quote filename)
- | None ->
- sprintf "pod2text %s" (Filename.quote filename) in
- let chan = open_process_in cmd in
- let lines = ref [] in
- let rec loop i - let line = input_line chan in
- if i = 1 && discard then (* discard the first line of output *)
- loop (i+1)
- else (
- let line = if trim then String.triml line else line in
- lines := line :: !lines;
- loop (i+1)
- ) in
- let lines : memo_value = try loop 1 with End_of_file -> List.rev !lines
in
- unlink filename;
- (match close_process_in chan with
- | WEXITED 0 -> ()
- | WEXITED i ->
- failwithf "pod2text: process exited with non-zero status
(%d)" i
- | WSIGNALED i | WSTOPPED i ->
- failwithf "pod2text: process signalled or stopped by signal
%d" i
- );
- Hashtbl.add pod2text_memo key lines;
- pod2text_memo_updated ();
- lines
+ if not (!pod2text_memo_atexit) then (
+ at_exit pod2text_memo_save;
+ pod2text_memo_atexit := true;
+ );
+ Memoized_cache.find pod2text_memo key
(* Compare two actions (for sorting). *)
let action_compare { name = n1 } { name = n2 } = compare n1 n2
--
2.21.0
Pino Toscano
2019-Aug-13 13:04 UTC
[Libguestfs] [PATCH 2/3] generator: adjust variable names
Rename some pod2text-related variables to better-fitting names.
This is just a small refactoring.
---
generator/utils.ml | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/generator/utils.ml b/generator/utils.ml
index dea352afd..bcbac8d2c 100644
--- a/generator/utils.ml
+++ b/generator/utils.ml
@@ -173,9 +173,9 @@ let html_escape text text
(* Used to memoize the result of pod2text. *)
-type memo_key = int option * bool * bool * string * string
- (* width, trim, discard, name, longdesc *)
-type memo_value = string list (* list of lines of POD file *)
+type pod2text_memo_key = int option * bool * bool * string * string
+ (* width, trim, discard, name, longdesc *)
+type pod2text_memo_value = string list (* list of lines of POD file *)
let run_pod2text (width, trim, discard, name, longdesc) let filename, chan =
Filename.open_temp_file "gen" ".tmp" in
fprintf chan "=encoding utf8\n\n";
@@ -198,7 +198,7 @@ let run_pod2text (width, trim, discard, name, longdesc)
lines := line :: !lines;
loop (i+1)
) in
- let lines : memo_value = try loop 1 with End_of_file -> List.rev !lines in
+ let lines : pod2text_memo_value = try loop 1 with End_of_file -> List.rev
!lines in
unlink filename;
(match close_process_in chan with
| WEXITED 0 -> ()
@@ -208,11 +208,11 @@ let run_pod2text (width, trim, discard, name, longdesc)
failwithf "pod2text: process signalled or stopped by signal %d" i
);
lines
-let pod2text_memo : (memo_key, memo_value) Memoized_cache.t +let pod2text_memo
: (pod2text_memo_key, pod2text_memo_value) Memoized_cache.t
Memoized_cache.create ~version:2 "pod2text" run_pod2text
-let pod2text_memo_atexit = ref false
-let pod2text_memo_save () +let memos_atexit = ref false
+let memos_save () Memoized_cache.save pod2text_memo
(* Useful if you need the longdesc POD text as plain text. Returns a
@@ -222,10 +222,10 @@ let pod2text_memo_save () * we memoize the results.
*)
let pod2text ?width ?(trim = true) ?(discard = true) name longdesc - let key :
memo_key = width, trim, discard, name, longdesc in
- if not (!pod2text_memo_atexit) then (
- at_exit pod2text_memo_save;
- pod2text_memo_atexit := true;
+ let key : pod2text_memo_key = width, trim, discard, name, longdesc in
+ if not (!memos_atexit) then (
+ at_exit memos_save;
+ memos_atexit := true;
);
Memoized_cache.find pod2text_memo key
--
2.21.0
Pino Toscano
2019-Aug-13 13:04 UTC
[Libguestfs] [PATCH 3/3] generator: improve pod2text invocation
- feed the content directly to stdin, avoid the need of read (and write)
a temporary file
- read all the output at once, without a tail-recurive function
- apply trimming and first line discarding after closing the process
---
generator/utils.ml | 35 ++++++++++++++++-------------------
1 file changed, 16 insertions(+), 19 deletions(-)
diff --git a/generator/utils.ml b/generator/utils.ml
index bcbac8d2c..f8837ed3c 100644
--- a/generator/utils.ml
+++ b/generator/utils.ml
@@ -177,36 +177,33 @@ type pod2text_memo_key = int option * bool * bool * string
* string
(* width, trim, discard, name, longdesc *)
type pod2text_memo_value = string list (* list of lines of POD file *)
let run_pod2text (width, trim, discard, name, longdesc) - let filename, chan =
Filename.open_temp_file "gen" ".tmp" in
- fprintf chan "=encoding utf8\n\n";
- fprintf chan "=head1 %s\n\n%s\n" name longdesc;
- close_out chan;
let cmd match width with
| Some width ->
- sprintf "pod2text -w %d %s" width (Filename.quote filename)
+ sprintf "pod2text -w %d" width
| None ->
- sprintf "pod2text %s" (Filename.quote filename) in
- let chan = open_process_in cmd in
+ "pod2text" in
+ let chan_out, chan_in = open_process cmd in
+ output_string chan_in "=encoding utf8\n\n";
+ output_string chan_in (sprintf "=head1 %s\n\n%s\n" name longdesc);
+ close_out chan_in;
let lines = ref [] in
- let rec loop i - let line = input_line chan in
- if i = 1 && discard then (* discard the first line of output *)
- loop (i+1)
- else (
- let line = if trim then String.triml line else line in
- lines := line :: !lines;
- loop (i+1)
- ) in
- let lines : pod2text_memo_value = try loop 1 with End_of_file -> List.rev
!lines in
- unlink filename;
- (match close_process_in chan with
+ (try while true do lines := input_line chan_out :: !lines done
+ with End_of_file -> ());
+ let lines = List.rev !lines in
+ (match close_process (chan_out, chan_in) with
| WEXITED 0 -> ()
| WEXITED i ->
failwithf "pod2text: process exited with non-zero status (%d)"
i
| WSIGNALED i | WSTOPPED i ->
failwithf "pod2text: process signalled or stopped by signal
%d" i
);
+ let lines + if discard then (* discard the first line of output *) List.tl
lines
+ else lines in
+ let lines + if trim then List.map String.triml lines
+ else lines in
lines
let pod2text_memo : (pod2text_memo_key, pod2text_memo_value) Memoized_cache.t
Memoized_cache.create ~version:2 "pod2text" run_pod2text
--
2.21.0
Eric Blake
2019-Aug-13 13:32 UTC
Re: [Libguestfs] [PATCH 3/3] generator: improve pod2text invocation
On 8/13/19 8:04 AM, Pino Toscano wrote:> - feed the content directly to stdin, avoid the need of read (and write) > a temporary file > - read all the output at once, without a tail-recurive functionrecursive> - apply trimming and first line discarding after closing the process > --- > generator/utils.ml | 35 ++++++++++++++++------------------- > 1 file changed, 16 insertions(+), 19 deletions(-) >-- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Richard W.M. Jones
2019-Aug-13 13:47 UTC
Re: [Libguestfs] [PATCH 3/3] generator: improve pod2text invocation
ACK series (apart from the spelling mistake :-) Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-top is 'top' for virtual machines. Tiny program with many powerful monitoring features, net stats, disk stats, logging, etc. http://people.redhat.com/~rjones/virt-top
Seemingly Similar Threads
- [PATCH 0/3] generator: pod2text-related improvements
- [PATCH 1/2] v2v: -i ova: Hoist utility functions to the top of the file.
- [PATCH 0/2] v2v: -i ova: A couple of cleanup patches.
- [PATCH 0/3] v2v: -i ova: Prefer pigz or pxz for uncompressing OVA
- [PATCH 2/3] Convert source so it can be compiled with OCaml '-safe-string' option.