Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 0/9] v2v: -i ova: Handle OVAs containing snapshots.
https://bugzilla.redhat.com/show_bug.cgi?id=1570407 This turned into quite an in-depth refactoring of how we handle OVAs. It also fixes a potential security issue. Rich.
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 1/9] mltools: Checksums.verify_checksum{, s} returns an error type instead of throwing exception.
Simple code refactoring, making it both more difficult to ignore the
error case and easier to add other error cases in future.
---
 builder/builder.ml           |  9 +++++----
 common/mltools/checksums.ml  | 17 +++++++++++++----
 common/mltools/checksums.mli | 16 +++++++++++-----
 v2v/input_ova.ml             | 10 ++++++----
 4 files changed, 35 insertions(+), 17 deletions(-)
diff --git a/builder/builder.ml b/builder/builder.ml
index 478b41bba..83c7aefed 100644
--- a/builder/builder.ml
+++ b/builder/builder.ml
@@ -313,10 +313,11 @@ let main ()      match entry with
     (* New-style: Using a checksum. *)
     | { Index.checksums = Some csums } ->
-      (try Checksums.verify_checksums csums template
-      with Checksums.Mismatched_checksum (csum, csum_actual) ->
-        error (f_"%s checksum of template did not match the expected
checksum!\n  found checksum: %s\n  expected checksum: %s\nTry:\n - Use the ‘-v’
option and look for earlier error messages.\n - Delete the cache: virt-builder
--delete-cache\n - Check no one has tampered with the website or your
network!")
-          (Checksums.string_of_csum_t csum) csum_actual
(Checksums.string_of_csum csum)
+      (match Checksums.verify_checksums csums template with
+       | Checksums.Good_checksum -> ()
+       | Checksums.Mismatched_checksum (csum, csum_actual) ->
+          error (f_"%s checksum of template did not match the expected
checksum!\n  found checksum: %s\n  expected checksum: %s\nTry:\n - Use the ‘-v’
option and look for earlier error messages.\n - Delete the cache: virt-builder
--delete-cache\n - Check no one has tampered with the website or your
network!")
+                (Checksums.string_of_csum_t csum) csum_actual
(Checksums.string_of_csum csum)
       )
 
     | { Index.checksums = None } ->
diff --git a/common/mltools/checksums.ml b/common/mltools/checksums.ml
index a40edca76..4dd69e734 100644
--- a/common/mltools/checksums.ml
+++ b/common/mltools/checksums.ml
@@ -27,7 +27,9 @@ type csum_t  | SHA256 of string
 | SHA512 of string
 
-exception Mismatched_checksum of (csum_t * string)
+type csum_result +  | Good_checksum
+  | Mismatched_checksum of csum_t * string
 
 let string_of_csum_t = function
   | SHA1 _ -> "sha1"
@@ -73,8 +75,15 @@ let compute_checksum csum_type ?tar filename  let
verify_checksum csum ?tar filename    let csum_type = string_of_csum_t csum in
   let csum_actual = compute_checksum csum_type ?tar filename in
-  if csum <> csum_actual then
-    raise (Mismatched_checksum (csum, (string_of_csum csum_actual)))
+  if csum = csum_actual then
+    Good_checksum
+  else
+    Mismatched_checksum (csum, string_of_csum csum_actual)
 
 let verify_checksums checksums filename -  List.iter (fun c ->
verify_checksum c filename) checksums
+  List.fold_left (
+    fun acc c ->
+      match acc with
+      | Good_checksum -> verify_checksum c filename
+      | Mismatched_checksum _ as acc -> acc
+  ) Good_checksum checksums
diff --git a/common/mltools/checksums.mli b/common/mltools/checksums.mli
index 92336a18b..d45b29dfd 100644
--- a/common/mltools/checksums.mli
+++ b/common/mltools/checksums.mli
@@ -21,7 +21,10 @@ type csum_t  | SHA256 of string
 | SHA512 of string
 
-exception Mismatched_checksum of (csum_t * string) (* expected checksum, got *)
+type csum_result +  | Good_checksum
+  (* expected checksum, actual checksum. *)
+  | Mismatched_checksum of csum_t * string
 
 val of_string : string -> string -> csum_t
 (** [of_string type value] returns the [csum_t] for the specified
@@ -29,14 +32,17 @@ val of_string : string -> string -> csum_t
 
     Raise [Invalid_argument] if the checksum type is not known. *)
 
-val verify_checksum : csum_t -> ?tar:string -> string -> unit
-(** [verify_checksum type filename] Verify the checksum of the file.
+val verify_checksum : csum_t -> ?tar:string -> string -> csum_result
+(** [verify_checksum type filename] verifies the checksum of the file.
 
     When optional [tar] is used it is path to uncompressed tar archive
     and the [filename] is a path in the tar archive. *)
 
-val verify_checksums : csum_t list -> string -> unit
-(** Verify all the checksums of the file. *)
+val verify_checksums : csum_t list -> string -> csum_result
+(** Verify all the checksums of the file.
+
+    If any checksum fails, the first failure (only) is returned in
+    {!csum_result}. *)
 
 val string_of_csum_t : csum_t -> string
 (** Return a string representation of the checksum type. *)
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index a909b92ed..59dbe6f5f 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -224,15 +224,17 @@ object
                 and disk = PCRE.sub 2
                 and expected = PCRE.sub 3 in
                 let csum = Checksums.of_string mode expected in
-                try
+                match
                   if partial then
                     Checksums.verify_checksum csum
                                               ~tar:ova (mf_subfolder // disk)
                   else
                     Checksums.verify_checksum csum (mf_folder // disk)
-                with Checksums.Mismatched_checksum (_, actual) ->
-                  error (f_"checksum of disk %s does not match manifest %s
(actual %s(%s) = %s, expected %s(%s) = %s)")
-                        disk mf mode disk actual mode disk expected;
+                with
+                | Checksums.Good_checksum -> ()
+                | Checksums.Mismatched_checksum (_, actual) ->
+                   error (f_"checksum of disk %s does not match manifest
%s (actual %s(%s) = %s, expected %s(%s) = %s)")
+                         disk mf mode disk actual mode disk expected;
               )
               else
                 warning (f_"unable to parse line from manifest file:
%S") line;
-- 
2.16.2
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 2/9] v2v: -i ova: Ignore non-existent files mentioned in *.mf.
Some OVA files generated by VMware have a *.mf file which contains
checksums for files which don't exist in the OVA.  Ignore these
checksums.
Thanks: Nisim Simsolo.
---
 builder/builder.ml           |  3 +++
 common/mltools/checksums.ml  | 28 +++++++++++++++++++++-------
 common/mltools/checksums.mli |  2 ++
 v2v/input_ova.ml             |  9 ++++++++-
 4 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/builder/builder.ml b/builder/builder.ml
index 83c7aefed..2ed7cb97a 100644
--- a/builder/builder.ml
+++ b/builder/builder.ml
@@ -318,6 +318,9 @@ let main ()         | Checksums.Mismatched_checksum (csum,
csum_actual) ->
           error (f_"%s checksum of template did not match the expected
checksum!\n  found checksum: %s\n  expected checksum: %s\nTry:\n - Use the ‘-v’
option and look for earlier error messages.\n - Delete the cache: virt-builder
--delete-cache\n - Check no one has tampered with the website or your
network!")
                 (Checksums.string_of_csum_t csum) csum_actual
(Checksums.string_of_csum csum)
+       | Checksums.Missing_file ->
+          error (f_"%s: template not downloaded or deleted.  You may have
run ‘virt-builder --delete-cache’ in parallel.")
+                template
       )
 
     | { Index.checksums = None } ->
diff --git a/common/mltools/checksums.ml b/common/mltools/checksums.ml
index 4dd69e734..fdeae1dff 100644
--- a/common/mltools/checksums.ml
+++ b/common/mltools/checksums.ml
@@ -30,6 +30,7 @@ type csum_t  type csum_result    | Good_checksum
   | Mismatched_checksum of csum_t * string
+  | Missing_file
 
 let string_of_csum_t = function
   | SHA1 _ -> "sha1"
@@ -72,18 +73,31 @@ let compute_checksum csum_type ?tar filename      let
csum_str = fst (String.split " " line) in
     of_string csum_type csum_str
 
+(* Check if the direct file exists or if it exists in the tarball. *)
+let file_exists ?tar filename +  match tar with
+  | None -> Sys.file_exists filename
+  | Some tar ->
+     let cmd +       sprintf "tar tf %s %s >/dev/null 2>&1"
(quote tar) (quote filename) in
+     Sys.command cmd = 0
+
 let verify_checksum csum ?tar filename -  let csum_type = string_of_csum_t csum
in
-  let csum_actual = compute_checksum csum_type ?tar filename in
-  if csum = csum_actual then
-    Good_checksum
-  else
-    Mismatched_checksum (csum, string_of_csum csum_actual)
+  if not (file_exists ?tar filename) then
+    Missing_file
+  else (
+    let csum_type = string_of_csum_t csum in
+    let csum_actual = compute_checksum csum_type ?tar filename in
+    if csum = csum_actual then
+      Good_checksum
+    else
+      Mismatched_checksum (csum, string_of_csum csum_actual)
+  )
 
 let verify_checksums checksums filename    List.fold_left (
     fun acc c ->
       match acc with
       | Good_checksum -> verify_checksum c filename
-      | Mismatched_checksum _ as acc -> acc
+      | (Mismatched_checksum _|Missing_file) as acc -> acc
   ) Good_checksum checksums
diff --git a/common/mltools/checksums.mli b/common/mltools/checksums.mli
index d45b29dfd..533e399bf 100644
--- a/common/mltools/checksums.mli
+++ b/common/mltools/checksums.mli
@@ -25,6 +25,8 @@ type csum_result    | Good_checksum
   (* expected checksum, actual checksum. *)
   | Mismatched_checksum of csum_t * string
+  (* referenced file does not exist *)
+  | Missing_file
 
 val of_string : string -> string -> csum_t
 (** [of_string type value] returns the [csum_t] for the specified
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index 59dbe6f5f..f23a1f2a9 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -234,7 +234,14 @@ object
                 | Checksums.Good_checksum -> ()
                 | Checksums.Mismatched_checksum (_, actual) ->
                    error (f_"checksum of disk %s does not match manifest
%s (actual %s(%s) = %s, expected %s(%s) = %s)")
-                         disk mf mode disk actual mode disk expected;
+                         disk mf mode disk actual mode disk expected
+                | Checksums.Missing_file ->
+                   (* RHBZ#1570407: Some OVA files generated by VMware
+                    * reference non-existent components in the *.mf file.
+                    * Generate a warning and ignore it.
+                    *)
+                   warning (f_"%s has a checksum for non-existent file %s
(ignored)")
+                           mf disk
               )
               else
                 warning (f_"unable to parse line from manifest file:
%S") line;
-- 
2.16.2
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 3/9] v2v: Ignore miscellaneous tar messages when parsing tar for file locations.
We use ‘tar tRvf’ to parse the locations of files within the tarball.
However examination of tar.git:src/list.c shows various other messages
which can appear in the output:
  block <offset>: ** Block of NULs **
  block <offset>: ** End of File **
Indeed it was easy to produce the first message just by using modern
tar to create a tarball:
  $ tar tRvf '/var/tmp/bz1570407-reproducer.ova'
  block 0: -rw-r--r-- rjones/rjones   100 2018-04-22 17:06
RHEL7_3_042218_extra-disk1.vmdk.000000000
  block 2: -rw-r--r-- rjones/rjones   243 2018-04-22 17:07
RHEL7_3_042218_extra.mf
  block 4: -rw-r--r-- rjones/rjones 13066 2018-04-22 15:08
RHEL7_3_042218_extra.ovf
  block 31: ** Block of NULs **
Ignore these messages.
---
 v2v/utils.ml | 71 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 31 deletions(-)
diff --git a/v2v/utils.ml b/v2v/utils.ml
index 372ad8aaa..d73011f9f 100644
--- a/v2v/utils.ml
+++ b/v2v/utils.ml
@@ -147,6 +147,7 @@ let error_if_no_ssh_agent ()      error (f_"ssh-agent
authentication has not been set up ($SSH_AUTH_SOCK is not set).  This is
required by qemu to do passwordless ssh access.  See the virt-v2v(1) man page
for more information.")
 
 let ws = PCRE.compile "\\s+"
+let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$"
 
 let find_file_in_tar tar filename    let lines = external_command (sprintf
"tar tRvf %s" (Filename.quote tar)) in
@@ -156,42 +157,50 @@ let find_file_in_tar tar filename      | line :: lines
-> (
       (* Lines have the form:
        * block <offset>: <perms> <owner>/<group>
<size> <mdate> <mtime> <file>
+       * or:
+       * block <offset>: ** Block of NULs **
+       * block <offset>: ** End of File **
        *)
-      let elems = PCRE.nsplit ~max:8 ws line in
-      if List.length elems = 8 && List.hd elems = "block"
then (
-        let elems = Array.of_list elems in
-        let offset = elems.(1) in
-        let size = elems.(4) in
-        let fname = elems.(7) in
+      if PCRE.matches re_tar_message line then
+        loop lines (* ignore "** Block of NULs **" etc. *)
+      else (
+        let elems = PCRE.nsplit ~max:8 ws line in
+        if List.length elems = 8 && List.hd elems = "block"
then (
+          let elems = Array.of_list elems in
+          let offset = elems.(1) in
+          let size = elems.(4) in
+          let fname = elems.(7) in
 
-        if fname <> filename then
-          loop lines
-        else (
-          let offset -            try
-              (* There should be a colon at the end *)
-              let i = String.rindex offset ':' in
-              if i == (String.length offset)-1 then
-                Int64.of_string (String.sub offset 0 i)
-              else
-                failwith "colon at wrong position"
-            with Failure _ | Not_found ->
-              failwithf (f_"invalid offset returned by tar: %S")
offset in
+          if fname <> filename then
+            loop lines
+          else (
+            let offset +              try
+                (* There should be a colon at the end *)
+                let i = String.rindex offset ':' in
+                if i == (String.length offset)-1 then
+                  Int64.of_string (String.sub offset 0 i)
+                else
+                  failwith "colon at wrong position"
+              with Failure _ | Not_found ->
+                failwithf (f_"invalid offset returned by tar: %S")
offset in
 
-          let size -            try Int64.of_string size
-            with Failure _ ->
-              failwithf (f_"invalid size returned by tar: %S") size
in
+            let size +              try Int64.of_string size
+              with Failure _ ->
+                failwithf (f_"invalid size returned by tar: %S") size
in
 
-          (* Note: Offset is actualy block number and there is a single
-           * block with tar header at the beginning of the file. So skip
-           * the header and convert the block number to bytes before
-           * returning.
-           *)
-          (offset +^ 1L) *^ 512L, size
+            (* Note: Offset is actualy block number and there is a single
+             * block with tar header at the beginning of the file. So skip
+             * the header and convert the block number to bytes before
+             * returning.
+             *)
+            (offset +^ 1L) *^ 512L, size
+          )
         )
-      ) else
-        failwithf (f_"failed to parse line returned by tar: %S") line
+        else
+          failwithf (f_"failed to parse line returned by tar: %S")
line
+      )
     )
   in
   loop lines
-- 
2.16.2
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 4/9] v2v: parse OVF: Export useful parse_disks function.
So you can parse out just the list of disks.
---
 v2v/parse_ovf_from_ova.ml  | 402 +++++++++++++++++++++++----------------------
 v2v/parse_ovf_from_ova.mli |   3 +
 2 files changed, 211 insertions(+), 194 deletions(-)
diff --git a/v2v/parse_ovf_from_ova.ml b/v2v/parse_ovf_from_ova.ml
index 2ffaf7ae4..7d4f2f543 100644
--- a/v2v/parse_ovf_from_ova.ml
+++ b/v2v/parse_ovf_from_ova.ml
@@ -35,7 +35,7 @@ type ovf_disk = {
   compressed : bool;            (* If the file is gzip compressed. *)
 }
 
-let parse_ovf_from_ova ovf_filename +let xpathctx_of_ovf ovf_filename    let
xml = read_whole_file ovf_filename in
   let doc = Xml.parse_memory xml in
 
@@ -50,207 +50,221 @@ let parse_ovf_from_ova ovf_filename   
Xml.xpath_register_ns xpathctx
                         "vssd"
"http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/CIM_VirtualSystemSettingData";
 
+  xpathctx
+
+let rec parse_ovf_from_ova ovf_filename +  let xpathctx = xpathctx_of_ovf
ovf_filename in
+
   let xpath_string = xpath_string xpathctx
   and xpath_int = xpath_int xpathctx
   and xpath_int64 = xpath_int64 xpathctx in
 
-  let rec parse_top () -    (* Search for vm name. *)
-    let name -      match xpath_string
"/ovf:Envelope/ovf:VirtualSystem/ovf:Name/text()" with
-      | None | Some "" -> None
-      | Some _ as name -> name in
+  (* Search for vm name. *)
+  let name +    match xpath_string
"/ovf:Envelope/ovf:VirtualSystem/ovf:Name/text()" with
+    | None | Some "" -> None
+    | Some _ as name -> name in
 
-    (* Search for memory. *)
-    let memory = Option.default (1024L *^ 1024L) (xpath_int64
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=4]/rasd:VirtualQuantity/text()")
in
-    let memory = memory *^ 1024L *^ 1024L in
+  (* Search for memory. *)
+  let memory = Option.default (1024L *^ 1024L) (xpath_int64
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=4]/rasd:VirtualQuantity/text()")
in
+  let memory = memory *^ 1024L *^ 1024L in
 
-    (* Search for number of vCPUs. *)
-    let vcpu = Option.default 1 (xpath_int
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=3]/rasd:VirtualQuantity/text()")
in
+  (* Search for number of vCPUs. *)
+  let vcpu = Option.default 1 (xpath_int
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=3]/rasd:VirtualQuantity/text()")
in
 
-    (* CPU topology.  coresPerSocket is a VMware proprietary extension.
-     * I couldn't find out how hyperthreads is specified in the OVF.
-     *)
-    let cores_per_socket = xpath_int
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=3]/vmw:CoresPerSocket/text()"
in
-    let cpu_topology -      match cores_per_socket with
-      | None -> None
-      | Some cores_per_socket when cores_per_socket <= 0 ->
-         warning (f_"invalid vmw:CoresPerSocket (%d) ignored")
-                 cores_per_socket;
-         None
-      | Some cores_per_socket ->
-         let sockets = vcpu / cores_per_socket in
-         if sockets <= 0 then (
-           warning (f_"invalid vmw:CoresPerSocket < number of
cores");
-           None
-         )
-         else
-           Some { s_cpu_sockets = sockets; s_cpu_cores = cores_per_socket;
-                  s_cpu_threads = 1 } in
-
-    (* BIOS or EFI firmware? *)
-    let firmware = Option.default "bios" (xpath_string
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/vmw:Config[@vmw:key=\"firmware\"]/@vmw:value")
in
-    let firmware -      match firmware with
-      | "bios" -> BIOS
-      | "efi" -> UEFI
-      | s ->
-         error (f_"unknown Config:firmware value %s (expected
\"bios\" or \"efi\")") s in
-
-    name, memory, vcpu, cpu_topology, firmware,
-    parse_disks (), parse_removables (), parse_nics ()
-
-  (* Helper function to return the parent controller of a disk. *)
-  and parent_controller id -    let expr = sprintf
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:InstanceID/text()=%d]/rasd:ResourceType/text()"
id in
-    let controller = xpath_int expr in
-
-    (* 5: IDE, 6: SCSI controller, 20: SATA *)
-    match controller with
-    | Some 5 -> Some Source_IDE
-    | Some 6 -> Some Source_SCSI
-    | Some 20 -> Some Source_SATA
-    | None ->
-       warning (f_"ova disk has no parent controller, please report this
as a bug supplying the *.ovf file extracted from the ova");
-       None
-    | Some controller ->
-       warning (f_"ova disk has an unknown VMware controller type (%d),
please report this as a bug supplying the *.ovf file extracted from the
ova")
-               controller;
-       None
-
-  (* Hard disks (ResourceType = 17). *)
-  and parse_disks () -    let disks = ref [] in
-    let expr =
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=17]"
in
-    let obj = Xml.xpath_eval_expression xpathctx expr in
-    let nr_nodes = Xml.xpathobj_nr_nodes obj in
-    for i = 0 to nr_nodes-1 do
-      let n = Xml.xpathobj_node obj i in
-      Xml.xpathctx_set_current_context xpathctx n;
-
-      (* XXX We assume the OVF lists these in order.
-         let address = xpath_int "rasd:AddressOnParent/text()" in
-       *)
-
-      (* Find the parent controller. *)
-      let parent_id = xpath_int "rasd:Parent/text()" in
-      let controller -        match parent_id with
-        | None -> None
-        | Some id -> parent_controller id in
-
-      Xml.xpathctx_set_current_context xpathctx n;
-      let file_id -        Option.default "" (xpath_string
"rasd:HostResource/text()") in
-      let rex = PCRE.compile "^(?:ovf:)?/disk/(.*)" in
-      if PCRE.matches rex file_id then (
-        (* Chase the references through to the actual file name. *)
-        let file_id = PCRE.sub 1 in
-        let expr = sprintf
"/ovf:Envelope/ovf:DiskSection/ovf:Disk[@ovf:diskId='%s']/@ovf:fileRef"
file_id in
-        let file_ref -          match xpath_string expr with
-          | None -> error (f_"error parsing disk fileRef")
-          | Some s -> s in
-        let expr = sprintf
"/ovf:Envelope/ovf:References/ovf:File[@ovf:id='%s']/@ovf:href"
file_ref in
-        let href -          match xpath_string expr with
-          | None -> error (f_"no href in ovf:File (id=%s)")
file_ref
-          | Some s -> s in
-
-        let expr = sprintf
"/ovf:Envelope/ovf:References/ovf:File[@ovf:id='%s']/@ovf:compression"
file_ref in
-        let compressed -          match xpath_string expr with
-          | None | Some "identity" -> false
-          | Some "gzip" -> true
-          | Some s -> error (f_"unsupported compression in OVF:
%s") s in
-
-        let disk = {
-          source_disk = {
-            s_disk_id = i;
-            s_qemu_uri = "";
-            s_format = Some "vmdk";
-            s_controller = controller;
-          };
-          href = href;
-          compressed = compressed
-        } in
-        List.push_front disk disks;
-      ) else
-        error (f_"could not parse disk rasd:HostResource from OVF
document")
-    done;
-    List.rev !disks
-
-  (* Floppies (ResourceType = 14), CDs (ResourceType = 15) and
-   * CDROMs (ResourceType = 16).  (What is the difference?)  Try hard
-   * to preserve the original ordering from the OVF.
+  (* CPU topology.  coresPerSocket is a VMware proprietary extension.
+   * I couldn't find out how hyperthreads is specified in the OVF.
    *)
-  and parse_removables () -    let removables = ref [] in
-    let expr =
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=14
or rasd:ResourceType/text()=15 or rasd:ResourceType/text()=16]" in
-    let obj = Xml.xpath_eval_expression xpathctx expr in
-    let nr_nodes = Xml.xpathobj_nr_nodes obj in
-    for i = 0 to nr_nodes-1 do
-      let n = Xml.xpathobj_node obj i in
-      Xml.xpathctx_set_current_context xpathctx n;
-      let id -        match xpath_int "rasd:ResourceType/text()" with
-        | None -> assert false
-        | Some (14|15|16 as i) -> i
-        | Some _ -> assert false in
-
-      let slot = xpath_int "rasd:AddressOnParent/text()" in
-
-      (* Find the parent controller. *)
-      let parent_id = xpath_int "rasd:Parent/text()" in
-      let controller -        match parent_id with
-        | None -> None
-        | Some id -> parent_controller id in
-
-      let typ -        match id with
-        | 14 -> Floppy
-        | 15 | 16 -> CDROM
-        | _ -> assert false in
+  let cores_per_socket = xpath_int
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=3]/vmw:CoresPerSocket/text()"
in
+  let cpu_topology +    match cores_per_socket with
+    | None -> None
+    | Some cores_per_socket when cores_per_socket <= 0 ->
+       warning (f_"invalid vmw:CoresPerSocket (%d) ignored")
+               cores_per_socket;
+       None
+    | Some cores_per_socket ->
+       let sockets = vcpu / cores_per_socket in
+       if sockets <= 0 then (
+         warning (f_"invalid vmw:CoresPerSocket < number of
cores");
+         None
+       )
+       else
+         Some { s_cpu_sockets = sockets; s_cpu_cores = cores_per_socket;
+                s_cpu_threads = 1 } in
+
+  (* BIOS or EFI firmware? *)
+  let firmware = Option.default "bios" (xpath_string
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/vmw:Config[@vmw:key=\"firmware\"]/@vmw:value")
in
+  let firmware +    match firmware with
+    | "bios" -> BIOS
+    | "efi" -> UEFI
+    | s ->
+       error (f_"unknown Config:firmware value %s (expected
\"bios\" or \"efi\")") s in
+
+  name, memory, vcpu, cpu_topology, firmware,
+  parse_disks xpathctx, parse_removables xpathctx, parse_nics xpathctx
+
+(* Hard disks (ResourceType = 17). *)
+and parse_disks xpathctx +  let xpath_string = xpath_string xpathctx
+  and xpath_int = xpath_int xpathctx in
+
+  let disks = ref [] in
+  let expr =
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=17]"
in
+  let obj = Xml.xpath_eval_expression xpathctx expr in
+  let nr_nodes = Xml.xpathobj_nr_nodes obj in
+  for i = 0 to nr_nodes-1 do
+    let n = Xml.xpathobj_node obj i in
+    Xml.xpathctx_set_current_context xpathctx n;
+
+    (* XXX We assume the OVF lists these in order.
+       let address = xpath_int "rasd:AddressOnParent/text()" in
+     *)
+
+    (* Find the parent controller. *)
+    let parent_id = xpath_int "rasd:Parent/text()" in
+    let controller +      match parent_id with
+      | None -> None
+      | Some id -> parent_controller xpathctx id in
+
+    Xml.xpathctx_set_current_context xpathctx n;
+    let file_id +      Option.default "" (xpath_string
"rasd:HostResource/text()") in
+    let rex = PCRE.compile "^(?:ovf:)?/disk/(.*)" in
+    if PCRE.matches rex file_id then (
+      (* Chase the references through to the actual file name. *)
+      let file_id = PCRE.sub 1 in
+      let expr = sprintf
"/ovf:Envelope/ovf:DiskSection/ovf:Disk[@ovf:diskId='%s']/@ovf:fileRef"
file_id in
+      let file_ref +        match xpath_string expr with
+        | None -> error (f_"error parsing disk fileRef")
+        | Some s -> s in
+      let expr = sprintf
"/ovf:Envelope/ovf:References/ovf:File[@ovf:id='%s']/@ovf:href"
file_ref in
+      let href +        match xpath_string expr with
+        | None -> error (f_"no href in ovf:File (id=%s)") file_ref
+        | Some s -> s in
+
+      let expr = sprintf
"/ovf:Envelope/ovf:References/ovf:File[@ovf:id='%s']/@ovf:compression"
file_ref in
+      let compressed +        match xpath_string expr with
+        | None | Some "identity" -> false
+        | Some "gzip" -> true
+        | Some s -> error (f_"unsupported compression in OVF: %s")
s in
+
       let disk = {
-        s_removable_type = typ;
-        s_removable_controller = controller;
-        s_removable_slot = slot;
+        source_disk = {
+          s_disk_id = i;
+          s_qemu_uri = "";
+          s_format = Some "vmdk";
+          s_controller = controller;
+        };
+        href = href;
+        compressed = compressed
       } in
-      List.push_front disk removables;
-    done;
-    List.rev !removables
-
-  (* Search for networks ResourceType: 10 *)
-  and parse_nics () -    let nics = ref [] in
-    let expr =
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=10]"
in
-    let obj = Xml.xpath_eval_expression xpathctx expr in
-    let nr_nodes = Xml.xpathobj_nr_nodes obj in
-    for i = 0 to nr_nodes-1 do
-      let n = Xml.xpathobj_node obj i in
-      Xml.xpathctx_set_current_context xpathctx n;
-      let vnet, vnet_type -        match xpath_string
"rasd:Connection/text()" with
-        | Some connection -> connection, Bridge
-        | None -> sprintf "eth%d" i, Network in
-      let mac = xpath_string "rasd:Address/text()" in
-      let nic_model -        match xpath_string
"rasd:ResourceSubType/text()" with
-        | Some "E1000" -> Some Source_e1000
-        | Some model -> Some (Source_other_nic (String.lowercase_ascii
model))
-        | None -> None in
-      let nic = {
-        s_mac = mac;
-        s_nic_model = nic_model;
-        s_vnet = vnet;
-        s_vnet_orig = vnet;
-        s_vnet_type = vnet_type;
-      } in
-      List.push_front nic nics
-    done;
-    List.rev !nics
-  in
+      List.push_front disk disks;
+    ) else
+      error (f_"could not parse disk rasd:HostResource from OVF
document")
+  done;
+  List.rev !disks
+
+(* Floppies (ResourceType = 14), CDs (ResourceType = 15) and
+ * CDROMs (ResourceType = 16).  (What is the difference?)  Try hard
+ * to preserve the original ordering from the OVF.
+ *)
+and parse_removables xpathctx +  let xpath_int = xpath_int xpathctx in
+
+  let removables = ref [] in
+  let expr =
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=14
or rasd:ResourceType/text()=15 or rasd:ResourceType/text()=16]" in
+  let obj = Xml.xpath_eval_expression xpathctx expr in
+  let nr_nodes = Xml.xpathobj_nr_nodes obj in
+  for i = 0 to nr_nodes-1 do
+    let n = Xml.xpathobj_node obj i in
+    Xml.xpathctx_set_current_context xpathctx n;
+    let id +      match xpath_int "rasd:ResourceType/text()" with
+      | None -> assert false
+      | Some (14|15|16 as i) -> i
+      | Some _ -> assert false in
+
+    let slot = xpath_int "rasd:AddressOnParent/text()" in
+
+    (* Find the parent controller. *)
+    let parent_id = xpath_int "rasd:Parent/text()" in
+    let controller +      match parent_id with
+      | None -> None
+      | Some id -> parent_controller xpathctx id in
+
+    let typ +      match id with
+      | 14 -> Floppy
+      | 15 | 16 -> CDROM
+      | _ -> assert false in
+    let disk = {
+      s_removable_type = typ;
+      s_removable_controller = controller;
+      s_removable_slot = slot;
+    } in
+    List.push_front disk removables;
+  done;
+  List.rev !removables
+
+(* Search for networks ResourceType: 10 *)
+and parse_nics xpathctx +  let xpath_string = xpath_string xpathctx in
+
+  let nics = ref [] in
+  let expr =
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=10]"
in
+  let obj = Xml.xpath_eval_expression xpathctx expr in
+  let nr_nodes = Xml.xpathobj_nr_nodes obj in
+  for i = 0 to nr_nodes-1 do
+    let n = Xml.xpathobj_node obj i in
+    Xml.xpathctx_set_current_context xpathctx n;
+    let vnet, vnet_type +      match xpath_string
"rasd:Connection/text()" with
+      | Some connection -> connection, Bridge
+      | None -> sprintf "eth%d" i, Network in
+    let mac = xpath_string "rasd:Address/text()" in
+    let nic_model +      match xpath_string
"rasd:ResourceSubType/text()" with
+      | Some "E1000" -> Some Source_e1000
+      | Some model -> Some (Source_other_nic (String.lowercase_ascii model))
+      | None -> None in
+    let nic = {
+      s_mac = mac;
+      s_nic_model = nic_model;
+      s_vnet = vnet;
+      s_vnet_orig = vnet;
+      s_vnet_type = vnet_type;
+    } in
+    List.push_front nic nics
+  done;
+  List.rev !nics
+
+(* Helper function to return the parent controller of a disk. *)
+and parent_controller xpathctx id +  let xpath_int = xpath_int xpathctx in
+
+  let expr = sprintf
"/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:InstanceID/text()=%d]/rasd:ResourceType/text()"
id in
+  let controller = xpath_int expr in
+
+  (* 5: IDE, 6: SCSI controller, 20: SATA *)
+  match controller with
+  | Some 5 -> Some Source_IDE
+  | Some 6 -> Some Source_SCSI
+  | Some 20 -> Some Source_SATA
+  | None ->
+     warning (f_"ova disk has no parent controller, please report this as
a bug supplying the *.ovf file extracted from the ova");
+     None
+  | Some controller ->
+     warning (f_"ova disk has an unknown VMware controller type (%d),
please report this as a bug supplying the *.ovf file extracted from the
ova")
+             controller;
+     None
 
-  parse_top ()
+let parse_disks ovf_filename +  let xpathctx = xpathctx_of_ovf ovf_filename in
+  parse_disks xpathctx
diff --git a/v2v/parse_ovf_from_ova.mli b/v2v/parse_ovf_from_ova.mli
index 39bc83d2d..f217e6e41 100644
--- a/v2v/parse_ovf_from_ova.mli
+++ b/v2v/parse_ovf_from_ova.mli
@@ -35,3 +35,6 @@ val parse_ovf_from_ova : string -> string option * int64 *
int * Types.source_cp
     The returned tuple is
     [name, memory, vcpu, cpu_topology, firmware,
     disks, removables, nics] *)
+
+val parse_disks : string -> ovf_disk list
+(** As above, but returns only the disks. *)
-- 
2.16.2
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 5/9] v2v: -i ova: Factor out code for dealing with OVA files.
Factor out the complex code that handles dealing with multiple
different OVA file formats into a separate Parse_ova module.
This is largely straightforward code refactoring -- there should be no
significant functional change.
However:
 - Parse_ova now checks up front if the OVA contains any compressed
   disks and avoids the tar optimization in that case.  This is a
   regression for the case of an OVA containing a mix of both
   compressed and uncompressed disks (we expect this to be rare).
   The change is nevertheless good because it reduces the coupling
   between two parts of the code.
 - I had to simplify an error message.
---
 v2v/Makefile.am   |   2 +
 v2v/input_ova.ml  | 375 +++++++++++++-----------------------------------------
 v2v/parse_ova.ml  | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++
 v2v/parse_ova.mli |  73 +++++++++++
 v2v/utils.ml      |  59 ---------
 v2v/utils.mli     |   7 -
 6 files changed, 523 insertions(+), 353 deletions(-)
diff --git a/v2v/Makefile.am b/v2v/Makefile.am
index d832f75c0..c9ed1fc88 100644
--- a/v2v/Makefile.am
+++ b/v2v/Makefile.am
@@ -76,6 +76,7 @@ SOURCES_MLI = \
 	output_rhv_upload_plugin_source.mli \
 	output_rhv_upload_precheck_source.mli \
 	output_vdsm.mli \
+	parse_ova.mli \
 	parse_ovf_from_ova.mli \
 	parse_libvirt_xml.mli \
 	parse_vmx.mli \
@@ -99,6 +100,7 @@ SOURCES_ML = \
 	DOM.ml \
 	changeuid.ml \
 	parse_ovf_from_ova.ml \
+	parse_ova.ml \
 	create_ovf.ml \
 	linux.ml \
 	windows.ml \
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index f23a1f2a9..fc8fde4bc 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -20,242 +20,53 @@ open Printf
 
 open Std_utils
 open Tools_utils
-open Unix_utils
 open Common_gettext.Gettext
 
 open Types
-open Utils
+open Parse_ova
 open Parse_ovf_from_ova
 open Name_from_disk
 
-(* Return true if [libvirt] supports ["json:"] pseudo-URLs and
accepts the
- * ["raw"] driver. Function also returns true if [libvirt] backend is
not
- * used.  This didn't work in libvirt < 3.1.0.
- *)
-let libvirt_supports_json_raw_driver () -  if backend_is_libvirt () then (
-    let sup = Libvirt_utils.libvirt_get_version () >= (3, 1, 0) in
-    debug "libvirt supports  \"raw\" driver in json URL:
%B" sup;
-    sup
-  )
-  else
-    true
-
-let pigz_available -  let test = lazy (shell_command "pigz --help
>/dev/null 2>&1" = 0) in
-  fun () -> Lazy.force test
-
-let pxz_available -  let test = lazy (shell_command "pxz --help
>/dev/null 2>&1" = 0) in
-  fun () -> Lazy.force test
-
-let zcat_command_of_format = function
-  | `GZip ->
-     if pigz_available () then "pigz -c -d" else "gzip -c
-d"
-  | `XZ ->
-     if pxz_available () then "pxz -c -d" else "xz -c -d"
-
-(* Untar part or all files from tar archive. If [paths] is specified it is
- * a list of paths in the tar archive.
- *)
-let untar ?format ?(paths = []) file outdir -  let paths = String.concat "
" (List.map quote paths) in
-  let cmd -    match format with
-    | None ->
-       sprintf "tar -xf %s -C %s %s"
-               (quote file) (quote outdir) paths
-    | Some ((`GZip|`XZ) as format) ->
-       sprintf "%s %s | tar -xf - -C %s %s"
-               (zcat_command_of_format format) (quote file)
-               (quote outdir) paths in
-  if shell_command cmd <> 0 then
-    error (f_"error unpacking %s, see earlier error messages") file
-
-(* Untar only ovf and manifest from the archive *)
-let untar_metadata file outdir -  let files = external_command (sprintf
"tar -tf %s" (Filename.quote file)) in
-  let files -    List.filter_map (
-      fun f ->
-        if Filename.check_suffix f ".ovf" ||
-           Filename.check_suffix f ".mf" then Some f
-        else None
-    ) files in
-  untar ~paths:files file outdir
-
-(* Uncompress the first few bytes of [file] and return it as
- * [(bytes, len)].
- *)
-let uncompress_head format file -  let cmd = sprintf "%s %s"
(zcat_command_of_format format) (quote file) in
-  let chan_out, chan_in, chan_err = Unix.open_process_full cmd [||] in
-  let b = Bytes.create 512 in
-  let len = input chan_out b 0 (Bytes.length b) in
-  (* We're expecting the subprocess to fail because we close
-   * the pipe early, so:
-   *)
-  ignore (Unix.close_process_full (chan_out, chan_in, chan_err));
-  b, len
-
-(* Run [detect_file_type] on a compressed file, returning the
- * type of the uncompressed content (if known).
- *)
-let uncompressed_type format file -  let head, headlen = uncompress_head format
file in
-  let tmpfile, chan -    Filename.open_temp_file "ova.file."
"" in
-  output chan head 0 headlen;
-  close_out chan;
-  let ret = detect_file_type tmpfile in
-  Sys.remove tmpfile;
-  ret
-
-(* Find files in [dir] ending with [ext]. *)
-let find_files dir ext -  let rec loop = function
-    | [] -> []
-    | dir :: rest ->
-       let files = Array.to_list (Sys.readdir dir) in
-       let files = List.map (Filename.concat dir) files in
-       let dirs, files = List.partition Sys.is_directory files in
-       let files -         List.filter (fun x -> Filename.check_suffix x
ext) files in
-       files @ loop (rest @ dirs)
-  in
-  loop [dir]
-
-class input_ova ova -  let tmpdir -    let base_dir = (open_guestfs
())#get_cachedir () in
-    let t = Mkdtemp.temp_dir ~base_dir "ova." in
-    rmdir_on_exit t;
-    t in
-object
+class input_ova ova = object
   inherit input
 
   method as_options = "-i ova " ^ ova
 
   method source ()      (* Extract ova file. *)
-    let exploded, partial -      (* The spec allows a directory to be specified
as an ova.  This
-       * is also pretty convenient.
-       *)
-      if is_directory ova then ova, false
-      else (
-        match detect_file_type ova with
-        | `Tar ->
-          (* Normal ovas are tar file (not compressed). *)
-          if qemu_img_supports_offset_and_size () &&
-              libvirt_supports_json_raw_driver () then (
-            (* In newer QEMU we don't have to extract everything.
-             * We can access disks inside the tar archive directly.
-             *)
-            untar_metadata ova tmpdir;
-            tmpdir, true
-          ) else (
-            untar ova tmpdir;
-            tmpdir, false
-          )
+    let ova_t = parse_ova ova in
 
-        | `Zip ->
-          (* However, although not permitted by the spec, people ship
-           * zip files as ova too.
-           *)
-          let cmd = [ "unzip" ] @
-            (if verbose () then [] else [ "-q" ]) @
-            [ "-j"; "-d"; tmpdir; ova ] in
-          if run_command cmd <> 0 then
-            error (f_"error unpacking %s, see earlier error
messages") ova;
-          tmpdir, false
+    (* Extract ovf file from ova. *)
+    let ovf = get_ovf_file ova_t in
 
-        | (`GZip|`XZ) as format ->
-          (match uncompressed_type format ova with
-          | `Tar ->
-             untar ~format ova tmpdir;
-             tmpdir, false
-          | `Zip | `GZip | `XZ | `Unknown ->
-            error (f_"%s: unsupported file format\n\nFormats which we
currently understand for '-i ova' are: tar (uncompressed, compress with
gzip or xz), zip") ova
-          )
+    (* Extract the manifest from *.mf files in the ova. *)
+    let manifest = get_manifest ova_t in
 
-        | `Unknown ->
-          error (f_"%s: unsupported file format\n\nFormats which we
currently understand for '-i ova' are: tar (uncompressed, compress with
gzip or xz), zip") ova
-      ) in
-
-    (* Exploded path must be absolute (RHBZ#1155121). *)
-    let exploded = absolute_path exploded in
-
-    (* If virt-v2v is running as root, and the backend is libvirt, then
-     * we have to chmod the directory to 0755 and files to 0644
-     * so it is readable by qemu.qemu.  This is libvirt bug RHBZ#890291.
-     *)
-    if Unix.geteuid () = 0 && backend_is_libvirt () then (
-      warning (f_"making OVA directory public readable to work around
libvirt bug https://bugzilla.redhat.com/1045069");
-      let cmd = [ "chmod"; "-R"; "go=u,go-w";
exploded ] @
-                if partial then [ ova ] else [] in
-      ignore (run_command cmd)
-    );
-
-    (* Search for the ovf file. *)
-    let ovf = find_files exploded ".ovf" in
-    let ovf -      match ovf with
-      | [] ->
-        error (f_"no .ovf file was found in %s") ova
-      | [x] -> x
-      | _ :: _ ->
-        error (f_"more than one .ovf file was found in %s") ova in
-
-    (* Read any .mf (manifest) files and verify sha1. *)
-    let mf = find_files exploded ".mf" in
-    let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)=
([0-9a-fA-F]+)\r?$" in
+    (* Verify checksums of files listed in the manifest. *)
     List.iter (
-      fun mf ->
-        debug "processing manifest %s" mf;
-        let mf_folder = Filename.dirname mf in
-        let mf_subfolder = subdirectory exploded mf_folder in
-        with_open_in mf (
-          fun chan ->
-            let rec loop () -              let line = input_line chan in
-              if PCRE.matches rex line then (
-                let mode = PCRE.sub 1
-                and disk = PCRE.sub 2
-                and expected = PCRE.sub 3 in
-                let csum = Checksums.of_string mode expected in
-                match
-                  if partial then
-                    Checksums.verify_checksum csum
-                                              ~tar:ova (mf_subfolder // disk)
-                  else
-                    Checksums.verify_checksum csum (mf_folder // disk)
-                with
-                | Checksums.Good_checksum -> ()
-                | Checksums.Mismatched_checksum (_, actual) ->
-                   error (f_"checksum of disk %s does not match manifest
%s (actual %s(%s) = %s, expected %s(%s) = %s)")
-                         disk mf mode disk actual mode disk expected
-                | Checksums.Missing_file ->
-                   (* RHBZ#1570407: Some OVA files generated by VMware
-                    * reference non-existent components in the *.mf file.
-                    * Generate a warning and ignore it.
-                    *)
-                   warning (f_"%s has a checksum for non-existent file %s
(ignored)")
-                           mf disk
-              )
-              else
-                warning (f_"unable to parse line from manifest file:
%S") line;
-              loop ()
-            in
-            (try loop () with End_of_file -> ())
-        )
-    ) mf;
-
-    let ovf_folder = Filename.dirname ovf in
+      fun (file_ref, csum) ->
+        let filename, r +          match file_ref with
+          | LocalFile filename ->
+             filename, Checksums.verify_checksum csum filename
+          | TarFile (tar, filename) ->
+             filename, Checksums.verify_checksum csum ~tar filename in
+        match r with
+        | Checksums.Good_checksum -> ()
+        | Checksums.Mismatched_checksum (_, actual) ->
+           error (f_"checksum of disk %s does not match manifest (actual =
%s, expected = %s)")
+                 filename actual (Checksums.string_of_csum_t csum)
+        | Checksums.Missing_file ->
+           (* RHBZ#1570407: Some OVA files generated by VMware
+            * reference non-existent components in the *.mf file.
+            * Generate a warning and ignore it.
+            *)
+           warning (f_"manifest has a checksum for non-existent file %s
(ignored)")
+                   filename
+    ) manifest;
 
     (* Parse the ovf file. *)
-    let name, memory, vcpu, cpu_topology, firmware,
-        disks, removables, nics +    let name, memory, vcpu, cpu_topology,
firmware, disks, removables, nics        parse_ovf_from_ova ovf in
 
     let name @@ -265,81 +76,71 @@ object
          name_from_disk ova
       | Some name -> name in
 
-    let disks = List.map (
-      fun ({ href; compressed } as disk) ->
-        let partial -          if compressed && partial then (
-            (* We cannot access compressed disk inside the tar;
-             * we have to extract it.
-             *)
-            untar ~paths:[(subdirectory exploded ovf_folder) // href]
-                  ova tmpdir;
-            false
-          )
-          else
-            partial in
+    (* Convert the disk hrefs into qemu URIs. *)
+    let qemu_uris = List.map (
+      fun { href; compressed } ->
+        let file_ref = get_file_ref ova_t href in
 
-        let filename -          if partial then
-            (subdirectory exploded ovf_folder) // href
-          else (
-            (* Does the file exist and is it readable? *)
-            Unix.access (ovf_folder // href) [Unix.R_OK];
-            ovf_folder // href
-          ) in
+        match compressed, file_ref with
+        | false, LocalFile filename ->
+           filename
 
-        (* The spec allows the file to be gzip-compressed, in which case
-         * we must uncompress it into the tmpdir.
-         *)
-        let filename -          if compressed then (
-            let new_filename = tmpdir // String.random8 () ^ ".vmdk"
in
-            let cmd -              sprintf "zcat %s > %s" (quote
filename) (quote new_filename) in
-            if shell_command cmd <> 0 then
-              error (f_"error uncompressing %s, see earlier error
messages")
-                    filename;
-            new_filename
-          )
-          else filename in
+        | true, LocalFile filename ->
+           (* The spec allows the file to be gzip-compressed, in
+            * which case we must uncompress it into a temporary.
+            *)
+           let temp_dir = (open_guestfs ())#get_cachedir () in
+           let new_filename = Filename.temp_file ~temp_dir "ova"
".vmdk" in
+           unlink_on_exit new_filename;
+           let cmd +             sprintf "zcat %s > %s" (quote
filename) (quote new_filename) in
+           if shell_command cmd <> 0 then
+             error (f_"error uncompressing %s, see earlier error
messages")
+                   filename;
+           new_filename
 
-        let qemu_uri -          if not partial then (
-            filename
-          )
-          else (
-            let offset, size -              try find_file_in_tar ova filename
-              with
-              | Not_found ->
-                 error (f_"file ‘%s’ not found in the ova") filename
-              | Failure msg -> error (f_"%s") msg in
-            (* QEMU requires size aligned to 512 bytes. This is safe because
-             * tar also works with 512 byte blocks.
-             *)
-            let size = roundup64 size 512L in
+        | false, TarFile (tar, filename) ->
+           (* This is the tar optimization. *)
+           let offset, size +             try Parse_ova.get_tar_offet_and_size
tar filename
+             with
+             | Not_found ->
+                error (f_"file ‘%s’ not found in the ova") filename
+             | Failure msg -> error (f_"%s") msg in
+           (* QEMU requires size aligned to 512 bytes. This is safe because
+            * tar also works with 512 byte blocks.
+            *)
+           let size = roundup64 size 512L in
 
-            (* Workaround for libvirt bug RHBZ#1431652. *)
-            let ova_path = absolute_path ova in
+           (* Workaround for libvirt bug RHBZ#1431652. *)
+           let tar_path = absolute_path tar in
 
-            let doc = [
-                "file", JSON.Dict [
-                            "driver", JSON.String "raw";
-                            "offset", JSON.Int64 offset;
-                            "size", JSON.Int64 size;
-                            "file", JSON.Dict [
-                                        "driver", JSON.String
"file";
-                                        "filename", JSON.String
ova_path]
-                          ]
-              ] in
-            let uri -              sprintf "json:%s"
(JSON.string_of_doc ~fmt:JSON.Compact doc) in
-            debug "json: %s" uri;
-            uri
-          ) in
+           let doc = [
+               "file", JSON.Dict [
+                           "driver", JSON.String "raw";
+                           "offset", JSON.Int64 offset;
+                           "size", JSON.Int64 size;
+                           "file", JSON.Dict [
+                                       "driver", JSON.String
"file";
+                                       "filename", JSON.String
tar_path]
+                         ]
+             ] in
+           let uri +             sprintf "json:%s"
(JSON.string_of_doc ~fmt:JSON.Compact doc) in
+           uri
 
-        { disk.source_disk with s_qemu_uri = qemu_uri }
-     ) disks in
+        | true, TarFile _ ->
+           (* This should not happen since {!Parse_ova} knows that
+            * qemu cannot handle compressed files here.
+            *)
+           assert false
+      ) disks in
+
+    (* Get a final list of source disks. *)
+    let disks +      List.map (fun ({ source_disk }, qemu_uri) ->
+          { source_disk with s_qemu_uri = qemu_uri })
+               (List.combine disks qemu_uris) in
 
     let source = {
       s_hypervisor = VMware;
diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml
new file mode 100644
index 000000000..431cbe8d0
--- /dev/null
+++ b/v2v/parse_ova.ml
@@ -0,0 +1,360 @@
+(* virt-v2v
+ * Copyright (C) 2009-2018 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+open Printf
+
+open Std_utils
+open Tools_utils
+open Unix_utils
+open Common_gettext.Gettext
+
+open Utils
+open Parse_ovf_from_ova
+
+type t = {
+  (* Save the original OVA name, for error messages. *)
+  orig_ova : string;
+
+  (* Top directory of OVA.  If the OVA was already a directory then
+   * this is just that directory.  However in normal cases this is
+   * a temporary directory that we create, unpacking either just the
+   * OVF and MF files, or those plus the disks.  This temporary
+   * directory will be cleaned up on exit.
+   *)
+  top_dir : string;
+
+  ova_type : ova_type;
+}
+
+and ova_type +  (* The original OVA was a directory.  Or the OVA was fully
unpacked
+   * into a temporary directory.
+   *
+   * In either case everything is available in [top_dir].
+   *)
+  | Directory
+
+  (* The original OVA was an uncompressed tar file and we are able
+   * to optimize access to the disks by keeping them in the tarball.
+   *
+   * The OVF and MF files only have been unpacked in [top_dir].
+   *)
+  | TarOptimized of string (* tarball *)
+
+type file_ref +  | LocalFile of string
+  | TarFile of string * string
+
+type mf_record = file_ref * Checksums.csum_t
+
+let rec parse_ova ova +  (* The spec allows a directory to be specified as an
ova.  This
+   * is also pretty convenient.
+   *)
+  let top_dir, ova_type +    if is_directory ova then ova, Directory
+    else (
+      let tmpdir +        let base_dir = (open_guestfs ())#get_cachedir () in
+        let t = Mkdtemp.temp_dir ~base_dir "ova." in
+        rmdir_on_exit t;
+        t in
+
+      match detect_file_type ova with
+      | `Tar ->
+         (* Normal ovas are tar file (not compressed). *)
+
+         (* In newer QEMU we don't have to extract everything.
+          * We can access disks inside the tar archive directly.
+          *)
+         if qemu_img_supports_offset_and_size () &&
+            libvirt_supports_json_raw_driver () &&
+            (untar_metadata ova tmpdir;
+             no_disks_are_compressed ova tmpdir) then
+           tmpdir, TarOptimized ova
+         else (
+           (* If qemu/libvirt is too old or any disk is compressed
+            * then we must fall back on the slow path.
+            *)
+           untar ova tmpdir;
+           tmpdir, Directory
+         )
+
+      | `Zip ->
+         (* However, although not permitted by the spec, people ship
+          * zip files as ova too.
+          *)
+         let cmd +           [ "unzip" ] @ (if verbose () then []
else [ "-q" ]) @
+           [ "-j"; "-d"; tmpdir; ova ] in
+         if run_command cmd <> 0 then
+           error (f_"error unpacking %s, see earlier error messages")
ova;
+         tmpdir, Directory
+
+      | (`GZip|`XZ) as format ->
+         (match uncompressed_type format ova with
+          | `Tar ->
+             untar ~format ova tmpdir;
+             tmpdir, Directory
+          | `Zip | `GZip | `XZ | `Unknown ->
+             error (f_"%s: unsupported file format\n\nFormats which we
currently understand for '-i ova' are: tar (uncompressed, compress with
gzip or xz), zip") ova
+         )
+
+      | `Unknown ->
+         error (f_"%s: unsupported file format\n\nFormats which we
currently understand for '-i ova' are: tar (uncompressed, compress with
gzip or xz), zip") ova
+    ) in
+
+  (* Exploded path must be absolute (RHBZ#1155121). *)
+  let top_dir = absolute_path top_dir in
+
+  (* If virt-v2v is running as root, and the backend is libvirt, then
+   * we have to chmod the directory to 0755 and files to 0644
+   * so it is readable by qemu.qemu.  This is libvirt bug RHBZ#890291.
+   *)
+  if Unix.geteuid () = 0 && backend_is_libvirt () then (
+    warning (f_"making OVA directory public readable to work around
libvirt bug https://bugzilla.redhat.com/1045069");
+    let what +      match ova_type with
+      | Directory -> [ top_dir ]
+      | TarOptimized ova -> [ top_dir; ova ] in
+    let cmd = [ "chmod"; "-R"; "go=u,go-w" ] @
what in
+    ignore (run_command cmd)
+  );
+
+  { orig_ova = ova; top_dir; ova_type }
+
+(* Return true if [libvirt] supports ["json:"] pseudo-URLs and
accepts the
+ * ["raw"] driver. Function also returns true if [libvirt] backend is
not
+ * used.  This didn't work in libvirt < 3.1.0.
+ *)
+and libvirt_supports_json_raw_driver () +  if backend_is_libvirt () then (
+    let sup = Libvirt_utils.libvirt_get_version () >= (3, 1, 0) in
+    debug "libvirt supports  \"raw\" driver in json URL:
%B" sup;
+    sup
+  )
+  else
+    true
+
+(* No disks compressed?  We need to check the OVF file. *)
+and no_disks_are_compressed ova tmpdir +  let t = { orig_ova = ova; top_dir =
tmpdir; ova_type = Directory } in
+  let ovf = get_ovf_file t in
+  let disks = parse_disks ovf in
+  not (List.exists (fun { compressed } -> compressed) disks)
+
+and pigz_available +  let test = lazy (shell_command "pigz --help
>/dev/null 2>&1" = 0) in
+  fun () -> Lazy.force test
+
+and pxz_available +  let test = lazy (shell_command "pxz --help
>/dev/null 2>&1" = 0) in
+  fun () -> Lazy.force test
+
+and zcat_command_of_format = function
+  | `GZip ->
+     if pigz_available () then "pigz -c -d" else "gzip -c
-d"
+  | `XZ ->
+     if pxz_available () then "pxz -c -d" else "xz -c -d"
+
+(* Untar part or all files from tar archive. If [paths] is specified it is
+ * a list of paths in the tar archive.
+ *)
+and untar ?format ?(paths = []) file outdir +  let paths = String.concat "
" (List.map quote paths) in
+  let cmd +    match format with
+    | None ->
+       sprintf "tar -xf %s -C %s %s"
+               (quote file) (quote outdir) paths
+    | Some ((`GZip|`XZ) as format) ->
+       sprintf "%s %s | tar -xf - -C %s %s"
+               (zcat_command_of_format format) (quote file)
+               (quote outdir) paths in
+  if shell_command cmd <> 0 then
+    error (f_"error unpacking %s, see earlier error messages") file
+
+(* Untar only ovf and manifest from the archive *)
+and untar_metadata file outdir +  let files = external_command (sprintf
"tar -tf %s" (Filename.quote file)) in
+  let files +    List.filter_map (
+      fun f ->
+        if Filename.check_suffix f ".ovf" ||
+           Filename.check_suffix f ".mf" then Some f
+        else None
+    ) files in
+  untar ~paths:files file outdir
+
+(* Uncompress the first few bytes of [file] and return it as
+ * [(bytes, len)].
+ *)
+and uncompress_head format file +  let cmd = sprintf "%s %s"
(zcat_command_of_format format) (quote file) in
+  let chan_out, chan_in, chan_err = Unix.open_process_full cmd [||] in
+  let b = Bytes.create 512 in
+  let len = input chan_out b 0 (Bytes.length b) in
+  (* We're expecting the subprocess to fail because we close
+   * the pipe early, so:
+   *)
+  ignore (Unix.close_process_full (chan_out, chan_in, chan_err));
+  b, len
+
+(* Run [detect_file_type] on a compressed file, returning the
+ * type of the uncompressed content (if known).
+ *)
+and uncompressed_type format file +  let head, headlen = uncompress_head format
file in
+  let tmpfile, chan +    Filename.open_temp_file "ova.file."
"" in
+  output chan head 0 headlen;
+  close_out chan;
+  let ret = detect_file_type tmpfile in
+  Sys.remove tmpfile;
+  ret
+
+(* Find files in [dir] ending with [ext]. *)
+and find_files dir ext +  let rec loop = function
+    | [] -> []
+    | dir :: rest ->
+       let files = Array.to_list (Sys.readdir dir) in
+       let files = List.map (Filename.concat dir) files in
+       let dirs, files = List.partition Sys.is_directory files in
+       let files +         List.filter (fun x -> Filename.check_suffix x
ext) files in
+       files @ loop (rest @ dirs)
+  in
+  loop [dir]
+
+and get_ovf_file { orig_ova; top_dir } +  let ovf = find_files top_dir
".ovf" in
+  match ovf with
+  | [] ->
+     error (f_"no .ovf file was found in %s") orig_ova
+  | [x] -> x
+  | _ :: _ ->
+     error (f_"more than one .ovf file was found in %s") orig_ova
+
+let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$"
+
+let get_manifest { top_dir; ova_type } +  let mf_files = find_files top_dir
".mf" in
+  let manifest +    List.map (
+      fun mf ->
+        debug "ova: processing manifest file %s" mf;
+        let mf_folder = Filename.dirname mf in
+        let mf_subfolder = subdirectory top_dir mf_folder in
+        with_open_in mf (
+          fun chan ->
+            let ret = ref [] in
+            let rec loop () +              let line = input_line chan in
+              if PCRE.matches rex line then (
+                let csum_type = PCRE.sub 1
+                and filename = PCRE.sub 2
+                and expected = PCRE.sub 3 in
+                let csum = Checksums.of_string csum_type expected in
+                let file_ref +                  match ova_type with
+                  | Directory ->
+                     LocalFile (mf_folder // filename)
+                  | TarOptimized tar ->
+                     TarFile (tar, mf_subfolder // filename) in
+                List.push_front (file_ref, csum) ret
+              )
+              else
+                warning (f_"unable to parse line from manifest file:
%S") line;
+              loop ()
+            in
+            (try loop () with End_of_file -> ());
+            !ret
+        )
+    ) mf_files in
+
+  List.flatten manifest
+
+let get_file_ref ({ top_dir; ova_type } as t) href +  let ovf = get_ovf_file t
in
+  let ovf_folder = Filename.dirname ovf in
+
+  match ova_type with
+  | Directory -> LocalFile (ovf_folder // href)
+  | TarOptimized tar ->
+     let filename = subdirectory top_dir ovf_folder // href in
+     TarFile (tar, filename)
+
+let ws = PCRE.compile "\\s+"
+let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$"
+
+let get_tar_offet_and_size tar filename +  let lines = external_command
(sprintf "tar tRvf %s" (Filename.quote tar)) in
+  let rec loop lines +    match lines with
+    | [] -> raise Not_found
+    | line :: lines -> (
+      (* Lines have the form:
+       * block <offset>: <perms> <owner>/<group>
<size> <mdate> <mtime> <file>
+       * or:
+       * block <offset>: ** Block of NULs **
+       * block <offset>: ** End of File **
+       *)
+      if PCRE.matches re_tar_message line then
+        loop lines (* ignore "** Block of NULs **" etc. *)
+      else (
+        let elems = PCRE.nsplit ~max:8 ws line in
+        if List.length elems = 8 && List.hd elems = "block"
then (
+          let elems = Array.of_list elems in
+          let offset = elems.(1) in
+          let size = elems.(4) in
+          let fname = elems.(7) in
+
+          if fname <> filename then
+            loop lines
+          else (
+            let offset +              try
+                (* There should be a colon at the end *)
+                let i = String.rindex offset ':' in
+                if i == (String.length offset)-1 then
+                  Int64.of_string (String.sub offset 0 i)
+                else
+                  failwith "colon at wrong position"
+              with Failure _ | Not_found ->
+                failwithf (f_"invalid offset returned by tar: %S")
offset in
+
+            let size +              try Int64.of_string size
+              with Failure _ ->
+                failwithf (f_"invalid size returned by tar: %S") size
in
+
+            (* Note: Offset is actualy block number and there is a single
+             * block with tar header at the beginning of the file. So skip
+             * the header and convert the block number to bytes before
+             * returning.
+             *)
+            (offset +^ 1L) *^ 512L, size
+          )
+        )
+        else
+          failwithf (f_"failed to parse line returned by tar: %S")
line
+      )
+    )
+  in
+  loop lines
diff --git a/v2v/parse_ova.mli b/v2v/parse_ova.mli
new file mode 100644
index 000000000..54df752ad
--- /dev/null
+++ b/v2v/parse_ova.mli
@@ -0,0 +1,73 @@
+(* virt-v2v
+ * Copyright (C) 2009-2018 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+(** Helper functions for dealing with the OVA pseudo-format. *)
+
+type t
+
+val parse_ova : string -> t
+(** The parameter references either an OVA file or a directory
+    containing an unpacked OVA.
+
+    The OVA is "opened".  If necessary, parts of the OVA are
+    unpacked into a temporary directory.  This can consume a lot
+    of space, although we are able to optimize some common cases.
+
+    This does {b not} parse or verify the OVF, MF or disks. *)
+
+val get_ovf_file : t -> string
+(** Return the filename of the OVF file from the OVA.  This will
+    be a local file (might be a temporary file) valid for the
+    lifetime of the handle.
+
+    The filename can be passed directly to
+    {!Parse_ovf_from_ova.parse_ovf_from_ova}. *)
+
+type file_ref +  | LocalFile of string         (** A local filename. *)
+  | TarFile of string * string  (** Tar file containing file. *)
+(** A file reference, pointing usually to a disk.  If the OVA
+    is unpacked during parsing then this points to a local file.
+    It might be a temporary file, but it is valid for the lifetime
+    of the handle.  If we are optimizing access to the OVA then
+    it might also be a reference to a file within a tarball. *)
+
+type mf_record = file_ref * Checksums.csum_t
+(** A manifest record: (file reference, checksum of file). *)
+
+val get_manifest : t -> mf_record list
+(** Find and parse all manifest ([*.mf]) files in the OVA.
+    Parse out the filenames and checksums from these files
+    and return the full manifest as a single list.
+
+    Note the checksums are returned, but this function does not
+    verify them.  Also VMware-generated OVAs can return
+    non-existent files in this list. *)
+
+val get_file_ref : t -> string -> file_ref
+(** Convert an OVF [href] into an actual file reference.
+
+    Note this does not check that the file really exists. *)
+
+val get_tar_offet_and_size : string -> string -> int64 * int64
+(** [get_tar_offet_and_size tar filename] looks up file in the [tar]
+    archive and returns a tuple containing at which byte it starts
+    and how long the file is.
+
+    Function raises [Not_found] if there is no such file inside [tar] and
+    [Failure] if there is any error parsing the tar output. *)
diff --git a/v2v/utils.ml b/v2v/utils.ml
index d73011f9f..67e2028f3 100644
--- a/v2v/utils.ml
+++ b/v2v/utils.ml
@@ -146,65 +146,6 @@ let error_if_no_ssh_agent ()    with Not_found ->
     error (f_"ssh-agent authentication has not been set up ($SSH_AUTH_SOCK
is not set).  This is required by qemu to do passwordless ssh access.  See the
virt-v2v(1) man page for more information.")
 
-let ws = PCRE.compile "\\s+"
-let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$"
-
-let find_file_in_tar tar filename -  let lines = external_command (sprintf
"tar tRvf %s" (Filename.quote tar)) in
-  let rec loop lines -    match lines with
-    | [] -> raise Not_found
-    | line :: lines -> (
-      (* Lines have the form:
-       * block <offset>: <perms> <owner>/<group>
<size> <mdate> <mtime> <file>
-       * or:
-       * block <offset>: ** Block of NULs **
-       * block <offset>: ** End of File **
-       *)
-      if PCRE.matches re_tar_message line then
-        loop lines (* ignore "** Block of NULs **" etc. *)
-      else (
-        let elems = PCRE.nsplit ~max:8 ws line in
-        if List.length elems = 8 && List.hd elems = "block"
then (
-          let elems = Array.of_list elems in
-          let offset = elems.(1) in
-          let size = elems.(4) in
-          let fname = elems.(7) in
-
-          if fname <> filename then
-            loop lines
-          else (
-            let offset -              try
-                (* There should be a colon at the end *)
-                let i = String.rindex offset ':' in
-                if i == (String.length offset)-1 then
-                  Int64.of_string (String.sub offset 0 i)
-                else
-                  failwith "colon at wrong position"
-              with Failure _ | Not_found ->
-                failwithf (f_"invalid offset returned by tar: %S")
offset in
-
-            let size -              try Int64.of_string size
-              with Failure _ ->
-                failwithf (f_"invalid size returned by tar: %S") size
in
-
-            (* Note: Offset is actualy block number and there is a single
-             * block with tar header at the beginning of the file. So skip
-             * the header and convert the block number to bytes before
-             * returning.
-             *)
-            (offset +^ 1L) *^ 512L, size
-          )
-        )
-        else
-          failwithf (f_"failed to parse line returned by tar: %S")
line
-      )
-    )
-  in
-  loop lines
-
 (* Wait for a file to appear until a timeout. *)
 let rec wait_for_file filename timeout    if Sys.file_exists filename then true
diff --git a/v2v/utils.mli b/v2v/utils.mli
index 4a444aaa0..fd91387a7 100644
--- a/v2v/utils.mli
+++ b/v2v/utils.mli
@@ -55,13 +55,6 @@ val backend_is_libvirt : unit -> bool
 
 val error_if_no_ssh_agent : unit -> unit
 
-val find_file_in_tar : string -> string -> int64 * int64
-(** [find_file_in_tar tar filename] looks up file in [tar] archive and returns
-    a tuple containing at which byte it starts and how long the file is.
-
-    Function raises [Not_found] if there is no such file inside [tar] and
-    [Failure] if there is any error parsing the tar output. *)
-
 val wait_for_file : string -> int -> bool
 (** [wait_for_file filename timeout] waits up to [timeout] seconds for
     [filename] to appear.  It returns [true] if the file appeared. *)
-- 
2.16.2
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 6/9] common: mlstdutils: Inline unsafe ‘subdirectory’ function.
This function is ill-defined and unsafe.  As a preparation for
removing it completely, inline it in the places where it is used.
---
 builder/repository_main.ml           | 13 +++++++++++--
 common/mlstdutils/std_utils.ml       |  9 ---------
 common/mlstdutils/std_utils.mli      | 10 ----------
 common/mlstdutils/std_utils_tests.ml |  7 -------
 v2v/parse_ova.ml                     | 13 +++++++++++--
 5 files changed, 22 insertions(+), 30 deletions(-)
diff --git a/builder/repository_main.ml b/builder/repository_main.ml
index c020a6413..5dc4d57cd 100644
--- a/builder/repository_main.ml
+++ b/builder/repository_main.ml
@@ -398,6 +398,15 @@ let process_image acc_entries filename repo tmprepo index
interactive
   | None ->
     extract_entry_data ~entry:file_entry ()
 
+let unsafe_remove_directory_prefix parent path +  if path = parent then
+    ""
+  else if String.is_prefix path (parent // "") then (
+    let len = String.length parent in
+    String.sub path (len+1) (String.length path - len-1)
+  ) else
+    invalid_arg (sprintf "%S is not a path prefix of %S" parent path)
+
 let main ()    let cmdline = parse_cmdline () in
 
@@ -512,8 +521,8 @@ let main ()          fun (id, entry) ->
           let { Index.file_uri } = entry in
           let rel_path -            try
-              subdirectory cmdline.repo file_uri
+            try (* XXX wrong *)
+              unsafe_remove_directory_prefix cmdline.repo file_uri
             with
             | Invalid_argument _ ->
               file_uri in
diff --git a/common/mlstdutils/std_utils.ml b/common/mlstdutils/std_utils.ml
index 3fba96b5b..df443058f 100644
--- a/common/mlstdutils/std_utils.ml
+++ b/common/mlstdutils/std_utils.ml
@@ -376,15 +376,6 @@ end
 let (//) = Filename.concat
 let quote = Filename.quote
 
-let subdirectory parent path -  if path = parent then
-    ""
-  else if String.is_prefix path (parent // "") then (
-    let len = String.length parent in
-    String.sub path (len+1) (String.length path - len-1)
-  ) else
-    invalid_arg (sprintf "%S is not a path prefix of %S" parent path)
-
 let ( +^ ) = Int64.add
 let ( -^ ) = Int64.sub
 let ( *^ ) = Int64.mul
diff --git a/common/mlstdutils/std_utils.mli b/common/mlstdutils/std_utils.mli
index 195269a71..c887249a5 100644
--- a/common/mlstdutils/std_utils.mli
+++ b/common/mlstdutils/std_utils.mli
@@ -274,16 +274,6 @@ val ( // ) : string -> string -> string
 val quote : string -> string
 (** Shell-safe quoting of a string (alias for {!Filename.quote}). *)
 
-val subdirectory : string -> string -> string
-(** [subdirectory parent path] returns subdirectory part of [path] relative
-    to the [parent]. If [path] and [parent] point to the same directory empty
-    string is returned.
-
-    Note: path normalization on arguments is {b not} performed!
-
-    If [parent] is not a path prefix of [path] the function raises
-    [Invalid_argument]. *)
-
 val ( +^ ) : int64 -> int64 -> int64
 val ( -^ ) : int64 -> int64 -> int64
 val ( *^ ) : int64 -> int64 -> int64
diff --git a/common/mlstdutils/std_utils_tests.ml
b/common/mlstdutils/std_utils_tests.ml
index 5c25650c2..aa48f5f39 100644
--- a/common/mlstdutils/std_utils_tests.ml
+++ b/common/mlstdutils/std_utils_tests.ml
@@ -30,12 +30,6 @@ let assert_equal_int64 = assert_equal ~printer:(fun x ->
Int64.to_string x)
 let assert_equal_stringlist = assert_equal ~printer:(fun x -> "("
^ (String.escaped (String.concat "," x)) ^ ")")
 let assert_equal_stringpair = assert_equal ~printer:(fun (x, y) -> sprintf
"%S, %S" x y)
 
-let test_subdirectory ctx -  assert_equal_string "" (subdirectory
"/foo" "/foo");
-  assert_equal_string "" (subdirectory "/foo"
"/foo/");
-  assert_equal_string "bar" (subdirectory "/foo"
"/foo/bar");
-  assert_equal_string "bar/baz" (subdirectory "/foo"
"/foo/bar/baz")
-
 (* Test Std_utils.int_of_X and Std_utils.X_of_int byte swapping
  * functions.
  *)
@@ -150,7 +144,6 @@ let test_string_chomp ctx  let suite    "mllib
Std_utils" >:::
     [
-      "subdirectory" >:: test_subdirectory;
       "numeric.byteswap" >:: test_byteswap;
       "char.mem" >:: test_char_mem;
       "strings.is_prefix" >:: test_string_is_prefix;
diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml
index 431cbe8d0..c11502667 100644
--- a/v2v/parse_ova.ml
+++ b/v2v/parse_ova.ml
@@ -251,6 +251,15 @@ and get_ovf_file { orig_ova; top_dir }    | _ :: _ ->
      error (f_"more than one .ovf file was found in %s") orig_ova
 
+let unsafe_remove_directory_prefix parent path +  if path = parent then
+    ""
+  else if String.is_prefix path (parent // "") then (
+    let len = String.length parent in
+    String.sub path (len+1) (String.length path - len-1)
+  ) else
+    invalid_arg (sprintf "%S is not a path prefix of %S" parent path)
+
 let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$"
 
 let get_manifest { top_dir; ova_type } @@ -260,7 +269,7 @@ let get_manifest {
top_dir; ova_type }        fun mf ->
         debug "ova: processing manifest file %s" mf;
         let mf_folder = Filename.dirname mf in
-        let mf_subfolder = subdirectory top_dir mf_folder in
+        let mf_subfolder = unsafe_remove_directory_prefix top_dir mf_folder in
         with_open_in mf (
           fun chan ->
             let ret = ref [] in
@@ -297,7 +306,7 @@ let get_file_ref ({ top_dir; ova_type } as t) href    match
ova_type with
   | Directory -> LocalFile (ovf_folder // href)
   | TarOptimized tar ->
-     let filename = subdirectory top_dir ovf_folder // href in
+     let filename = unsafe_remove_directory_prefix top_dir ovf_folder // href
in
      TarFile (tar, filename)
 
 let ws = PCRE.compile "\\s+"
-- 
2.16.2
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 7/9] v2v: -i ova: Replace subdirectory function with clearer inline code.
---
 v2v/parse_ova.ml | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)
diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml
index c11502667..6c4af4464 100644
--- a/v2v/parse_ova.ml
+++ b/v2v/parse_ova.ml
@@ -251,15 +251,6 @@ and get_ovf_file { orig_ova; top_dir }    | _ :: _ ->
      error (f_"more than one .ovf file was found in %s") orig_ova
 
-let unsafe_remove_directory_prefix parent path -  if path = parent then
-    ""
-  else if String.is_prefix path (parent // "") then (
-    let len = String.length parent in
-    String.sub path (len+1) (String.length path - len-1)
-  ) else
-    invalid_arg (sprintf "%S is not a path prefix of %S" parent path)
-
 let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$"
 
 let get_manifest { top_dir; ova_type } @@ -268,8 +259,19 @@ let get_manifest {
top_dir; ova_type }      List.map (
       fun mf ->
         debug "ova: processing manifest file %s" mf;
+        (*               (1)                 (2)
+         * mf:           <top_dir>/bar.mf    <top_dir>/foo/bar.mf
+         * mf_folder:    <top_dir>           <top_dir>/foo
+         * mf_subfolder: ""                  foo
+         *)
         let mf_folder = Filename.dirname mf in
-        let mf_subfolder = unsafe_remove_directory_prefix top_dir mf_folder in
+        let mf_subfolder +          if String.is_prefix mf_folder (top_dir //
"") then ( (* 2 *)
+            let len = String.length top_dir + 1 in
+            String.sub mf_folder len (String.length mf_folder - len)
+          )
+          else if top_dir = mf_folder then "" (* 1 *)
+          else assert false in
         with_open_in mf (
           fun chan ->
             let ret = ref [] in
@@ -306,7 +308,19 @@ let get_file_ref ({ top_dir; ova_type } as t) href    match
ova_type with
   | Directory -> LocalFile (ovf_folder // href)
   | TarOptimized tar ->
-     let filename = unsafe_remove_directory_prefix top_dir ovf_folder // href
in
+     (*             (1)                 (2)
+      * ovf:        <top_dir>/bar.ovf   <top_dir>/foo/bar.ovf
+      * ovf_folder: <top_dir>           <top_dir>/foo
+      * subdir:     ""                  foo
+      * filename:   href                foo/href
+      *)
+     let filename +       if String.is_prefix ovf_folder (top_dir //
"") then ( (* 2 *)
+         let len = String.length top_dir + 1 in
+         String.sub ovf_folder len (String.length ovf_folder - len) // href
+       )
+       else if top_dir = ovf_folder then href (* 1 *)
+       else assert false in
      TarFile (tar, filename)
 
 let ws = PCRE.compile "\\s+"
-- 
2.16.2
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 8/9] v2v: -i ova: Sanity check hrefs to ensure they stay inside the tarball.
Check for hrefs like "../../../../dev/sda", or dubious symlinks in the
tarball, which could escape from the tarball into the host system.
We use realpath(3) to do this.  Since this function does not work on
non-existent files, we must change the signature of get_file_ref so it
can return whether the file exists or not.
---
 v2v/input_ova.ml  |  6 +++++-
 v2v/parse_ova.ml  | 41 +++++++++++++++++++++++++++++++++++++++--
 v2v/parse_ova.mli |  7 +++----
 3 files changed, 47 insertions(+), 7 deletions(-)
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index fc8fde4bc..9f7bc64ff 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -79,7 +79,11 @@ class input_ova ova = object
     (* Convert the disk hrefs into qemu URIs. *)
     let qemu_uris = List.map (
       fun { href; compressed } ->
-        let file_ref = get_file_ref ova_t href in
+        let file_ref +          match get_file_ref ova_t href with
+          | Some f -> f
+          | None ->
+             error (f_"-i ova: OVF references file ‘%s’ which was not
found in the OVA archive") href in
 
         match compressed, file_ref with
         | false, LocalFile filename ->
diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml
index 6c4af4464..76fa8ce13 100644
--- a/v2v/parse_ova.ml
+++ b/v2v/parse_ova.ml
@@ -17,6 +17,7 @@
  *)
 
 open Printf
+open Unix
 
 open Std_utils
 open Tools_utils
@@ -305,9 +306,33 @@ let get_file_ref ({ top_dir; ova_type } as t) href    let
ovf = get_ovf_file t in
   let ovf_folder = Filename.dirname ovf in
 
+  (* Since [href] comes from an untrusted source, we must ensure
+   * that it doesn't reference a path outside [top_dir].  An
+   * additional complication is that [href] is relative to
+   * the directory containing the OVF ([ovf_folder]).  A further
+   * complication is that the file might not exist at all.
+   *)
   match ova_type with
-  | Directory -> LocalFile (ovf_folder // href)
+  | Directory ->
+     let filename = ovf_folder // href in
+     let real_top_dir = Realpath.realpath top_dir in
+     (try
+        let filename = Realpath.realpath filename in
+        if not (String.is_prefix filename real_top_dir) then
+          error (f_"-i ova: invalid OVA file: path ‘%s’ references a file
outside the archive") href;
+        Some (LocalFile filename)
+      with
+        Unix_error (ENOENT, "realpath", _) -> None
+     )
+
   | TarOptimized tar ->
+     (* Security: Since the only thing we will do with the computed
+      * filename is to call get_tar_offet_and_size, it doesn't
+      * matter if the filename is bogus or references some file
+      * on the filesystem outside the tarball.  Therefore we don't
+      * need to do any sanity checking here.
+      *)
+
      (*             (1)                 (2)
       * ovf:        <top_dir>/bar.ovf   <top_dir>/foo/bar.ovf
       * ovf_folder: <top_dir>           <top_dir>/foo
@@ -321,7 +346,19 @@ let get_file_ref ({ top_dir; ova_type } as t) href        
)
        else if top_dir = ovf_folder then href (* 1 *)
        else assert false in
-     TarFile (tar, filename)
+
+     (* Does the file exist in the tarball? *)
+     let cmd = sprintf "tar tf %s %s >/dev/null 2>&1"
+                       (quote tar) (quote filename) in
+     debug "ova: testing if %s exists in %s" filename tar;
+     if Sys.command cmd = 0 then (
+       debug "ova: file exists";
+       Some (TarFile (tar, filename))
+     )
+     else (
+       debug "ova: file does not exist";
+       None
+     )
 
 let ws = PCRE.compile "\\s+"
 let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$"
diff --git a/v2v/parse_ova.mli b/v2v/parse_ova.mli
index 54df752ad..1ebf9022a 100644
--- a/v2v/parse_ova.mli
+++ b/v2v/parse_ova.mli
@@ -59,10 +59,9 @@ val get_manifest : t -> mf_record list
     verify them.  Also VMware-generated OVAs can return
     non-existent files in this list. *)
 
-val get_file_ref : t -> string -> file_ref
-(** Convert an OVF [href] into an actual file reference.
-
-    Note this does not check that the file really exists. *)
+val get_file_ref : t -> string -> file_ref option
+(** Convert an OVF [href] into an actual file reference.  Returns [None]
+    if the file does not exist. *)
 
 val get_tar_offet_and_size : string -> string -> int64 * int64
 (** [get_tar_offet_and_size tar filename] looks up file in the [tar]
-- 
2.16.2
Richard W.M. Jones
2018-Apr-25  13:35 UTC
[Libguestfs] [PATCH v2 9/9] v2v: -i ova: Handle OVAs containing snapshots (RHBZ#1570407).
Also adds a regression test.
---
 v2v/Makefile.am                        |   5 ++
 v2v/input_ova.ml                       |  49 ++++++++++--
 v2v/test-v2v-i-ova-snapshots.expected  |  21 +++++
 v2v/test-v2v-i-ova-snapshots.expected2 |  21 +++++
 v2v/test-v2v-i-ova-snapshots.ovf       | 138 +++++++++++++++++++++++++++++++++
 v2v/test-v2v-i-ova-snapshots.sh        |  78 +++++++++++++++++++
 6 files changed, 307 insertions(+), 5 deletions(-)
diff --git a/v2v/Makefile.am b/v2v/Makefile.am
index c9ed1fc88..9d9ab8d50 100644
--- a/v2v/Makefile.am
+++ b/v2v/Makefile.am
@@ -329,6 +329,7 @@ TESTS = \
 	test-v2v-i-ova-gz.sh \
 	test-v2v-i-ova-invalid-manifest1.sh \
 	test-v2v-i-ova-invalid-manifest2.sh \
+	test-v2v-i-ova-snapshots.sh \
 	test-v2v-i-ova-subfolders.sh \
 	test-v2v-i-ova-tar.sh \
 	test-v2v-i-ova-two-disks.sh \
@@ -474,6 +475,10 @@ EXTRA_DIST += \
 	test-v2v-i-ova-gz.sh \
 	test-v2v-i-ova-invalid-manifest1.sh \
 	test-v2v-i-ova-invalid-manifest2.sh \
+	test-v2v-i-ova-snapshots.expected \
+	test-v2v-i-ova-snapshots.expected2 \
+	test-v2v-i-ova-snapshots.ovf \
+	test-v2v-i-ova-snapshots.sh \
 	test-v2v-i-ova-subfolders.expected \
 	test-v2v-i-ova-subfolders.expected2 \
 	test-v2v-i-ova-subfolders.ovf \
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index 9f7bc64ff..5953f3401 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -27,6 +27,49 @@ open Parse_ova
 open Parse_ovf_from_ova
 open Name_from_disk
 
+(* RHBZ#1570407: VMware-generated OVA files found in the wild can
+ * contain hrefs referencing snapshots.  The href will be something
+ * like: <File href="disk1.vmdk"/> but the actual disk will be
a
+ * snapshot called something like "disk1.vmdk.000000000".
+ *)
+let re_snapshot = PCRE.compile "\\.(\\d+)$"
+
+let rec find_file_or_snapshot ova_t href manifest +  match get_file_ref ova_t
href with
+  | Some f -> f
+  | None ->
+     (* Find all files in the manifest called [<href>.\d+] *)
+     let manifest = List.map fst manifest (* just the file_ref's *) in
+     let snapshots +       List.filter_map (
+         function
+         | LocalFile filename -> get_snapshot_if_matches href filename
+         | TarFile (_, filename) -> get_snapshot_if_matches href filename
+       ) manifest in
+     (* Pick highest. *)
+     let snapshots = List.sort (fun a b -> compare b a) snapshots in
+     match snapshots with
+     | [] -> error_missing_href href
+     | snapshot::_ ->
+        let href = sprintf "%s.%s" href snapshot in
+        match get_file_ref ova_t href with
+        | None -> error_missing_href href
+        | Some f -> f
+
+(* If [filename] matches [<href>.\d+] then return [Some snapshot]. *)
+and get_snapshot_if_matches href filename +  if PCRE.matches re_snapshot
filename then (
+    let snapshot = PCRE.sub 1 in
+    if String.is_suffix filename (sprintf "%s.%s" href snapshot) then
+      Some snapshot
+    else
+      None
+  )
+  else None
+
+and error_missing_href href +  error (f_"-i ova: OVF references file ‘%s’
which was not found in the OVA archive") href
+
 class input_ova ova = object
   inherit input
 
@@ -79,11 +122,7 @@ class input_ova ova = object
     (* Convert the disk hrefs into qemu URIs. *)
     let qemu_uris = List.map (
       fun { href; compressed } ->
-        let file_ref -          match get_file_ref ova_t href with
-          | Some f -> f
-          | None ->
-             error (f_"-i ova: OVF references file ‘%s’ which was not
found in the OVA archive") href in
+        let file_ref = find_file_or_snapshot ova_t href manifest in
 
         match compressed, file_ref with
         | false, LocalFile filename ->
diff --git a/v2v/test-v2v-i-ova-snapshots.expected
b/v2v/test-v2v-i-ova-snapshots.expected
new file mode 100644
index 000000000..97bce58ad
--- /dev/null
+++ b/v2v/test-v2v-i-ova-snapshots.expected
@@ -0,0 +1,21 @@
+Source guest information (--print-source option):
+
+    source name: 2K8R2EESP1_2_Medium
+hypervisor type: vmware
+         memory: 1073741824 (bytes)
+       nr vCPUs: 1
+     CPU vendor: 
+      CPU model: 
+   CPU topology: 
+   CPU features: 
+       firmware: uefi
+        display: 
+          video: 
+          sound: 
+disks:
+	disk1.vmdk (vmdk) [scsi]
+removable media:
+	CD-ROM [ide] in slot 0
+NICs:
+	Bridge "PG-VLAN60" [e1000]
+
diff --git a/v2v/test-v2v-i-ova-snapshots.expected2
b/v2v/test-v2v-i-ova-snapshots.expected2
new file mode 100644
index 000000000..45be3cc46
--- /dev/null
+++ b/v2v/test-v2v-i-ova-snapshots.expected2
@@ -0,0 +1,21 @@
+Source guest information (--print-source option):
+
+    source name: 2K8R2EESP1_2_Medium
+hypervisor type: vmware
+         memory: 1073741824 (bytes)
+       nr vCPUs: 1
+     CPU vendor: 
+      CPU model: 
+   CPU topology: 
+   CPU features: 
+       firmware: uefi
+        display: 
+          video: 
+          sound: 
+disks:
+	json:{ "file": { "driver": "raw",
"offset": x, "size": 12288, "file": {
"driver": "file", "filename":
"test-snapshots.ova" } } } (vmdk) [scsi]
+removable media:
+	CD-ROM [ide] in slot 0
+NICs:
+	Bridge "PG-VLAN60" [e1000]
+
diff --git a/v2v/test-v2v-i-ova-snapshots.ovf b/v2v/test-v2v-i-ova-snapshots.ovf
new file mode 100644
index 000000000..5e7c0d054
--- /dev/null
+++ b/v2v/test-v2v-i-ova-snapshots.ovf
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Envelope vmw:buildId="build-1750787"
xmlns="http://schemas.dmtf.org/ovf/envelope/1"
xmlns:cim="http://schemas.dmtf.org/wbem/wscim/1/common"
xmlns:ovf="http://schemas.dmtf.org/ovf/envelope/1"
xmlns:rasd="http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/CIM_ResourceAllocationSettingData"
xmlns:vmw="http://www.vmware.com/schema/ovf"
xmlns:vssd="http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/CIM_VirtualSystemSettingData"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <References>
+    <File ovf:href="disk1.vmdk" ovf:id="file1"
ovf:size="12288"/>
+  </References>
+  <DiskSection>
+    <Info>Virtual disk information</Info>
+    <Disk ovf:capacity="50" ovf:capacityAllocationUnits="byte
* 2^30" ovf:diskId="vmdisk1" ovf:fileRef="file1"
ovf:format="http://www.vmware.com/interfaces/specifications/vmdk.html#streamOptimized"
ovf:populatedSize="18975752192"/>
+  </DiskSection>
+  <NetworkSection>
+    <Info>The list of logical networks</Info>
+    <Network ovf:name="PG-VLAN60">
+      <Description>The PG-VLAN60 network</Description>
+    </Network>
+  </NetworkSection>
+  <VirtualSystem ovf:id="2K8R2EESP1_2_Medium">
+    <Info>A virtual machine</Info>
+    <Name>2K8R2EESP1_2_Medium</Name>
+    <OperatingSystemSection ovf:id="103"
vmw:osType="windows7Server64Guest">
+      <Info>The kind of installed guest operating system</Info>
+      <Description>Microsoft Windows Server 2008 R2
(64-bit)</Description>
+    </OperatingSystemSection>
+    <VirtualHardwareSection>
+      <Info>Virtual hardware requirements</Info>
+      <System>
+        <vssd:ElementName>Virtual Hardware
Family</vssd:ElementName>
+        <vssd:InstanceID>0</vssd:InstanceID>
+       
<vssd:VirtualSystemIdentifier>2K8R2EESP1_2_Medium</vssd:VirtualSystemIdentifier>
+        <vssd:VirtualSystemType>vmx-10</vssd:VirtualSystemType>
+      </System>
+      <Item>
+        <rasd:AllocationUnits>hertz * 10^6</rasd:AllocationUnits>
+        <rasd:Description>Number of Virtual CPUs</rasd:Description>
+        <rasd:ElementName>1 virtual CPU(s)</rasd:ElementName>
+        <rasd:InstanceID>1</rasd:InstanceID>
+        <rasd:ResourceType>3</rasd:ResourceType>
+        <rasd:VirtualQuantity>1</rasd:VirtualQuantity>
+      </Item>
+      <Item>
+        <rasd:AllocationUnits>byte * 2^20</rasd:AllocationUnits>
+        <rasd:Description>Memory Size</rasd:Description>
+        <rasd:ElementName>1024MB of memory</rasd:ElementName>
+        <rasd:InstanceID>2</rasd:InstanceID>
+        <rasd:ResourceType>4</rasd:ResourceType>
+        <rasd:VirtualQuantity>1024</rasd:VirtualQuantity>
+      </Item>
+      <Item>
+        <rasd:Address>0</rasd:Address>
+        <rasd:Description>SCSI Controller</rasd:Description>
+        <rasd:ElementName>SCSI controller 0</rasd:ElementName>
+        <rasd:InstanceID>3</rasd:InstanceID>
+        <rasd:ResourceSubType>lsilogicsas</rasd:ResourceSubType>
+        <rasd:ResourceType>6</rasd:ResourceType>
+        <vmw:Config ovf:required="false"
vmw:key="slotInfo.pciSlotNumber" vmw:value="160"/>
+      </Item>
+      <Item>
+        <rasd:Address>1</rasd:Address>
+        <rasd:Description>IDE Controller</rasd:Description>
+        <rasd:ElementName>IDE 1</rasd:ElementName>
+        <rasd:InstanceID>4</rasd:InstanceID>
+        <rasd:ResourceType>5</rasd:ResourceType>
+      </Item>
+      <Item>
+        <rasd:Address>0</rasd:Address>
+        <rasd:Description>IDE Controller</rasd:Description>
+        <rasd:ElementName>IDE 0</rasd:ElementName>
+        <rasd:InstanceID>5</rasd:InstanceID>
+        <rasd:ResourceType>5</rasd:ResourceType>
+      </Item>
+      <Item ovf:required="false">
+        <rasd:AutomaticAllocation>false</rasd:AutomaticAllocation>
+        <rasd:ElementName>Video card</rasd:ElementName>
+        <rasd:InstanceID>6</rasd:InstanceID>
+        <rasd:ResourceType>24</rasd:ResourceType>
+        <vmw:Config ovf:required="false"
vmw:key="enable3DSupport" vmw:value="false"/>
+        <vmw:Config ovf:required="false"
vmw:key="use3dRenderer" vmw:value="automatic"/>
+        <vmw:Config ovf:required="false"
vmw:key="useAutoDetect" vmw:value="true"/>
+        <vmw:Config ovf:required="false"
vmw:key="videoRamSizeInKB" vmw:value="4096"/>
+      </Item>
+      <Item ovf:required="false">
+        <rasd:AutomaticAllocation>false</rasd:AutomaticAllocation>
+        <rasd:ElementName>VMCI device</rasd:ElementName>
+        <rasd:InstanceID>7</rasd:InstanceID>
+        <rasd:ResourceSubType>vmware.vmci</rasd:ResourceSubType>
+        <rasd:ResourceType>1</rasd:ResourceType>
+        <vmw:Config ovf:required="false"
vmw:key="allowUnrestrictedCommunication"
vmw:value="false"/>
+        <vmw:Config ovf:required="false"
vmw:key="slotInfo.pciSlotNumber" vmw:value="32"/>
+      </Item>
+      <Item ovf:required="false">
+        <rasd:AddressOnParent>0</rasd:AddressOnParent>
+        <rasd:AutomaticAllocation>false</rasd:AutomaticAllocation>
+        <rasd:ElementName>CD/DVD drive 1</rasd:ElementName>
+        <rasd:InstanceID>8</rasd:InstanceID>
+        <rasd:Parent>4</rasd:Parent>
+       
<rasd:ResourceSubType>vmware.cdrom.atapi</rasd:ResourceSubType>
+        <rasd:ResourceType>15</rasd:ResourceType>
+      </Item>
+      <Item>
+        <rasd:AddressOnParent>0</rasd:AddressOnParent>
+        <rasd:ElementName>Hard disk 1</rasd:ElementName>
+        <rasd:HostResource>ovf:/disk/vmdisk1</rasd:HostResource>
+        <rasd:InstanceID>9</rasd:InstanceID>
+        <rasd:Parent>3</rasd:Parent>
+        <rasd:ResourceType>17</rasd:ResourceType>
+        <vmw:Config ovf:required="false"
vmw:key="backing.writeThrough" vmw:value="false"/>
+      </Item>
+      <Item>
+        <rasd:AddressOnParent>7</rasd:AddressOnParent>
+        <rasd:AutomaticAllocation>true</rasd:AutomaticAllocation>
+        <rasd:Connection>PG-VLAN60</rasd:Connection>
+        <rasd:Description>E1000 ethernet adapter on
"PG-VLAN60"</rasd:Description>
+        <rasd:ElementName>Network adapter 1</rasd:ElementName>
+        <rasd:InstanceID>11</rasd:InstanceID>
+        <rasd:ResourceSubType>E1000</rasd:ResourceSubType>
+        <rasd:ResourceType>10</rasd:ResourceType>
+        <vmw:Config ovf:required="false"
vmw:key="slotInfo.pciSlotNumber" vmw:value="33"/>
+        <vmw:Config ovf:required="false"
vmw:key="wakeOnLanEnabled" vmw:value="true"/>
+      </Item>
+      <vmw:Config ovf:required="false"
vmw:key="cpuHotAddEnabled" vmw:value="false"/>
+      <vmw:Config ovf:required="false"
vmw:key="cpuHotRemoveEnabled" vmw:value="false"/>
+      <vmw:Config ovf:required="false"
vmw:key="firmware" vmw:value="efi"/>
+      <vmw:Config ovf:required="false"
vmw:key="virtualICH7MPresent" vmw:value="false"/>
+      <vmw:Config ovf:required="false"
vmw:key="virtualSMCPresent" vmw:value="false"/>
+      <vmw:Config ovf:required="false"
vmw:key="memoryHotAddEnabled" vmw:value="false"/>
+      <vmw:Config ovf:required="false"
vmw:key="nestedHVEnabled" vmw:value="false"/>
+      <vmw:Config ovf:required="false"
vmw:key="powerOpInfo.powerOffType" vmw:value="soft"/>
+      <vmw:Config ovf:required="false"
vmw:key="powerOpInfo.resetType" vmw:value="soft"/>
+      <vmw:Config ovf:required="false"
vmw:key="powerOpInfo.standbyAction"
vmw:value="checkpoint"/>
+      <vmw:Config ovf:required="false"
vmw:key="powerOpInfo.suspendType" vmw:value="hard"/>
+      <vmw:Config ovf:required="false"
vmw:key="tools.afterPowerOn" vmw:value="true"/>
+      <vmw:Config ovf:required="false"
vmw:key="tools.afterResume" vmw:value="true"/>
+      <vmw:Config ovf:required="false"
vmw:key="tools.beforeGuestShutdown" vmw:value="true"/>
+      <vmw:Config ovf:required="false"
vmw:key="tools.beforeGuestStandby" vmw:value="true"/>
+      <vmw:Config ovf:required="false"
vmw:key="tools.syncTimeWithHost" vmw:value="false"/>
+      <vmw:Config ovf:required="false"
vmw:key="tools.toolsUpgradePolicy"
vmw:value="upgradeAtPowerCycle"/>
+    </VirtualHardwareSection>
+  </VirtualSystem>
+</Envelope>                                 
diff --git a/v2v/test-v2v-i-ova-snapshots.sh b/v2v/test-v2v-i-ova-snapshots.sh
new file mode 100755
index 000000000..7649d22f9
--- /dev/null
+++ b/v2v/test-v2v-i-ova-snapshots.sh
@@ -0,0 +1,78 @@
+#!/bin/bash -
+# libguestfs virt-v2v test script
+# Copyright (C) 2014-2018 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# Test -i ova option with OVA file containing snapshots.
+# https://bugzilla.redhat.com/show_bug.cgi?id=1570407
+
+unset CDPATH
+export LANG=C
+set -e
+
+$TEST_FUNCTIONS
+skip_if_skipped
+skip_if_backend uml
+
+export VIRT_TOOLS_DATA_DIR="$top_srcdir/test-data/fake-virt-tools"
+
+d=test-v2v-i-ova-snapshots.d
+rm -rf $d
+mkdir $d
+
+pushd $d
+
+# Create a phony OVA.  This is only a test of source parsing, not
+# conversion, so the contents of the disks doesn't matter.
+# In these weird OVAs, disk1.vmdk does not exist, but both the
+# href and manifest reference it.  virt-v2v should use the
+# highest numbered snapshot instead.
+guestfish disk-create disk1.vmdk.000000000 raw 10k
+guestfish disk-create disk1.vmdk.000000001 raw 11k
+guestfish disk-create disk1.vmdk.000000002 raw 12k
+sha=`do_sha1 disk1.vmdk.000000002`
+echo -e "SHA1(disk1.vmdk)= $sha\r" > disk1.mf
+sha=`do_sha1 disk1.vmdk.000000000`
+echo -e "SHA1(disk1.vmdk.000000000)= $sha\r" > disk1.mf
+sha=`do_sha1 disk1.vmdk.000000001`
+echo -e "SHA1(disk1.vmdk.000000001)= $sha\r" > disk1.mf
+sha=`do_sha1 disk1.vmdk.000000002`
+echo -e "SHA1(disk1.vmdk.000000002)= $sha\r" > disk1.mf
+cp ../test-v2v-i-ova-snapshots.ovf .
+tar -cf test-snapshots.ova test-v2v-i-ova-snapshots.ovf disk1.vmdk.00000000?
disk1.mf
+
+popd
+
+# Run virt-v2v but only as far as the --print-source stage
+$VG virt-v2v --debug-gc --quiet \
+    -i ova $d/test-snapshots.ova \
+    --print-source > $d/source
+
+# Check the parsed source is what we expect.
+if grep -sq json: $d/source ; then
+    # Normalize the output.
+    # Remove directory prefix.
+    # Exact offset will vary because of tar.
+    sed -i -e "s,\"[^\"]*/$d/,\"," \
+           -e "s|\"offset\": [0-9]*,|\"offset\":
x,|" $d/source
+    diff -u test-v2v-i-ova-snapshots.expected2 $d/source
+else
+    # normalize the output
+    sed -i -e 's,[^ \t]*\(disk.*.vmdk\),\1,' $d/source
+    diff -u test-v2v-i-ova-snapshots.expected $d/source
+fi
+
+rm -rf $d
-- 
2.16.2
Seemingly Similar Threads
- [PATCH 0/2] v2v: -i ova: A couple of cleanup patches.
- GIT: [PATCH 0/5] v2v: Multiple fixes for handling semi-standard OVA files (RHBZ#1152998).
- v2v: -i libvirtxml: Map empty network or bridge name to a default (RHBZ#1257895).
- [PATCH v5 0/3] Import directly from OVA tar archive if possible
- [PATCH v3 0/6] Import directly from OVA tar archive if possible