Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 0/9] v2v: -i ova: Handle OVAs containing snapshots.
https://bugzilla.redhat.com/show_bug.cgi?id=1570407 This turned into quite an in-depth refactoring of how we handle OVAs. It also fixes a potential security issue. Rich.
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 1/9] mltools: Checksums.verify_checksum{, s} returns an error type instead of throwing exception.
Simple code refactoring, making it both more difficult to ignore the error case and easier to add other error cases in future. --- builder/builder.ml | 9 +++++---- common/mltools/checksums.ml | 17 +++++++++++++---- common/mltools/checksums.mli | 16 +++++++++++----- v2v/input_ova.ml | 10 ++++++---- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/builder/builder.ml b/builder/builder.ml index 478b41bba..83c7aefed 100644 --- a/builder/builder.ml +++ b/builder/builder.ml @@ -313,10 +313,11 @@ let main () match entry with (* New-style: Using a checksum. *) | { Index.checksums = Some csums } -> - (try Checksums.verify_checksums csums template - with Checksums.Mismatched_checksum (csum, csum_actual) -> - error (f_"%s checksum of template did not match the expected checksum!\n found checksum: %s\n expected checksum: %s\nTry:\n - Use the ‘-v’ option and look for earlier error messages.\n - Delete the cache: virt-builder --delete-cache\n - Check no one has tampered with the website or your network!") - (Checksums.string_of_csum_t csum) csum_actual (Checksums.string_of_csum csum) + (match Checksums.verify_checksums csums template with + | Checksums.Good_checksum -> () + | Checksums.Mismatched_checksum (csum, csum_actual) -> + error (f_"%s checksum of template did not match the expected checksum!\n found checksum: %s\n expected checksum: %s\nTry:\n - Use the ‘-v’ option and look for earlier error messages.\n - Delete the cache: virt-builder --delete-cache\n - Check no one has tampered with the website or your network!") + (Checksums.string_of_csum_t csum) csum_actual (Checksums.string_of_csum csum) ) | { Index.checksums = None } -> diff --git a/common/mltools/checksums.ml b/common/mltools/checksums.ml index a40edca76..4dd69e734 100644 --- a/common/mltools/checksums.ml +++ b/common/mltools/checksums.ml @@ -27,7 +27,9 @@ type csum_t | SHA256 of string | SHA512 of string -exception Mismatched_checksum of (csum_t * string) +type csum_result + | Good_checksum + | Mismatched_checksum of csum_t * string let string_of_csum_t = function | SHA1 _ -> "sha1" @@ -73,8 +75,15 @@ let compute_checksum csum_type ?tar filename let verify_checksum csum ?tar filename let csum_type = string_of_csum_t csum in let csum_actual = compute_checksum csum_type ?tar filename in - if csum <> csum_actual then - raise (Mismatched_checksum (csum, (string_of_csum csum_actual))) + if csum = csum_actual then + Good_checksum + else + Mismatched_checksum (csum, string_of_csum csum_actual) let verify_checksums checksums filename - List.iter (fun c -> verify_checksum c filename) checksums + List.fold_left ( + fun acc c -> + match acc with + | Good_checksum -> verify_checksum c filename + | Mismatched_checksum _ as acc -> acc + ) Good_checksum checksums diff --git a/common/mltools/checksums.mli b/common/mltools/checksums.mli index 92336a18b..d45b29dfd 100644 --- a/common/mltools/checksums.mli +++ b/common/mltools/checksums.mli @@ -21,7 +21,10 @@ type csum_t | SHA256 of string | SHA512 of string -exception Mismatched_checksum of (csum_t * string) (* expected checksum, got *) +type csum_result + | Good_checksum + (* expected checksum, actual checksum. *) + | Mismatched_checksum of csum_t * string val of_string : string -> string -> csum_t (** [of_string type value] returns the [csum_t] for the specified @@ -29,14 +32,17 @@ val of_string : string -> string -> csum_t Raise [Invalid_argument] if the checksum type is not known. *) -val verify_checksum : csum_t -> ?tar:string -> string -> unit -(** [verify_checksum type filename] Verify the checksum of the file. +val verify_checksum : csum_t -> ?tar:string -> string -> csum_result +(** [verify_checksum type filename] verifies the checksum of the file. When optional [tar] is used it is path to uncompressed tar archive and the [filename] is a path in the tar archive. *) -val verify_checksums : csum_t list -> string -> unit -(** Verify all the checksums of the file. *) +val verify_checksums : csum_t list -> string -> csum_result +(** Verify all the checksums of the file. + + If any checksum fails, the first failure (only) is returned in + {!csum_result}. *) val string_of_csum_t : csum_t -> string (** Return a string representation of the checksum type. *) diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml index a909b92ed..59dbe6f5f 100644 --- a/v2v/input_ova.ml +++ b/v2v/input_ova.ml @@ -224,15 +224,17 @@ object and disk = PCRE.sub 2 and expected = PCRE.sub 3 in let csum = Checksums.of_string mode expected in - try + match if partial then Checksums.verify_checksum csum ~tar:ova (mf_subfolder // disk) else Checksums.verify_checksum csum (mf_folder // disk) - with Checksums.Mismatched_checksum (_, actual) -> - error (f_"checksum of disk %s does not match manifest %s (actual %s(%s) = %s, expected %s(%s) = %s)") - disk mf mode disk actual mode disk expected; + with + | Checksums.Good_checksum -> () + | Checksums.Mismatched_checksum (_, actual) -> + error (f_"checksum of disk %s does not match manifest %s (actual %s(%s) = %s, expected %s(%s) = %s)") + disk mf mode disk actual mode disk expected; ) else warning (f_"unable to parse line from manifest file: %S") line; -- 2.16.2
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 2/9] v2v: -i ova: Ignore non-existent files mentioned in *.mf.
Some OVA files generated by VMware have a *.mf file which contains checksums for files which don't exist in the OVA. Ignore these checksums. Thanks: Nisim Simsolo. --- builder/builder.ml | 3 +++ common/mltools/checksums.ml | 28 +++++++++++++++++++++------- common/mltools/checksums.mli | 2 ++ v2v/input_ova.ml | 9 ++++++++- 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/builder/builder.ml b/builder/builder.ml index 83c7aefed..2ed7cb97a 100644 --- a/builder/builder.ml +++ b/builder/builder.ml @@ -318,6 +318,9 @@ let main () | Checksums.Mismatched_checksum (csum, csum_actual) -> error (f_"%s checksum of template did not match the expected checksum!\n found checksum: %s\n expected checksum: %s\nTry:\n - Use the ‘-v’ option and look for earlier error messages.\n - Delete the cache: virt-builder --delete-cache\n - Check no one has tampered with the website or your network!") (Checksums.string_of_csum_t csum) csum_actual (Checksums.string_of_csum csum) + | Checksums.Missing_file -> + error (f_"%s: template not downloaded or deleted. You may have run ‘virt-builder --delete-cache’ in parallel.") + template ) | { Index.checksums = None } -> diff --git a/common/mltools/checksums.ml b/common/mltools/checksums.ml index 4dd69e734..fdeae1dff 100644 --- a/common/mltools/checksums.ml +++ b/common/mltools/checksums.ml @@ -30,6 +30,7 @@ type csum_t type csum_result | Good_checksum | Mismatched_checksum of csum_t * string + | Missing_file let string_of_csum_t = function | SHA1 _ -> "sha1" @@ -72,18 +73,31 @@ let compute_checksum csum_type ?tar filename let csum_str = fst (String.split " " line) in of_string csum_type csum_str +(* Check if the direct file exists or if it exists in the tarball. *) +let file_exists ?tar filename + match tar with + | None -> Sys.file_exists filename + | Some tar -> + let cmd + sprintf "tar tf %s %s >/dev/null 2>&1" (quote tar) (quote filename) in + Sys.command cmd = 0 + let verify_checksum csum ?tar filename - let csum_type = string_of_csum_t csum in - let csum_actual = compute_checksum csum_type ?tar filename in - if csum = csum_actual then - Good_checksum - else - Mismatched_checksum (csum, string_of_csum csum_actual) + if not (file_exists ?tar filename) then + Missing_file + else ( + let csum_type = string_of_csum_t csum in + let csum_actual = compute_checksum csum_type ?tar filename in + if csum = csum_actual then + Good_checksum + else + Mismatched_checksum (csum, string_of_csum csum_actual) + ) let verify_checksums checksums filename List.fold_left ( fun acc c -> match acc with | Good_checksum -> verify_checksum c filename - | Mismatched_checksum _ as acc -> acc + | (Mismatched_checksum _|Missing_file) as acc -> acc ) Good_checksum checksums diff --git a/common/mltools/checksums.mli b/common/mltools/checksums.mli index d45b29dfd..533e399bf 100644 --- a/common/mltools/checksums.mli +++ b/common/mltools/checksums.mli @@ -25,6 +25,8 @@ type csum_result | Good_checksum (* expected checksum, actual checksum. *) | Mismatched_checksum of csum_t * string + (* referenced file does not exist *) + | Missing_file val of_string : string -> string -> csum_t (** [of_string type value] returns the [csum_t] for the specified diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml index 59dbe6f5f..f23a1f2a9 100644 --- a/v2v/input_ova.ml +++ b/v2v/input_ova.ml @@ -234,7 +234,14 @@ object | Checksums.Good_checksum -> () | Checksums.Mismatched_checksum (_, actual) -> error (f_"checksum of disk %s does not match manifest %s (actual %s(%s) = %s, expected %s(%s) = %s)") - disk mf mode disk actual mode disk expected; + disk mf mode disk actual mode disk expected + | Checksums.Missing_file -> + (* RHBZ#1570407: Some OVA files generated by VMware + * reference non-existent components in the *.mf file. + * Generate a warning and ignore it. + *) + warning (f_"%s has a checksum for non-existent file %s (ignored)") + mf disk ) else warning (f_"unable to parse line from manifest file: %S") line; -- 2.16.2
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 3/9] v2v: Ignore miscellaneous tar messages when parsing tar for file locations.
We use ‘tar tRvf’ to parse the locations of files within the tarball. However examination of tar.git:src/list.c shows various other messages which can appear in the output: block <offset>: ** Block of NULs ** block <offset>: ** End of File ** Indeed it was easy to produce the first message just by using modern tar to create a tarball: $ tar tRvf '/var/tmp/bz1570407-reproducer.ova' block 0: -rw-r--r-- rjones/rjones 100 2018-04-22 17:06 RHEL7_3_042218_extra-disk1.vmdk.000000000 block 2: -rw-r--r-- rjones/rjones 243 2018-04-22 17:07 RHEL7_3_042218_extra.mf block 4: -rw-r--r-- rjones/rjones 13066 2018-04-22 15:08 RHEL7_3_042218_extra.ovf block 31: ** Block of NULs ** Ignore these messages. --- v2v/utils.ml | 71 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/v2v/utils.ml b/v2v/utils.ml index 372ad8aaa..d73011f9f 100644 --- a/v2v/utils.ml +++ b/v2v/utils.ml @@ -147,6 +147,7 @@ let error_if_no_ssh_agent () error (f_"ssh-agent authentication has not been set up ($SSH_AUTH_SOCK is not set). This is required by qemu to do passwordless ssh access. See the virt-v2v(1) man page for more information.") let ws = PCRE.compile "\\s+" +let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$" let find_file_in_tar tar filename let lines = external_command (sprintf "tar tRvf %s" (Filename.quote tar)) in @@ -156,42 +157,50 @@ let find_file_in_tar tar filename | line :: lines -> ( (* Lines have the form: * block <offset>: <perms> <owner>/<group> <size> <mdate> <mtime> <file> + * or: + * block <offset>: ** Block of NULs ** + * block <offset>: ** End of File ** *) - let elems = PCRE.nsplit ~max:8 ws line in - if List.length elems = 8 && List.hd elems = "block" then ( - let elems = Array.of_list elems in - let offset = elems.(1) in - let size = elems.(4) in - let fname = elems.(7) in + if PCRE.matches re_tar_message line then + loop lines (* ignore "** Block of NULs **" etc. *) + else ( + let elems = PCRE.nsplit ~max:8 ws line in + if List.length elems = 8 && List.hd elems = "block" then ( + let elems = Array.of_list elems in + let offset = elems.(1) in + let size = elems.(4) in + let fname = elems.(7) in - if fname <> filename then - loop lines - else ( - let offset - try - (* There should be a colon at the end *) - let i = String.rindex offset ':' in - if i == (String.length offset)-1 then - Int64.of_string (String.sub offset 0 i) - else - failwith "colon at wrong position" - with Failure _ | Not_found -> - failwithf (f_"invalid offset returned by tar: %S") offset in + if fname <> filename then + loop lines + else ( + let offset + try + (* There should be a colon at the end *) + let i = String.rindex offset ':' in + if i == (String.length offset)-1 then + Int64.of_string (String.sub offset 0 i) + else + failwith "colon at wrong position" + with Failure _ | Not_found -> + failwithf (f_"invalid offset returned by tar: %S") offset in - let size - try Int64.of_string size - with Failure _ -> - failwithf (f_"invalid size returned by tar: %S") size in + let size + try Int64.of_string size + with Failure _ -> + failwithf (f_"invalid size returned by tar: %S") size in - (* Note: Offset is actualy block number and there is a single - * block with tar header at the beginning of the file. So skip - * the header and convert the block number to bytes before - * returning. - *) - (offset +^ 1L) *^ 512L, size + (* Note: Offset is actualy block number and there is a single + * block with tar header at the beginning of the file. So skip + * the header and convert the block number to bytes before + * returning. + *) + (offset +^ 1L) *^ 512L, size + ) ) - ) else - failwithf (f_"failed to parse line returned by tar: %S") line + else + failwithf (f_"failed to parse line returned by tar: %S") line + ) ) in loop lines -- 2.16.2
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 4/9] v2v: parse OVF: Export useful parse_disks function.
So you can parse out just the list of disks. --- v2v/parse_ovf_from_ova.ml | 402 +++++++++++++++++++++++---------------------- v2v/parse_ovf_from_ova.mli | 3 + 2 files changed, 211 insertions(+), 194 deletions(-) diff --git a/v2v/parse_ovf_from_ova.ml b/v2v/parse_ovf_from_ova.ml index 2ffaf7ae4..7d4f2f543 100644 --- a/v2v/parse_ovf_from_ova.ml +++ b/v2v/parse_ovf_from_ova.ml @@ -35,7 +35,7 @@ type ovf_disk = { compressed : bool; (* If the file is gzip compressed. *) } -let parse_ovf_from_ova ovf_filename +let xpathctx_of_ovf ovf_filename let xml = read_whole_file ovf_filename in let doc = Xml.parse_memory xml in @@ -50,207 +50,221 @@ let parse_ovf_from_ova ovf_filename Xml.xpath_register_ns xpathctx "vssd" "http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/CIM_VirtualSystemSettingData"; + xpathctx + +let rec parse_ovf_from_ova ovf_filename + let xpathctx = xpathctx_of_ovf ovf_filename in + let xpath_string = xpath_string xpathctx and xpath_int = xpath_int xpathctx and xpath_int64 = xpath_int64 xpathctx in - let rec parse_top () - (* Search for vm name. *) - let name - match xpath_string "/ovf:Envelope/ovf:VirtualSystem/ovf:Name/text()" with - | None | Some "" -> None - | Some _ as name -> name in + (* Search for vm name. *) + let name + match xpath_string "/ovf:Envelope/ovf:VirtualSystem/ovf:Name/text()" with + | None | Some "" -> None + | Some _ as name -> name in - (* Search for memory. *) - let memory = Option.default (1024L *^ 1024L) (xpath_int64 "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=4]/rasd:VirtualQuantity/text()") in - let memory = memory *^ 1024L *^ 1024L in + (* Search for memory. *) + let memory = Option.default (1024L *^ 1024L) (xpath_int64 "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=4]/rasd:VirtualQuantity/text()") in + let memory = memory *^ 1024L *^ 1024L in - (* Search for number of vCPUs. *) - let vcpu = Option.default 1 (xpath_int "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=3]/rasd:VirtualQuantity/text()") in + (* Search for number of vCPUs. *) + let vcpu = Option.default 1 (xpath_int "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=3]/rasd:VirtualQuantity/text()") in - (* CPU topology. coresPerSocket is a VMware proprietary extension. - * I couldn't find out how hyperthreads is specified in the OVF. - *) - let cores_per_socket = xpath_int "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=3]/vmw:CoresPerSocket/text()" in - let cpu_topology - match cores_per_socket with - | None -> None - | Some cores_per_socket when cores_per_socket <= 0 -> - warning (f_"invalid vmw:CoresPerSocket (%d) ignored") - cores_per_socket; - None - | Some cores_per_socket -> - let sockets = vcpu / cores_per_socket in - if sockets <= 0 then ( - warning (f_"invalid vmw:CoresPerSocket < number of cores"); - None - ) - else - Some { s_cpu_sockets = sockets; s_cpu_cores = cores_per_socket; - s_cpu_threads = 1 } in - - (* BIOS or EFI firmware? *) - let firmware = Option.default "bios" (xpath_string "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/vmw:Config[@vmw:key=\"firmware\"]/@vmw:value") in - let firmware - match firmware with - | "bios" -> BIOS - | "efi" -> UEFI - | s -> - error (f_"unknown Config:firmware value %s (expected \"bios\" or \"efi\")") s in - - name, memory, vcpu, cpu_topology, firmware, - parse_disks (), parse_removables (), parse_nics () - - (* Helper function to return the parent controller of a disk. *) - and parent_controller id - let expr = sprintf "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:InstanceID/text()=%d]/rasd:ResourceType/text()" id in - let controller = xpath_int expr in - - (* 5: IDE, 6: SCSI controller, 20: SATA *) - match controller with - | Some 5 -> Some Source_IDE - | Some 6 -> Some Source_SCSI - | Some 20 -> Some Source_SATA - | None -> - warning (f_"ova disk has no parent controller, please report this as a bug supplying the *.ovf file extracted from the ova"); - None - | Some controller -> - warning (f_"ova disk has an unknown VMware controller type (%d), please report this as a bug supplying the *.ovf file extracted from the ova") - controller; - None - - (* Hard disks (ResourceType = 17). *) - and parse_disks () - let disks = ref [] in - let expr = "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=17]" in - let obj = Xml.xpath_eval_expression xpathctx expr in - let nr_nodes = Xml.xpathobj_nr_nodes obj in - for i = 0 to nr_nodes-1 do - let n = Xml.xpathobj_node obj i in - Xml.xpathctx_set_current_context xpathctx n; - - (* XXX We assume the OVF lists these in order. - let address = xpath_int "rasd:AddressOnParent/text()" in - *) - - (* Find the parent controller. *) - let parent_id = xpath_int "rasd:Parent/text()" in - let controller - match parent_id with - | None -> None - | Some id -> parent_controller id in - - Xml.xpathctx_set_current_context xpathctx n; - let file_id - Option.default "" (xpath_string "rasd:HostResource/text()") in - let rex = PCRE.compile "^(?:ovf:)?/disk/(.*)" in - if PCRE.matches rex file_id then ( - (* Chase the references through to the actual file name. *) - let file_id = PCRE.sub 1 in - let expr = sprintf "/ovf:Envelope/ovf:DiskSection/ovf:Disk[@ovf:diskId='%s']/@ovf:fileRef" file_id in - let file_ref - match xpath_string expr with - | None -> error (f_"error parsing disk fileRef") - | Some s -> s in - let expr = sprintf "/ovf:Envelope/ovf:References/ovf:File[@ovf:id='%s']/@ovf:href" file_ref in - let href - match xpath_string expr with - | None -> error (f_"no href in ovf:File (id=%s)") file_ref - | Some s -> s in - - let expr = sprintf "/ovf:Envelope/ovf:References/ovf:File[@ovf:id='%s']/@ovf:compression" file_ref in - let compressed - match xpath_string expr with - | None | Some "identity" -> false - | Some "gzip" -> true - | Some s -> error (f_"unsupported compression in OVF: %s") s in - - let disk = { - source_disk = { - s_disk_id = i; - s_qemu_uri = ""; - s_format = Some "vmdk"; - s_controller = controller; - }; - href = href; - compressed = compressed - } in - List.push_front disk disks; - ) else - error (f_"could not parse disk rasd:HostResource from OVF document") - done; - List.rev !disks - - (* Floppies (ResourceType = 14), CDs (ResourceType = 15) and - * CDROMs (ResourceType = 16). (What is the difference?) Try hard - * to preserve the original ordering from the OVF. + (* CPU topology. coresPerSocket is a VMware proprietary extension. + * I couldn't find out how hyperthreads is specified in the OVF. *) - and parse_removables () - let removables = ref [] in - let expr = "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=14 or rasd:ResourceType/text()=15 or rasd:ResourceType/text()=16]" in - let obj = Xml.xpath_eval_expression xpathctx expr in - let nr_nodes = Xml.xpathobj_nr_nodes obj in - for i = 0 to nr_nodes-1 do - let n = Xml.xpathobj_node obj i in - Xml.xpathctx_set_current_context xpathctx n; - let id - match xpath_int "rasd:ResourceType/text()" with - | None -> assert false - | Some (14|15|16 as i) -> i - | Some _ -> assert false in - - let slot = xpath_int "rasd:AddressOnParent/text()" in - - (* Find the parent controller. *) - let parent_id = xpath_int "rasd:Parent/text()" in - let controller - match parent_id with - | None -> None - | Some id -> parent_controller id in - - let typ - match id with - | 14 -> Floppy - | 15 | 16 -> CDROM - | _ -> assert false in + let cores_per_socket = xpath_int "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=3]/vmw:CoresPerSocket/text()" in + let cpu_topology + match cores_per_socket with + | None -> None + | Some cores_per_socket when cores_per_socket <= 0 -> + warning (f_"invalid vmw:CoresPerSocket (%d) ignored") + cores_per_socket; + None + | Some cores_per_socket -> + let sockets = vcpu / cores_per_socket in + if sockets <= 0 then ( + warning (f_"invalid vmw:CoresPerSocket < number of cores"); + None + ) + else + Some { s_cpu_sockets = sockets; s_cpu_cores = cores_per_socket; + s_cpu_threads = 1 } in + + (* BIOS or EFI firmware? *) + let firmware = Option.default "bios" (xpath_string "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/vmw:Config[@vmw:key=\"firmware\"]/@vmw:value") in + let firmware + match firmware with + | "bios" -> BIOS + | "efi" -> UEFI + | s -> + error (f_"unknown Config:firmware value %s (expected \"bios\" or \"efi\")") s in + + name, memory, vcpu, cpu_topology, firmware, + parse_disks xpathctx, parse_removables xpathctx, parse_nics xpathctx + +(* Hard disks (ResourceType = 17). *) +and parse_disks xpathctx + let xpath_string = xpath_string xpathctx + and xpath_int = xpath_int xpathctx in + + let disks = ref [] in + let expr = "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=17]" in + let obj = Xml.xpath_eval_expression xpathctx expr in + let nr_nodes = Xml.xpathobj_nr_nodes obj in + for i = 0 to nr_nodes-1 do + let n = Xml.xpathobj_node obj i in + Xml.xpathctx_set_current_context xpathctx n; + + (* XXX We assume the OVF lists these in order. + let address = xpath_int "rasd:AddressOnParent/text()" in + *) + + (* Find the parent controller. *) + let parent_id = xpath_int "rasd:Parent/text()" in + let controller + match parent_id with + | None -> None + | Some id -> parent_controller xpathctx id in + + Xml.xpathctx_set_current_context xpathctx n; + let file_id + Option.default "" (xpath_string "rasd:HostResource/text()") in + let rex = PCRE.compile "^(?:ovf:)?/disk/(.*)" in + if PCRE.matches rex file_id then ( + (* Chase the references through to the actual file name. *) + let file_id = PCRE.sub 1 in + let expr = sprintf "/ovf:Envelope/ovf:DiskSection/ovf:Disk[@ovf:diskId='%s']/@ovf:fileRef" file_id in + let file_ref + match xpath_string expr with + | None -> error (f_"error parsing disk fileRef") + | Some s -> s in + let expr = sprintf "/ovf:Envelope/ovf:References/ovf:File[@ovf:id='%s']/@ovf:href" file_ref in + let href + match xpath_string expr with + | None -> error (f_"no href in ovf:File (id=%s)") file_ref + | Some s -> s in + + let expr = sprintf "/ovf:Envelope/ovf:References/ovf:File[@ovf:id='%s']/@ovf:compression" file_ref in + let compressed + match xpath_string expr with + | None | Some "identity" -> false + | Some "gzip" -> true + | Some s -> error (f_"unsupported compression in OVF: %s") s in + let disk = { - s_removable_type = typ; - s_removable_controller = controller; - s_removable_slot = slot; + source_disk = { + s_disk_id = i; + s_qemu_uri = ""; + s_format = Some "vmdk"; + s_controller = controller; + }; + href = href; + compressed = compressed } in - List.push_front disk removables; - done; - List.rev !removables - - (* Search for networks ResourceType: 10 *) - and parse_nics () - let nics = ref [] in - let expr = "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=10]" in - let obj = Xml.xpath_eval_expression xpathctx expr in - let nr_nodes = Xml.xpathobj_nr_nodes obj in - for i = 0 to nr_nodes-1 do - let n = Xml.xpathobj_node obj i in - Xml.xpathctx_set_current_context xpathctx n; - let vnet, vnet_type - match xpath_string "rasd:Connection/text()" with - | Some connection -> connection, Bridge - | None -> sprintf "eth%d" i, Network in - let mac = xpath_string "rasd:Address/text()" in - let nic_model - match xpath_string "rasd:ResourceSubType/text()" with - | Some "E1000" -> Some Source_e1000 - | Some model -> Some (Source_other_nic (String.lowercase_ascii model)) - | None -> None in - let nic = { - s_mac = mac; - s_nic_model = nic_model; - s_vnet = vnet; - s_vnet_orig = vnet; - s_vnet_type = vnet_type; - } in - List.push_front nic nics - done; - List.rev !nics - in + List.push_front disk disks; + ) else + error (f_"could not parse disk rasd:HostResource from OVF document") + done; + List.rev !disks + +(* Floppies (ResourceType = 14), CDs (ResourceType = 15) and + * CDROMs (ResourceType = 16). (What is the difference?) Try hard + * to preserve the original ordering from the OVF. + *) +and parse_removables xpathctx + let xpath_int = xpath_int xpathctx in + + let removables = ref [] in + let expr = "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=14 or rasd:ResourceType/text()=15 or rasd:ResourceType/text()=16]" in + let obj = Xml.xpath_eval_expression xpathctx expr in + let nr_nodes = Xml.xpathobj_nr_nodes obj in + for i = 0 to nr_nodes-1 do + let n = Xml.xpathobj_node obj i in + Xml.xpathctx_set_current_context xpathctx n; + let id + match xpath_int "rasd:ResourceType/text()" with + | None -> assert false + | Some (14|15|16 as i) -> i + | Some _ -> assert false in + + let slot = xpath_int "rasd:AddressOnParent/text()" in + + (* Find the parent controller. *) + let parent_id = xpath_int "rasd:Parent/text()" in + let controller + match parent_id with + | None -> None + | Some id -> parent_controller xpathctx id in + + let typ + match id with + | 14 -> Floppy + | 15 | 16 -> CDROM + | _ -> assert false in + let disk = { + s_removable_type = typ; + s_removable_controller = controller; + s_removable_slot = slot; + } in + List.push_front disk removables; + done; + List.rev !removables + +(* Search for networks ResourceType: 10 *) +and parse_nics xpathctx + let xpath_string = xpath_string xpathctx in + + let nics = ref [] in + let expr = "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:ResourceType/text()=10]" in + let obj = Xml.xpath_eval_expression xpathctx expr in + let nr_nodes = Xml.xpathobj_nr_nodes obj in + for i = 0 to nr_nodes-1 do + let n = Xml.xpathobj_node obj i in + Xml.xpathctx_set_current_context xpathctx n; + let vnet, vnet_type + match xpath_string "rasd:Connection/text()" with + | Some connection -> connection, Bridge + | None -> sprintf "eth%d" i, Network in + let mac = xpath_string "rasd:Address/text()" in + let nic_model + match xpath_string "rasd:ResourceSubType/text()" with + | Some "E1000" -> Some Source_e1000 + | Some model -> Some (Source_other_nic (String.lowercase_ascii model)) + | None -> None in + let nic = { + s_mac = mac; + s_nic_model = nic_model; + s_vnet = vnet; + s_vnet_orig = vnet; + s_vnet_type = vnet_type; + } in + List.push_front nic nics + done; + List.rev !nics + +(* Helper function to return the parent controller of a disk. *) +and parent_controller xpathctx id + let xpath_int = xpath_int xpathctx in + + let expr = sprintf "/ovf:Envelope/ovf:VirtualSystem/ovf:VirtualHardwareSection/ovf:Item[rasd:InstanceID/text()=%d]/rasd:ResourceType/text()" id in + let controller = xpath_int expr in + + (* 5: IDE, 6: SCSI controller, 20: SATA *) + match controller with + | Some 5 -> Some Source_IDE + | Some 6 -> Some Source_SCSI + | Some 20 -> Some Source_SATA + | None -> + warning (f_"ova disk has no parent controller, please report this as a bug supplying the *.ovf file extracted from the ova"); + None + | Some controller -> + warning (f_"ova disk has an unknown VMware controller type (%d), please report this as a bug supplying the *.ovf file extracted from the ova") + controller; + None - parse_top () +let parse_disks ovf_filename + let xpathctx = xpathctx_of_ovf ovf_filename in + parse_disks xpathctx diff --git a/v2v/parse_ovf_from_ova.mli b/v2v/parse_ovf_from_ova.mli index 39bc83d2d..f217e6e41 100644 --- a/v2v/parse_ovf_from_ova.mli +++ b/v2v/parse_ovf_from_ova.mli @@ -35,3 +35,6 @@ val parse_ovf_from_ova : string -> string option * int64 * int * Types.source_cp The returned tuple is [name, memory, vcpu, cpu_topology, firmware, disks, removables, nics] *) + +val parse_disks : string -> ovf_disk list +(** As above, but returns only the disks. *) -- 2.16.2
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 5/9] v2v: -i ova: Factor out code for dealing with OVA files.
Factor out the complex code that handles dealing with multiple different OVA file formats into a separate Parse_ova module. This is largely straightforward code refactoring -- there should be no significant functional change. However: - Parse_ova now checks up front if the OVA contains any compressed disks and avoids the tar optimization in that case. This is a regression for the case of an OVA containing a mix of both compressed and uncompressed disks (we expect this to be rare). The change is nevertheless good because it reduces the coupling between two parts of the code. - I had to simplify an error message. --- v2v/Makefile.am | 2 + v2v/input_ova.ml | 375 +++++++++++++----------------------------------------- v2v/parse_ova.ml | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++ v2v/parse_ova.mli | 73 +++++++++++ v2v/utils.ml | 59 --------- v2v/utils.mli | 7 - 6 files changed, 523 insertions(+), 353 deletions(-) diff --git a/v2v/Makefile.am b/v2v/Makefile.am index d832f75c0..c9ed1fc88 100644 --- a/v2v/Makefile.am +++ b/v2v/Makefile.am @@ -76,6 +76,7 @@ SOURCES_MLI = \ output_rhv_upload_plugin_source.mli \ output_rhv_upload_precheck_source.mli \ output_vdsm.mli \ + parse_ova.mli \ parse_ovf_from_ova.mli \ parse_libvirt_xml.mli \ parse_vmx.mli \ @@ -99,6 +100,7 @@ SOURCES_ML = \ DOM.ml \ changeuid.ml \ parse_ovf_from_ova.ml \ + parse_ova.ml \ create_ovf.ml \ linux.ml \ windows.ml \ diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml index f23a1f2a9..fc8fde4bc 100644 --- a/v2v/input_ova.ml +++ b/v2v/input_ova.ml @@ -20,242 +20,53 @@ open Printf open Std_utils open Tools_utils -open Unix_utils open Common_gettext.Gettext open Types -open Utils +open Parse_ova open Parse_ovf_from_ova open Name_from_disk -(* Return true if [libvirt] supports ["json:"] pseudo-URLs and accepts the - * ["raw"] driver. Function also returns true if [libvirt] backend is not - * used. This didn't work in libvirt < 3.1.0. - *) -let libvirt_supports_json_raw_driver () - if backend_is_libvirt () then ( - let sup = Libvirt_utils.libvirt_get_version () >= (3, 1, 0) in - debug "libvirt supports \"raw\" driver in json URL: %B" sup; - sup - ) - else - true - -let pigz_available - let test = lazy (shell_command "pigz --help >/dev/null 2>&1" = 0) in - fun () -> Lazy.force test - -let pxz_available - let test = lazy (shell_command "pxz --help >/dev/null 2>&1" = 0) in - fun () -> Lazy.force test - -let zcat_command_of_format = function - | `GZip -> - if pigz_available () then "pigz -c -d" else "gzip -c -d" - | `XZ -> - if pxz_available () then "pxz -c -d" else "xz -c -d" - -(* Untar part or all files from tar archive. If [paths] is specified it is - * a list of paths in the tar archive. - *) -let untar ?format ?(paths = []) file outdir - let paths = String.concat " " (List.map quote paths) in - let cmd - match format with - | None -> - sprintf "tar -xf %s -C %s %s" - (quote file) (quote outdir) paths - | Some ((`GZip|`XZ) as format) -> - sprintf "%s %s | tar -xf - -C %s %s" - (zcat_command_of_format format) (quote file) - (quote outdir) paths in - if shell_command cmd <> 0 then - error (f_"error unpacking %s, see earlier error messages") file - -(* Untar only ovf and manifest from the archive *) -let untar_metadata file outdir - let files = external_command (sprintf "tar -tf %s" (Filename.quote file)) in - let files - List.filter_map ( - fun f -> - if Filename.check_suffix f ".ovf" || - Filename.check_suffix f ".mf" then Some f - else None - ) files in - untar ~paths:files file outdir - -(* Uncompress the first few bytes of [file] and return it as - * [(bytes, len)]. - *) -let uncompress_head format file - let cmd = sprintf "%s %s" (zcat_command_of_format format) (quote file) in - let chan_out, chan_in, chan_err = Unix.open_process_full cmd [||] in - let b = Bytes.create 512 in - let len = input chan_out b 0 (Bytes.length b) in - (* We're expecting the subprocess to fail because we close - * the pipe early, so: - *) - ignore (Unix.close_process_full (chan_out, chan_in, chan_err)); - b, len - -(* Run [detect_file_type] on a compressed file, returning the - * type of the uncompressed content (if known). - *) -let uncompressed_type format file - let head, headlen = uncompress_head format file in - let tmpfile, chan - Filename.open_temp_file "ova.file." "" in - output chan head 0 headlen; - close_out chan; - let ret = detect_file_type tmpfile in - Sys.remove tmpfile; - ret - -(* Find files in [dir] ending with [ext]. *) -let find_files dir ext - let rec loop = function - | [] -> [] - | dir :: rest -> - let files = Array.to_list (Sys.readdir dir) in - let files = List.map (Filename.concat dir) files in - let dirs, files = List.partition Sys.is_directory files in - let files - List.filter (fun x -> Filename.check_suffix x ext) files in - files @ loop (rest @ dirs) - in - loop [dir] - -class input_ova ova - let tmpdir - let base_dir = (open_guestfs ())#get_cachedir () in - let t = Mkdtemp.temp_dir ~base_dir "ova." in - rmdir_on_exit t; - t in -object +class input_ova ova = object inherit input method as_options = "-i ova " ^ ova method source () (* Extract ova file. *) - let exploded, partial - (* The spec allows a directory to be specified as an ova. This - * is also pretty convenient. - *) - if is_directory ova then ova, false - else ( - match detect_file_type ova with - | `Tar -> - (* Normal ovas are tar file (not compressed). *) - if qemu_img_supports_offset_and_size () && - libvirt_supports_json_raw_driver () then ( - (* In newer QEMU we don't have to extract everything. - * We can access disks inside the tar archive directly. - *) - untar_metadata ova tmpdir; - tmpdir, true - ) else ( - untar ova tmpdir; - tmpdir, false - ) + let ova_t = parse_ova ova in - | `Zip -> - (* However, although not permitted by the spec, people ship - * zip files as ova too. - *) - let cmd = [ "unzip" ] @ - (if verbose () then [] else [ "-q" ]) @ - [ "-j"; "-d"; tmpdir; ova ] in - if run_command cmd <> 0 then - error (f_"error unpacking %s, see earlier error messages") ova; - tmpdir, false + (* Extract ovf file from ova. *) + let ovf = get_ovf_file ova_t in - | (`GZip|`XZ) as format -> - (match uncompressed_type format ova with - | `Tar -> - untar ~format ova tmpdir; - tmpdir, false - | `Zip | `GZip | `XZ | `Unknown -> - error (f_"%s: unsupported file format\n\nFormats which we currently understand for '-i ova' are: tar (uncompressed, compress with gzip or xz), zip") ova - ) + (* Extract the manifest from *.mf files in the ova. *) + let manifest = get_manifest ova_t in - | `Unknown -> - error (f_"%s: unsupported file format\n\nFormats which we currently understand for '-i ova' are: tar (uncompressed, compress with gzip or xz), zip") ova - ) in - - (* Exploded path must be absolute (RHBZ#1155121). *) - let exploded = absolute_path exploded in - - (* If virt-v2v is running as root, and the backend is libvirt, then - * we have to chmod the directory to 0755 and files to 0644 - * so it is readable by qemu.qemu. This is libvirt bug RHBZ#890291. - *) - if Unix.geteuid () = 0 && backend_is_libvirt () then ( - warning (f_"making OVA directory public readable to work around libvirt bug https://bugzilla.redhat.com/1045069"); - let cmd = [ "chmod"; "-R"; "go=u,go-w"; exploded ] @ - if partial then [ ova ] else [] in - ignore (run_command cmd) - ); - - (* Search for the ovf file. *) - let ovf = find_files exploded ".ovf" in - let ovf - match ovf with - | [] -> - error (f_"no .ovf file was found in %s") ova - | [x] -> x - | _ :: _ -> - error (f_"more than one .ovf file was found in %s") ova in - - (* Read any .mf (manifest) files and verify sha1. *) - let mf = find_files exploded ".mf" in - let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$" in + (* Verify checksums of files listed in the manifest. *) List.iter ( - fun mf -> - debug "processing manifest %s" mf; - let mf_folder = Filename.dirname mf in - let mf_subfolder = subdirectory exploded mf_folder in - with_open_in mf ( - fun chan -> - let rec loop () - let line = input_line chan in - if PCRE.matches rex line then ( - let mode = PCRE.sub 1 - and disk = PCRE.sub 2 - and expected = PCRE.sub 3 in - let csum = Checksums.of_string mode expected in - match - if partial then - Checksums.verify_checksum csum - ~tar:ova (mf_subfolder // disk) - else - Checksums.verify_checksum csum (mf_folder // disk) - with - | Checksums.Good_checksum -> () - | Checksums.Mismatched_checksum (_, actual) -> - error (f_"checksum of disk %s does not match manifest %s (actual %s(%s) = %s, expected %s(%s) = %s)") - disk mf mode disk actual mode disk expected - | Checksums.Missing_file -> - (* RHBZ#1570407: Some OVA files generated by VMware - * reference non-existent components in the *.mf file. - * Generate a warning and ignore it. - *) - warning (f_"%s has a checksum for non-existent file %s (ignored)") - mf disk - ) - else - warning (f_"unable to parse line from manifest file: %S") line; - loop () - in - (try loop () with End_of_file -> ()) - ) - ) mf; - - let ovf_folder = Filename.dirname ovf in + fun (file_ref, csum) -> + let filename, r + match file_ref with + | LocalFile filename -> + filename, Checksums.verify_checksum csum filename + | TarFile (tar, filename) -> + filename, Checksums.verify_checksum csum ~tar filename in + match r with + | Checksums.Good_checksum -> () + | Checksums.Mismatched_checksum (_, actual) -> + error (f_"checksum of disk %s does not match manifest (actual = %s, expected = %s)") + filename actual (Checksums.string_of_csum_t csum) + | Checksums.Missing_file -> + (* RHBZ#1570407: Some OVA files generated by VMware + * reference non-existent components in the *.mf file. + * Generate a warning and ignore it. + *) + warning (f_"manifest has a checksum for non-existent file %s (ignored)") + filename + ) manifest; (* Parse the ovf file. *) - let name, memory, vcpu, cpu_topology, firmware, - disks, removables, nics + let name, memory, vcpu, cpu_topology, firmware, disks, removables, nics parse_ovf_from_ova ovf in let name @@ -265,81 +76,71 @@ object name_from_disk ova | Some name -> name in - let disks = List.map ( - fun ({ href; compressed } as disk) -> - let partial - if compressed && partial then ( - (* We cannot access compressed disk inside the tar; - * we have to extract it. - *) - untar ~paths:[(subdirectory exploded ovf_folder) // href] - ova tmpdir; - false - ) - else - partial in + (* Convert the disk hrefs into qemu URIs. *) + let qemu_uris = List.map ( + fun { href; compressed } -> + let file_ref = get_file_ref ova_t href in - let filename - if partial then - (subdirectory exploded ovf_folder) // href - else ( - (* Does the file exist and is it readable? *) - Unix.access (ovf_folder // href) [Unix.R_OK]; - ovf_folder // href - ) in + match compressed, file_ref with + | false, LocalFile filename -> + filename - (* The spec allows the file to be gzip-compressed, in which case - * we must uncompress it into the tmpdir. - *) - let filename - if compressed then ( - let new_filename = tmpdir // String.random8 () ^ ".vmdk" in - let cmd - sprintf "zcat %s > %s" (quote filename) (quote new_filename) in - if shell_command cmd <> 0 then - error (f_"error uncompressing %s, see earlier error messages") - filename; - new_filename - ) - else filename in + | true, LocalFile filename -> + (* The spec allows the file to be gzip-compressed, in + * which case we must uncompress it into a temporary. + *) + let temp_dir = (open_guestfs ())#get_cachedir () in + let new_filename = Filename.temp_file ~temp_dir "ova" ".vmdk" in + unlink_on_exit new_filename; + let cmd + sprintf "zcat %s > %s" (quote filename) (quote new_filename) in + if shell_command cmd <> 0 then + error (f_"error uncompressing %s, see earlier error messages") + filename; + new_filename - let qemu_uri - if not partial then ( - filename - ) - else ( - let offset, size - try find_file_in_tar ova filename - with - | Not_found -> - error (f_"file ‘%s’ not found in the ova") filename - | Failure msg -> error (f_"%s") msg in - (* QEMU requires size aligned to 512 bytes. This is safe because - * tar also works with 512 byte blocks. - *) - let size = roundup64 size 512L in + | false, TarFile (tar, filename) -> + (* This is the tar optimization. *) + let offset, size + try Parse_ova.get_tar_offet_and_size tar filename + with + | Not_found -> + error (f_"file ‘%s’ not found in the ova") filename + | Failure msg -> error (f_"%s") msg in + (* QEMU requires size aligned to 512 bytes. This is safe because + * tar also works with 512 byte blocks. + *) + let size = roundup64 size 512L in - (* Workaround for libvirt bug RHBZ#1431652. *) - let ova_path = absolute_path ova in + (* Workaround for libvirt bug RHBZ#1431652. *) + let tar_path = absolute_path tar in - let doc = [ - "file", JSON.Dict [ - "driver", JSON.String "raw"; - "offset", JSON.Int64 offset; - "size", JSON.Int64 size; - "file", JSON.Dict [ - "driver", JSON.String "file"; - "filename", JSON.String ova_path] - ] - ] in - let uri - sprintf "json:%s" (JSON.string_of_doc ~fmt:JSON.Compact doc) in - debug "json: %s" uri; - uri - ) in + let doc = [ + "file", JSON.Dict [ + "driver", JSON.String "raw"; + "offset", JSON.Int64 offset; + "size", JSON.Int64 size; + "file", JSON.Dict [ + "driver", JSON.String "file"; + "filename", JSON.String tar_path] + ] + ] in + let uri + sprintf "json:%s" (JSON.string_of_doc ~fmt:JSON.Compact doc) in + uri - { disk.source_disk with s_qemu_uri = qemu_uri } - ) disks in + | true, TarFile _ -> + (* This should not happen since {!Parse_ova} knows that + * qemu cannot handle compressed files here. + *) + assert false + ) disks in + + (* Get a final list of source disks. *) + let disks + List.map (fun ({ source_disk }, qemu_uri) -> + { source_disk with s_qemu_uri = qemu_uri }) + (List.combine disks qemu_uris) in let source = { s_hypervisor = VMware; diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml new file mode 100644 index 000000000..431cbe8d0 --- /dev/null +++ b/v2v/parse_ova.ml @@ -0,0 +1,360 @@ +(* virt-v2v + * Copyright (C) 2009-2018 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +open Printf + +open Std_utils +open Tools_utils +open Unix_utils +open Common_gettext.Gettext + +open Utils +open Parse_ovf_from_ova + +type t = { + (* Save the original OVA name, for error messages. *) + orig_ova : string; + + (* Top directory of OVA. If the OVA was already a directory then + * this is just that directory. However in normal cases this is + * a temporary directory that we create, unpacking either just the + * OVF and MF files, or those plus the disks. This temporary + * directory will be cleaned up on exit. + *) + top_dir : string; + + ova_type : ova_type; +} + +and ova_type + (* The original OVA was a directory. Or the OVA was fully unpacked + * into a temporary directory. + * + * In either case everything is available in [top_dir]. + *) + | Directory + + (* The original OVA was an uncompressed tar file and we are able + * to optimize access to the disks by keeping them in the tarball. + * + * The OVF and MF files only have been unpacked in [top_dir]. + *) + | TarOptimized of string (* tarball *) + +type file_ref + | LocalFile of string + | TarFile of string * string + +type mf_record = file_ref * Checksums.csum_t + +let rec parse_ova ova + (* The spec allows a directory to be specified as an ova. This + * is also pretty convenient. + *) + let top_dir, ova_type + if is_directory ova then ova, Directory + else ( + let tmpdir + let base_dir = (open_guestfs ())#get_cachedir () in + let t = Mkdtemp.temp_dir ~base_dir "ova." in + rmdir_on_exit t; + t in + + match detect_file_type ova with + | `Tar -> + (* Normal ovas are tar file (not compressed). *) + + (* In newer QEMU we don't have to extract everything. + * We can access disks inside the tar archive directly. + *) + if qemu_img_supports_offset_and_size () && + libvirt_supports_json_raw_driver () && + (untar_metadata ova tmpdir; + no_disks_are_compressed ova tmpdir) then + tmpdir, TarOptimized ova + else ( + (* If qemu/libvirt is too old or any disk is compressed + * then we must fall back on the slow path. + *) + untar ova tmpdir; + tmpdir, Directory + ) + + | `Zip -> + (* However, although not permitted by the spec, people ship + * zip files as ova too. + *) + let cmd + [ "unzip" ] @ (if verbose () then [] else [ "-q" ]) @ + [ "-j"; "-d"; tmpdir; ova ] in + if run_command cmd <> 0 then + error (f_"error unpacking %s, see earlier error messages") ova; + tmpdir, Directory + + | (`GZip|`XZ) as format -> + (match uncompressed_type format ova with + | `Tar -> + untar ~format ova tmpdir; + tmpdir, Directory + | `Zip | `GZip | `XZ | `Unknown -> + error (f_"%s: unsupported file format\n\nFormats which we currently understand for '-i ova' are: tar (uncompressed, compress with gzip or xz), zip") ova + ) + + | `Unknown -> + error (f_"%s: unsupported file format\n\nFormats which we currently understand for '-i ova' are: tar (uncompressed, compress with gzip or xz), zip") ova + ) in + + (* Exploded path must be absolute (RHBZ#1155121). *) + let top_dir = absolute_path top_dir in + + (* If virt-v2v is running as root, and the backend is libvirt, then + * we have to chmod the directory to 0755 and files to 0644 + * so it is readable by qemu.qemu. This is libvirt bug RHBZ#890291. + *) + if Unix.geteuid () = 0 && backend_is_libvirt () then ( + warning (f_"making OVA directory public readable to work around libvirt bug https://bugzilla.redhat.com/1045069"); + let what + match ova_type with + | Directory -> [ top_dir ] + | TarOptimized ova -> [ top_dir; ova ] in + let cmd = [ "chmod"; "-R"; "go=u,go-w" ] @ what in + ignore (run_command cmd) + ); + + { orig_ova = ova; top_dir; ova_type } + +(* Return true if [libvirt] supports ["json:"] pseudo-URLs and accepts the + * ["raw"] driver. Function also returns true if [libvirt] backend is not + * used. This didn't work in libvirt < 3.1.0. + *) +and libvirt_supports_json_raw_driver () + if backend_is_libvirt () then ( + let sup = Libvirt_utils.libvirt_get_version () >= (3, 1, 0) in + debug "libvirt supports \"raw\" driver in json URL: %B" sup; + sup + ) + else + true + +(* No disks compressed? We need to check the OVF file. *) +and no_disks_are_compressed ova tmpdir + let t = { orig_ova = ova; top_dir = tmpdir; ova_type = Directory } in + let ovf = get_ovf_file t in + let disks = parse_disks ovf in + not (List.exists (fun { compressed } -> compressed) disks) + +and pigz_available + let test = lazy (shell_command "pigz --help >/dev/null 2>&1" = 0) in + fun () -> Lazy.force test + +and pxz_available + let test = lazy (shell_command "pxz --help >/dev/null 2>&1" = 0) in + fun () -> Lazy.force test + +and zcat_command_of_format = function + | `GZip -> + if pigz_available () then "pigz -c -d" else "gzip -c -d" + | `XZ -> + if pxz_available () then "pxz -c -d" else "xz -c -d" + +(* Untar part or all files from tar archive. If [paths] is specified it is + * a list of paths in the tar archive. + *) +and untar ?format ?(paths = []) file outdir + let paths = String.concat " " (List.map quote paths) in + let cmd + match format with + | None -> + sprintf "tar -xf %s -C %s %s" + (quote file) (quote outdir) paths + | Some ((`GZip|`XZ) as format) -> + sprintf "%s %s | tar -xf - -C %s %s" + (zcat_command_of_format format) (quote file) + (quote outdir) paths in + if shell_command cmd <> 0 then + error (f_"error unpacking %s, see earlier error messages") file + +(* Untar only ovf and manifest from the archive *) +and untar_metadata file outdir + let files = external_command (sprintf "tar -tf %s" (Filename.quote file)) in + let files + List.filter_map ( + fun f -> + if Filename.check_suffix f ".ovf" || + Filename.check_suffix f ".mf" then Some f + else None + ) files in + untar ~paths:files file outdir + +(* Uncompress the first few bytes of [file] and return it as + * [(bytes, len)]. + *) +and uncompress_head format file + let cmd = sprintf "%s %s" (zcat_command_of_format format) (quote file) in + let chan_out, chan_in, chan_err = Unix.open_process_full cmd [||] in + let b = Bytes.create 512 in + let len = input chan_out b 0 (Bytes.length b) in + (* We're expecting the subprocess to fail because we close + * the pipe early, so: + *) + ignore (Unix.close_process_full (chan_out, chan_in, chan_err)); + b, len + +(* Run [detect_file_type] on a compressed file, returning the + * type of the uncompressed content (if known). + *) +and uncompressed_type format file + let head, headlen = uncompress_head format file in + let tmpfile, chan + Filename.open_temp_file "ova.file." "" in + output chan head 0 headlen; + close_out chan; + let ret = detect_file_type tmpfile in + Sys.remove tmpfile; + ret + +(* Find files in [dir] ending with [ext]. *) +and find_files dir ext + let rec loop = function + | [] -> [] + | dir :: rest -> + let files = Array.to_list (Sys.readdir dir) in + let files = List.map (Filename.concat dir) files in + let dirs, files = List.partition Sys.is_directory files in + let files + List.filter (fun x -> Filename.check_suffix x ext) files in + files @ loop (rest @ dirs) + in + loop [dir] + +and get_ovf_file { orig_ova; top_dir } + let ovf = find_files top_dir ".ovf" in + match ovf with + | [] -> + error (f_"no .ovf file was found in %s") orig_ova + | [x] -> x + | _ :: _ -> + error (f_"more than one .ovf file was found in %s") orig_ova + +let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$" + +let get_manifest { top_dir; ova_type } + let mf_files = find_files top_dir ".mf" in + let manifest + List.map ( + fun mf -> + debug "ova: processing manifest file %s" mf; + let mf_folder = Filename.dirname mf in + let mf_subfolder = subdirectory top_dir mf_folder in + with_open_in mf ( + fun chan -> + let ret = ref [] in + let rec loop () + let line = input_line chan in + if PCRE.matches rex line then ( + let csum_type = PCRE.sub 1 + and filename = PCRE.sub 2 + and expected = PCRE.sub 3 in + let csum = Checksums.of_string csum_type expected in + let file_ref + match ova_type with + | Directory -> + LocalFile (mf_folder // filename) + | TarOptimized tar -> + TarFile (tar, mf_subfolder // filename) in + List.push_front (file_ref, csum) ret + ) + else + warning (f_"unable to parse line from manifest file: %S") line; + loop () + in + (try loop () with End_of_file -> ()); + !ret + ) + ) mf_files in + + List.flatten manifest + +let get_file_ref ({ top_dir; ova_type } as t) href + let ovf = get_ovf_file t in + let ovf_folder = Filename.dirname ovf in + + match ova_type with + | Directory -> LocalFile (ovf_folder // href) + | TarOptimized tar -> + let filename = subdirectory top_dir ovf_folder // href in + TarFile (tar, filename) + +let ws = PCRE.compile "\\s+" +let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$" + +let get_tar_offet_and_size tar filename + let lines = external_command (sprintf "tar tRvf %s" (Filename.quote tar)) in + let rec loop lines + match lines with + | [] -> raise Not_found + | line :: lines -> ( + (* Lines have the form: + * block <offset>: <perms> <owner>/<group> <size> <mdate> <mtime> <file> + * or: + * block <offset>: ** Block of NULs ** + * block <offset>: ** End of File ** + *) + if PCRE.matches re_tar_message line then + loop lines (* ignore "** Block of NULs **" etc. *) + else ( + let elems = PCRE.nsplit ~max:8 ws line in + if List.length elems = 8 && List.hd elems = "block" then ( + let elems = Array.of_list elems in + let offset = elems.(1) in + let size = elems.(4) in + let fname = elems.(7) in + + if fname <> filename then + loop lines + else ( + let offset + try + (* There should be a colon at the end *) + let i = String.rindex offset ':' in + if i == (String.length offset)-1 then + Int64.of_string (String.sub offset 0 i) + else + failwith "colon at wrong position" + with Failure _ | Not_found -> + failwithf (f_"invalid offset returned by tar: %S") offset in + + let size + try Int64.of_string size + with Failure _ -> + failwithf (f_"invalid size returned by tar: %S") size in + + (* Note: Offset is actualy block number and there is a single + * block with tar header at the beginning of the file. So skip + * the header and convert the block number to bytes before + * returning. + *) + (offset +^ 1L) *^ 512L, size + ) + ) + else + failwithf (f_"failed to parse line returned by tar: %S") line + ) + ) + in + loop lines diff --git a/v2v/parse_ova.mli b/v2v/parse_ova.mli new file mode 100644 index 000000000..54df752ad --- /dev/null +++ b/v2v/parse_ova.mli @@ -0,0 +1,73 @@ +(* virt-v2v + * Copyright (C) 2009-2018 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +(** Helper functions for dealing with the OVA pseudo-format. *) + +type t + +val parse_ova : string -> t +(** The parameter references either an OVA file or a directory + containing an unpacked OVA. + + The OVA is "opened". If necessary, parts of the OVA are + unpacked into a temporary directory. This can consume a lot + of space, although we are able to optimize some common cases. + + This does {b not} parse or verify the OVF, MF or disks. *) + +val get_ovf_file : t -> string +(** Return the filename of the OVF file from the OVA. This will + be a local file (might be a temporary file) valid for the + lifetime of the handle. + + The filename can be passed directly to + {!Parse_ovf_from_ova.parse_ovf_from_ova}. *) + +type file_ref + | LocalFile of string (** A local filename. *) + | TarFile of string * string (** Tar file containing file. *) +(** A file reference, pointing usually to a disk. If the OVA + is unpacked during parsing then this points to a local file. + It might be a temporary file, but it is valid for the lifetime + of the handle. If we are optimizing access to the OVA then + it might also be a reference to a file within a tarball. *) + +type mf_record = file_ref * Checksums.csum_t +(** A manifest record: (file reference, checksum of file). *) + +val get_manifest : t -> mf_record list +(** Find and parse all manifest ([*.mf]) files in the OVA. + Parse out the filenames and checksums from these files + and return the full manifest as a single list. + + Note the checksums are returned, but this function does not + verify them. Also VMware-generated OVAs can return + non-existent files in this list. *) + +val get_file_ref : t -> string -> file_ref +(** Convert an OVF [href] into an actual file reference. + + Note this does not check that the file really exists. *) + +val get_tar_offet_and_size : string -> string -> int64 * int64 +(** [get_tar_offet_and_size tar filename] looks up file in the [tar] + archive and returns a tuple containing at which byte it starts + and how long the file is. + + Function raises [Not_found] if there is no such file inside [tar] and + [Failure] if there is any error parsing the tar output. *) diff --git a/v2v/utils.ml b/v2v/utils.ml index d73011f9f..67e2028f3 100644 --- a/v2v/utils.ml +++ b/v2v/utils.ml @@ -146,65 +146,6 @@ let error_if_no_ssh_agent () with Not_found -> error (f_"ssh-agent authentication has not been set up ($SSH_AUTH_SOCK is not set). This is required by qemu to do passwordless ssh access. See the virt-v2v(1) man page for more information.") -let ws = PCRE.compile "\\s+" -let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$" - -let find_file_in_tar tar filename - let lines = external_command (sprintf "tar tRvf %s" (Filename.quote tar)) in - let rec loop lines - match lines with - | [] -> raise Not_found - | line :: lines -> ( - (* Lines have the form: - * block <offset>: <perms> <owner>/<group> <size> <mdate> <mtime> <file> - * or: - * block <offset>: ** Block of NULs ** - * block <offset>: ** End of File ** - *) - if PCRE.matches re_tar_message line then - loop lines (* ignore "** Block of NULs **" etc. *) - else ( - let elems = PCRE.nsplit ~max:8 ws line in - if List.length elems = 8 && List.hd elems = "block" then ( - let elems = Array.of_list elems in - let offset = elems.(1) in - let size = elems.(4) in - let fname = elems.(7) in - - if fname <> filename then - loop lines - else ( - let offset - try - (* There should be a colon at the end *) - let i = String.rindex offset ':' in - if i == (String.length offset)-1 then - Int64.of_string (String.sub offset 0 i) - else - failwith "colon at wrong position" - with Failure _ | Not_found -> - failwithf (f_"invalid offset returned by tar: %S") offset in - - let size - try Int64.of_string size - with Failure _ -> - failwithf (f_"invalid size returned by tar: %S") size in - - (* Note: Offset is actualy block number and there is a single - * block with tar header at the beginning of the file. So skip - * the header and convert the block number to bytes before - * returning. - *) - (offset +^ 1L) *^ 512L, size - ) - ) - else - failwithf (f_"failed to parse line returned by tar: %S") line - ) - ) - in - loop lines - (* Wait for a file to appear until a timeout. *) let rec wait_for_file filename timeout if Sys.file_exists filename then true diff --git a/v2v/utils.mli b/v2v/utils.mli index 4a444aaa0..fd91387a7 100644 --- a/v2v/utils.mli +++ b/v2v/utils.mli @@ -55,13 +55,6 @@ val backend_is_libvirt : unit -> bool val error_if_no_ssh_agent : unit -> unit -val find_file_in_tar : string -> string -> int64 * int64 -(** [find_file_in_tar tar filename] looks up file in [tar] archive and returns - a tuple containing at which byte it starts and how long the file is. - - Function raises [Not_found] if there is no such file inside [tar] and - [Failure] if there is any error parsing the tar output. *) - val wait_for_file : string -> int -> bool (** [wait_for_file filename timeout] waits up to [timeout] seconds for [filename] to appear. It returns [true] if the file appeared. *) -- 2.16.2
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 6/9] common: mlstdutils: Inline unsafe ‘subdirectory’ function.
This function is ill-defined and unsafe. As a preparation for removing it completely, inline it in the places where it is used. --- builder/repository_main.ml | 13 +++++++++++-- common/mlstdutils/std_utils.ml | 9 --------- common/mlstdutils/std_utils.mli | 10 ---------- common/mlstdutils/std_utils_tests.ml | 7 ------- v2v/parse_ova.ml | 13 +++++++++++-- 5 files changed, 22 insertions(+), 30 deletions(-) diff --git a/builder/repository_main.ml b/builder/repository_main.ml index c020a6413..5dc4d57cd 100644 --- a/builder/repository_main.ml +++ b/builder/repository_main.ml @@ -398,6 +398,15 @@ let process_image acc_entries filename repo tmprepo index interactive | None -> extract_entry_data ~entry:file_entry () +let unsafe_remove_directory_prefix parent path + if path = parent then + "" + else if String.is_prefix path (parent // "") then ( + let len = String.length parent in + String.sub path (len+1) (String.length path - len-1) + ) else + invalid_arg (sprintf "%S is not a path prefix of %S" parent path) + let main () let cmdline = parse_cmdline () in @@ -512,8 +521,8 @@ let main () fun (id, entry) -> let { Index.file_uri } = entry in let rel_path - try - subdirectory cmdline.repo file_uri + try (* XXX wrong *) + unsafe_remove_directory_prefix cmdline.repo file_uri with | Invalid_argument _ -> file_uri in diff --git a/common/mlstdutils/std_utils.ml b/common/mlstdutils/std_utils.ml index 3fba96b5b..df443058f 100644 --- a/common/mlstdutils/std_utils.ml +++ b/common/mlstdutils/std_utils.ml @@ -376,15 +376,6 @@ end let (//) = Filename.concat let quote = Filename.quote -let subdirectory parent path - if path = parent then - "" - else if String.is_prefix path (parent // "") then ( - let len = String.length parent in - String.sub path (len+1) (String.length path - len-1) - ) else - invalid_arg (sprintf "%S is not a path prefix of %S" parent path) - let ( +^ ) = Int64.add let ( -^ ) = Int64.sub let ( *^ ) = Int64.mul diff --git a/common/mlstdutils/std_utils.mli b/common/mlstdutils/std_utils.mli index 195269a71..c887249a5 100644 --- a/common/mlstdutils/std_utils.mli +++ b/common/mlstdutils/std_utils.mli @@ -274,16 +274,6 @@ val ( // ) : string -> string -> string val quote : string -> string (** Shell-safe quoting of a string (alias for {!Filename.quote}). *) -val subdirectory : string -> string -> string -(** [subdirectory parent path] returns subdirectory part of [path] relative - to the [parent]. If [path] and [parent] point to the same directory empty - string is returned. - - Note: path normalization on arguments is {b not} performed! - - If [parent] is not a path prefix of [path] the function raises - [Invalid_argument]. *) - val ( +^ ) : int64 -> int64 -> int64 val ( -^ ) : int64 -> int64 -> int64 val ( *^ ) : int64 -> int64 -> int64 diff --git a/common/mlstdutils/std_utils_tests.ml b/common/mlstdutils/std_utils_tests.ml index 5c25650c2..aa48f5f39 100644 --- a/common/mlstdutils/std_utils_tests.ml +++ b/common/mlstdutils/std_utils_tests.ml @@ -30,12 +30,6 @@ let assert_equal_int64 = assert_equal ~printer:(fun x -> Int64.to_string x) let assert_equal_stringlist = assert_equal ~printer:(fun x -> "(" ^ (String.escaped (String.concat "," x)) ^ ")") let assert_equal_stringpair = assert_equal ~printer:(fun (x, y) -> sprintf "%S, %S" x y) -let test_subdirectory ctx - assert_equal_string "" (subdirectory "/foo" "/foo"); - assert_equal_string "" (subdirectory "/foo" "/foo/"); - assert_equal_string "bar" (subdirectory "/foo" "/foo/bar"); - assert_equal_string "bar/baz" (subdirectory "/foo" "/foo/bar/baz") - (* Test Std_utils.int_of_X and Std_utils.X_of_int byte swapping * functions. *) @@ -150,7 +144,6 @@ let test_string_chomp ctx let suite "mllib Std_utils" >::: [ - "subdirectory" >:: test_subdirectory; "numeric.byteswap" >:: test_byteswap; "char.mem" >:: test_char_mem; "strings.is_prefix" >:: test_string_is_prefix; diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml index 431cbe8d0..c11502667 100644 --- a/v2v/parse_ova.ml +++ b/v2v/parse_ova.ml @@ -251,6 +251,15 @@ and get_ovf_file { orig_ova; top_dir } | _ :: _ -> error (f_"more than one .ovf file was found in %s") orig_ova +let unsafe_remove_directory_prefix parent path + if path = parent then + "" + else if String.is_prefix path (parent // "") then ( + let len = String.length parent in + String.sub path (len+1) (String.length path - len-1) + ) else + invalid_arg (sprintf "%S is not a path prefix of %S" parent path) + let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$" let get_manifest { top_dir; ova_type } @@ -260,7 +269,7 @@ let get_manifest { top_dir; ova_type } fun mf -> debug "ova: processing manifest file %s" mf; let mf_folder = Filename.dirname mf in - let mf_subfolder = subdirectory top_dir mf_folder in + let mf_subfolder = unsafe_remove_directory_prefix top_dir mf_folder in with_open_in mf ( fun chan -> let ret = ref [] in @@ -297,7 +306,7 @@ let get_file_ref ({ top_dir; ova_type } as t) href match ova_type with | Directory -> LocalFile (ovf_folder // href) | TarOptimized tar -> - let filename = subdirectory top_dir ovf_folder // href in + let filename = unsafe_remove_directory_prefix top_dir ovf_folder // href in TarFile (tar, filename) let ws = PCRE.compile "\\s+" -- 2.16.2
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 7/9] v2v: -i ova: Replace subdirectory function with clearer inline code.
--- v2v/parse_ova.ml | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml index c11502667..6c4af4464 100644 --- a/v2v/parse_ova.ml +++ b/v2v/parse_ova.ml @@ -251,15 +251,6 @@ and get_ovf_file { orig_ova; top_dir } | _ :: _ -> error (f_"more than one .ovf file was found in %s") orig_ova -let unsafe_remove_directory_prefix parent path - if path = parent then - "" - else if String.is_prefix path (parent // "") then ( - let len = String.length parent in - String.sub path (len+1) (String.length path - len-1) - ) else - invalid_arg (sprintf "%S is not a path prefix of %S" parent path) - let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$" let get_manifest { top_dir; ova_type } @@ -268,8 +259,19 @@ let get_manifest { top_dir; ova_type } List.map ( fun mf -> debug "ova: processing manifest file %s" mf; + (* (1) (2) + * mf: <top_dir>/bar.mf <top_dir>/foo/bar.mf + * mf_folder: <top_dir> <top_dir>/foo + * mf_subfolder: "" foo + *) let mf_folder = Filename.dirname mf in - let mf_subfolder = unsafe_remove_directory_prefix top_dir mf_folder in + let mf_subfolder + if String.is_prefix mf_folder (top_dir // "") then ( (* 2 *) + let len = String.length top_dir + 1 in + String.sub mf_folder len (String.length mf_folder - len) + ) + else if top_dir = mf_folder then "" (* 1 *) + else assert false in with_open_in mf ( fun chan -> let ret = ref [] in @@ -306,7 +308,19 @@ let get_file_ref ({ top_dir; ova_type } as t) href match ova_type with | Directory -> LocalFile (ovf_folder // href) | TarOptimized tar -> - let filename = unsafe_remove_directory_prefix top_dir ovf_folder // href in + (* (1) (2) + * ovf: <top_dir>/bar.ovf <top_dir>/foo/bar.ovf + * ovf_folder: <top_dir> <top_dir>/foo + * subdir: "" foo + * filename: href foo/href + *) + let filename + if String.is_prefix ovf_folder (top_dir // "") then ( (* 2 *) + let len = String.length top_dir + 1 in + String.sub ovf_folder len (String.length ovf_folder - len) // href + ) + else if top_dir = ovf_folder then href (* 1 *) + else assert false in TarFile (tar, filename) let ws = PCRE.compile "\\s+" -- 2.16.2
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 8/9] v2v: -i ova: Sanity check hrefs to ensure they stay inside the tarball.
Check for hrefs like "../../../../dev/sda", or dubious symlinks in the tarball, which could escape from the tarball into the host system. We use realpath(3) to do this. Since this function does not work on non-existent files, we must change the signature of get_file_ref so it can return whether the file exists or not. --- v2v/input_ova.ml | 6 +++++- v2v/parse_ova.ml | 41 +++++++++++++++++++++++++++++++++++++++-- v2v/parse_ova.mli | 7 +++---- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml index fc8fde4bc..9f7bc64ff 100644 --- a/v2v/input_ova.ml +++ b/v2v/input_ova.ml @@ -79,7 +79,11 @@ class input_ova ova = object (* Convert the disk hrefs into qemu URIs. *) let qemu_uris = List.map ( fun { href; compressed } -> - let file_ref = get_file_ref ova_t href in + let file_ref + match get_file_ref ova_t href with + | Some f -> f + | None -> + error (f_"-i ova: OVF references file ‘%s’ which was not found in the OVA archive") href in match compressed, file_ref with | false, LocalFile filename -> diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml index 6c4af4464..76fa8ce13 100644 --- a/v2v/parse_ova.ml +++ b/v2v/parse_ova.ml @@ -17,6 +17,7 @@ *) open Printf +open Unix open Std_utils open Tools_utils @@ -305,9 +306,33 @@ let get_file_ref ({ top_dir; ova_type } as t) href let ovf = get_ovf_file t in let ovf_folder = Filename.dirname ovf in + (* Since [href] comes from an untrusted source, we must ensure + * that it doesn't reference a path outside [top_dir]. An + * additional complication is that [href] is relative to + * the directory containing the OVF ([ovf_folder]). A further + * complication is that the file might not exist at all. + *) match ova_type with - | Directory -> LocalFile (ovf_folder // href) + | Directory -> + let filename = ovf_folder // href in + let real_top_dir = Realpath.realpath top_dir in + (try + let filename = Realpath.realpath filename in + if not (String.is_prefix filename real_top_dir) then + error (f_"-i ova: invalid OVA file: path ‘%s’ references a file outside the archive") href; + Some (LocalFile filename) + with + Unix_error (ENOENT, "realpath", _) -> None + ) + | TarOptimized tar -> + (* Security: Since the only thing we will do with the computed + * filename is to call get_tar_offet_and_size, it doesn't + * matter if the filename is bogus or references some file + * on the filesystem outside the tarball. Therefore we don't + * need to do any sanity checking here. + *) + (* (1) (2) * ovf: <top_dir>/bar.ovf <top_dir>/foo/bar.ovf * ovf_folder: <top_dir> <top_dir>/foo @@ -321,7 +346,19 @@ let get_file_ref ({ top_dir; ova_type } as t) href ) else if top_dir = ovf_folder then href (* 1 *) else assert false in - TarFile (tar, filename) + + (* Does the file exist in the tarball? *) + let cmd = sprintf "tar tf %s %s >/dev/null 2>&1" + (quote tar) (quote filename) in + debug "ova: testing if %s exists in %s" filename tar; + if Sys.command cmd = 0 then ( + debug "ova: file exists"; + Some (TarFile (tar, filename)) + ) + else ( + debug "ova: file does not exist"; + None + ) let ws = PCRE.compile "\\s+" let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$" diff --git a/v2v/parse_ova.mli b/v2v/parse_ova.mli index 54df752ad..1ebf9022a 100644 --- a/v2v/parse_ova.mli +++ b/v2v/parse_ova.mli @@ -59,10 +59,9 @@ val get_manifest : t -> mf_record list verify them. Also VMware-generated OVAs can return non-existent files in this list. *) -val get_file_ref : t -> string -> file_ref -(** Convert an OVF [href] into an actual file reference. - - Note this does not check that the file really exists. *) +val get_file_ref : t -> string -> file_ref option +(** Convert an OVF [href] into an actual file reference. Returns [None] + if the file does not exist. *) val get_tar_offet_and_size : string -> string -> int64 * int64 (** [get_tar_offet_and_size tar filename] looks up file in the [tar] -- 2.16.2
Richard W.M. Jones
2018-Apr-25 13:35 UTC
[Libguestfs] [PATCH v2 9/9] v2v: -i ova: Handle OVAs containing snapshots (RHBZ#1570407).
Also adds a regression test. --- v2v/Makefile.am | 5 ++ v2v/input_ova.ml | 49 ++++++++++-- v2v/test-v2v-i-ova-snapshots.expected | 21 +++++ v2v/test-v2v-i-ova-snapshots.expected2 | 21 +++++ v2v/test-v2v-i-ova-snapshots.ovf | 138 +++++++++++++++++++++++++++++++++ v2v/test-v2v-i-ova-snapshots.sh | 78 +++++++++++++++++++ 6 files changed, 307 insertions(+), 5 deletions(-) diff --git a/v2v/Makefile.am b/v2v/Makefile.am index c9ed1fc88..9d9ab8d50 100644 --- a/v2v/Makefile.am +++ b/v2v/Makefile.am @@ -329,6 +329,7 @@ TESTS = \ test-v2v-i-ova-gz.sh \ test-v2v-i-ova-invalid-manifest1.sh \ test-v2v-i-ova-invalid-manifest2.sh \ + test-v2v-i-ova-snapshots.sh \ test-v2v-i-ova-subfolders.sh \ test-v2v-i-ova-tar.sh \ test-v2v-i-ova-two-disks.sh \ @@ -474,6 +475,10 @@ EXTRA_DIST += \ test-v2v-i-ova-gz.sh \ test-v2v-i-ova-invalid-manifest1.sh \ test-v2v-i-ova-invalid-manifest2.sh \ + test-v2v-i-ova-snapshots.expected \ + test-v2v-i-ova-snapshots.expected2 \ + test-v2v-i-ova-snapshots.ovf \ + test-v2v-i-ova-snapshots.sh \ test-v2v-i-ova-subfolders.expected \ test-v2v-i-ova-subfolders.expected2 \ test-v2v-i-ova-subfolders.ovf \ diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml index 9f7bc64ff..5953f3401 100644 --- a/v2v/input_ova.ml +++ b/v2v/input_ova.ml @@ -27,6 +27,49 @@ open Parse_ova open Parse_ovf_from_ova open Name_from_disk +(* RHBZ#1570407: VMware-generated OVA files found in the wild can + * contain hrefs referencing snapshots. The href will be something + * like: <File href="disk1.vmdk"/> but the actual disk will be a + * snapshot called something like "disk1.vmdk.000000000". + *) +let re_snapshot = PCRE.compile "\\.(\\d+)$" + +let rec find_file_or_snapshot ova_t href manifest + match get_file_ref ova_t href with + | Some f -> f + | None -> + (* Find all files in the manifest called [<href>.\d+] *) + let manifest = List.map fst manifest (* just the file_ref's *) in + let snapshots + List.filter_map ( + function + | LocalFile filename -> get_snapshot_if_matches href filename + | TarFile (_, filename) -> get_snapshot_if_matches href filename + ) manifest in + (* Pick highest. *) + let snapshots = List.sort (fun a b -> compare b a) snapshots in + match snapshots with + | [] -> error_missing_href href + | snapshot::_ -> + let href = sprintf "%s.%s" href snapshot in + match get_file_ref ova_t href with + | None -> error_missing_href href + | Some f -> f + +(* If [filename] matches [<href>.\d+] then return [Some snapshot]. *) +and get_snapshot_if_matches href filename + if PCRE.matches re_snapshot filename then ( + let snapshot = PCRE.sub 1 in + if String.is_suffix filename (sprintf "%s.%s" href snapshot) then + Some snapshot + else + None + ) + else None + +and error_missing_href href + error (f_"-i ova: OVF references file ‘%s’ which was not found in the OVA archive") href + class input_ova ova = object inherit input @@ -79,11 +122,7 @@ class input_ova ova = object (* Convert the disk hrefs into qemu URIs. *) let qemu_uris = List.map ( fun { href; compressed } -> - let file_ref - match get_file_ref ova_t href with - | Some f -> f - | None -> - error (f_"-i ova: OVF references file ‘%s’ which was not found in the OVA archive") href in + let file_ref = find_file_or_snapshot ova_t href manifest in match compressed, file_ref with | false, LocalFile filename -> diff --git a/v2v/test-v2v-i-ova-snapshots.expected b/v2v/test-v2v-i-ova-snapshots.expected new file mode 100644 index 000000000..97bce58ad --- /dev/null +++ b/v2v/test-v2v-i-ova-snapshots.expected @@ -0,0 +1,21 @@ +Source guest information (--print-source option): + + source name: 2K8R2EESP1_2_Medium +hypervisor type: vmware + memory: 1073741824 (bytes) + nr vCPUs: 1 + CPU vendor: + CPU model: + CPU topology: + CPU features: + firmware: uefi + display: + video: + sound: +disks: + disk1.vmdk (vmdk) [scsi] +removable media: + CD-ROM [ide] in slot 0 +NICs: + Bridge "PG-VLAN60" [e1000] + diff --git a/v2v/test-v2v-i-ova-snapshots.expected2 b/v2v/test-v2v-i-ova-snapshots.expected2 new file mode 100644 index 000000000..45be3cc46 --- /dev/null +++ b/v2v/test-v2v-i-ova-snapshots.expected2 @@ -0,0 +1,21 @@ +Source guest information (--print-source option): + + source name: 2K8R2EESP1_2_Medium +hypervisor type: vmware + memory: 1073741824 (bytes) + nr vCPUs: 1 + CPU vendor: + CPU model: + CPU topology: + CPU features: + firmware: uefi + display: + video: + sound: +disks: + json:{ "file": { "driver": "raw", "offset": x, "size": 12288, "file": { "driver": "file", "filename": "test-snapshots.ova" } } } (vmdk) [scsi] +removable media: + CD-ROM [ide] in slot 0 +NICs: + Bridge "PG-VLAN60" [e1000] + diff --git a/v2v/test-v2v-i-ova-snapshots.ovf b/v2v/test-v2v-i-ova-snapshots.ovf new file mode 100644 index 000000000..5e7c0d054 --- /dev/null +++ b/v2v/test-v2v-i-ova-snapshots.ovf @@ -0,0 +1,138 @@ +<?xml version="1.0" encoding="UTF-8"?> +<Envelope vmw:buildId="build-1750787" xmlns="http://schemas.dmtf.org/ovf/envelope/1" xmlns:cim="http://schemas.dmtf.org/wbem/wscim/1/common" xmlns:ovf="http://schemas.dmtf.org/ovf/envelope/1" xmlns:rasd="http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/CIM_ResourceAllocationSettingData" xmlns:vmw="http://www.vmware.com/schema/ovf" xmlns:vssd="http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/CIM_VirtualSystemSettingData" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <References> + <File ovf:href="disk1.vmdk" ovf:id="file1" ovf:size="12288"/> + </References> + <DiskSection> + <Info>Virtual disk information</Info> + <Disk ovf:capacity="50" ovf:capacityAllocationUnits="byte * 2^30" ovf:diskId="vmdisk1" ovf:fileRef="file1" ovf:format="http://www.vmware.com/interfaces/specifications/vmdk.html#streamOptimized" ovf:populatedSize="18975752192"/> + </DiskSection> + <NetworkSection> + <Info>The list of logical networks</Info> + <Network ovf:name="PG-VLAN60"> + <Description>The PG-VLAN60 network</Description> + </Network> + </NetworkSection> + <VirtualSystem ovf:id="2K8R2EESP1_2_Medium"> + <Info>A virtual machine</Info> + <Name>2K8R2EESP1_2_Medium</Name> + <OperatingSystemSection ovf:id="103" vmw:osType="windows7Server64Guest"> + <Info>The kind of installed guest operating system</Info> + <Description>Microsoft Windows Server 2008 R2 (64-bit)</Description> + </OperatingSystemSection> + <VirtualHardwareSection> + <Info>Virtual hardware requirements</Info> + <System> + <vssd:ElementName>Virtual Hardware Family</vssd:ElementName> + <vssd:InstanceID>0</vssd:InstanceID> + <vssd:VirtualSystemIdentifier>2K8R2EESP1_2_Medium</vssd:VirtualSystemIdentifier> + <vssd:VirtualSystemType>vmx-10</vssd:VirtualSystemType> + </System> + <Item> + <rasd:AllocationUnits>hertz * 10^6</rasd:AllocationUnits> + <rasd:Description>Number of Virtual CPUs</rasd:Description> + <rasd:ElementName>1 virtual CPU(s)</rasd:ElementName> + <rasd:InstanceID>1</rasd:InstanceID> + <rasd:ResourceType>3</rasd:ResourceType> + <rasd:VirtualQuantity>1</rasd:VirtualQuantity> + </Item> + <Item> + <rasd:AllocationUnits>byte * 2^20</rasd:AllocationUnits> + <rasd:Description>Memory Size</rasd:Description> + <rasd:ElementName>1024MB of memory</rasd:ElementName> + <rasd:InstanceID>2</rasd:InstanceID> + <rasd:ResourceType>4</rasd:ResourceType> + <rasd:VirtualQuantity>1024</rasd:VirtualQuantity> + </Item> + <Item> + <rasd:Address>0</rasd:Address> + <rasd:Description>SCSI Controller</rasd:Description> + <rasd:ElementName>SCSI controller 0</rasd:ElementName> + <rasd:InstanceID>3</rasd:InstanceID> + <rasd:ResourceSubType>lsilogicsas</rasd:ResourceSubType> + <rasd:ResourceType>6</rasd:ResourceType> + <vmw:Config ovf:required="false" vmw:key="slotInfo.pciSlotNumber" vmw:value="160"/> + </Item> + <Item> + <rasd:Address>1</rasd:Address> + <rasd:Description>IDE Controller</rasd:Description> + <rasd:ElementName>IDE 1</rasd:ElementName> + <rasd:InstanceID>4</rasd:InstanceID> + <rasd:ResourceType>5</rasd:ResourceType> + </Item> + <Item> + <rasd:Address>0</rasd:Address> + <rasd:Description>IDE Controller</rasd:Description> + <rasd:ElementName>IDE 0</rasd:ElementName> + <rasd:InstanceID>5</rasd:InstanceID> + <rasd:ResourceType>5</rasd:ResourceType> + </Item> + <Item ovf:required="false"> + <rasd:AutomaticAllocation>false</rasd:AutomaticAllocation> + <rasd:ElementName>Video card</rasd:ElementName> + <rasd:InstanceID>6</rasd:InstanceID> + <rasd:ResourceType>24</rasd:ResourceType> + <vmw:Config ovf:required="false" vmw:key="enable3DSupport" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="use3dRenderer" vmw:value="automatic"/> + <vmw:Config ovf:required="false" vmw:key="useAutoDetect" vmw:value="true"/> + <vmw:Config ovf:required="false" vmw:key="videoRamSizeInKB" vmw:value="4096"/> + </Item> + <Item ovf:required="false"> + <rasd:AutomaticAllocation>false</rasd:AutomaticAllocation> + <rasd:ElementName>VMCI device</rasd:ElementName> + <rasd:InstanceID>7</rasd:InstanceID> + <rasd:ResourceSubType>vmware.vmci</rasd:ResourceSubType> + <rasd:ResourceType>1</rasd:ResourceType> + <vmw:Config ovf:required="false" vmw:key="allowUnrestrictedCommunication" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="slotInfo.pciSlotNumber" vmw:value="32"/> + </Item> + <Item ovf:required="false"> + <rasd:AddressOnParent>0</rasd:AddressOnParent> + <rasd:AutomaticAllocation>false</rasd:AutomaticAllocation> + <rasd:ElementName>CD/DVD drive 1</rasd:ElementName> + <rasd:InstanceID>8</rasd:InstanceID> + <rasd:Parent>4</rasd:Parent> + <rasd:ResourceSubType>vmware.cdrom.atapi</rasd:ResourceSubType> + <rasd:ResourceType>15</rasd:ResourceType> + </Item> + <Item> + <rasd:AddressOnParent>0</rasd:AddressOnParent> + <rasd:ElementName>Hard disk 1</rasd:ElementName> + <rasd:HostResource>ovf:/disk/vmdisk1</rasd:HostResource> + <rasd:InstanceID>9</rasd:InstanceID> + <rasd:Parent>3</rasd:Parent> + <rasd:ResourceType>17</rasd:ResourceType> + <vmw:Config ovf:required="false" vmw:key="backing.writeThrough" vmw:value="false"/> + </Item> + <Item> + <rasd:AddressOnParent>7</rasd:AddressOnParent> + <rasd:AutomaticAllocation>true</rasd:AutomaticAllocation> + <rasd:Connection>PG-VLAN60</rasd:Connection> + <rasd:Description>E1000 ethernet adapter on "PG-VLAN60"</rasd:Description> + <rasd:ElementName>Network adapter 1</rasd:ElementName> + <rasd:InstanceID>11</rasd:InstanceID> + <rasd:ResourceSubType>E1000</rasd:ResourceSubType> + <rasd:ResourceType>10</rasd:ResourceType> + <vmw:Config ovf:required="false" vmw:key="slotInfo.pciSlotNumber" vmw:value="33"/> + <vmw:Config ovf:required="false" vmw:key="wakeOnLanEnabled" vmw:value="true"/> + </Item> + <vmw:Config ovf:required="false" vmw:key="cpuHotAddEnabled" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="cpuHotRemoveEnabled" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="firmware" vmw:value="efi"/> + <vmw:Config ovf:required="false" vmw:key="virtualICH7MPresent" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="virtualSMCPresent" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="memoryHotAddEnabled" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="nestedHVEnabled" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="powerOpInfo.powerOffType" vmw:value="soft"/> + <vmw:Config ovf:required="false" vmw:key="powerOpInfo.resetType" vmw:value="soft"/> + <vmw:Config ovf:required="false" vmw:key="powerOpInfo.standbyAction" vmw:value="checkpoint"/> + <vmw:Config ovf:required="false" vmw:key="powerOpInfo.suspendType" vmw:value="hard"/> + <vmw:Config ovf:required="false" vmw:key="tools.afterPowerOn" vmw:value="true"/> + <vmw:Config ovf:required="false" vmw:key="tools.afterResume" vmw:value="true"/> + <vmw:Config ovf:required="false" vmw:key="tools.beforeGuestShutdown" vmw:value="true"/> + <vmw:Config ovf:required="false" vmw:key="tools.beforeGuestStandby" vmw:value="true"/> + <vmw:Config ovf:required="false" vmw:key="tools.syncTimeWithHost" vmw:value="false"/> + <vmw:Config ovf:required="false" vmw:key="tools.toolsUpgradePolicy" vmw:value="upgradeAtPowerCycle"/> + </VirtualHardwareSection> + </VirtualSystem> +</Envelope> diff --git a/v2v/test-v2v-i-ova-snapshots.sh b/v2v/test-v2v-i-ova-snapshots.sh new file mode 100755 index 000000000..7649d22f9 --- /dev/null +++ b/v2v/test-v2v-i-ova-snapshots.sh @@ -0,0 +1,78 @@ +#!/bin/bash - +# libguestfs virt-v2v test script +# Copyright (C) 2014-2018 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# Test -i ova option with OVA file containing snapshots. +# https://bugzilla.redhat.com/show_bug.cgi?id=1570407 + +unset CDPATH +export LANG=C +set -e + +$TEST_FUNCTIONS +skip_if_skipped +skip_if_backend uml + +export VIRT_TOOLS_DATA_DIR="$top_srcdir/test-data/fake-virt-tools" + +d=test-v2v-i-ova-snapshots.d +rm -rf $d +mkdir $d + +pushd $d + +# Create a phony OVA. This is only a test of source parsing, not +# conversion, so the contents of the disks doesn't matter. +# In these weird OVAs, disk1.vmdk does not exist, but both the +# href and manifest reference it. virt-v2v should use the +# highest numbered snapshot instead. +guestfish disk-create disk1.vmdk.000000000 raw 10k +guestfish disk-create disk1.vmdk.000000001 raw 11k +guestfish disk-create disk1.vmdk.000000002 raw 12k +sha=`do_sha1 disk1.vmdk.000000002` +echo -e "SHA1(disk1.vmdk)= $sha\r" > disk1.mf +sha=`do_sha1 disk1.vmdk.000000000` +echo -e "SHA1(disk1.vmdk.000000000)= $sha\r" > disk1.mf +sha=`do_sha1 disk1.vmdk.000000001` +echo -e "SHA1(disk1.vmdk.000000001)= $sha\r" > disk1.mf +sha=`do_sha1 disk1.vmdk.000000002` +echo -e "SHA1(disk1.vmdk.000000002)= $sha\r" > disk1.mf +cp ../test-v2v-i-ova-snapshots.ovf . +tar -cf test-snapshots.ova test-v2v-i-ova-snapshots.ovf disk1.vmdk.00000000? disk1.mf + +popd + +# Run virt-v2v but only as far as the --print-source stage +$VG virt-v2v --debug-gc --quiet \ + -i ova $d/test-snapshots.ova \ + --print-source > $d/source + +# Check the parsed source is what we expect. +if grep -sq json: $d/source ; then + # Normalize the output. + # Remove directory prefix. + # Exact offset will vary because of tar. + sed -i -e "s,\"[^\"]*/$d/,\"," \ + -e "s|\"offset\": [0-9]*,|\"offset\": x,|" $d/source + diff -u test-v2v-i-ova-snapshots.expected2 $d/source +else + # normalize the output + sed -i -e 's,[^ \t]*\(disk.*.vmdk\),\1,' $d/source + diff -u test-v2v-i-ova-snapshots.expected $d/source +fi + +rm -rf $d -- 2.16.2
Maybe Matching Threads
- [PATCH 0/2] v2v: -i ova: A couple of cleanup patches.
- GIT: [PATCH 0/5] v2v: Multiple fixes for handling semi-standard OVA files (RHBZ#1152998).
- v2v: -i libvirtxml: Map empty network or bridge name to a default (RHBZ#1257895).
- [PATCH v5 0/3] Import directly from OVA tar archive if possible
- [PATCH v3 0/6] Import directly from OVA tar archive if possible