I received some large data sets in Minitab Portable Worksheet (.mtp) format and wanted to read them into R. Here is a _first cut_ at a perl script that transforms .mtp files into something that can be read by R. Comments, bug-fixes, enhancements and extensions are welcome. #!/usr/bin/perl # Convert a Minitab Portable Worksheet to an R (or S-PLUS) list structure # $Id: mtp2R.pl,v 1.1 1998/01/16 15:33:27 bates Exp $ # Copyright (C) 1998 Douglas M Bates # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # A copy of the GNU General Public License is available via WWW at # http://www.gnu.org/copyleft/gpl.html. You can also obtain it by # writing to the Free Software Foundation, Inc., 675 Mass Ave, # Cambridge, MA 02139, USA. # Send any bug reports to Douglas Bates <bates@stat.wisc.edu> require 5.003; use strict; use Carp; for ( @ARGV ) { open( MTP, "< $_" ) || carp( "Unable to open $_ for reading: $!\n" ); s/\.mtp$/.R/i; open( R, "> $_" ) || carp( "Unable to open $_ for writing: $!\n" ); my $line = <MTP>; carp( "Input does not seem to be in Minitab Portable Format\n" ) unless ( $line =~ /^Minitab Portable Worksheet/ ); print R "# R data file automatically created by mtp2R\n"; s/\.R$//; print R "\"$_\" <- list(\n"; my $initial = 1; while (1) { last unless ( $line = <MTP> ); carp( "Incorrect format\n" ) unless ( $line =~ /^%/ ); my ($pct, $colmat, $number, $length, $NAs, $name) split(' ', $line ); next if ( 100 <= $colmat && $colmat <= 102 ); print R ",\n" unless $initial; $initial = 0; $name = "C" . $number if ( $name =~ /^\.$/ ); print R "$name = c(\n"; my $width = 16; $width = -$NAs + 1 if ( $NAs < 0 ); while ( $length > 0 ) { $line = <MTP>; chomp $line; chop $line if ($line =~ /\r$/); # Generated on DOS/Windows chop $line if ($line =~ /\.$/); while ( length( $line ) >= $width && $length) { my $value = substr( $line, 0, $width ); $line = substr( $line, $width ); $value =~ s/^\s+//; $value =~ s/\s+$//; if ( $NAs < 0 ) { print R "\"$value\""; } else { print R 0+$value; } $length--; print R ", " if $length; } print R "\n"; } print R ")"; } print R ")\n"; close ( MTP ); close ( R ); } -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._