Hello,
I was using a bunch of cpios in initramfs as a working system, and
wondering why the unused files weren't being paged out to swap.
So I reread ramfs-rootfs-initramfs.txt and now I know.
So I wrote the attached utility. It creates a tmpfs, moves all files
on the initramfs, moves / and executes the real init.
It works, even with hardlinks, but it isn't the correct approach. Have
anyone implemented a small init, in klibc, that decompresses a bunch of
cpios to a tmpfs?
Regards,
Luciano Rocha
--
lfr
0/0
-------------- next part --------------
/* ----------------------------------------------------------------------- *
*
* Copyright 2004-2006 H. Peter Anvin - All Rights Reserved
* Copyright 2006 Luciano Rocha - All Rights Reserved
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall
* be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* ----------------------------------------------------------------------- */
/*
* rtfs.c -- ramfs to tmpfs
*
* Usage: install as /init
*
* This program should be installed as /init on an initramfs;
* it does the following:
*
* - Mounts a tmpfs on an empty /newroot*
* - Moves all files on the ramfs at / to the tmpfs
* - Remounts /newroot onto the root filesystem;
* - Chroots;
* - Opens /dev/console;
* - Spawns the init program (with arguments.)
*/
#include <alloca.h>
#include <assert.h>
#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/vfs.h>
#include <unistd.h>
#include <utime.h>
/* Make it possible to compile on glibc by including constants that the
always-behind shipped glibc headers may not include. Classic example
on why the lack of ABI headers screw us up. */
#ifndef TMPFS_MAGIC
# define TMPFS_MAGIC 0x01021994
#endif
#ifndef RAMFS_MAGIC
# define RAMFS_MAGIC 0x858458f6
#endif
#ifndef MS_MOVE
# define MS_MOVE 8192
#endif
#define NEWROOT "/newroot"
#define CMDLINE "/proc/cmdline"
#define MEMINFO "/proc/meminfo"
static const char *program; // argv[0]
static char newroot[512]; // where newroot will be
static char **initargs; // argv
static struct stat root_st; // stat of /
static char meminfo[1024]; // contents of /proc/meminfo, after getprocinfo
static char cmdline[2048]; // contents of /proc/cmdline, after getprocinfo
static const char *getprocinfo(void);
/* error functions:
* error(fmt, ...) print "program: msg" and exits
* serror(fmt, ...) print "program: msg: error\n" and exits
*/
#define error(...) my_error(0, __VA_ARGS__)
#define serror(...) my_error(1, __VA_ARGS__)
static void __attribute__ ((noreturn,format(printf, 2, 3)))
my_error(int p, const char *fmt, ...)
{
va_list ap;
int err = errno;
va_start(ap, fmt);
fprintf(stderr, "%s: ", program);
vfprintf(stderr, fmt, ap);
if (p) fprintf(stderr, ": %s\n", strerror(err));
va_end(ap);
exit(1);
}
/* change stdin, stdout and stderr to /dev/console */
static void openconsole(void)
{
int fd;
if ((fd = open("/dev/console", O_RDWR)) >= 0) {
dup2(fd, 0);
dup2(fd, 1);
dup2(fd, 2);
if (fd > 2) close(fd);
}
}
/* if init specified in cmdline, execute it, else trye from the usual
* places. Die if no init found */
static void __attribute__ ((noreturn)) exec_init(void)
{
int i;
char *s;
char *definit[] = {
"/sbin/init",
"/bin/init",
"/etc/init",
"/bin/sh",
NULL
};
openconsole();
if ((s = strstr(cmdline, "init="))) {
char *p;
/* find last init= */
while ((p = strstr(s+1, "init=")))
s = p;
s += 5;
for (p = s; *p > ' '; ++p);
*p = '\0';
initargs[0] = s;
execv(initargs[0], initargs);
serror("executing init '%s' (from cmdline) failed", s);
}
for (i = 0; definit[i]; i++) {
initargs[0] = definit[i];
execv(initargs[0], initargs);
}
error("init not found\n");
}
/* check running environment, to avoid destroying already running system */
static void checkenv(void)
{
struct statfs sfs;
/* must be run as init */
if (getpid() != 1)
error("must be run as init\n");
openconsole();
/* check / */
if (statfs("/", &sfs))
serror("statfs /");
/* no work needed if on tmpfs */
if (sfs.f_type == TMPFS_MAGIC) {
getprocinfo();
exec_init();
}
/* Make sure we're on a ramfs */
if (sfs.f_type != RAMFS_MAGIC)
error("rootfs not a ramfs\n");
if (lstat("/", &root_st))
serror("stat /");
}
/* check if empty is dir. assumes an existing dir always includes '.'
and
* '..' */
static int isdirempty(const char *p)
{
DIR *d;
struct dirent *de;
int dc;
if ((d = opendir(p)) == NULL)
return (errno == ENOENT);
dc = 0;
while ((de = readdir(d)))
dc++;
closedir(d);
return (dc == 2);
}
/* get data from /proc/cmdline and /proc/meminfo */
static const char *getprocinfo(void)
{
int fd, i, j;
if ((fd = open(CMDLINE, O_RDONLY)) < 0 && errno == ENOENT) {
mkdir("/proc", 0555);
if (mount("none", "/proc", "proc", 0, NULL) <
0)
return "mounting /proc";
fd = open(CMDLINE, O_RDONLY);
}
if (fd < 0)
return "opening " CMDLINE;
j = 0;
while ((i = read(fd, cmdline+j, sizeof cmdline - j - 1)) > 0)
j += i;
if (i < 0)
return "reading " CMDLINE;
cmdline[j] = '\0';
close(fd);
/* meminfo is optional */
if ((fd = open(MEMINFO, O_RDONLY)) >= 0) {
j = 0;
while ((i = read(fd, meminfo+j, sizeof meminfo - j - 1)) > 0)
j += i;
cmdline[j] = '\0';
close(fd);
}
umount("/proc");
return NULL;
}
/* linked list structure for hard links */
struct le {
struct le *next;
ino_t inode;
char path[];
};
static struct le *hl;
/* if inode on list, return the destination file. otherwise, add inode and
* current target to the list */
static char *hlfind(ino_t inode, const char *s)
{
struct le *l;
for (l = hl; l && inode != l->inode; l = l->next);
/* found, return path */
if (l) return l->path;
/* new target */
l = malloc(sizeof *l + strlen(s) + 1);
l->next = hl;
l->inode = inode;
strcpy(l->path, s);
hl = l;
return NULL;
}
static void move_dir(const char *s);
/* move a directory entry. recurse into subdirectories, copy regular files,
* and recreate special files.
* preserve owner, access and creation times and permissions, whenever
* possible */
static void move_ent(const char *p, const char *n)
{
int pl = strlen(p), nl = strlen(n);
char s[pl+nl+3];
struct stat st;
struct utimbuf tm;
s[0] = '.';
memcpy(s+1, p, pl);
s[pl+1] = '/';
memcpy(s+pl+2, n, nl+1);
if (!strcmp(s+1, newroot))
return;
if (lstat(s+1, &st))
serror("stating %s", s+1);
if (st.st_dev != root_st.st_dev)
return;
if (S_ISDIR(st.st_mode)) {
if (mkdir(s, 0700))
serror("creating %s", s);
move_dir(s+1);
if (rmdir(s+1))
serror("removing %s", s+1);
} else if (S_ISREG(st.st_mode)) {
int fdi, fdo;
char *p;
if (st.st_nlink > 1 && (p = hlfind(st.st_ino, s))) {
if (link(p, s))
serror("linking %s", s);
if (unlink(s+1))
serror("unlink of %s", s+1);
return;
}
if ((fdi = open(s+1, O_RDONLY)) < 0)
serror("opening %s", s+1);
if ((fdo = open(s, O_CREAT|O_WRONLY|O_TRUNC, 0600)) < 0)
serror("creating %s", s);
if ((p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fdi, 0))
== MAP_FAILED) {
char b[2048];
int i;
while ((i = read(fdi, b, sizeof b)) > 0) {
if (write(fdo, b, i) != i)
error("short write on %s\n", s);
}
if (i < 0)
error("reading %s\n", s+1);
} else {
if (write(fdo, p, st.st_size) != st.st_size)
error("short write on %s\n", s);
munmap(p, st.st_size);
}
close(fdi);
close(fdo);
} else if (S_ISLNK(st.st_mode)) {
char target[2048];
int i;
if ((i = readlink(s+1, target, sizeof target-1)) < 0)
serror("reading link %s", s+1);
target[i] = '\0';
if (symlink(target, s))
serror("creating link %s", s);
} else if (mknod(s, st.st_mode, st.st_rdev))
serror("mknod of %s", s);
if (lchown(s, st.st_uid, st.st_gid))
serror("chown of %s", s);
if (!S_ISLNK(st.st_mode)) {
tm.actime = st.st_atime;
tm.modtime = st.st_mtime;
if (utime(s, &tm))
serror("settime of %s", s);
if (chmod(s, st.st_mode & ~S_IFMT))
serror("chmod of %s", s);
}
if (!S_ISDIR(st.st_mode) && unlink(s+1))
serror("unlink of %s", s+1);
}
/* call move_ent for each entry in directory */
static void move_dir(const char *s)
{
DIR *d;
struct dirent *de;
if (!(d = opendir(s)))
serror("listing %s", s);
while ((de = readdir(d))) {
if (de->d_name[0] == '.'
&& (de->d_name[1] == '\0' || (de->d_name[1] ==
'.'
&& de->d_name[2] == '\0')))
continue;
move_ent(s, de->d_name);
}
closedir(d);
}
/* parse data from MEMINFO and return string in the format
* size=totalmemk or NULL on error */
const char *getmemsize(void)
{
int i, j;
/* Assuming first line of meminfo is MemTotal, find first digit */
for (i = 0; meminfo[i] && !isdigit(meminfo[i]); i++);
/* space needed for 'size=', return NULL otherwise */
if (i < 5 || !meminfo[i]) return NULL;
/* add 'k' after last digit and terminate string */
for (j = i+1; isdigit(meminfo[j]); j++);
meminfo[j] = 'k';
meminfo[j+1] = '\0';
/* now add 'size=' */
i -= 5;
memcpy(meminfo+i, "size=", 5);
/* return from 'size=' */
return meminfo+i;
}
int main(int argc, char *argv[])
{
int nrc = 0;
const char *e;
program = argv[0];
initargs = argv;
umask(0);
checkenv();
if ((e = getprocinfo()))
serror(e);
/* create a tmpfs */
strcpy(newroot, NEWROOT);
while (!isdirempty(newroot)) {
snprintf(newroot, sizeof newroot, "%s%d\n", NEWROOT, nrc++);
}
if (mkdir(newroot, 0755) && errno != EEXIST)
serror("creating new root %s", newroot);
if (mount("none", newroot, "tmpfs", 0, getmemsize()) <
0)
serror("mounting tmpfs");
if (chdir(newroot))
serror("entering new root");
move_dir("/");
if (mount(".", "/", NULL, MS_MOVE, NULL))
serror("overmounting root");
if (chroot(".") || chdir("/"))
serror("chroot");
/* remove copy of this program */
unlink("/init");
exec_init();
return 1;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
Url :
http://www.zytor.com/pipermail/klibc/attachments/20060928/a299f453/attachment.bin