加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 综合聚焦 > 服务器 > Linux > 正文

从根文件系统制作看loop设备

发布时间:2020-12-14 02:05:19 所属栏目:Linux 来源:网络整理
导读:一、引出 在Linux系统下,通过 dd? +?losetup + mkfs 创建一个根文件系统已经是创建一个Linux根文件系统的一个常规方法。由于这个是通过一个loop设备来创建的文件,所以这个loop文件是把一个文件看做是一个设备,这个还是一个相对比较高难度的一个适配,因为

一、引出

在Linux系统下,通过 dd? +?losetup + mkfs 创建一个根文件系统已经是创建一个Linux根文件系统的一个常规方法。由于这个是通过一个loop设备来创建的文件,所以这个loop文件是把一个文件看做是一个设备,这个还是一个相对比较高难度的一个适配,因为上层肯定是使用了块设备的驱动来完成对一个文件的操作,此事还是有一些挑战性的。

二、实现

1、内核中对loop设备的注册

linux-2.6.21driversblockloop.c

关于loop设备
linux-2.6.21driversblockloop.c:loop_init

?if (register_blkdev(LOOP_MAJOR,"loop"))
??return -EIO;

#define LOOP_SET_FD??0x4C00
#define LOOP_SET_STATUS64?0x4C04

这里是对一种loop设备的注册,这个设备也就是注册了一种自己的设备号。我们从内核的文档中可以知道,loop有自己的设备号。在内核说明文档中linux-2.6.21Documentationdevices.txt

??7 block?Loopback devices 0 = /dev/loop0?First loop device 1 = /dev/loop1?Second loop device?...
??The loop devices are used to mount filesystems not
??associated with block devices.?The binding to the
??loop devices is handled by mount(8) or losetup(8).

作为一种虚拟设备,内核将会一次性在内核中分配指定个数的虚拟磁盘,这个参数可以在系统启动的时候通过启动参数max_loop进行设置。然后在loop的初始化函数中创建这样指定个数的磁盘,这些就是我们用户态可以识别的磁盘个数。

loop_dev = kmalloc(max_loop * sizeof(struct loop_device),GFP_KERNEL);
?if (!loop_dev)
??goto out_mem1;
?memset(loop_dev,max_loop * sizeof(struct loop_device));

?disks = kmalloc(max_loop * sizeof(struct gendisk *),GFP_KERNEL);
?if (!disks)
??goto out_mem2;

?for (i = 0; i < max_loop; i++) {
??disks[i] =?alloc_disk(1);
??if (!disks[i])
???goto out_mem3;
?}

2、磁盘操作实体

上面只是分配的一个磁盘,注意,分配的不是设备描述符,而直接就是磁盘,也就是创建的是一个虚拟设备。这个磁盘其实本身并没有做任何实质性的操作,因为从执行的函数可以看到,它大部分情况下使用的都是通用而非定制接口,所以分配的磁盘也就是通用的磁盘,这个没有加上自己定制的接口,那么一定无法完成这个设备的虚拟。

这个虚拟磁盘可以认为是一个中转,也就是“铁打的营盘流水的兵”。这个磁盘虽然是创建了,但是它并没有实际的内容,并且既是它有了实际的内容,那么它的内容在通常情况下也会变化,例如在制作img之后马上被卸载。

这个设备真正的点睛之笔是通过ioctl中的set_fd操作来完成的,这个是一个比较另类的操作,也就是它并不是通过通常的open之类接口完成,而是通过这种设备完成,所以还是比较独特的

static int lo_ioctl(struct inode * inode,struct file * file,
?unsigned int cmd,unsigned long arg)

?case LOOP_SET_FD:
??err = loop_set_fd(lo,file,inode->i_bdev,arg);

?/*
? * set queue make_request_fn,and add limits based on lower level
? * device
? */
?blk_queue_make_request(lo->lo_queue,?loop_make_request);
?lo->lo_queue->queuedata = lo;
?lo->lo_queue->unplug_fn = loop_unplug;

?set_capacity(disks[lo->lo_number],size);
?bd_set_size(bdev,size << 9);

?set_blocksize(bdev,lo_blocksize);

?lo->lo_thread =?kthread_create(loop_thread,lo,"loop%d",
??????lo->lo_number);

这里就有两个比较关键的操作,其中第一个最为关键,就是设置一个磁盘特有的make_request接口,这也就是说,对于每个不同的disk,它可以定义自己的make_request,而其它模块希望操作这个具体磁盘的时候,它就通过向这个队列发送请求原语就可以了,至于这个具体怎么实现,那就是具体磁盘自己的事情了。这样就很好的解耦了系统中的不同模块,就是接口单一的原则。这个接口事实上是作为一个block设备对其它模块做的承诺,这样它才像是一个block设备

然后第二个就是一个不太必须但是比较特别的东西,就是在每个设备设置了文件描述符之后,都会创建一个对应的内核线程,由这个线程来完成操作。

这个内核线程我们在losetup之后可以看到内核中的确是有这个线程的。

例如,我们可以在挂载了loop文件之后,系统中有一个文件loop4内核线程
tsecer?? 18014? 0.0? 6.4? 77968 66284 pts/1??? S+?? 07:02?? 0:00 gdb vmlinux
root???? 27397? 0.0? 0.0????? 0???? 0 ???????? S<?? 23:10?? 0:00 [loop4]
root???? 27580? 0.0? 0.1? 55048? 2000 ???????? S??? 23:45?? 0:00 /usr/libexec/fp
tsecer?? 27584? 0.0? 0.0?? 4692?? 992 pts/0??? R+?? 23:45?? 0:00 ps aux
而其实这个loop_make_request的接口也比较简单,它就是把具体的设备操作转换为真正的文件对应的操作接口。例如wirte转换为backup文件的write接口,从而完成中间层的转发。

3、用户态线程的操作

util-linux-ng-2.16.2mountlomount.c

#define _PATH_DEV_LOOP??"/dev/loop"
#define NLOOPS_DEFAULT??8?/* /dev/loop[0-7] */
static int
looplist_open_dev(struct looplist *ll,int lnum)
{
?struct stat st;
?int used;
?int fd;

?/* create a full device path */
?snprintf(ll->name,sizeof(ll->name),
??ll->flag & LLFLG_SUBDIR ?
???_PATH_DEV_LOOP "/%d" :
???_PATH_DEV "loop%d",
??lnum);

looplist_next(struct looplist *ll)
?/* B) Classic way,try first eight loop devices (default number
? *??? of loop devices). This is enough for 99% of all cases.
? */
?if (ll->flag & LLFLG_DFLT) {
??for (++ll->ncur; ll->ncur < NLOOPS_DEFAULT; ll->ncur++) {
???fd = looplist_open_dev(ll,ll->ncur);
???if (fd != -1)
????return fd;
??}
??ll->flag &= ~LLFLG_DFLT;
?}
然后执行
???res = set_loop(device,off,slimit,encryption,pfd,&ro);--->>>
?if (ioctl(fd,LOOP_SET_FD,ffd) < 0) {

从而向内核注册了对应的文件。

4、strace输出

[[email?protected] linux-2.6.21]$ su -c "strace mount -t ext2 -o loop /home/tsecer/KernelDebug/helloword.c /dev/"
Password:?
execve("/bin/mount",["mount","-t","ext2","-o","loop","/home/tsecer/KernelDebug/hellowo"...,"/dev/"],[/* 44 vars */]) = 0
brk(0)????????????????????????????????? = 0x24c7000
mmap2(NULL,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0) = 0xb7842000
access("/etc/ld.so.preload",R_OK)????? = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache",O_RDONLY)????? = 3
fstat64(3,{st_mode=S_IFREG|0644,st_size=85504,...}) = 0
mmap2(NULL,85504,PROT_READ,MAP_PRIVATE,3,0) = 0xb782d000
close(3)??????????????????????????????? = 0
open("/lib/libblkid.so.1",O_RDONLY)??? = 3
read(3,"177ELF11133120$}004"...,512) = 512
fstat64(3,{st_mode=S_IFREG|0755,st_size=85520,86652,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_DENYWRITE,0) = 0xfb2000
mmap2(0xfc6000,8192,MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE,0x13) = 0xfc6000
close(3)??????????????????????????????? = 0
open("/lib/libuuid.so.1",O_RDONLY)???? = 3
read(3,"177ELF111331`.306004"...,st_size=16112,17072,0) = 0xdff000
mmap2(0xe03000,0x3) = 0xe03000
close(3)??????????????????????????????? = 0
open("/lib/libselinux.so.1",O_RDONLY)? = 3
read(3,"177ELF111331`201?004"...,st_size=118316,121848,0) = 0x56d000
mmap2(0x589000,0x1b) = 0x589000
close(3)??????????????????????????????? = 0
open("/lib/libsepol.so.1","177ELF111331340257V004"...,st_size=242288,244992,0) = 0x9c9000
mmap2(0xa04000,0x3a) = 0xa04000
close(3)??????????????????????????????? = 0
open("/lib/libc.so.6",O_RDONLY)??????? = 3
read(3,"177ELF111333120r"004"...,st_size=1831904,0) = 0xb782c000
mmap2(NULL,1542504,0) = 0x110000
mprotect(0x282000,PROT_NONE)???? = 0
mmap2(0x283000,12288,0x172) = 0x283000
mmap2(0x286000,10600,MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,0) = 0x286000
close(3)??????????????????????????????? = 0
open("/lib/libdl.so.2",O_RDONLY)?????? = 3
read(3,"177ELF1113331`Z8004"...,st_size=20480,16500,0) = 0xcf8000
mmap2(0xcfb000,0x2) = 0xcfb000
close(3)??????????????????????????????? = 0
mmap2(NULL,0) = 0xb782b000
set_thread_area({entry_number:-1 -> 6,base_addr:0xb782b750,limit:1048575,seg_32bit:1,contents:0,read_exec_only:0,limit_in_pages:1,seg_not_present:0,useable:1}) = 0
mprotect(0xcfb000,PROT_READ)???? = 0
mprotect(0x283000,PROT_READ)???? = 0
mprotect(0x589000,PROT_READ)???? = 0
mprotect(0x9af000,PROT_READ)???? = 0
munmap(0xb782d000,85504)?????????????? = 0
statfs64("/selinux",84,{f_type=0xf97cff8c,f_bsize=4096,f_blocks=0,f_bfree=0,f_bavail=0,f_files=0,f_ffree=0,f_fsid={0,0},f_namelen=255,f_frsize=4096}) = 0
brk(0)????????????????????????????????? = 0x24c7000
brk(0x24e8000)????????????????????????? = 0x24e8000
open("/usr/lib/locale/locale-archive",O_RDONLY|O_LARGEFILE) = 3
fstat64(3,st_size=98765760,2097152,0) = 0xb762b000
close(3)??????????????????????????????? = 0
umask(022)????????????????????????????? = 02
open("/dev/null",O_RDWR|O_LARGEFILE)?? = 3
close(3)??????????????????????????????? = 0
getuid32()????????????????????????????? = 0
geteuid32()???????????????????????????? = 0
readlink("/home",0xbff60c5b,4096)???? = -1 EINVAL (Invalid argument)
readlink("/home/tsecer",4096) = -1 EINVAL (Invalid argument)
readlink("/home/tsecer/KernelDebug",4096) = -1 EINVAL (Invalid argument)
readlink("/home/tsecer/KernelDebug/helloword.c",4096) = -1 EINVAL (Invalid argument)
readlink("/dev",0xbff60adb,4096)????? = -1 EINVAL (Invalid argument)
umask(077)????????????????????????????? = 022
open("/etc/mtab",O_RDONLY|O_LARGEFILE) = 3
umask(022)????????????????????????????? = 077
fstat64(3,st_size=470,0) = 0xb7841000
read(3,"/dev/mapper/vg_harry-lv_root / e"...,4096) = 470
read(3,"",4096)?????????????????????? = 0
close(3)??????????????????????????????? = 0
munmap(0xb7841000,4096)??????????????? = 0
stat64("/dev/",{st_mode=S_IFDIR|0755,st_size=4040,...}) = 0
stat64("/dev/loop",0xbff619d0)???????? = -1 ENOENT (No such file or directory)
open("/dev/loop0",{st_mode=S_IFBLK|0660,st_rdev=makedev(7,0),...}) = 0
ioctl(3,0x4c03,0xbff5f8b0)??????????? = -1 ENXIO (No such device or address)
close(3)??????????????????????????????? = 0
open("/home/tsecer/KernelDebug/helloword.c",O_RDWR|O_LARGEFILE) = 3
open("/dev/loop0",O_RDWR|O_LARGEFILE)? = 4
readlink("/home",0xbff608cb,4096) = -1 EINVAL (Invalid argument)
ioctl(4,0x4c00,0x3)?????????????????? = 0
close(3)??????????????????????????????? = 0
ioctl(4,0x4c04,0xbff61988)??????????? = 0
ioctl(4,0x4c05,0xbff61788)??????????? = 0
stat64("/sbin/mount.ext2",0xbff619f8)? = -1 ENOENT (No such file or directory)
rt_sigprocmask(SIG_BLOCK,~[TRAP SEGV RTMIN RT_1],NULL,8) = 0
stat64("/sbin/mount.ext2",0xbff619b8)? = -1 ENOENT (No such file or directory)
这里进行了真正的mount操作,也就是在这里进行了mount系统调用,而之前已经通过对设备文件的ioctl,设置了回环文件使用的真正文件的位置。也就是当执行mount之前,用户态的mount已经进行了loop文件的打开和设置,然后在mount中就作为一个普通的设备文件进行操作了


mount("/dev/loop0","/dev/",MS_MGC_VAL,NULL) = -1 EINVAL (Invalid argument)
rt_sigprocmask(SIG_UNBLOCK,8) = 0
open("/usr/share/locale/locale.alias",O_RDONLY) = 3
fstat64(3,st_size=2512,"# Locale name alias data base.n#"...,4096) = 2512
read(3,4096)??????????????? = 0
open("/usr/share/locale/en_US.UTF-8/LC_MESSAGES/util-linux-ng.mo",O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en_US.utf8/LC_MESSAGES/util-linux-ng.mo",O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en_US/LC_MESSAGES/util-linux-ng.mo",O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en.UTF-8/LC_MESSAGES/util-linux-ng.mo",O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en.utf8/LC_MESSAGES/util-linux-ng.mo",O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en/LC_MESSAGES/util-linux-ng.mo",O_RDONLY) = -1 ENOENT (No such file or directory)
write(2,"mount: wrong fs type,bad option"...,121mount: wrong fs type,bad option,bad superblock on /dev/loop0,
?????? missing codepage or helper program,or other error) = 121
write(2,"n",1
)?????????????????????? = 1
stat64("/dev/loop0",...}) = 0
open("/dev/loop0",O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 3
uname({sys="Linux",node="Harry",BLKGETSIZE64,0xbff61cb8)????? = 0
write(2,"?????? (could this be the IDE de"...,111?????? (could this be the IDE device where you in fact use
?????? ide-scsi so that sr0 or sda or so is needed?)) = 111
write(2,1
)?????????????????????? = 1
close(3)??????????????????????????????? = 0
write(2,"?????? In some cases useful info"...,85?????? In some cases useful info is found in syslog - try
?????? dmesg | tail? or so
) = 85
write(2,1
)?????????????????????? = 1
exit_group(32)????????????????????????? = ?
[[email?protected] linux-2.6.21]$

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读