内核块设备 – 使用自旋锁落入死锁
发布时间:2020-12-16 07:23:03 所属栏目:百科 来源:网络整理
导读:我刚刚实现了一个虚拟块设备,我想用它来探索 linux内核如何处理块设备. 我的设备只是一个内存区域,分为两个512字节扇区. 我正在使用全局结构来存储设备信息: typedef struct{ uint32_t hard_sector_size; // Size of a device sector uint32_t sector_numbe
我刚刚实现了一个虚拟块设备,我想用它来探索
linux内核如何处理块设备.
我的设备只是一个内存区域,分为两个512字节扇区. 我正在使用全局结构来存储设备信息: typedef struct { uint32_t hard_sector_size; // Size of a device sector uint32_t sector_number; // Number of sector on device uint32_t size; // Total size of virtual device in bytes uint8_t* data; // Device memory buffer spinlock_t device_lock; // Device structure access spinlock struct request_queue *queue; // Device request queue struct gendisk *gendisk; // Device "disk" representation int major; // Device major number attributed by kernel int minor; // Device minor number fixed at initialization uint32_t r_users; // Number of read access uint32_t w_users; // Number of write access }blk_mod_t; blk_mod_t self; [...] 现在我想保护这个结构免受并发访问.为此,我正在使用device_lock字段. 现在我只将这个螺旋锁用于以下三个功能 static int block_mod_open(struct block_device *bdev,fmode_t mode) { access_mode_t access_mode; DEBUG("Entering open functionn"); if((mode & FMODE_READ) && (mode & FMODE_WRITE)) { NOTICE("Oppened in read/write moden"); mode = ACCESS_RW; } else if(mode & FMODE_READ) { NOTICE("Oppened in read only moden"); mode = ACCESS_RONLY; } else if(mode & FMODE_WRITE) { NOTICE("Oppened in write only moden"); mode = ACCESS_WONLY; } DEBUG("<--n"); spin_lock(&self.device_lock); if(ACCESS_RW == access_mode) { self.r_users++; self.w_users++; } else if(ACCESS_RONLY == access_mode) { self.r_users++; } else { self.w_users++; } NOTICE("Read access: %dtWrite access: %dn",self.r_users,self.w_users); DEBUG("-->n"); spin_unlock(&self.device_lock); DEBUG("Exiting open functionn"); return 0; } static void block_mod_release(struct gendisk *disk,fmode_t mode) { access_mode_t access_mode; DEBUG("Entering release functionn"); if((mode & FMODE_READ) && (mode & FMODE_WRITE)) { NOTICE("Closed read/write moden"); mode = ACCESS_RW; } else if(mode & FMODE_READ) { NOTICE("Closed read only moden"); mode = ACCESS_RONLY; } else if(mode & FMODE_WRITE) { NOTICE("Closed write only moden"); mode = ACCESS_WONLY; } DEBUG("<--n"); spin_lock(&self.device_lock); if(ACCESS_RW == access_mode) { self.r_users--; self.w_users--; } else if(ACCESS_RONLY == access_mode) { self.r_users--; } else { self.w_users--; } NOTICE("Read access: %dtWrite access: %dn",self.w_users); DEBUG("-->n"); spin_unlock(&self.device_lock); DEBUG("Exiting release functionn"); return; } static void block_mod_transfer(unsigned long sector,unsigned long nsect,char *buffer,int write) { unsigned long offset = sector*KERNEL_SECTOR_SIZE; unsigned long nbytes = nsect*KERNEL_SECTOR_SIZE; DEBUG("Entering transfer functionn"); DEBUG("<--n"); spin_lock(&self.device_lock); if((offset + nbytes) > self.size) { WARNING("Beyond-end write (%ld %ld)n",offset,nbytes); spin_unlock(&self.device_lock); return; } if(write) { NOTICE("Writing to devicen"); memcpy(self.data + offset,buffer,nbytes); } else { NOTICE("Reading from devicen"); memcpy(buffer,self.data + offset,nbytes); } DEBUG("-->n"); spin_unlock(&self.device_lock); DEBUG("Exiting transfer functionn"); } 我用以下函数处理请求 static void block_mod_request(struct request_queue *queue) { DEBUG("Entering request functionn"); struct request *request; while(NULL != (request = blk_fetch_request(queue))) { blk_mod_t *self = request->rq_disk->private_data; // Check if request is a filesystem request (i.e. moves block of data) if(REQ_TYPE_FS != request->cmd_type) { // Close request with unsuccessful status WARNING("Skip non-fs requestn"); __blk_end_request_cur(request,-EIO); continue; } // Treat request block_mod_transfer(blk_rq_pos(request),blk_rq_cur_sectors(request),bio_data(request->bio),rq_data_dir(request)); // Close request with successful status __blk_end_request_cur(request,0); } DEBUG("Exiting request functionn"); return; } 加载模块时没有什么特别的事情发生.但是,如果我尝试从中读取,我陷入僵局,因为我的系统不再响应,我必须重新启动. 这是输出: root@PC325:~# echo 8 > /proc/sys/kernel/printk root@PC325:~# insmod block_mod.ko [ 64.546791] block_mod: loading out-of-tree module taints kernel. [ 64.548197] block_mod: module license '(c) Test license' taints kernel. [ 64.549951] Disabling lock debugging due to kernel taint [ 64.552816] Inserting module 'blk_mod_test' [ 64.554085] Got major number : '254' [ 64.554940] Data allocated (size = 1024) [ 64.557378] Request queue initialized [ 64.558178] Sent hard sector size to request queue [ 64.559188] Gendisk allocated [ 64.559817] Gendisk filled [ 64.560416] Gendisk capacity set [ 64.563285] Gendisk added root@PC325:~# [ 64.565280] Entering open function [ 64.566035] Oppened in read only mode [ 64.566773] <-- [ 64.567138] Read access: 1 Write access: 0 [ 64.567977] --> [ 64.568342] Exiting open function [ 64.571080] Entering release function [ 64.571855] Closed read only mode [ 64.572531] <-- [ 64.572924] Read access: 0 Write access: 0 [ 64.573749] --> [ 64.574116] Exiting release function root@PC325:~# cat /dev/blkmodtest [ 78.488228] Entering open function [ 78.488988] Oppened in read only mode [ 78.489733] <-- [ 78.490100] Read access: 1 Write access: 0 [ 78.490925] --> [ 78.491290] Exiting open function [ 78.492026] Entering request function [ 78.492743] Entering transfer function [ 78.493469] <-- -------------- DEADLOCK HERE -------------- 更新:添加init和exit函数 static int __init block_mod_init(void) { char* message = "abcdefghijklmnopqrstuvwxyz"; int i; INFO("Inserting module '%s'n",MODULE_NAME); // Initialize driver data structure memset(&self,sizeof(blk_mod_t)); self.hard_sector_size = DEVICE_HARD_SECTOR_SIZE; self.sector_number = DEVICE_SECTOR_NUMBER; self.size = self.sector_number*self.hard_sector_size; self.minor = 1; // Get a major number from kernel if(0 > (self.major = register_blkdev(self.major,MODULE_NAME))) { ERROR("Unable to get major number for '%s'n",MODULE_NAME); unregister_blkdev(self.major,MODULE_NAME); return -1; } DEBUG("Got major number : '%d'n",self.major); // Allocate data space if(NULL == (self.data = vmalloc(self.size))) { ERROR("Unable to allocate memory for '%s'n",MODULE_NAME); return -2; } for(i=0;i<self.size;i++) { self.data[i] = message[i%strlen(message)]; } spin_lock_init(&self.device_lock); DEBUG("Data allocated (size = %d)n",self.size); // Allocate the request queue if(NULL == (self.queue = blk_init_queue(block_mod_request,&self.device_lock))) { ERROR("Unable to initialize request queue for '%s'n",MODULE_NAME); vfree(self.data); unregister_blkdev(self.major,MODULE_NAME); return -3; } DEBUG("Request queue initializedn"); // Send device hard sector size to request queue blk_queue_logical_block_size(self.queue,self.hard_sector_size); self.queue->queuedata = &self; DEBUG("Sent hard sector size to request queuen"); // Allocate the gendisk structure if(NULL == (self.gendisk = alloc_disk(self.minor))) { ERROR("Unable to initialize gendisk for '%s'n",MODULE_NAME); blk_cleanup_queue(self.queue); vfree(self.data); unregister_blkdev(self.major,MODULE_NAME); return -4; } DEBUG("Gendisk allocatedn"); // Fill gendisk structure self.gendisk->major = self.major; self.gendisk->first_minor = self.minor; self.gendisk->fops = &self_ops; self.gendisk->queue = self.queue; self.gendisk->private_data = &self; snprintf(self.gendisk->disk_name,32,"blkmodtest"); DEBUG("Gendisk filledn"); set_capacity(self.gendisk,self.sector_number*(self.hard_sector_size/KERNEL_SECTOR_SIZE)); DEBUG("Gendisk capacity setn"); add_disk(self.gendisk); DEBUG("Gendisk addedn"); return 0; } static void __exit block_mod_cleanup(void) { del_gendisk(self.gendisk); put_disk(self.gendisk); blk_cleanup_queue(self.queue); vfree(self.data); unregister_blkdev(self.major,MODULE_NAME); INFO("Removing module '%s'n",MODULE_NAME); return; } 更新:添加宏和枚举定义 #define MODULE_NAME "blk_mod_test" #define KERNEL_SECTOR_SIZE 512 #define DEVICE_HARD_SECTOR_SIZE 512 #define DEVICE_SECTOR_NUMBER 2 typedef enum { ACCESS_RONLY = 0,ACCESS_WONLY = 1,ACCESS_RW = 2,}access_mode_t; 我不明白的是,在我尝试将它锁定到block_mod_transfer之前,已经释放了自旋锁(在block_mod_open结束时). 因此,我不明白为什么当自旋锁似乎可用时,内核陷入死锁. 为什么我在这种情况下陷入僵局?我究竟做错了什么? 解决方法
感谢@ CraigEstey的评论,我终于发现问题来自于请求队列使用与我的设备结构相同的自旋锁初始化.
// Allocate the request queue if(NULL == (self.queue = blk_init_queue(block_mod_request,&self.device_lock))) { ERROR("Unable to initialize request queue for '%s'n",MODULE_NAME); vfree(self.data); unregister_blkdev(self.major,MODULE_NAME); return -3; } 因此,当调用请求队列的回调函数(即block_mod_request)时,自旋锁已经被保持并且我陷入死锁. (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |