linux-kernel – 无法使用ib_create_qp创建队列对
我正在编写一个RDMA(InfiniBand)内核模块.
到目前为止,我已成功创建保护域,发送和接收队列的完成队列. 但每当我尝试通过调用ib_create_qp来创建队列对时,它都无法创建队列对.我写的代码如下所示: #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/list.h> #include <linux/module.h> #include <linux/err.h> #include "myClient.h" struct workqueue_struct *myClient_workqueue; struct ib_sa_client myClient_sa_client; /* static void myClient_add_one(struct ib_device *device); static void myClient_remove_one(struct ib_device *device); */ struct ib_pd *mypd; struct ib_cq *myrcvcq; struct ib_cq *myClientsendcq; struct ib_qp *myClientqp; void myClient_ib_recvcompletion(struct ib_cq *cq) { printk("A user-specified callback that is invoked when a completion event occurs on the CQ.n"); } void myClient_ib_sendcompletion(struct ib_cq *cq) { printk("A user-specified callback that is invoked when a completion event occurs on the CQ.n"); } static void my_qp_event_handler(struct ib_event *myqpAsyncEvent,void *anyPointer) { printk(KERN_INFO "Dummy affiliated asynchronous event occured function called n"); } static void myClient_add_one(struct ib_device *device) { union ib_gid tmp_gid; int ret; int hcaport = 1; int result = -ENOMEM; u16 port1Pkey; struct ib_port_attr attr; ret = ib_query_port(device,hcaport,&attr); printk("ib query port result %d n",ret); // Creating the Protection Domain for RDMA mypd = ib_alloc_pd(device); if(IS_ERR(mypd)){ printk(KERN_INFO "Failed to allocate PDn"); return; } else{ printk(KERN_INFO "1Successfully allocated the PDn"); pdset = true; } // Creating the receive completion queue for RDMA myrcvcq = ib_create_cq(device,myClient_ib_recvcompletion,NULL,myClient_recvq_size,0); if(IS_ERR(myrcvcq)){ pr_err("%s:%d error code for receive cq%dn",__func__,__LINE__,PTR_ERR(myrcvcq)); //printk("Error creating QP: %d n",PTR_ERR(myClientqp)); } else{ printk("Recieve CQ successfully created in address: %x n",myrcvcq); } // Creating the send completion queue for RDMA myClientsendcq = ib_create_cq(device,myClient_ib_sendcompletion,myClient_sendq_size,0 ); if(IS_ERR(myClientsendcq)){ pr_err("%s:%d scqerror code for send cq%dn",PTR_ERR(myClientsendcq)); //printk("Error creating QP: %d n",PTR_ERR(myClientqp)); } else{ printk("1Send CQ successfully created in address: %x n",myClientsendcq); } // Creating the queue pair // Creating the queue pair struct ib_qp_init_attr init_qpattr; memset(&init_qpattr,sizeof(init_qpattr)); init_qpattr.event_handler = myClient_qp_event_handler; init_qpattr.cap.max_send_wr = 2; init_qpattr.cap.max_recv_wr = 2; init_qpattr.cap.max_recv_sge = 1; init_qpattr.cap.max_send_sge = 1; init_qpattr.sq_sig_type = IB_SIGNAL_ALL_WR; init_qpattr.qp_type = IB_QPT_UD; init_qpattr.send_cq = myClientsendcq; init_qpattr.recv_cq = myrcvcq; myClientqp = ib_create_qp(mypd,&init_qpattr); if(IS_ERR(myClientqp)){ pr_err("%s:%d error code %dn",PTR_ERR(myClientqp)); //printk("Error creating QP: %d n",PTR_ERR(myClientqp)); } else{ printk(KERN_INFO "1The queue pair is successfully created n"); qpcreated = true; } } static void myClient_remove_one(struct ib_device *device) { } static struct ib_client my_client = { .name = "myRDMAclient",.add = myClient_add_one,.remove = myClient_remove_one }; static int __init myRDMAclient_init(void) { int ret; ret = ib_register_client(&my_client); if(ret){ //printk(KERN_ALERT "KERN_ERR Failed to register IB clientn"); goto err_sa; } printk(KERN_ALERT "lKERN_INFO Successfully registered myRDMAclient module n"); return 0; err_sa: return ret; } module_init(myRDMAclient_init); 除了ib_create_qp(mypd,& init_qpattr)之外,所有查询都有效.无法创建队列对. 更新:在创建队列对之前注册了内存.但它仍然显示ib_create_qp的无效参数错误(错误代码-22) #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/list.h> #include <linux/module.h> #include <linux/err.h> #include "myClient.h" struct workqueue_struct *myClient_workqueue; struct ib_sa_client myClient_sa_client; /* static void myClient_add_one(struct ib_device *device); static void myClient_remove_one(struct ib_device *device); */ struct ib_pd *mypd; struct ib_cq *myrcvcq; struct ib_cq *myClientsendcq; struct ib_qp *myClientqp; struct ib_mr *mymr; void myClient_ib_recvcompletion(struct ib_cq *cq) { printk("A user-specified callback that is invoked when a completion event occurs on the CQ.n"); } void myClient_ib_sendcompletion(struct ib_cq *cq) { printk("A user-specified callback that is invoked when a completion event occurs on the CQ.n"); } static void my_qp_event_handler(struct ib_event *myqpAsyncEvent,ret); // Creating the Protection Domain for RDMA mypd = ib_alloc_pd(device); if(IS_ERR(mypd)){ printk(KERN_INFO "Failed to allocate PDn"); return; } else{ printk(KERN_INFO "1Successfully allocated the PDn"); pdset = true; } // Registering Memory mymr = ib_get_dma_mr(mypd,IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE); if(IS_ERR(mymr)){ printk("failed to register memory :( %d n",PTR_ERR(mymr)); }else{ printk(KERN_INFO "Successfully registered memory region :) n"); } // End Registering Memory // Creating the receive completion queue for RDMA myrcvcq = ib_create_cq(device,.remove = myClient_remove_one }; static int __init myRDMAclient_init(void) { int ret; ret = ib_register_client(&my_client); if(ret){ //printk(KERN_ALERT "KERN_ERR Failed to register IB clientn"); goto err_sa; } printk(KERN_ALERT "lKERN_INFO Successfully registered myRDMAclient module n"); return 0; err_sa: return ret; } module_init(myRDMAclient_init); 解决方法
更新:
根据以下评论中的讨论,我猜你在当前版本的基础上安装了Mellanox OFED驱动程序.看看Mellanox OFED内核驱动程序的3.1-1.0.3源代码,我看到他们通过添加一些字段来改变struct ib_qp_init_attr的布局.我很确定您的问题是您正在针对原始SLE 3.0.76-0.11内核头文件构建模块,因此传递给create QP函数的init_qpattr结构没有您在右侧设置的值地方. 我不知道你是如何安装新的树外驱动程序的,所以我不能确切地告诉你如何正确地构建你的模块,但你可以尝试添加像 init_qpattr.qpg_type = 0; 到你设置结构的地方. (我知道你已经将整个事情记录为零,但是这将确保你构建的标题具有结构的新qpg_type成员.我认为这是OFED添加的一个新字段,不在你原来的内核头文件,所以如果您的模块编译,那么您正在构建正确的标头) 老答案: 所以我怀疑你遇到了与创建这么小的QP相关的mlx4驱动程序中的错误(max_send_wr == max_recv_wr == 2和max_send_sge == max_recv_sge == 1).我设法找到你正在使用的3.0.76-0.11内核的源代码,不幸的是我没有看到任何明显的错误. 你可以尝试一些帮助调试的东西 >加载模块参数debug_level = 1到mlx4_core模块.使用驱动程序初始化的所有输出更新您的问题(关于“最大CQE:”的一系列行等.mlx4驱动程序中有相当多的逻辑,它依赖于fimrware在初始化期间返回的参数,并且此输出将允许我们看看那是什么. (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |