diff options
Diffstat (limited to 'drivers/infiniband/core')
23 files changed, 1386 insertions, 241 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f2a84c6..e9cf51b 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -37,6 +37,7 @@ #include <linux/inetdevice.h> #include <linux/slab.h> #include <linux/workqueue.h> +#include <linux/module.h> #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index f9ba7d7..9353992 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -302,7 +302,8 @@ static void ib_cache_event(struct ib_event_handler *handler, event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER) { + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_GID_CHANGE) { work = kmalloc(sizeof *work, GFP_ATOMIC); if (work) { INIT_WORK(&work->work, ib_cache_task); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index fc0f2bd..8b72f39 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -36,6 +36,7 @@ #include <linux/completion.h> #include <linux/dma-mapping.h> #include <linux/device.h> +#include <linux/module.h> #include <linux/err.h> #include <linux/idr.h> #include <linux/interrupt.h> @@ -889,6 +890,8 @@ retest: break; case IB_CM_ESTABLISHED: spin_unlock_irq(&cm_id_priv->lock); + if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) + break; ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: @@ -1008,7 +1011,6 @@ static void cm_format_req(struct cm_req_msg *req_msg, req_msg->service_id = param->service_id; req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); - cm_req_set_resp_res(req_msg, param->responder_resources); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, param->remote_cm_response_timeout); @@ -1017,12 +1019,16 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); cm_req_set_local_resp_timeout(req_msg, param->local_cm_response_timeout); - cm_req_set_retry_count(req_msg, param->retry_count); req_msg->pkey = param->primary_path->pkey; cm_req_set_path_mtu(req_msg, param->primary_path->mtu); - cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); - cm_req_set_srq(req_msg, param->srq); + + if (param->qp_type != IB_QPT_XRC_INI) { + cm_req_set_resp_res(req_msg, param->responder_resources); + cm_req_set_retry_count(req_msg, param->retry_count); + cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); + cm_req_set_srq(req_msg, param->srq); + } if (pri_path->hop_limit <= 1) { req_msg->primary_local_lid = pri_path->slid; @@ -1080,7 +1086,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param) if (!param->primary_path) return -EINVAL; - if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC) + if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && + param->qp_type != IB_QPT_XRC_INI) return -EINVAL; if (param->private_data && @@ -1601,18 +1608,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); rep_msg->local_comm_id = cm_id_priv->id.local_id; rep_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; - rep_msg->initiator_depth = param->initiator_depth; cm_rep_set_target_ack_delay(rep_msg, cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); - cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); - cm_rep_set_srq(rep_msg, param->srq); rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { + rep_msg->initiator_depth = param->initiator_depth; + cm_rep_set_flow_ctrl(rep_msg, param->flow_control); + cm_rep_set_srq(rep_msg, param->srq); + cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + } else { + cm_rep_set_srq(rep_msg, 1); + cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); + } + if (param->private_data && param->private_data_len) memcpy(rep_msg->private_data, param->private_data, param->private_data_len); @@ -1660,7 +1673,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); - cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; @@ -1731,7 +1744,7 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_rtu); -static void cm_format_rep_event(struct cm_work *work) +static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) { struct cm_rep_msg *rep_msg; struct ib_cm_rep_event_param *param; @@ -1740,7 +1753,7 @@ static void cm_format_rep_event(struct cm_work *work) param = &work->cm_event.param.rep_rcvd; param->remote_ca_guid = rep_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); - param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg)); + param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); param->responder_resources = rep_msg->initiator_depth; param->initiator_depth = rep_msg->resp_resources; @@ -1808,7 +1821,7 @@ static int cm_rep_handler(struct cm_work *work) return -EINVAL; } - cm_format_rep_event(work); + cm_format_rep_event(work, cm_id_priv->qp_type); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { @@ -1823,7 +1836,7 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; - cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); spin_lock(&cm.lock); /* Check for duplicate REP. */ @@ -1850,7 +1863,7 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; - cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); cm_id_priv->initiator_depth = rep_msg->resp_resources; cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); @@ -3492,7 +3505,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { + if (cm_id_priv->qp_type == IB_QPT_RC || + cm_id_priv->qp_type == IB_QPT_XRC_TGT) { *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; qp_attr->max_dest_rd_atomic = @@ -3537,15 +3551,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { - *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | + switch (cm_id_priv->qp_type) { + case IB_QPT_RC: + case IB_QPT_XRC_INI: + *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->av.timeout; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; - qp_attr->max_rd_atomic = - cm_id_priv->initiator_depth; + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + /* fall through */ + case IB_QPT_XRC_TGT: + *qp_attr_mask |= IB_QP_TIMEOUT; + qp_attr->timeout = cm_id_priv->av.timeout; + break; + default: + break; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 7e63c08..505db2a 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * @@ -86,7 +86,7 @@ struct cm_req_msg { __be16 pkey; /* path MTU:4, RDC exists:1, RNR retry count:3. */ u8 offset50; - /* max CM Retries:4, SRQ:1, rsvd:3 */ + /* max CM Retries:4, SRQ:1, extended transport type:3 */ u8 offset51; __be16 primary_local_lid; @@ -175,6 +175,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) switch(transport_type) { case 0: return IB_QPT_RC; case 1: return IB_QPT_UC; + case 3: + switch (req_msg->offset51 & 0x7) { + case 1: return IB_QPT_XRC_TGT; + default: return 0; + } default: return 0; } } @@ -188,6 +193,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40) & 0xFFFFFFF9) | 0x2); break; + case IB_QPT_XRC_INI: + req_msg->offset40 = cpu_to_be32((be32_to_cpu( + req_msg->offset40) & + 0xFFFFFFF9) | 0x6); + req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1; + break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & @@ -527,6 +538,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); } +static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg) +{ + return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8); +} + +static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn) +{ + rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) | + (be32_to_cpu(rep_msg->offset16) & 0x000000FF)); +} + +static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) +{ + return (qp_type == IB_QPT_XRC_INI) ? + cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg); +} + static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b6a33b3..d0d4aa9 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -41,6 +41,7 @@ #include <linux/idr.h> #include <linux/inetdevice.h> #include <linux/slab.h> +#include <linux/module.h> #include <net/tcp.h> #include <net/ipv6.h> @@ -81,6 +82,7 @@ static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); static DEFINE_IDR(ipoib_ps); +static DEFINE_IDR(ib_ps); struct cma_device { struct list_head list; @@ -359,6 +361,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; + if (dev_ll != IB_LINK_LAYER_INFINIBAND && + id_priv->id.ps == RDMA_PS_IPOIB) + return -EINVAL; + mutex_lock(&lock); iboe_addr_get_sgid(dev_addr, &iboe_gid); memcpy(&gid, dev_addr->src_dev_addr + @@ -406,11 +412,6 @@ static int cma_disable_callback(struct rdma_id_private *id_priv, return 0; } -static int cma_has_cm_dev(struct rdma_id_private *id_priv) -{ - return (id_priv->id.device && id_priv->cm_id.ib); -} - struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type) @@ -920,11 +921,11 @@ void rdma_destroy_id(struct rdma_cm_id *id) if (id_priv->cma_dev) { switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + if (id_priv->cm_id.ib) ib_destroy_cm_id(id_priv->cm_id.ib); break; case RDMA_TRANSPORT_IWARP: - if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) + if (id_priv->cm_id.iw) iw_destroy_cm_id(id_priv->cm_id.iw); break; default: @@ -1085,12 +1086,12 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (cma_get_net_info(ib_event->private_data, listen_id->ps, &ip_ver, &port, &src, &dst)) - goto err; + return NULL; id = rdma_create_id(listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) - goto err; + return NULL; cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); @@ -1100,7 +1101,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); if (!rt->path_rec) - goto destroy_id; + goto err; rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; if (rt->num_paths == 2) @@ -1114,7 +1115,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, &rt->addr.dev_addr); if (ret) - goto destroy_id; + goto err; } rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); @@ -1122,9 +1123,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, id_priv->state = RDMA_CM_CONNECT; return id_priv; -destroy_id: - rdma_destroy_id(id); err: + rdma_destroy_id(id); return NULL; } @@ -1181,6 +1181,15 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = req_data->remote_qpn; } +static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) +{ + return (((ib_event->event == IB_CM_REQ_RECEIVED) || + (ib_event->param.req_rcvd.qp_type == id->qp_type)) || + ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && + (id->qp_type == IB_QPT_UD)) || + (!id->qp_type)); +} + static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id; @@ -1188,13 +1197,16 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) int offset, ret; listen_id = cm_id->context; + if (!cma_check_req_qp_type(&listen_id->id, ib_event)) + return -EINVAL; + if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) return -ECONNABORTED; memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - if (listen_id->id.qp_type == IB_QPT_UD) { + if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { conn_id = cma_new_udp_id(&listen_id->id, ib_event); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = @@ -1330,6 +1342,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) switch (iw_event->status) { case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; break; case -ECONNRESET: case -ECONNREFUSED: @@ -1345,6 +1359,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) break; case IW_CM_EVENT_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; break; default: BUG_ON(1); @@ -1435,8 +1451,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, event.event = RDMA_CM_EVENT_CONNECT_REQUEST; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; - event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; - event.param.conn.responder_resources = attr.max_qp_rd_atom; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; /* * Protect against the user destroying conn_id from another thread @@ -1468,13 +1484,15 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) { struct ib_cm_compare_data compare_data; struct sockaddr *addr; + struct ib_cm_id *id; __be64 svc_id; int ret; - id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler, - id_priv); - if (IS_ERR(id_priv->cm_id.ib)) - return PTR_ERR(id_priv->cm_id.ib); + id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv); + if (IS_ERR(id)) + return PTR_ERR(id); + + id_priv->cm_id.ib = id; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; svc_id = cma_get_service_id(id_priv->id.ps, addr); @@ -1497,12 +1515,15 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) { int ret; struct sockaddr_in *sin; + struct iw_cm_id *id; + + id = iw_create_cm_id(id_priv->id.device, + iw_conn_req_handler, + id_priv); + if (IS_ERR(id)) + return PTR_ERR(id); - id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, - iw_conn_req_handler, - id_priv); - if (IS_ERR(id_priv->cm_id.iw)) - return PTR_ERR(id_priv->cm_id.iw); + id_priv->cm_id.iw = id; sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; id_priv->cm_id.iw->local_addr = *sin; @@ -2231,6 +2252,9 @@ static int cma_get_port(struct rdma_id_private *id_priv) case RDMA_PS_IPOIB: ps = &ipoib_ps; break; + case RDMA_PS_IB: + ps = &ib_ps; + break; default: return -EPROTONOSUPPORT; } @@ -2484,10 +2508,14 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, { struct ib_cm_sidr_req_param req; struct rdma_route *route; + struct ib_cm_id *id; int ret; req.private_data_len = sizeof(struct cma_hdr) + conn_param->private_data_len; + if (req.private_data_len < conn_param->private_data_len) + return -EINVAL; + req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!req.private_data) return -ENOMEM; @@ -2501,12 +2529,13 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, if (ret) goto out; - id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, - cma_sidr_rep_handler, id_priv); - if (IS_ERR(id_priv->cm_id.ib)) { - ret = PTR_ERR(id_priv->cm_id.ib); + id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, + id_priv); + if (IS_ERR(id)) { + ret = PTR_ERR(id); goto out; } + id_priv->cm_id.ib = id; req.path = route->path_rec; req.service_id = cma_get_service_id(id_priv->id.ps, @@ -2530,11 +2559,15 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, struct ib_cm_req_param req; struct rdma_route *route; void *private_data; + struct ib_cm_id *id; int offset, ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv->id.ps); req.private_data_len = offset + conn_param->private_data_len; + if (req.private_data_len < conn_param->private_data_len) + return -EINVAL; + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!private_data) return -ENOMEM; @@ -2543,12 +2576,12 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, memcpy(private_data + offset, conn_param->private_data, conn_param->private_data_len); - id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler, - id_priv); - if (IS_ERR(id_priv->cm_id.ib)) { - ret = PTR_ERR(id_priv->cm_id.ib); + id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); + if (IS_ERR(id)) { + ret = PTR_ERR(id); goto out; } + id_priv->cm_id.ib = id; route = &id_priv->id.route; ret = cma_format_hdr(private_data, id_priv->id.ps, route); @@ -2563,7 +2596,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.service_id = cma_get_service_id(id_priv->id.ps, (struct sockaddr *) &route->addr.dst_addr); req.qp_num = id_priv->qp_num; - req.qp_type = IB_QPT_RC; + req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; @@ -2577,8 +2610,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: - if (ret && !IS_ERR(id_priv->cm_id.ib)) { - ib_destroy_cm_id(id_priv->cm_id.ib); + if (ret && !IS_ERR(id)) { + ib_destroy_cm_id(id); id_priv->cm_id.ib = NULL; } @@ -2595,10 +2628,8 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); - if (IS_ERR(cm_id)) { - ret = PTR_ERR(cm_id); - goto out; - } + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); id_priv->cm_id.iw = cm_id; @@ -2612,17 +2643,19 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, if (ret) goto out; - iw_param.ord = conn_param->initiator_depth; - iw_param.ird = conn_param->responder_resources; - iw_param.private_data = conn_param->private_data; - iw_param.private_data_len = conn_param->private_data_len; - if (id_priv->id.qp) + if (conn_param) { + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; + } else { + memset(&iw_param, 0, sizeof iw_param); iw_param.qpn = id_priv->qp_num; - else - iw_param.qpn = conn_param->qp_num; + } ret = iw_cm_connect(cm_id, &iw_param); out: - if (ret && !IS_ERR(cm_id)) { + if (ret) { iw_destroy_cm_id(cm_id); id_priv->cm_id.iw = NULL; } @@ -2761,14 +2794,20 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->qp_type == IB_QPT_UD) - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - conn_param->private_data, - conn_param->private_data_len); - else if (conn_param) - ret = cma_accept_ib(id_priv, conn_param); - else - ret = cma_rep_recv(id_priv); + if (id->qp_type == IB_QPT_UD) { + if (conn_param) + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + conn_param->private_data, + conn_param->private_data_len); + else + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + NULL, 0); + } else { + if (conn_param) + ret = cma_accept_ib(id_priv, conn_param); + else + ret = cma_rep_recv(id_priv); + } break; case RDMA_TRANSPORT_IWARP: ret = cma_accept_iw(id_priv, conn_param); @@ -2795,7 +2834,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_has_cm_dev(id_priv)) + if (!id_priv->cm_id.ib) return -EINVAL; switch (id->device->node_type) { @@ -2817,7 +2856,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_has_cm_dev(id_priv)) + if (!id_priv->cm_id.ib) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { @@ -2848,7 +2887,7 @@ int rdma_disconnect(struct rdma_cm_id *id) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_has_cm_dev(id_priv)) + if (!id_priv->cm_id.ib) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { @@ -3456,6 +3495,7 @@ static void __exit cma_cleanup(void) idr_destroy(&tcp_ps); idr_destroy(&udp_ps); idr_destroy(&ipoib_ps); + idr_destroy(&ib_ps); } module_init(cma_init); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 4007f72..e711de4 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -627,6 +627,9 @@ int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify) { + if (!device->modify_device) + return -ENOSYS; + return device->modify_device(device, device_modify_mask, device_modify); } @@ -647,6 +650,9 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { + if (!device->modify_port) + return -ENOSYS; + if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 4507043..176c8f9 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/spinlock.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/jhash.h> #include <linux/kthread.h> diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index a9c0423..2882556 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -45,6 +45,8 @@ #include <linux/workqueue.h> #include <linux/completion.h> #include <linux/slab.h> +#include <linux/module.h> +#include <linux/sysctl.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> @@ -64,6 +66,20 @@ struct iwcm_work { struct list_head free_list; }; +static unsigned int default_backlog = 256; + +static struct ctl_table_header *iwcm_ctl_table_hdr; +static struct ctl_table iwcm_ctl_table[] = { + { + .procname = "default_backlog", + .data = &default_backlog, + .maxlen = sizeof(default_backlog), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: @@ -418,6 +434,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + if (!backlog) + backlog = default_backlog; + ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; @@ -1013,17 +1032,33 @@ int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, } EXPORT_SYMBOL(iw_cm_init_qp_attr); +static struct ctl_path iwcm_ctl_path[] = { + { .procname = "net" }, + { .procname = "iw_cm" }, + { } +}; + static int __init iw_cm_init(void) { iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); if (!iwcm_wq) return -ENOMEM; + iwcm_ctl_table_hdr = register_net_sysctl_table(&init_net, + iwcm_ctl_path, + iwcm_ctl_table); + if (!iwcm_ctl_table_hdr) { + pr_err("iw_cm: couldn't register sysctl paths\n"); + destroy_workqueue(iwcm_wq); + return -ENOMEM; + } + return 0; } static void __exit iw_cm_cleanup(void) { + unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index b4d8672..2fe428b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -35,6 +35,7 @@ */ #include <linux/dma-mapping.h> #include <linux/slab.h> +#include <linux/module.h> #include <rdma/ib_cache.h> #include "mad_priv.h" @@ -1596,6 +1597,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv, mad->mad_hdr.class_version].class; if (!class) goto out; + if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + IB_MGMT_MAX_METHODS) + goto out; method = class->method_table[convert_mgmt_class( mad->mad_hdr.mgmt_class)]; if (method) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 68b4162..d2360a8 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -34,6 +34,7 @@ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/interrupt.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/bitops.h> #include <linux/random.h> diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 9227f4a..d1c8196 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -32,6 +32,7 @@ #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ +#include <linux/export.h> #include <net/netlink.h> #include <net/net_namespace.h> #include <net/sock.h> diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index 019bd4b..1b65986 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include <linux/export.h> #include <linux/string.h> #include <rdma/ib_pack.h> diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 9ab5df7..c61bca3 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -35,6 +35,7 @@ #include "core_priv.h" #include <linux/slab.h> +#include <linux/stat.h> #include <linux/string.h> #include <rdma/ib_mad.h> @@ -185,17 +186,35 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; + rate = (25 * attr.active_speed) / 10; + switch (attr.active_speed) { - case 2: speed = " DDR"; break; - case 4: speed = " QDR"; break; + case 2: + speed = " DDR"; + break; + case 4: + speed = " QDR"; + break; + case 8: + speed = " FDR10"; + rate = 10; + break; + case 16: + speed = " FDR"; + rate = 14; + break; + case 32: + speed = " EDR"; + rate = 25; + break; } - rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed; + rate *= ib_width_enum_to_int(attr.active_width); if (rate < 0) return -EINVAL; return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate / 10, rate % 10 ? ".5" : "", + rate, (attr.active_speed == 1) ? ".5" : "", ib_width_enum_to_int(attr.active_width), speed); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 08f948d..b8a0b4a 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1122,7 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) + if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 71be5ee..b37b0c0 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -41,6 +41,7 @@ #include <linux/miscdevice.h> #include <linux/slab.h> #include <linux/sysctl.h> +#include <linux/module.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -276,7 +277,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; - if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) + if (cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, @@ -377,6 +378,9 @@ static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_ case RDMA_PS_IPOIB: *qp_type = IB_QPT_UD; return 0; + case RDMA_PS_IB: + *qp_type = cmd->qp_type; + return 0; default: return -EINVAL; } @@ -1270,7 +1274,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) + if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 9b737ff..72feee6 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/string.h> +#include <linux/export.h> #include <linux/if_ether.h> #include <rdma/ib_pack.h> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index b645e55..c1fef27 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -35,6 +35,7 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/sched.h> +#include <linux/export.h> #include <linux/hugetlb.h> #include <linux/dma-attrs.h> #include <linux/slab.h> @@ -93,6 +94,17 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + if (!size) + return ERR_PTR(-EINVAL); + + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || + PAGE_ALIGN(addr + size) < (addr + size)) + return ERR_PTR(-EINVAL); + if (!can_do_mlock()) return ERR_PTR(-EPERM); @@ -136,7 +148,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, down_write(¤t->mm->mmap_sem); - locked = npages + current->mm->locked_vm; + locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -206,7 +218,7 @@ out: __ib_umem_release(context->device, umem, 0); kfree(umem); } else - current->mm->locked_vm = locked; + current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); if (vma_list) @@ -222,7 +234,7 @@ static void ib_umem_account(struct work_struct *work) struct ib_umem *umem = container_of(work, struct ib_umem, work); down_write(&umem->mm->mmap_sem); - umem->mm->locked_vm -= umem->diff; + umem->mm->pinned_vm -= umem->diff; up_write(&umem->mm->mmap_sem); mmput(umem->mm); kfree(umem); @@ -268,7 +280,7 @@ void ib_umem_release(struct ib_umem *umem) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + current->mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); kfree(umem); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 8d261b6..c685881 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -458,8 +458,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - if (packet->mad.hdr.id < 0 || - packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } @@ -703,7 +702,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); - if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } @@ -781,27 +780,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; - int ret; + int ret = -ENXIO; port = container_of(inode->i_cdev, struct ib_umad_port, cdev); - if (port) - kref_get(&port->umad_dev->ref); - else - return -ENXIO; mutex_lock(&port->file_mutex); - if (!port->ib_dev) { - ret = -ENXIO; + if (!port->ib_dev) goto out; - } + ret = -ENOMEM; file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) { - kref_put(&port->umad_dev->ref, ib_umad_release_dev); - ret = -ENOMEM; + if (!file) goto out; - } mutex_init(&file->mutex); spin_lock_init(&file->send_lock); @@ -815,6 +806,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); ret = nonseekable_open(inode, filp); + if (ret) { + list_del(&file->port_list); + kfree(file); + goto out; + } + + kref_get(&port->umad_dev->ref); out: mutex_unlock(&port->file_mutex); @@ -881,10 +879,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) int ret; port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); - if (port) - kref_get(&port->umad_dev->ref); - else - return -ENXIO; if (filp->f_flags & O_NONBLOCK) { if (down_trylock(&port->sm_sem)) { @@ -899,17 +893,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - if (ret) { - up(&port->sm_sem); - goto fail; - } + if (ret) + goto err_up_sem; filp->private_data = port; - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret) + goto err_clr_sm_cap; + + kref_get(&port->umad_dev->ref); + + return 0; + +err_clr_sm_cap: + swap(props.set_port_cap_mask, props.clr_port_cap_mask); + ib_modify_port(port->ib_dev, port->port_num, 0, &props); + +err_up_sem: + up(&port->sm_sem); fail: - kref_put(&port->umad_dev->ref, ib_umad_release_dev); return ret; } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a078e56..228af18 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -69,13 +69,16 @@ */ struct ib_uverbs_device { - struct kref ref; + atomic_t refcount; int num_comp_vectors; struct completion comp; struct device *dev; struct ib_device *ib_dev; int devnum; struct cdev cdev; + struct rb_root xrcd_tree; + struct mutex xrcd_tree_mutex; + struct kobject kobj; }; struct ib_uverbs_event_file { @@ -120,6 +123,16 @@ struct ib_uevent_object { u32 events_reported; }; +struct ib_uxrcd_object { + struct ib_uobject uobject; + atomic_t refcnt; +}; + +struct ib_usrq_object { + struct ib_uevent_object uevent; + struct ib_uxrcd_object *uxrcd; +}; + struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; @@ -142,6 +155,7 @@ extern struct idr ib_uverbs_ah_idr; extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +extern struct idr ib_uverbs_xrcd_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -161,6 +175,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ @@ -181,6 +196,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq); IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(open_qp); IB_UVERBS_DECLARE_CMD(query_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); @@ -195,5 +211,8 @@ IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); +IB_UVERBS_DECLARE_CMD(create_xsrq); +IB_UVERBS_DECLARE_CMD(open_xrcd); +IB_UVERBS_DECLARE_CMD(close_xrcd); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index c426992..3be21aa 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -47,6 +47,7 @@ static struct lock_class_key cq_lock_key; static struct lock_class_key qp_lock_key; static struct lock_class_key ah_lock_key; static struct lock_class_key srq_lock_key; +static struct lock_class_key xrcd_lock_key; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -240,11 +241,24 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } +static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); + return uobj ? uobj->object : NULL; +} + static void put_qp_read(struct ib_qp *qp) { put_uobj_read(qp->uobject); } +static void put_qp_write(struct ib_qp *qp) +{ + put_uobj_write(qp->uobject); +} + static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); @@ -255,6 +269,18 @@ static void put_srq_read(struct ib_srq *srq) put_uobj_read(srq->uobject); } +static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, + struct ib_uobject **uobj) +{ + *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); + return *uobj ? (*uobj)->object : NULL; +} + +static void put_xrcd_read(struct ib_uobject *uobj) +{ + put_uobj_read(uobj); +} + ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -298,6 +324,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->xrcd_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -579,6 +606,310 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, return in_len; } +struct xrcd_table_entry { + struct rb_node node; + struct ib_xrcd *xrcd; + struct inode *inode; +}; + +static int xrcd_table_insert(struct ib_uverbs_device *dev, + struct inode *inode, + struct ib_xrcd *xrcd) +{ + struct xrcd_table_entry *entry, *scan; + struct rb_node **p = &dev->xrcd_tree.rb_node; + struct rb_node *parent = NULL; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->xrcd = xrcd; + entry->inode = inode; + + while (*p) { + parent = *p; + scan = rb_entry(parent, struct xrcd_table_entry, node); + + if (inode < scan->inode) { + p = &(*p)->rb_left; + } else if (inode > scan->inode) { + p = &(*p)->rb_right; + } else { + kfree(entry); + return -EEXIST; + } + } + + rb_link_node(&entry->node, parent, p); + rb_insert_color(&entry->node, &dev->xrcd_tree); + igrab(inode); + return 0; +} + +static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + struct rb_node *p = dev->xrcd_tree.rb_node; + + while (p) { + entry = rb_entry(p, struct xrcd_table_entry, node); + + if (inode < entry->inode) + p = p->rb_left; + else if (inode > entry->inode) + p = p->rb_right; + else + return entry; + } + + return NULL; +} + +static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (!entry) + return NULL; + + return entry->xrcd; +} + +static void xrcd_table_delete(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (entry) { + iput(inode); + rb_erase(&entry->node, &dev->xrcd_tree); + kfree(entry); + } +} + +ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_open_xrcd cmd; + struct ib_uverbs_open_xrcd_resp resp; + struct ib_udata udata; + struct ib_uxrcd_object *obj; + struct ib_xrcd *xrcd = NULL; + struct file *f = NULL; + struct inode *inode = NULL; + int ret = 0; + int new_xrcd = 0; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + mutex_lock(&file->device->xrcd_tree_mutex); + + if (cmd.fd != -1) { + /* search for file descriptor */ + f = fget(cmd.fd); + if (!f) { + ret = -EBADF; + goto err_tree_mutex_unlock; + } + + inode = f->f_dentry->d_inode; + if (!inode) { + ret = -EBADF; + goto err_tree_mutex_unlock; + } + + xrcd = find_xrcd(file->device, inode); + if (!xrcd && !(cmd.oflags & O_CREAT)) { + /* no file descriptor. Need CREATE flag */ + ret = -EAGAIN; + goto err_tree_mutex_unlock; + } + + if (xrcd && cmd.oflags & O_EXCL) { + ret = -EINVAL; + goto err_tree_mutex_unlock; + } + } + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) { + ret = -ENOMEM; + goto err_tree_mutex_unlock; + } + + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key); + + down_write(&obj->uobject.mutex); + + if (!xrcd) { + xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(xrcd)) { + ret = PTR_ERR(xrcd); + goto err; + } + + xrcd->inode = inode; + xrcd->device = file->device->ib_dev; + atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); + new_xrcd = 1; + } + + atomic_set(&obj->refcnt, 0); + obj->uobject.object = xrcd; + ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + if (ret) + goto err_idr; + + memset(&resp, 0, sizeof resp); + resp.xrcd_handle = obj->uobject.id; + + if (inode) { + if (new_xrcd) { + /* create new inode/xrcd table entry */ + ret = xrcd_table_insert(file->device, inode, xrcd); + if (ret) + goto err_insert_xrcd; + } + atomic_inc(&xrcd->usecnt); + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + if (f) + fput(f); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); + mutex_unlock(&file->mutex); + + obj->uobject.live = 1; + up_write(&obj->uobject.mutex); + + mutex_unlock(&file->device->xrcd_tree_mutex); + return in_len; + +err_copy: + if (inode) { + if (new_xrcd) + xrcd_table_delete(file->device, inode); + atomic_dec(&xrcd->usecnt); + } + +err_insert_xrcd: + idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + +err_idr: + ib_dealloc_xrcd(xrcd); + +err: + put_uobj_write(&obj->uobject); + +err_tree_mutex_unlock: + if (f) + fput(f); + + mutex_unlock(&file->device->xrcd_tree_mutex); + + return ret; +} + +ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_close_xrcd cmd; + struct ib_uobject *uobj; + struct ib_xrcd *xrcd = NULL; + struct inode *inode = NULL; + struct ib_uxrcd_object *obj; + int live; + int ret = 0; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&file->device->xrcd_tree_mutex); + uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); + if (!uobj) { + ret = -EINVAL; + goto out; + } + + xrcd = uobj->object; + inode = xrcd->inode; + obj = container_of(uobj, struct ib_uxrcd_object, uobject); + if (atomic_read(&obj->refcnt)) { + put_uobj_write(uobj); + ret = -EBUSY; + goto out; + } + + if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { + ret = ib_dealloc_xrcd(uobj->object); + if (!ret) + uobj->live = 0; + } + + live = uobj->live; + if (inode && ret) + atomic_inc(&xrcd->usecnt); + + put_uobj_write(uobj); + + if (ret) + goto out; + + if (inode && !live) + xrcd_table_delete(file->device, inode); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + ret = in_len; + +out: + mutex_unlock(&file->device->xrcd_tree_mutex); + return ret; +} + +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, + struct ib_xrcd *xrcd) +{ + struct inode *inode; + + inode = xrcd->inode; + if (inode && !atomic_dec_and_test(&xrcd->usecnt)) + return; + + ib_dealloc_xrcd(xrcd); + + if (inode) + xrcd_table_delete(dev, inode); +} + ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1052,9 +1383,12 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; struct ib_uqp_object *obj; - struct ib_pd *pd; - struct ib_cq *scq, *rcq; - struct ib_srq *srq; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; struct ib_qp *qp; struct ib_qp_init_attr attr; int ret; @@ -1076,15 +1410,39 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); down_write(&obj->uevent.uobject.mutex); - srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); - rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); + if (cmd.qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + device = xrcd->device; + } else { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); + if (!pd || !scq) { + ret = -EINVAL; + goto err_put; + } - if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { - ret = -EINVAL; - goto err_put; + if (cmd.qp_type == IB_QPT_XRC_INI) { + cmd.max_recv_wr = cmd.max_recv_sge = 0; + } else { + if (cmd.is_srq) { + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq || srq->srq_type != IB_SRQT_BASIC) { + ret = -EINVAL; + goto err_put; + } + } + rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ? + scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } + } + device = pd->device; } attr.event_handler = ib_uverbs_qp_event_handler; @@ -1092,6 +1450,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; + attr.xrcd = xrcd; attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd.qp_type; attr.create_flags = 0; @@ -1106,26 +1465,35 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - qp = pd->device->create_qp(pd, &attr, &udata); + if (cmd.qp_type == IB_QPT_XRC_TGT) + qp = ib_create_qp(pd, &attr); + else + qp = device->create_qp(pd, &attr, &udata); + if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->uobject = &obj->uevent.uobject; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - atomic_inc(&pd->usecnt); - atomic_inc(&attr.send_cq->usecnt); - atomic_inc(&attr.recv_cq->usecnt); - if (attr.srq) - atomic_inc(&attr.srq->usecnt); + if (cmd.qp_type != IB_QPT_XRC_TGT) { + qp->real_qp = qp; + qp->device = device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_set(&qp->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + if (attr.recv_cq) + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + } + qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); @@ -1147,9 +1515,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - put_cq_read(scq); - if (rcq != scq) + if (xrcd) + put_xrcd_read(xrcd_uobj); + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq && rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); @@ -1171,6 +1543,8 @@ err_destroy: ib_destroy_qp(qp); err_put: + if (xrcd) + put_xrcd_read(xrcd_uobj); if (pd) put_pd_read(pd); if (scq) @@ -1184,6 +1558,98 @@ err_put: return ret; } +ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_open_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uqp_object *obj; + struct ib_xrcd *xrcd; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_qp *qp; + struct ib_qp_open_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + down_write(&obj->uevent.uobject.mutex); + + xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.qp_num = cmd.qpn; + attr.qp_type = cmd.qp_type; + + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); + + qp = ib_open_qp(xrcd, &attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_put; + } + + qp->uobject = &obj->uevent.uobject; + + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + if (ret) + goto err_destroy; + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_remove; + } + + put_xrcd_read(xrcd_uobj); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + + up_write(&obj->uevent.uobject.mutex); + + return in_len; + +err_remove: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + +err_destroy: + ib_destroy_qp(qp); + +err_put: + put_xrcd_read(xrcd_uobj); + put_uobj_write(&obj->uevent.uobject); + return ret; +} + ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1284,6 +1750,20 @@ out: return ret ? ret : in_len; } +/* Remove ignored fields set in the attribute mask */ +static int modify_qp_mask(enum ib_qp_type qp_type, int mask) +{ + switch (qp_type) { + case IB_QPT_XRC_INI: + return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); + case IB_QPT_XRC_TGT: + return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY); + default: + return mask; + } +} + ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1356,7 +1836,12 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); + if (qp->real_qp == qp) { + ret = qp->device->modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); + } else { + ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); + } put_qp_read(qp); @@ -1494,6 +1979,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->send_flags = user_wr->send_flags; if (is_ud) { + if (next->opcode != IB_WR_SEND && + next->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); if (!next->wr.ud.ah) { @@ -1530,9 +2021,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, user_wr->wr.atomic.compare_add; next->wr.atomic.swap = user_wr->wr.atomic.swap; next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + case IB_WR_SEND: break; default: - break; + ret = -EINVAL; + goto out_put; } } @@ -1553,7 +2046,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } resp.bad_wr = 0; - ret = qp->device->post_send(qp, wr, &bad_wr); + ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -1691,7 +2184,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, goto out; resp.bad_wr = 0; - ret = qp->device->post_recv(qp, wr, &bad_wr); + ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); put_qp_read(qp); @@ -1904,7 +2397,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_write_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -1933,7 +2426,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, kfree(mcast); out_put: - put_qp_read(qp); + put_qp_write(qp); return ret ? ret : in_len; } @@ -1951,7 +2444,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_write_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -1970,112 +2463,204 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, } out_put: - put_qp_read(qp); + put_qp_write(qp); return ret ? ret : in_len; } -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +int __uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_uverbs_create_xsrq *cmd, + struct ib_udata *udata) { - struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; - struct ib_uevent_object *obj; + struct ib_usrq_object *obj; struct ib_pd *pd; struct ib_srq *srq; + struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key); - down_write(&obj->uobject.mutex); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key); + down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; } + if (cmd->srq_type == IB_SRQT_XRC) { + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + if (!attr.ext.xrc.cq) { + ret = -EINVAL; + goto err_put_pd; + } + + attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); + if (!attr.ext.xrc.xrcd) { + ret = -EINVAL; + goto err_put_cq; + } + + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); + } + attr.event_handler = ib_uverbs_srq_event_handler; attr.srq_context = file; - attr.attr.max_wr = cmd.max_wr; - attr.attr.max_sge = cmd.max_sge; - attr.attr.srq_limit = cmd.srq_limit; + attr.srq_type = cmd->srq_type; + attr.attr.max_wr = cmd->max_wr; + attr.attr.max_sge = cmd->max_sge; + attr.attr.srq_limit = cmd->srq_limit; - obj->events_reported = 0; - INIT_LIST_HEAD(&obj->event_list); + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->create_srq(pd, &attr, &udata); + srq = pd->device->create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; } - srq->device = pd->device; - srq->pd = pd; - srq->uobject = &obj->uobject; + srq->device = pd->device; + srq->pd = pd; + srq->srq_type = cmd->srq_type; + srq->uobject = &obj->uevent.uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + + if (cmd->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.cq = attr.ext.xrc.cq; + srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; + atomic_inc(&attr.ext.xrc.cq->usecnt); + atomic_inc(&attr.ext.xrc.xrcd->usecnt); + } + atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); - obj->uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); + obj->uevent.uobject.object = srq; + ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); - resp.srq_handle = obj->uobject.id; + resp.srq_handle = obj->uevent.uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; + if (cmd->srq_type == IB_SRQT_XRC) + resp.srqn = srq->ext.xrc.srq_num; - if (copy_to_user((void __user *) (unsigned long) cmd.response, + if (copy_to_user((void __user *) (unsigned long) cmd->response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } + if (cmd->srq_type == IB_SRQT_XRC) { + put_uobj_read(xrcd_uobj); + put_cq_read(attr.ext.xrc.cq); + } put_pd_read(pd); mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); - obj->uobject.live = 1; + obj->uevent.uobject.live = 1; - up_write(&obj->uobject.mutex); + up_write(&obj->uevent.uobject.mutex); - return in_len; + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); + idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); err_destroy: ib_destroy_srq(srq); err_put: + if (cmd->srq_type == IB_SRQT_XRC) { + atomic_dec(&obj->uxrcd->refcnt); + put_uobj_read(xrcd_uobj); + } + +err_put_cq: + if (cmd->srq_type == IB_SRQT_XRC) + put_cq_read(attr.ext.xrc.cq); + +err_put_pd: put_pd_read(pd); err: - put_uobj_write(&obj->uobject); + put_uobj_write(&obj->uevent.uobject); return ret; } +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_srq cmd; + struct ib_uverbs_create_xsrq xcmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + xcmd.response = cmd.response; + xcmd.user_handle = cmd.user_handle; + xcmd.srq_type = IB_SRQT_BASIC; + xcmd.pd_handle = cmd.pd_handle; + xcmd.max_wr = cmd.max_wr; + xcmd.max_sge = cmd.max_sge; + xcmd.srq_limit = cmd.srq_limit; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ret = __uverbs_create_xsrq(file, &xcmd, &udata); + if (ret) + return ret; + + return in_len; +} + +ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_create_xsrq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ret = __uverbs_create_xsrq(file, &cmd, &udata); + if (ret) + return ret; + + return in_len; +} + ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 56898b6..f07c6e3 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -72,6 +72,7 @@ DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); +DEFINE_IDR(ib_uverbs_xrcd_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -107,19 +108,27 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, + [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, + [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp }; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); -static void ib_uverbs_release_dev(struct kref *ref) +static void ib_uverbs_release_dev(struct kobject *kobj) { struct ib_uverbs_device *dev = - container_of(ref, struct ib_uverbs_device, ref); + container_of(kobj, struct ib_uverbs_device, kobj); - complete(&dev->comp); + kfree(dev); } +static struct kobj_type ib_uverbs_dev_ktype = { + .release = ib_uverbs_release_dev, +}; + static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = @@ -202,8 +211,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); + if (qp != qp->real_qp) { + ib_close_qp(qp); + } else { + ib_uverbs_detach_umcast(qp, uqp); + ib_destroy_qp(qp); + } ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } @@ -241,6 +254,18 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + mutex_lock(&file->device->xrcd_tree_mutex); + list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { + struct ib_xrcd *xrcd = uobj->object; + struct ib_uxrcd_object *uxrcd = + container_of(uobj, struct ib_uxrcd_object, uobject); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + ib_uverbs_dealloc_xrcd(file->device, xrcd); + kfree(uxrcd); + } + mutex_unlock(&file->device->xrcd_tree_mutex); + list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; @@ -252,13 +277,19 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, return context->device->dealloc_ucontext(context); } +static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) +{ + complete(&dev->comp); +} + static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); - kref_put(&file->device->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&file->device->refcount)) + ib_uverbs_comp_dev(file->device); kfree(file); } @@ -430,6 +461,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.element = element; entry->desc.async.event_type = event; + entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); @@ -557,8 +589,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.in_words * 4 != count) return -EINVAL; - if (hdr.command < 0 || - hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || + if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[hdr.command]) return -EINVAL; @@ -600,9 +631,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) int ret; dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); - if (dev) - kref_get(&dev->ref); - else + if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { @@ -623,6 +652,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_init(&file->mutex); filp->private_data = file; + kobject_get(&dev->kobj); return nonseekable_open(inode, filp); @@ -630,13 +660,16 @@ err_module: module_put(dev->ib_dev->owner); err: - kref_put(&dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&dev->refcount)) + ib_uverbs_comp_dev(dev); + return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_device *dev = file->device; ib_uverbs_cleanup_ucontext(file, file->ucontext); @@ -644,6 +677,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); + kobject_put(&dev->kobj); return 0; } @@ -739,8 +773,11 @@ static void ib_uverbs_add_one(struct ib_device *device) if (!uverbs_dev) return; - kref_init(&uverbs_dev->ref); + atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); + uverbs_dev->xrcd_tree = RB_ROOT; + mutex_init(&uverbs_dev->xrcd_tree_mutex); + kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); @@ -767,6 +804,7 @@ static void ib_uverbs_add_one(struct ib_device *device) cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; @@ -797,9 +835,10 @@ err_cdev: clear_bit(devnum, overflow_map); err: - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kfree(uverbs_dev); + kobject_put(&uverbs_dev->kobj); return; } @@ -819,9 +858,10 @@ static void ib_uverbs_remove_one(struct ib_device *device) else clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kfree(uverbs_dev); + kobject_put(&uverbs_dev->kobj); } static char *uverbs_devnode(struct device *dev, mode_t *mode) diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index 1b1146f..e7bee46 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include <linux/export.h> #include <rdma/ib_marshall.h> void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index af7a8b0..575b780 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -38,7 +38,9 @@ #include <linux/errno.h> #include <linux/err.h> +#include <linux/export.h> #include <linux/string.h> +#include <linux/slab.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -77,6 +79,31 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); +int ib_rate_to_mbps(enum ib_rate rate) +{ + switch (rate) { + case IB_RATE_2_5_GBPS: return 2500; + case IB_RATE_5_GBPS: return 5000; + case IB_RATE_10_GBPS: return 10000; + case IB_RATE_20_GBPS: return 20000; + case IB_RATE_30_GBPS: return 30000; + case IB_RATE_40_GBPS: return 40000; + case IB_RATE_60_GBPS: return 60000; + case IB_RATE_80_GBPS: return 80000; + case IB_RATE_120_GBPS: return 120000; + case IB_RATE_14_GBPS: return 14062; + case IB_RATE_56_GBPS: return 56250; + case IB_RATE_112_GBPS: return 112500; + case IB_RATE_168_GBPS: return 168750; + case IB_RATE_25_GBPS: return 25781; + case IB_RATE_100_GBPS: return 103125; + case IB_RATE_200_GBPS: return 206250; + case IB_RATE_300_GBPS: return 309375; + default: return -1; + } +} +EXPORT_SYMBOL(ib_rate_to_mbps); + enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) { @@ -250,6 +277,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->uobject = NULL; srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; + srq->srq_type = srq_init_attr->srq_type; + if (srq->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; + atomic_inc(&srq->ext.xrc.xrcd->usecnt); + atomic_inc(&srq->ext.xrc.cq->usecnt); + } atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); } @@ -279,16 +313,29 @@ EXPORT_SYMBOL(ib_query_srq); int ib_destroy_srq(struct ib_srq *srq) { struct ib_pd *pd; + enum ib_srq_type srq_type; + struct ib_xrcd *uninitialized_var(xrcd); + struct ib_cq *uninitialized_var(cq); int ret; if (atomic_read(&srq->usecnt)) return -EBUSY; pd = srq->pd; + srq_type = srq->srq_type; + if (srq_type == IB_SRQT_XRC) { + xrcd = srq->ext.xrc.xrcd; + cq = srq->ext.xrc.cq; + } ret = srq->device->destroy_srq(srq); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (srq_type == IB_SRQT_XRC) { + atomic_dec(&xrcd->usecnt); + atomic_dec(&cq->usecnt); + } + } return ret; } @@ -296,28 +343,123 @@ EXPORT_SYMBOL(ib_destroy_srq); /* Queue pairs */ +static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) +{ + struct ib_qp *qp = context; + + list_for_each_entry(event->element.qp, &qp->open_list, open_list) + event->element.qp->event_handler(event, event->element.qp->qp_context); +} + +static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) +{ + mutex_lock(&xrcd->tgt_qp_mutex); + list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); + mutex_unlock(&xrcd->tgt_qp_mutex); +} + +static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, + void (*event_handler)(struct ib_event *, void *), + void *qp_context) +{ + struct ib_qp *qp; + unsigned long flags; + + qp = kzalloc(sizeof *qp, GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->real_qp = real_qp; + atomic_inc(&real_qp->usecnt); + qp->device = real_qp->device; + qp->event_handler = event_handler; + qp->qp_context = qp_context; + qp->qp_num = real_qp->qp_num; + qp->qp_type = real_qp->qp_type; + + spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + list_add(&qp->open_list, &real_qp->open_list); + spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + + return qp; +} + +struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, + struct ib_qp_open_attr *qp_open_attr) +{ + struct ib_qp *qp, *real_qp; + + if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) + return ERR_PTR(-EINVAL); + + qp = ERR_PTR(-EINVAL); + mutex_lock(&xrcd->tgt_qp_mutex); + list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { + if (real_qp->qp_num == qp_open_attr->qp_num) { + qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, + qp_open_attr->qp_context); + break; + } + } + mutex_unlock(&xrcd->tgt_qp_mutex); + return qp; +} +EXPORT_SYMBOL(ib_open_qp); + struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { - struct ib_qp *qp; + struct ib_qp *qp, *real_qp; + struct ib_device *device; - qp = pd->device->create_qp(pd, qp_init_attr, NULL); + device = pd ? pd->device : qp_init_attr->xrcd->device; + qp = device->create_qp(pd, qp_init_attr, NULL); if (!IS_ERR(qp)) { - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = qp_init_attr->send_cq; - qp->recv_cq = qp_init_attr->recv_cq; - qp->srq = qp_init_attr->srq; - qp->uobject = NULL; - qp->event_handler = qp_init_attr->event_handler; - qp->qp_context = qp_init_attr->qp_context; - qp->qp_type = qp_init_attr->qp_type; - atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); - atomic_inc(&qp_init_attr->recv_cq->usecnt); - if (qp_init_attr->srq) - atomic_inc(&qp_init_attr->srq->usecnt); + qp->device = device; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = qp_init_attr->qp_type; + + atomic_set(&qp->usecnt, 0); + if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { + qp->event_handler = __ib_shared_qp_event_handler; + qp->qp_context = qp; + qp->pd = NULL; + qp->send_cq = qp->recv_cq = NULL; + qp->srq = NULL; + qp->xrcd = qp_init_attr->xrcd; + atomic_inc(&qp_init_attr->xrcd->usecnt); + INIT_LIST_HEAD(&qp->open_list); + + real_qp = qp; + qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, + qp_init_attr->qp_context); + if (!IS_ERR(qp)) + __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); + else + real_qp->device->destroy_qp(real_qp); + } else { + qp->event_handler = qp_init_attr->event_handler; + qp->qp_context = qp_init_attr->qp_context; + if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { + qp->recv_cq = NULL; + qp->srq = NULL; + } else { + qp->recv_cq = qp_init_attr->recv_cq; + atomic_inc(&qp_init_attr->recv_cq->usecnt); + qp->srq = qp_init_attr->srq; + if (qp->srq) + atomic_inc(&qp_init_attr->srq->usecnt); + } + + qp->pd = pd; + qp->send_cq = qp_init_attr->send_cq; + qp->xrcd = NULL; + + atomic_inc(&pd->usecnt); + atomic_inc(&qp_init_attr->send_cq->usecnt); + } } return qp; @@ -326,8 +468,8 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; - enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETHERTYPE + 1]; - enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETHERTYPE + 1]; + enum ib_qp_attr_mask req_param[IB_QPT_MAX]; + enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -343,6 +485,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -365,6 +513,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -384,6 +538,16 @@ static const struct { IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC_INI] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN), + [IB_QPT_XRC_TGT] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER), }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | @@ -394,6 +558,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), + [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -414,6 +584,13 @@ static const struct { IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | + IB_QP_SQ_PSN), [IB_QPT_SMI] = IB_QP_SQ_PSN, [IB_QPT_GSI] = IB_QP_SQ_PSN, }, @@ -429,6 +606,15 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -453,6 +639,15 @@ static const struct { IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | + IB_QP_MIN_RNR_TIMER), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -465,6 +660,8 @@ static const struct { [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY } @@ -487,6 +684,15 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -515,6 +721,25 @@ static const struct { IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -579,7 +804,7 @@ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL); + return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); @@ -589,11 +814,59 @@ int ib_query_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr) { return qp->device->query_qp ? - qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) : + qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -ENOSYS; } EXPORT_SYMBOL(ib_query_qp); +int ib_close_qp(struct ib_qp *qp) +{ + struct ib_qp *real_qp; + unsigned long flags; + + real_qp = qp->real_qp; + if (real_qp == qp) + return -EINVAL; + + spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + list_del(&qp->open_list); + spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + + atomic_dec(&real_qp->usecnt); + kfree(qp); + + return 0; +} +EXPORT_SYMBOL(ib_close_qp); + +static int __ib_destroy_shared_qp(struct ib_qp *qp) +{ + struct ib_xrcd *xrcd; + struct ib_qp *real_qp; + int ret; + + real_qp = qp->real_qp; + xrcd = real_qp->xrcd; + + mutex_lock(&xrcd->tgt_qp_mutex); + ib_close_qp(qp); + if (atomic_read(&real_qp->usecnt) == 0) + list_del(&real_qp->xrcd_list); + else + real_qp = NULL; + mutex_unlock(&xrcd->tgt_qp_mutex); + + if (real_qp) { + ret = ib_destroy_qp(real_qp); + if (!ret) + atomic_dec(&xrcd->usecnt); + else + __ib_insert_xrcd_qp(xrcd, real_qp); + } + + return 0; +} + int ib_destroy_qp(struct ib_qp *qp) { struct ib_pd *pd; @@ -601,16 +874,25 @@ int ib_destroy_qp(struct ib_qp *qp) struct ib_srq *srq; int ret; - pd = qp->pd; - scq = qp->send_cq; - rcq = qp->recv_cq; - srq = qp->srq; + if (atomic_read(&qp->usecnt)) + return -EBUSY; + + if (qp->real_qp != qp) + return __ib_destroy_shared_qp(qp); + + pd = qp->pd; + scq = qp->send_cq; + rcq = qp->recv_cq; + srq = qp->srq; ret = qp->device->destroy_qp(qp); if (!ret) { - atomic_dec(&pd->usecnt); - atomic_dec(&scq->usecnt); - atomic_dec(&rcq->usecnt); + if (pd) + atomic_dec(&pd->usecnt); + if (scq) + atomic_dec(&scq->usecnt); + if (rcq) + atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); } @@ -920,3 +1202,42 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) return qp->device->detach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_detach_mcast); + +struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) +{ + struct ib_xrcd *xrcd; + + if (!device->alloc_xrcd) + return ERR_PTR(-ENOSYS); + + xrcd = device->alloc_xrcd(device, NULL, NULL); + if (!IS_ERR(xrcd)) { + xrcd->device = device; + xrcd->inode = NULL; + atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); + } + + return xrcd; +} +EXPORT_SYMBOL(ib_alloc_xrcd); + +int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + struct ib_qp *qp; + int ret; + + if (atomic_read(&xrcd->usecnt)) + return -EBUSY; + + while (!list_empty(&xrcd->tgt_qp_list)) { + qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); + ret = ib_destroy_qp(qp); + if (ret) + return ret; + } + + return xrcd->device->dealloc_xrcd(xrcd); +} +EXPORT_SYMBOL(ib_dealloc_xrcd); |