Skip to content
qp.c 48.1 KiB
Newer Older

	PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);

	/* locking hierarchy: cq lock first, then qp lock. */
	spin_lock_irqsave(&rchp->lock, flag);
	spin_lock(&qhp->lock);
	c4iw_flush_hw_cq(&rchp->cq);
	c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
	flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
	spin_unlock(&qhp->lock);
	spin_unlock_irqrestore(&rchp->lock, flag);
	if (flushed) {
		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
	}
	/* locking hierarchy: cq lock first, then qp lock. */
	spin_lock_irqsave(&schp->lock, flag);
	spin_lock(&qhp->lock);
	c4iw_flush_hw_cq(&schp->cq);
	c4iw_count_scqes(&schp->cq, &qhp->wq, &count);
	flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count);
	spin_unlock(&qhp->lock);
	spin_unlock_irqrestore(&schp->lock, flag);
	if (flushed) {
		spin_lock_irqsave(&schp->comp_handler_lock, flag);
		(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
		spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
	}
static void flush_qp(struct c4iw_qp *qhp)
{
	struct c4iw_cq *rchp, *schp;
	unsigned long flag;

	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
	schp = get_chp(qhp->rhp, qhp->attr.scq);

	if (qhp->ibqp.uobject) {
		t4_set_wq_in_error(&qhp->wq);
		t4_set_cq_in_error(&rchp->cq);
		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
			t4_set_cq_in_error(&schp->cq);
			spin_lock_irqsave(&schp->comp_handler_lock, flag);
			(*schp->ibcq.comp_handler)(&schp->ibcq,
					schp->ibcq.cq_context);
			spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
	__flush_qp(qhp, rchp, schp);
static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
		     struct c4iw_ep *ep)
{
	struct fw_ri_wr *wqe;
	int ret;
	struct sk_buff *skb;

	PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
	     ep->hwtid);
	skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
	if (!skb)
		return -ENOMEM;
	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);

	wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
	memset(wqe, 0, sizeof *wqe);
	wqe->op_compl = cpu_to_be32(
		FW_WR_OP(FW_RI_INIT_WR) |
		FW_WR_COMPL(1));
	wqe->flowid_len16 = cpu_to_be32(
		FW_WR_FLOWID(ep->hwtid) |
		FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
	wqe->cookie = (unsigned long) &ep->com.wr_wait;

	wqe->u.fini.type = FW_RI_TYPE_FINI;
	ret = c4iw_ofld_send(&rhp->rdev, skb);
	if (ret)
		goto out;

	ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid,
			     qhp->wq.sq.qid, __func__);
out:
	PDBG("%s ret %d\n", __func__, ret);
	return ret;
}

static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
{
	PDBG("%s p2p_type = %d\n", __func__, p2p_type);
	memset(&init->u, 0, sizeof init->u);
	switch (p2p_type) {
	case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
		init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
		init->u.write.stag_sink = cpu_to_be32(1);
		init->u.write.to_sink = cpu_to_be64(1);
		init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
		init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
						   sizeof(struct fw_ri_immd),
						   16);
		break;
	case FW_RI_INIT_P2PTYPE_READ_REQ:
		init->u.write.opcode = FW_RI_RDMA_READ_WR;
		init->u.read.stag_src = cpu_to_be32(1);
		init->u.read.to_src_lo = cpu_to_be32(1);
		init->u.read.stag_sink = cpu_to_be32(1);
		init->u.read.to_sink_lo = cpu_to_be32(1);
		init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
		break;
	}
}

static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
{
	struct fw_ri_wr *wqe;
	int ret;
	struct sk_buff *skb;

	PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
	     qhp->ep->hwtid);

	skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
	if (!skb)
		return -ENOMEM;
	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);

	wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
	memset(wqe, 0, sizeof *wqe);
	wqe->op_compl = cpu_to_be32(
		FW_WR_OP(FW_RI_INIT_WR) |
		FW_WR_COMPL(1));
	wqe->flowid_len16 = cpu_to_be32(
		FW_WR_FLOWID(qhp->ep->hwtid) |
		FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));

	wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait;

	wqe->u.init.type = FW_RI_TYPE_INIT;
	wqe->u.init.mpareqbit_p2ptype =
		V_FW_RI_WR_MPAREQBIT(qhp->attr.mpa_attr.initiator) |
		V_FW_RI_WR_P2PTYPE(qhp->attr.mpa_attr.p2p_type);
	wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE;
	if (qhp->attr.mpa_attr.recv_marker_enabled)
		wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE;
	if (qhp->attr.mpa_attr.xmit_marker_enabled)
		wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE;
	if (qhp->attr.mpa_attr.crc_enabled)
		wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE;

	wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE |
			    FW_RI_QP_RDMA_WRITE_ENABLE |
			    FW_RI_QP_BIND_ENABLE;
	if (!qhp->ibqp.uobject)
		wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE |
				     FW_RI_QP_STAG0_ENABLE;
	wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq));
	wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
	wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
	wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
	wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
	wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
	wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
	wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
	wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
	wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
	wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
	wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
	wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
					 rhp->rdev.lldi.vr->rq.start);
	if (qhp->attr.mpa_attr.initiator)
		build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);

	ret = c4iw_ofld_send(&rhp->rdev, skb);
	if (ret)
		goto out;

	ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait,
				  qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
out:
	PDBG("%s ret %d\n", __func__, ret);
	return ret;
}

/*
 * Called by the library when the qp has user dbs disabled due to
 * a DB_FULL condition.  This function will single-thread all user
 * DB rings to avoid overflowing the hw db-fifo.
 */
static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
{
	int delay = db_delay_usecs;

	mutex_lock(&qhp->rhp->db_mutex);
	do {

		/*
		 * The interrupt threshold is dbfifo_int_thresh << 6. So
		 * make sure we don't cross that and generate an interrupt.
		 */
		if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) <
		    (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) {
			writel(QID(qid) | PIDX(inc), qhp->wq.db);
			break;
		}
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule_timeout(usecs_to_jiffies(delay));
		delay = min(delay << 1, 2000);
	} while (1);
	mutex_unlock(&qhp->rhp->db_mutex);
	return 0;
}

int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
		   enum c4iw_qp_attr_mask mask,
		   struct c4iw_qp_attributes *attrs,
		   int internal)
{
	int ret = 0;
	struct c4iw_qp_attributes newattr = qhp->attr;
	int disconnect = 0;
	int terminate = 0;
	int abort = 0;
	int free = 0;
	struct c4iw_ep *ep = NULL;

	PDBG("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", __func__,
	     qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
	     (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);

	mutex_lock(&qhp->mutex);

	/* Process attr changes if in IDLE */
	if (mask & C4IW_QP_ATTR_VALID_MODIFY) {
		if (qhp->attr.state != C4IW_QP_STATE_IDLE) {
			ret = -EIO;
			goto out;
		}
		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ)
			newattr.enable_rdma_read = attrs->enable_rdma_read;
		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE)
			newattr.enable_rdma_write = attrs->enable_rdma_write;
		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND)
			newattr.enable_bind = attrs->enable_bind;
		if (mask & C4IW_QP_ATTR_MAX_ORD) {
			if (attrs->max_ord > c4iw_max_read_depth) {
				ret = -EINVAL;
				goto out;
			}
			newattr.max_ord = attrs->max_ord;
		}
		if (mask & C4IW_QP_ATTR_MAX_IRD) {
			if (attrs->max_ird > c4iw_max_read_depth) {
				ret = -EINVAL;
				goto out;
			}
			newattr.max_ird = attrs->max_ird;
		}
		qhp->attr = newattr;
	}

	if (mask & C4IW_QP_ATTR_SQ_DB) {
		ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
		goto out;
	}
	if (mask & C4IW_QP_ATTR_RQ_DB) {
		ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
		goto out;
	}

	if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
		goto out;
	if (qhp->attr.state == attrs->next_state)
		goto out;

	switch (qhp->attr.state) {
	case C4IW_QP_STATE_IDLE:
		switch (attrs->next_state) {
		case C4IW_QP_STATE_RTS:
			if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) {
				ret = -EINVAL;
				goto out;
			}
			if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) {
				ret = -EINVAL;
				goto out;
			}
			qhp->attr.mpa_attr = attrs->mpa_attr;
			qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
			qhp->ep = qhp->attr.llp_stream_handle;
			set_state(qhp, C4IW_QP_STATE_RTS);

			/*
			 * Ref the endpoint here and deref when we
			 * disassociate the endpoint from the QP.  This
			 * happens in CLOSING->IDLE transition or *->ERROR
			 * transition.
			 */
			c4iw_get_ep(&qhp->ep->com);
			ret = rdma_init(rhp, qhp);
			if (ret)
				goto err;
			break;
		case C4IW_QP_STATE_ERROR:
			set_state(qhp, C4IW_QP_STATE_ERROR);
			flush_qp(qhp);
			break;
		default:
			ret = -EINVAL;
			goto out;
		}
		break;
	case C4IW_QP_STATE_RTS:
		switch (attrs->next_state) {
		case C4IW_QP_STATE_CLOSING:
			BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
			set_state(qhp, C4IW_QP_STATE_CLOSING);
			ep = qhp->ep;
			if (!internal) {
				abort = 0;
				disconnect = 1;
				c4iw_get_ep(&qhp->ep->com);
			if (qhp->ibqp.uobject)
				t4_set_wq_in_error(&qhp->wq);
			ret = rdma_fini(rhp, qhp, ep);
			if (ret)
				goto err;
			break;
		case C4IW_QP_STATE_TERMINATE:
			set_state(qhp, C4IW_QP_STATE_TERMINATE);
			qhp->attr.layer_etype = attrs->layer_etype;
			qhp->attr.ecode = attrs->ecode;
			if (qhp->ibqp.uobject)
				t4_set_wq_in_error(&qhp->wq);
			if (!internal)
				terminate = 1;
			c4iw_get_ep(&qhp->ep->com);
			break;
		case C4IW_QP_STATE_ERROR:
			set_state(qhp, C4IW_QP_STATE_ERROR);
			if (qhp->ibqp.uobject)
				t4_set_wq_in_error(&qhp->wq);
			if (!internal) {
				abort = 1;
				disconnect = 1;
				ep = qhp->ep;
				c4iw_get_ep(&qhp->ep->com);
			}
			goto err;
			break;
		default:
			ret = -EINVAL;
			goto out;
		}
		break;
	case C4IW_QP_STATE_CLOSING:
		if (!internal) {
			ret = -EINVAL;
			goto out;
		}
		switch (attrs->next_state) {
		case C4IW_QP_STATE_IDLE:
			flush_qp(qhp);
			set_state(qhp, C4IW_QP_STATE_IDLE);
			qhp->attr.llp_stream_handle = NULL;
			c4iw_put_ep(&qhp->ep->com);
			qhp->ep = NULL;
			wake_up(&qhp->wait);
			break;
		case C4IW_QP_STATE_ERROR:
			goto err;
		default:
			ret = -EINVAL;
			goto err;
		}
		break;
	case C4IW_QP_STATE_ERROR:
		if (attrs->next_state != C4IW_QP_STATE_IDLE) {
			ret = -EINVAL;
			goto out;
		}
		if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) {
			ret = -EINVAL;
			goto out;
		}
		set_state(qhp, C4IW_QP_STATE_IDLE);
		break;
	case C4IW_QP_STATE_TERMINATE:
		if (!internal) {
			ret = -EINVAL;
			goto out;
		}
		goto err;
		break;
	default:
		printk(KERN_ERR "%s in a bad state %d\n",
		       __func__, qhp->attr.state);
		ret = -EINVAL;
		goto err;
		break;
	}
	goto out;
err:
	PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
	     qhp->wq.sq.qid);

	/* disassociate the LLP connection */
	qhp->attr.llp_stream_handle = NULL;
	if (!ep)
		ep = qhp->ep;
	qhp->ep = NULL;
	set_state(qhp, C4IW_QP_STATE_ERROR);
	wake_up(&qhp->wait);
	BUG_ON(!ep);
	mutex_unlock(&qhp->mutex);
		post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL);

	/*
	 * If disconnect is 1, then we need to initiate a disconnect
	 * on the EP.  This can be a normal close (RTS->CLOSING) or
	 * an abnormal close (RTS/CLOSING->ERROR).
	 */
	if (disconnect) {
		c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC :
							 GFP_KERNEL);
		c4iw_put_ep(&ep->com);
	}

	/*
	 * If free is 1, then we've disassociated the EP from the QP
	 * and we need to dereference the EP.
	 */
	if (free)
		c4iw_put_ep(&ep->com);
	PDBG("%s exit state %d\n", __func__, qhp->attr.state);
	return ret;
}

static int enable_qp_db(int id, void *p, void *data)
{
	struct c4iw_qp *qp = p;

	t4_enable_wq_db(&qp->wq);
	return 0;
}

int c4iw_destroy_qp(struct ib_qp *ib_qp)
{
	struct c4iw_dev *rhp;
	struct c4iw_qp *qhp;
	struct c4iw_qp_attributes attrs;
	struct c4iw_ucontext *ucontext;

	qhp = to_c4iw_qp(ib_qp);
	rhp = qhp->rhp;

	attrs.next_state = C4IW_QP_STATE_ERROR;
	if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
	else
		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
	wait_event(qhp->wait, !qhp->ep);

	spin_lock_irq(&rhp->lock);
	remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid);
	rhp->qpcnt--;
	BUG_ON(rhp->qpcnt < 0);
	if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) {
		rhp->rdev.stats.db_state_transitions++;
		rhp->db_state = NORMAL;
		idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
	}
	if (db_coalescing_threshold >= 0)
		if (rhp->qpcnt <= db_coalescing_threshold)
			cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]);
	spin_unlock_irq(&rhp->lock);
	atomic_dec(&qhp->refcnt);
	wait_event(qhp->wait, !atomic_read(&qhp->refcnt));

	ucontext = ib_qp->uobject ?
		   to_c4iw_ucontext(ib_qp->uobject->context) : NULL;
	destroy_qp(&rhp->rdev, &qhp->wq,
		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);

	PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
	kfree(qhp);
	return 0;
}

static int disable_qp_db(int id, void *p, void *data)
{
	struct c4iw_qp *qp = p;

	t4_disable_wq_db(&qp->wq);
	return 0;
}

struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
			     struct ib_udata *udata)
{
	struct c4iw_dev *rhp;
	struct c4iw_qp *qhp;
	struct c4iw_pd *php;
	struct c4iw_cq *schp;
	struct c4iw_cq *rchp;
	struct c4iw_create_qp_resp uresp;
	int sqsize, rqsize;
	struct c4iw_ucontext *ucontext;
	int ret;
	struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL;

	PDBG("%s ib_pd %p\n", __func__, pd);

	if (attrs->qp_type != IB_QPT_RC)
		return ERR_PTR(-EINVAL);

	php = to_c4iw_pd(pd);
	rhp = php->rhp;
	schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
	rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
	if (!schp || !rchp)
		return ERR_PTR(-EINVAL);

	if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
		return ERR_PTR(-EINVAL);

	rqsize = roundup(attrs->cap.max_recv_wr + 1, 16);
	if (rqsize > T4_MAX_RQ_SIZE)
		return ERR_PTR(-E2BIG);

	sqsize = roundup(attrs->cap.max_send_wr + 1, 16);
	if (sqsize > T4_MAX_SQ_SIZE)
		return ERR_PTR(-E2BIG);

	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;


	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
	if (!qhp)
		return ERR_PTR(-ENOMEM);
	qhp->wq.sq.size = sqsize;
	qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue;
	qhp->wq.rq.size = rqsize;
	qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue;

	if (ucontext) {
		qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
		qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
	}

	PDBG("%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu\n",
	     __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize);

	ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
	if (ret)
		goto err1;

	attrs->cap.max_recv_wr = rqsize - 1;
	attrs->cap.max_send_wr = sqsize - 1;
	attrs->cap.max_inline_data = T4_MAX_SEND_INLINE;

	qhp->rhp = rhp;
	qhp->attr.pd = php->pdid;
	qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
	qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
	qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
	qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
	qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
	qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
	qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
	qhp->attr.state = C4IW_QP_STATE_IDLE;
	qhp->attr.next_state = C4IW_QP_STATE_IDLE;
	qhp->attr.enable_rdma_read = 1;
	qhp->attr.enable_rdma_write = 1;
	qhp->attr.enable_bind = 1;
	qhp->attr.max_ord = 1;
	qhp->attr.max_ird = 1;
	spin_lock_init(&qhp->lock);
	mutex_init(&qhp->mutex);
	init_waitqueue_head(&qhp->wait);
	atomic_set(&qhp->refcnt, 1);

	spin_lock_irq(&rhp->lock);
	if (rhp->db_state != NORMAL)
		t4_disable_wq_db(&qhp->wq);
	rhp->qpcnt++;
	if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
		rhp->rdev.stats.db_state_transitions++;
		rhp->db_state = FLOW_CONTROL;
		idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
	}
	if (db_coalescing_threshold >= 0)
		if (rhp->qpcnt > db_coalescing_threshold)
			cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]);
	ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
	spin_unlock_irq(&rhp->lock);
	if (ret)
		goto err2;

	if (udata) {
		mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
		if (!mm1) {
			ret = -ENOMEM;
		}
		mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
		if (!mm2) {
			ret = -ENOMEM;
		}
		mm3 = kmalloc(sizeof *mm3, GFP_KERNEL);
		if (!mm3) {
			ret = -ENOMEM;
		}
		mm4 = kmalloc(sizeof *mm4, GFP_KERNEL);
		if (!mm4) {
			ret = -ENOMEM;
		if (t4_sq_onchip(&qhp->wq.sq)) {
			mm5 = kmalloc(sizeof *mm5, GFP_KERNEL);
			if (!mm5) {
				ret = -ENOMEM;
				goto err7;
			}
			uresp.flags = C4IW_QPF_ONCHIP;
		} else
			uresp.flags = 0;
		uresp.qid_mask = rhp->rdev.qpmask;
		uresp.sqid = qhp->wq.sq.qid;
		uresp.sq_size = qhp->wq.sq.size;
		uresp.sq_memsize = qhp->wq.sq.memsize;
		uresp.rqid = qhp->wq.rq.qid;
		uresp.rq_size = qhp->wq.rq.size;
		uresp.rq_memsize = qhp->wq.rq.memsize;
		spin_lock(&ucontext->mmap_lock);
		if (mm5) {
			uresp.ma_sync_key = ucontext->key;
			ucontext->key += PAGE_SIZE;
		} else {
			uresp.ma_sync_key =  0;
		uresp.sq_key = ucontext->key;
		ucontext->key += PAGE_SIZE;
		uresp.rq_key = ucontext->key;
		ucontext->key += PAGE_SIZE;
		uresp.sq_db_gts_key = ucontext->key;
		ucontext->key += PAGE_SIZE;
		uresp.rq_db_gts_key = ucontext->key;
		ucontext->key += PAGE_SIZE;
		spin_unlock(&ucontext->mmap_lock);
		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
		if (ret)
			goto err8;
		mm1->key = uresp.sq_key;
		mm1->addr = qhp->wq.sq.phys_addr;
		mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize);
		insert_mmap(ucontext, mm1);
		mm2->key = uresp.rq_key;
		mm2->addr = virt_to_phys(qhp->wq.rq.queue);
		mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
		insert_mmap(ucontext, mm2);
		mm3->key = uresp.sq_db_gts_key;
		mm3->addr = qhp->wq.sq.udb;
		mm3->len = PAGE_SIZE;
		insert_mmap(ucontext, mm3);
		mm4->key = uresp.rq_db_gts_key;
		mm4->addr = qhp->wq.rq.udb;
		mm4->len = PAGE_SIZE;
		insert_mmap(ucontext, mm4);
		if (mm5) {
			mm5->key = uresp.ma_sync_key;
			mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0)
				    + A_PCIE_MA_SYNC) & PAGE_MASK;
			mm5->len = PAGE_SIZE;
			insert_mmap(ucontext, mm5);
		}
	}
	qhp->ibqp.qp_num = qhp->wq.sq.qid;
	init_timer(&(qhp->timer));
	PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n",
	     __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
	     qhp->wq.sq.qid);
	return &qhp->ibqp;
err8:
	kfree(mm5);
err3:
	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
err2:
	destroy_qp(&rhp->rdev, &qhp->wq,
		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
err1:
	kfree(qhp);
	return ERR_PTR(ret);
}

int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
		      int attr_mask, struct ib_udata *udata)
{
	struct c4iw_dev *rhp;
	struct c4iw_qp *qhp;
	enum c4iw_qp_attr_mask mask = 0;
	struct c4iw_qp_attributes attrs;

	PDBG("%s ib_qp %p\n", __func__, ibqp);

	/* iwarp does not support the RTR state */
	if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
		attr_mask &= ~IB_QP_STATE;

	/* Make sure we still have something left to do */
	if (!attr_mask)
		return 0;

	memset(&attrs, 0, sizeof attrs);
	qhp = to_c4iw_qp(ibqp);
	rhp = qhp->rhp;

	attrs.next_state = c4iw_convert_state(attr->qp_state);
	attrs.enable_rdma_read = (attr->qp_access_flags &
			       IB_ACCESS_REMOTE_READ) ?  1 : 0;
	attrs.enable_rdma_write = (attr->qp_access_flags &
				IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
	attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;


	mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0;
	mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
			(C4IW_QP_ATTR_ENABLE_RDMA_READ |
			 C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
			 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;

	/*
	 * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
	 * ringing the queue db when we're in DB_FULL mode.
	 */
	attrs.sq_db_inc = attr->sq_psn;
	attrs.rq_db_inc = attr->rq_psn;
	mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
	mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;

	return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
}

struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
{
	PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
	return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
}

int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
		     int attr_mask, struct ib_qp_init_attr *init_attr)
{
	struct c4iw_qp *qhp = to_c4iw_qp(ibqp);

	memset(attr, 0, sizeof *attr);
	memset(init_attr, 0, sizeof *init_attr);
	attr->qp_state = to_ib_qp_state(qhp->attr.state);
	return 0;
}