Commit 65959522 authored by Maor Gottlieb's avatar Maor Gottlieb Committed by Jason Gunthorpe
Browse files

RDMA: Add support to dump resource tracker in RAW format

Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.

The raw query returns only the device specific data, general data is still
returned by using the existing queries.

Example:

$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]

Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org


Signed-off-by: default avatarMaor Gottlieb <maorg@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 211cd945
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -2619,8 +2619,11 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
	SET_DEVICE_OP(dev_ops, enable_driver);
	SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
	SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
	SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
	SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
	SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
	SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
	SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
	SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
	SET_DEVICE_OP(dev_ops, get_dev_fw_str);
	SET_DEVICE_OP(dev_ops, get_dma_mr);
+118 −62
Original line number Diff line number Diff line
@@ -114,6 +114,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
@@ -446,11 +447,11 @@ static int fill_res_name_pid(struct sk_buff *msg,
	return err ? -EMSGSIZE : 0;
}

static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
			     struct rdma_restrack_entry *res, uint32_t port)
static int fill_res_qp_entry_query(struct sk_buff *msg,
				   struct rdma_restrack_entry *res,
				   struct ib_device *dev,
				   struct ib_qp *qp)
{
	struct ib_qp *qp = container_of(res, struct ib_qp, res);
	struct ib_device *dev = qp->device;
	struct ib_qp_init_attr qp_init_attr;
	struct ib_qp_attr qp_attr;
	int ret;
@@ -459,16 +460,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
	if (ret)
		return ret;

	if (port && port != qp_attr.port_num)
		return -EAGAIN;

	/* In create_qp() port is not set yet */
	if (qp_attr.port_num &&
	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
		goto err;

	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
		goto err;
	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
				qp_attr.dest_qp_num))
@@ -492,13 +483,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
		goto err;

	if (!rdma_is_kernel_res(res) &&
	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
		goto err;

	if (fill_res_name_pid(msg, res))
		goto err;

	if (dev->ops.fill_res_qp_entry)
		return dev->ops.fill_res_qp_entry(msg, qp);
	return 0;
@@ -506,6 +490,48 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
err:	return -EMSGSIZE;
}

static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
			     struct rdma_restrack_entry *res, uint32_t port)
{
	struct ib_qp *qp = container_of(res, struct ib_qp, res);
	struct ib_device *dev = qp->device;
	int ret;

	if (port && port != qp->port)
		return -EAGAIN;

	/* In create_qp() port is not set yet */
	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
		return -EINVAL;

	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
	if (ret)
		return -EMSGSIZE;

	if (!rdma_is_kernel_res(res) &&
	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
		return -EMSGSIZE;

	ret = fill_res_name_pid(msg, res);
	if (ret)
		return -EMSGSIZE;

	return fill_res_qp_entry_query(msg, res, dev, qp);
}

static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
				 struct rdma_restrack_entry *res, uint32_t port)
{
	struct ib_qp *qp = container_of(res, struct ib_qp, res);
	struct ib_device *dev = qp->device;

	if (port && port != qp->port)
		return -EAGAIN;
	if (!dev->ops.fill_res_qp_entry_raw)
		return -EINVAL;
	return dev->ops.fill_res_qp_entry_raw(msg, qp);
}

static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
				struct rdma_restrack_entry *res, uint32_t port)
{
@@ -565,34 +591,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
	struct ib_device *dev = cq->device;

	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
		goto err;
		return -EMSGSIZE;
	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
		goto err;
		return -EMSGSIZE;

	/* Poll context is only valid for kernel CQs */
	if (rdma_is_kernel_res(res) &&
	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
		goto err;
		return -EMSGSIZE;

	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
		goto err;
		return -EMSGSIZE;

	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
		goto err;
		return -EMSGSIZE;
	if (!rdma_is_kernel_res(res) &&
	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
			cq->uobject->uevent.uobject.context->res.id))
		goto err;
		return -EMSGSIZE;

	if (fill_res_name_pid(msg, res))
		goto err;
		return -EMSGSIZE;

	if (dev->ops.fill_res_cq_entry)
		return dev->ops.fill_res_cq_entry(msg, cq);
	return 0;
	return (dev->ops.fill_res_cq_entry) ?
		dev->ops.fill_res_cq_entry(msg, cq) : 0;
}

err:	return -EMSGSIZE;
static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
				 struct rdma_restrack_entry *res, uint32_t port)
{
	struct ib_cq *cq = container_of(res, struct ib_cq, res);
	struct ib_device *dev = cq->device;

	if (!dev->ops.fill_res_cq_entry_raw)
		return -EINVAL;
	return dev->ops.fill_res_cq_entry_raw(msg, cq);
}

static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -603,30 +637,39 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,

	if (has_cap_net_admin) {
		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
			goto err;
			return -EMSGSIZE;
		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
			goto err;
			return -EMSGSIZE;
	}

	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
			      RDMA_NLDEV_ATTR_PAD))
		goto err;
		return -EMSGSIZE;

	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
		goto err;
		return -EMSGSIZE;

	if (!rdma_is_kernel_res(res) &&
	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
		goto err;
		return -EMSGSIZE;

	if (fill_res_name_pid(msg, res))
		goto err;
		return -EMSGSIZE;

	if (dev->ops.fill_res_mr_entry)
		return dev->ops.fill_res_mr_entry(msg, mr);
	return 0;
	return (dev->ops.fill_res_mr_entry) ?
		       dev->ops.fill_res_mr_entry(msg, mr) :
		       0;
}

err:	return -EMSGSIZE;
static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
				 struct rdma_restrack_entry *res, uint32_t port)
{
	struct ib_mr *mr = container_of(res, struct ib_mr, res);
	struct ib_device *dev = mr->pd->device;

	if (!dev->ops.fill_res_mr_entry_raw)
		return -EINVAL;
	return dev->ops.fill_res_mr_entry_raw(msg, mr);
}

static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -1149,7 +1192,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,

struct nldev_fill_res_entry {
	enum rdma_nldev_attr nldev_attr;
	enum rdma_nldev_command nldev_cmd;
	u8 flags;
	u32 entry;
	u32 id;
@@ -1161,40 +1203,34 @@ enum nldev_res_flags {

static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
	[RDMA_RESTRACK_QP] = {
		.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
		.id = RDMA_NLDEV_ATTR_RES_LQPN,
	},
	[RDMA_RESTRACK_CM_ID] = {
		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
	},
	[RDMA_RESTRACK_CQ] = {
		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
		.flags = NLDEV_PER_DEV,
		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
		.id = RDMA_NLDEV_ATTR_RES_CQN,
	},
	[RDMA_RESTRACK_MR] = {
		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
		.flags = NLDEV_PER_DEV,
		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
		.id = RDMA_NLDEV_ATTR_RES_MRN,
	},
	[RDMA_RESTRACK_PD] = {
		.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
		.flags = NLDEV_PER_DEV,
		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
		.id = RDMA_NLDEV_ATTR_RES_PDN,
	},
	[RDMA_RESTRACK_COUNTER] = {
		.nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
@@ -1253,7 +1289,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
	}

	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
			0, 0);

	if (fill_nldev_handle(msg, device)) {
@@ -1331,7 +1368,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
	}

	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
			0, NLM_F_MULTI);

	if (fill_nldev_handle(skb, device)) {
@@ -1429,10 +1467,13 @@ next: idx++;
	}

RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);

static LIST_HEAD(link_ops);
@@ -2117,6 +2158,21 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
		.doit = nldev_stat_del_doit,
		.flags = RDMA_NL_ADMIN_PERM,
	},
	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
		.doit = nldev_res_get_qp_raw_doit,
		.dump = nldev_res_get_qp_raw_dumpit,
		.flags = RDMA_NL_ADMIN_PERM,
	},
	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
		.doit = nldev_res_get_cq_raw_doit,
		.dump = nldev_res_get_cq_raw_dumpit,
		.flags = RDMA_NL_ADMIN_PERM,
	},
	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
		.doit = nldev_res_get_mr_raw_doit,
		.dump = nldev_res_get_mr_raw_dumpit,
		.flags = RDMA_NL_ADMIN_PERM,
	},
};

void __init nldev_init(void)
+3 −0
Original line number Diff line number Diff line
@@ -2583,8 +2583,11 @@ struct ib_device_ops {
	 * Allows rdma drivers to add their own restrack attributes.
	 */
	int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
	int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr);
	int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq);
	int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq);
	int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
	int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
	int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);

	/* Device lifecycle callbacks */
+8 −0
Original line number Diff line number Diff line
@@ -287,6 +287,12 @@ enum rdma_nldev_command {

	RDMA_NLDEV_CMD_STAT_DEL,

	RDMA_NLDEV_CMD_RES_QP_GET_RAW,

	RDMA_NLDEV_CMD_RES_CQ_GET_RAW,

	RDMA_NLDEV_CMD_RES_MR_GET_RAW,

	RDMA_NLDEV_NUM_OPS
};

@@ -525,6 +531,8 @@ enum rdma_nldev_attr {
	 */
	RDMA_NLDEV_ATTR_DEV_DIM,                /* u8 */

	RDMA_NLDEV_ATTR_RES_RAW,	/* binary */

	/*
	 * Always the end
	 */