Newer
Older
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
#include <linux/security.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/io-64-nonatomic-lo-hi.h>
* This implements the PCI exclusive functionality for a CXL device as it is
* defined by the Compute Express Link specification. CXL devices may surface
* certain functionality even if it isn't CXL enabled.
*
* The driver has several responsibilities, mainly:
* - Create the memX device and register on the CXL bus.
* - Enumerate device's register interface and map them.
* - Probe the device attributes to establish sysfs interface.
* - Provide an IOCTL interface to userspace to communicate with the device for
* things like firmware update.
*/
#define cxl_doorbell_busy(cxlm) \
(readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \
CXLDEV_MBOX_CTRL_DOORBELL)
/* CXL 2.0 - 8.2.8.4 */
#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
enum opcode {
CXL_MBOX_OP_GET_FW_INFO = 0x0200,
CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400,
CXL_MBOX_OP_GET_LOG = 0x0401,
CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100,
CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101,
CXL_MBOX_OP_GET_HEALTH_INFO = 0x4200,
CXL_MBOX_OP_SET_SHUTDOWN_STATE = 0x4204,
CXL_MBOX_OP_SCAN_MEDIA = 0x4304,
CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305,
CXL_MBOX_OP_MAX = 0x10000
};
/**
* struct mbox_cmd - A command to be submitted to hardware.
* @opcode: (input) The command set and command submitted to hardware.
* @payload_in: (input) Pointer to the input payload.
* @payload_out: (output) Pointer to the output payload. Must be allocated by
* the caller.
* @size_in: (input) Number of bytes to load from @payload_in.
* @size_out: (input) Max number of bytes loaded into @payload_out.
* (output) Number of bytes generated by the device. For fixed size
* outputs commands this is always expected to be deterministic. For
* variable sized output commands, it tells the exact number of bytes
* written.
* @return_code: (output) Error code returned from hardware.
*
* This is the primary mechanism used to send commands to the hardware.
* All the fields except @payload_* correspond exactly to the fields described in
* Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
* @payload_out are written to, and read from the Command Payload Registers
* defined in CXL 2.0 8.2.8.4.8.
*/
struct mbox_cmd {
u16 opcode;
void *payload_in;
void *payload_out;
size_t size_in;
size_t size_out;
u16 return_code;
#define CXL_MBOX_SUCCESS 0
};
static int cxl_mem_major;
static DEFINE_IDA(cxl_memdev_ida);
Dan Williams
committed
static DECLARE_RWSEM(cxl_memdev_rwsem);
static struct dentry *cxl_debugfs;
static bool cxl_raw_allow_all;
enum {
CEL_UUID,
VENDOR_DEBUG_UUID,
};
/* See CXL 2.0 Table 170. Get Log Input Payload */
static const uuid_t log_uuid[] = {
[CEL_UUID] = UUID_INIT(0xda9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96,
0xb1, 0x62, 0x3b, 0x3f, 0x17),
[VENDOR_DEBUG_UUID] = UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f,
0xd6, 0x07, 0x19, 0x40, 0x3d, 0x86),
};
/**
* struct cxl_mem_command - Driver representation of a memory device command
* @info: Command information as it exists for the UAPI
* @opcode: The actual bits used for the mailbox protocol
* @flags: Set of flags effecting driver behavior.
*
* * %CXL_CMD_FLAG_FORCE_ENABLE: In cases of error, commands with this flag
* will be enabled by the driver regardless of what hardware may have
* advertised.
*
* The cxl_mem_command is the driver's internal representation of commands that
* are supported by the driver. Some of these commands may not be supported by
* the hardware. The driver will use @info to validate the fields passed in by
* the user then submit the @opcode to the hardware.
*
* See struct cxl_command_info.
*/
struct cxl_mem_command {
struct cxl_command_info info;
enum opcode opcode;
u32 flags;
#define CXL_CMD_FLAG_NONE 0
#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
#define CXL_CMD(_id, sin, sout, _flags) \
[CXL_MEM_COMMAND_ID_##_id] = { \
.info = { \
.id = CXL_MEM_COMMAND_ID_##_id, \
.size_in = sin, \
.size_out = sout, \
}, \
.opcode = CXL_MBOX_OP_##_id, \
}
/*
* This table defines the supported mailbox commands for the driver. This table
* is made up of a UAPI structure. Non-negative values as parameters in the
* table will be validated against the user's input. For example, if size_in is
* 0, and the user passed in 1, it is an error.
*/
static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
CXL_CMD(GET_LSA, 0x8, ~0, 0),
CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
};
/*
* Commands that RAW doesn't permit. The rationale for each:
*
* CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
* coordination of transaction timeout values at the root bridge level.
*
* CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
* and needs to be coordinated with HDM updates.
*
* CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
* driver and any writes from userspace invalidates those contents.
*
* CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
* to the device after it is marked clean, userspace can not make that
* assertion.
*
* CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
* is kept up to date with patrol notifications and error management.
*/
static u16 cxl_disabled_raw_commands[] = {
CXL_MBOX_OP_ACTIVATE_FW,
CXL_MBOX_OP_SET_PARTITION_INFO,
CXL_MBOX_OP_SET_LSA,
CXL_MBOX_OP_SET_SHUTDOWN_STATE,
CXL_MBOX_OP_SCAN_MEDIA,
CXL_MBOX_OP_GET_SCAN_MEDIA,
};
/*
* Command sets that RAW doesn't permit. All opcodes in this set are
* disabled because they pass plain text security payloads over the
* user/kernel boundary. This functionality is intended to be wrapped
* behind the keys ABI which allows for encrypted payloads in the UAPI
*/
static u8 security_command_sets[] = {
0x44, /* Sanitize */
0x45, /* Persistent Memory Data-at-rest Security */
0x46, /* Security Passthrough */
};
#define cxl_for_each_cmd(cmd) \
for ((cmd) = &mem_commands[0]; \
((cmd) - mem_commands) < ARRAY_SIZE(mem_commands); (cmd)++)
#define cxl_cmd_count ARRAY_SIZE(mem_commands)
static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm)
{
const unsigned long start = jiffies;
unsigned long end = start;
while (cxl_doorbell_busy(cxlm)) {
end = jiffies;
if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
/* Check again in case preempted before timeout test */
if (!cxl_doorbell_busy(cxlm))
break;
return -ETIMEDOUT;
}
cpu_relax();
}
dev_dbg(&cxlm->pdev->dev, "Doorbell wait took %dms",
jiffies_to_msecs(end) - jiffies_to_msecs(start));
return 0;
}
static bool cxl_is_security_command(u16 opcode)
{
int i;
for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
if (security_command_sets[i] == (opcode >> 8))
return true;
return false;
}
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm,
struct mbox_cmd *mbox_cmd)
{
struct device *dev = &cxlm->pdev->dev;
dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
mbox_cmd->opcode, mbox_cmd->size_in);
}
/**
* __cxl_mem_mbox_send_cmd() - Execute a mailbox command
* @cxlm: The CXL memory device to communicate with.
* @mbox_cmd: Command to send to the memory device.
*
* Context: Any context. Expects mbox_mutex to be held.
* Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
* Caller should check the return code in @mbox_cmd to make sure it
* succeeded.
*
* This is a generic form of the CXL mailbox send command thus only using the
* registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
* devices, and perhaps other types of CXL devices may have further information
* available upon error conditions. Driver facilities wishing to send mailbox
* commands should use the wrapper command.
*
* The CXL spec allows for up to two mailboxes. The intention is for the primary
* mailbox to be OS controlled and the secondary mailbox to be used by system
* firmware. This allows the OS and firmware to communicate with the device and
* not need to coordinate with each other. The driver only uses the primary
* mailbox.
*/
static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
struct mbox_cmd *mbox_cmd)
{
void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
u64 cmd_reg, status_reg;
size_t out_len;
int rc;
lockdep_assert_held(&cxlm->mbox_mutex);
/*
* Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
* 1. Caller reads MB Control Register to verify doorbell is clear
* 2. Caller writes Command Register
* 3. Caller writes Command Payload Registers if input payload is non-empty
* 4. Caller writes MB Control Register to set doorbell
* 5. Caller either polls for doorbell to be clear or waits for interrupt if configured
* 6. Caller reads MB Status Register to fetch Return code
* 7. If command successful, Caller reads Command Register to get Payload Length
* 8. If output payload is non-empty, host reads Command Payload Registers
*
* Hardware is free to do whatever it wants before the doorbell is rung,
* and isn't allowed to change anything after it clears the doorbell. As
* such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
* also happen in any order (though some orders might not make sense).
*/
/* #1 */
if (cxl_doorbell_busy(cxlm)) {
dev_err_ratelimited(&cxlm->pdev->dev,
"Mailbox re-busy after acquiring\n");
return -EBUSY;
}
cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
mbox_cmd->opcode);
if (mbox_cmd->size_in) {
if (WARN_ON(!mbox_cmd->payload_in))
return -EINVAL;
cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
mbox_cmd->size_in);
memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
}
/* #2, #3 */
writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
/* #4 */
dev_dbg(&cxlm->pdev->dev, "Sending command\n");
writel(CXLDEV_MBOX_CTRL_DOORBELL,
cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
/* #5 */
rc = cxl_mem_wait_for_doorbell(cxlm);
if (rc == -ETIMEDOUT) {
cxl_mem_mbox_timeout(cxlm, mbox_cmd);
return rc;
}
/* #6 */
status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
mbox_cmd->return_code =
FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
if (mbox_cmd->return_code != 0) {
dev_dbg(&cxlm->pdev->dev, "Mailbox operation had an error\n");
return 0;
}
/* #7 */
cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
/* #8 */
if (out_len && mbox_cmd->payload_out) {
/*
* Sanitize the copy. If hardware misbehaves, out_len per the
* spec can actually be greater than the max allowed size (21
* bits available but spec defined 1M max). The caller also may
* have requested less data than the hardware supplied even
* within spec.
*/
size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len);
memcpy_fromio(mbox_cmd->payload_out, payload, n);
mbox_cmd->size_out = n;
} else {
mbox_cmd->size_out = 0;
}
return 0;
}
/**
* cxl_mem_mbox_get() - Acquire exclusive access to the mailbox.
* @cxlm: The memory device to gain access to.
*
* Context: Any context. Takes the mbox_mutex.
* Return: 0 if exclusive access was acquired.
*/
static int cxl_mem_mbox_get(struct cxl_mem *cxlm)
{
struct device *dev = &cxlm->pdev->dev;
u64 md_status;
int rc;
mutex_lock_io(&cxlm->mbox_mutex);
/*
* XXX: There is some amount of ambiguity in the 2.0 version of the spec
* around the mailbox interface ready (8.2.8.5.1.1). The purpose of the
* bit is to allow firmware running on the device to notify the driver
* that it's ready to receive commands. It is unclear if the bit needs
* to be read for each transaction mailbox, ie. the firmware can switch
* it on and off as needed. Second, there is no defined timeout for
* mailbox ready, like there is for the doorbell interface.
*
* Assumptions:
* 1. The firmware might toggle the Mailbox Interface Ready bit, check
* it for every command.
*
* 2. If the doorbell is clear, the firmware should have first set the
* Mailbox Interface Ready bit. Therefore, waiting for the doorbell
* to be ready is sufficient.
*/
rc = cxl_mem_wait_for_doorbell(cxlm);
if (rc) {
dev_warn(dev, "Mailbox interface not ready\n");
goto out;
}
md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET);
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
rc = -EBUSY;
goto out;
}
/*
* Hardware shouldn't allow a ready status but also have failure bits
* set. Spit out an error, this should be a bug report
*/
rc = -EFAULT;
if (md_status & CXLMDEV_DEV_FATAL) {
dev_err(dev, "mbox: reported ready, but fatal\n");
goto out;
}
if (md_status & CXLMDEV_FW_HALT) {
dev_err(dev, "mbox: reported ready, but halted\n");
goto out;
}
if (CXLMDEV_RESET_NEEDED(md_status)) {
dev_err(dev, "mbox: reported ready, but reset needed\n");
goto out;
}
/* with lock held */
return 0;
out:
mutex_unlock(&cxlm->mbox_mutex);
return rc;
}
/**
* cxl_mem_mbox_put() - Release exclusive access to the mailbox.
* @cxlm: The CXL memory device to communicate with.
*
* Context: Any context. Expects mbox_mutex to be held.
*/
static void cxl_mem_mbox_put(struct cxl_mem *cxlm)
{
mutex_unlock(&cxlm->mbox_mutex);
}
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
/**
* handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
* @cxlm: The CXL memory device to communicate with.
* @cmd: The validated command.
* @in_payload: Pointer to userspace's input payload.
* @out_payload: Pointer to userspace's output payload.
* @size_out: (Input) Max payload size to copy out.
* (Output) Payload size hardware generated.
* @retval: Hardware generated return code from the operation.
*
* Return:
* * %0 - Mailbox transaction succeeded. This implies the mailbox
* protocol completed successfully not that the operation itself
* was successful.
* * %-ENOMEM - Couldn't allocate a bounce buffer.
* * %-EFAULT - Something happened with copy_to/from_user.
* * %-EINTR - Mailbox acquisition interrupted.
* * %-EXXX - Transaction level failures.
*
* Creates the appropriate mailbox command and dispatches it on behalf of a
* userspace request. The input and output payloads are copied between
* userspace.
*
* See cxl_send_cmd().
*/
static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
const struct cxl_mem_command *cmd,
u64 in_payload, u64 out_payload,
s32 *size_out, u32 *retval)
{
struct device *dev = &cxlm->pdev->dev;
struct mbox_cmd mbox_cmd = {
.opcode = cmd->opcode,
.size_in = cmd->info.size_in,
.size_out = cmd->info.size_out,
};
int rc;
if (cmd->info.size_out) {
mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
if (!mbox_cmd.payload_out)
return -ENOMEM;
}
if (cmd->info.size_in) {
mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
cmd->info.size_in);
if (IS_ERR(mbox_cmd.payload_in)) {
kvfree(mbox_cmd.payload_out);
}
rc = cxl_mem_mbox_get(cxlm);
if (rc)
goto out;
dev_dbg(dev,
"Submitting %s command for user\n"
"\topcode: %x\n"
"\tsize: %ub\n",
cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
cmd->info.size_in);
dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
"raw command path used\n");
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
cxl_mem_mbox_put(cxlm);
if (rc)
goto out;
/*
* @size_out contains the max size that's allowed to be written back out
* to userspace. While the payload may have written more output than
* this it will have to be ignored.
*/
if (mbox_cmd.size_out) {
dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
"Invalid return size\n");
if (copy_to_user(u64_to_user_ptr(out_payload),
mbox_cmd.payload_out, mbox_cmd.size_out)) {
rc = -EFAULT;
goto out;
}
}
*size_out = mbox_cmd.size_out;
*retval = mbox_cmd.return_code;
out:
kvfree(mbox_cmd.payload_in);
kvfree(mbox_cmd.payload_out);
return rc;
}
static bool cxl_mem_raw_command_allowed(u16 opcode)
{
int i;
if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
return false;
if (security_locked_down(LOCKDOWN_NONE))
return false;
if (cxl_raw_allow_all)
return true;
if (cxl_is_security_command(opcode))
return false;
for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
if (cxl_disabled_raw_commands[i] == opcode)
return false;
return true;
}
/**
* cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
* @cxlm: &struct cxl_mem device whose mailbox will be used.
* @send_cmd: &struct cxl_send_command copied in from userspace.
* @out_cmd: Sanitized and populated &struct cxl_mem_command.
*
* Return:
* * %0 - @out_cmd is ready to send.
* * %-ENOTTY - Invalid command specified.
* * %-EINVAL - Reserved fields or invalid values were used.
* * %-ENOMEM - Input or output buffer wasn't sized properly.
* * %-EPERM - Attempted to use a protected command.
*
* The result of this command is a fully validated command in @out_cmd that is
* safe to send to the hardware.
*
* See handle_mailbox_cmd_from_user()
*/
static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
const struct cxl_send_command *send_cmd,
struct cxl_mem_command *out_cmd)
{
const struct cxl_command_info *info;
struct cxl_mem_command *c;
if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
return -ENOTTY;
/*
* The user can never specify an input payload larger than what hardware
* supports, but output can be arbitrarily large (simply write out as
* much data as the hardware provides).
*/
if (send_cmd->in.size > cxlm->payload_size)
return -EINVAL;
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
/*
* Checks are bypassed for raw commands but a WARN/taint will occur
* later in the callchain
*/
if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
const struct cxl_mem_command temp = {
.info = {
.id = CXL_MEM_COMMAND_ID_RAW,
.flags = 0,
.size_in = send_cmd->in.size,
.size_out = send_cmd->out.size,
},
.opcode = send_cmd->raw.opcode
};
if (send_cmd->raw.rsvd)
return -EINVAL;
/*
* Unlike supported commands, the output size of RAW commands
* gets passed along without further checking, so it must be
* validated here.
*/
if (send_cmd->out.size > cxlm->payload_size)
return -EINVAL;
if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
return -EPERM;
memcpy(out_cmd, &temp, sizeof(temp));
return 0;
}
if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
return -EINVAL;
if (send_cmd->rsvd)
return -EINVAL;
if (send_cmd->in.rsvd || send_cmd->out.rsvd)
return -EINVAL;
/* Convert user's command into the internal representation */
c = &mem_commands[send_cmd->id];
info = &c->info;
/* Check that the command is enabled for hardware */
if (!test_bit(info->id, cxlm->enabled_cmds))
return -ENOTTY;
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
/* Check the input buffer is the expected size */
if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
return -ENOMEM;
/* Check the output buffer is at least large enough */
if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
return -ENOMEM;
memcpy(out_cmd, c, sizeof(*c));
out_cmd->info.size_in = send_cmd->in.size;
/*
* XXX: out_cmd->info.size_out will be controlled by the driver, and the
* specified number of bytes @send_cmd->out.size will be copied back out
* to userspace.
*/
return 0;
}
static int cxl_query_cmd(struct cxl_memdev *cxlmd,
struct cxl_mem_query_commands __user *q)
{
struct device *dev = &cxlmd->dev;
struct cxl_mem_command *cmd;
u32 n_commands;
int j = 0;
dev_dbg(dev, "Query IOCTL\n");
if (get_user(n_commands, &q->n_commands))
return -EFAULT;
/* returns the total number if 0 elements are requested. */
if (n_commands == 0)
return put_user(cxl_cmd_count, &q->n_commands);
/*
* otherwise, return max(n_commands, total commands) cxl_command_info
* structures.
*/
cxl_for_each_cmd(cmd) {
const struct cxl_command_info *info = &cmd->info;
if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
return -EFAULT;
if (j == n_commands)
break;
}
return 0;
}
static int cxl_send_cmd(struct cxl_memdev *cxlmd,
struct cxl_send_command __user *s)
{
struct cxl_mem *cxlm = cxlmd->cxlm;
struct device *dev = &cxlmd->dev;
struct cxl_send_command send;
struct cxl_mem_command c;
int rc;
dev_dbg(dev, "Send IOCTL\n");
if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT;
rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c);
if (rc)
return rc;
/* Prepare to handle a full payload for variable sized output */
if (c.info.size_out < 0)
c.info.size_out = cxlm->payload_size;
rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload,
send.out.payload, &send.out.size,
&send.retval);
if (rc)
return rc;
if (copy_to_user(s, &send, sizeof(send)))
return -EFAULT;
return 0;
}
static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
unsigned long arg)
{
switch (cmd) {
case CXL_MEM_QUERY_COMMANDS:
return cxl_query_cmd(cxlmd, (void __user *)arg);
case CXL_MEM_SEND_COMMAND:
return cxl_send_cmd(cxlmd, (void __user *)arg);
default:
return -ENOTTY;
}
}
static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
Dan Williams
committed
struct cxl_memdev *cxlmd = file->private_data;
int rc = -ENXIO;
Dan Williams
committed
down_read(&cxl_memdev_rwsem);
if (cxlmd->cxlm)
rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
up_read(&cxl_memdev_rwsem);
Dan Williams
committed
return rc;
}
Dan Williams
committed
static int cxl_memdev_open(struct inode *inode, struct file *file)
{
struct cxl_memdev *cxlmd =
container_of(inode->i_cdev, typeof(*cxlmd), cdev);
Dan Williams
committed
get_device(&cxlmd->dev);
file->private_data = cxlmd;
Dan Williams
committed
return 0;
}
static int cxl_memdev_release_file(struct inode *inode, struct file *file)
{
struct cxl_memdev *cxlmd =
container_of(inode->i_cdev, typeof(*cxlmd), cdev);
put_device(&cxlmd->dev);
return 0;
}
static const struct file_operations cxl_memdev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = cxl_memdev_ioctl,
Dan Williams
committed
.open = cxl_memdev_open,
.release = cxl_memdev_release_file,
.compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
{
struct cxl_mem_command *c;
cxl_for_each_cmd(c)
if (c->opcode == opcode)
return c;
return NULL;
}
/**
* cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
* @cxlm: The CXL memory device to communicate with.
* @opcode: Opcode for the mailbox command.
* @in: The input payload for the mailbox command.
* @in_size: The length of the input payload
* @out: Caller allocated buffer for the output.
* @out_size: Expected size of output.
*
* Context: Any context. Will acquire and release mbox_mutex.
* Return:
* * %>=0 - Number of bytes returned in @out.
* * %-E2BIG - Payload is too large for hardware.
* * %-EBUSY - Couldn't acquire exclusive mailbox access.
* * %-EFAULT - Hardware error occurred.
* * %-ENXIO - Command completed, but device reported an error.
* * %-EIO - Unexpected output size.
*
* Mailbox commands may execute successfully yet the device itself reported an
* error. While this distinction can be useful for commands from userspace, the
* kernel will only be able to use results when both are successful.
*
* See __cxl_mem_mbox_send_cmd()
*/
static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode,
void *in, size_t in_size,
void *out, size_t out_size)
{
const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
struct mbox_cmd mbox_cmd = {
.opcode = opcode,
.payload_in = in,
.size_in = in_size,
.size_out = out_size,
.payload_out = out,
};
int rc;
if (out_size > cxlm->payload_size)
return -E2BIG;
rc = cxl_mem_mbox_get(cxlm);
if (rc)
return rc;
rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
cxl_mem_mbox_put(cxlm);
if (rc)
return rc;
/* TODO: Map return code to proper kernel style errno */
if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
return -ENXIO;
/*
* Variable sized commands can't be validated and so it's up to the
* caller to do that if they wish.
*/
if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
return -EIO;
return 0;
}
static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
{
const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
cxlm->payload_size =
1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
/*
* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
*
* If the size is too small, mandatory commands will not work and so
* there's no point in going forward. If the size is too large, there's
* no harm is soft limiting it.
*/
cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
if (cxlm->payload_size < 256) {
dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
cxlm->payload_size);
return -ENXIO;
}
dev_dbg(&cxlm->pdev->dev, "Mailbox payload sized %zu",
cxlm->payload_size);
return 0;
}
static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct cxl_mem *cxlm;
cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
if (!cxlm) {
dev_err(dev, "No memory available\n");
return ERR_PTR(-ENOMEM);
}
mutex_init(&cxlm->mbox_mutex);
cxlm->pdev = pdev;
cxlm->enabled_cmds =
devm_kmalloc_array(dev, BITS_TO_LONGS(cxl_cmd_count),
sizeof(unsigned long),
GFP_KERNEL | __GFP_ZERO);
if (!cxlm->enabled_cmds) {
dev_err(dev, "No memory available for bitmap\n");
return ERR_PTR(-ENOMEM);
static void __iomem *cxl_mem_map_regblock(struct cxl_mem *cxlm,
u8 bar, u64 offset)
{
struct pci_dev *pdev = cxlm->pdev;
struct device *dev = &pdev->dev;
/* Basic sanity check that BAR is big enough */
if (pci_resource_len(pdev, bar) < offset) {
dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar,
&pdev->resource[bar], (unsigned long long)offset);
addr = pcim_iomap(pdev, bar, 0);
if (!addr) {
dev_err(dev, "failed to map registers\n");
dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %#llx\n",
bar, offset);
return pcim_iomap_table(pdev)[bar] + offset;
static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
{
int pos;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
if (!pos)
return 0;
while (pos) {
u16 vendor, id;
pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
return pos;
pos = pci_find_next_ext_capability(pdev, pos,
PCI_EXT_CAP_ID_DVSEC);
}
return 0;
}
static void cxl_decode_register_block(u32 reg_lo, u32 reg_hi,
u8 *bar, u64 *offset, u8 *reg_type)
{
*offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
*bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
*reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
}
/**
* cxl_mem_setup_regs() - Setup necessary MMIO.
* @cxlm: The CXL memory device to communicate with.
*
* Return: 0 if all necessary registers mapped.
*
* A memory device is required by spec to implement a certain set of MMIO
* regions. The purpose of this function is to enumerate and map those
* registers.
*/
static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
{
struct cxl_regs *regs = &cxlm->regs;
struct pci_dev *pdev = cxlm->pdev;
struct device *dev = &pdev->dev;
u32 regloc_size, regblocks;