Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2012 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* BSD LICENSE
*
* Copyright(c) 2012 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copy
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Intel PCIe NTB Linux driver
*
* Contact Information:
* Jon Mason <jon.mason@intel.com>
*/
#include <linux/debugfs.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include "ntb_hw.h"
#include "ntb_regs.h"
#define NTB_NAME "Intel(R) PCI-E Non-Transparent Bridge Driver"
MODULE_DESCRIPTION(NTB_NAME);
MODULE_VERSION(NTB_VER);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel Corporation");
static bool xeon_errata_workaround = true;
module_param(xeon_errata_workaround, bool, 0644);
MODULE_PARM_DESC(xeon_errata_workaround, "Workaround for the Xeon Errata");
enum {
NTB_CONN_CLASSIC = 0,
NTB_CONN_B2B,
NTB_CONN_RP,
};
enum {
NTB_DEV_USD = 0,
NTB_DEV_DSD,
};
enum {
SNB_HW = 0,
BWD_HW,
};
static struct dentry *debugfs_dir;
/* Translate memory window 0,1 to BAR 2,4 */
static DEFINE_PCI_DEVICE_TABLE(ntb_pci_tbl) = {
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)},
{0}
};
MODULE_DEVICE_TABLE(pci, ntb_pci_tbl);
/**
* ntb_register_event_callback() - register event callback
* @ndev: pointer to ntb_device instance
* @func: callback function to register
*
* This function registers a callback for any HW driver events such as link
* up/down, power management notices and etc.
*
* RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/
int ntb_register_event_callback(struct ntb_device *ndev,
void (*func)(void *handle, enum ntb_hw_event event))
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
{
if (ndev->event_cb)
return -EINVAL;
ndev->event_cb = func;
return 0;
}
/**
* ntb_unregister_event_callback() - unregisters the event callback
* @ndev: pointer to ntb_device instance
*
* This function unregisters the existing callback from transport
*/
void ntb_unregister_event_callback(struct ntb_device *ndev)
{
ndev->event_cb = NULL;
}
/**
* ntb_register_db_callback() - register a callback for doorbell interrupt
* @ndev: pointer to ntb_device instance
* @idx: doorbell index to register callback, zero based
* @func: callback function to register
*
* This function registers a callback function for the doorbell interrupt
* on the primary side. The function will unmask the doorbell as well to
* allow interrupt.
*
* RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/
int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx,
void *data, void (*func)(void *data, int db_num))
{
unsigned long mask;
if (idx >= ndev->max_cbs || ndev->db_cb[idx].callback) {
dev_warn(&ndev->pdev->dev, "Invalid Index.\n");
return -EINVAL;
}
ndev->db_cb[idx].callback = func;
ndev->db_cb[idx].data = data;
/* unmask interrupt */
mask = readw(ndev->reg_ofs.pdb_mask);
clear_bit(idx * ndev->bits_per_vector, &mask);
writew(mask, ndev->reg_ofs.pdb_mask);
return 0;
}
/**
* ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
* @ndev: pointer to ntb_device instance
* @idx: doorbell index to register callback, zero based
*
* This function unregisters a callback function for the doorbell interrupt
* on the primary side. The function will also mask the said doorbell.
*/
void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx)
{
unsigned long mask;
if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback)
return;
mask = readw(ndev->reg_ofs.pdb_mask);
set_bit(idx * ndev->bits_per_vector, &mask);
writew(mask, ndev->reg_ofs.pdb_mask);
ndev->db_cb[idx].callback = NULL;
}
/**
* ntb_find_transport() - find the transport pointer
* @transport: pointer to pci device
*
* Given the pci device pointer, return the transport pointer passed in when
* the transport attached when it was inited.
*
* RETURNS: pointer to transport.
*/
void *ntb_find_transport(struct pci_dev *pdev)
{
struct ntb_device *ndev = pci_get_drvdata(pdev);
return ndev->ntb_transport;
}
/**
* ntb_register_transport() - Register NTB transport with NTB HW driver
* @transport: transport identifier
*
* This function allows a transport to reserve the hardware driver for
* NTB usage.
*
* RETURNS: pointer to ntb_device, NULL on error.
*/
struct ntb_device *ntb_register_transport(struct pci_dev *pdev, void *transport)
{
struct ntb_device *ndev = pci_get_drvdata(pdev);
if (ndev->ntb_transport)
return NULL;
ndev->ntb_transport = transport;
return ndev;
}
/**
* ntb_unregister_transport() - Unregister the transport with the NTB HW driver
* @ndev - ntb_device of the transport to be freed
*
* This function unregisters the transport from the HW driver and performs any
* necessary cleanups.
*/
void ntb_unregister_transport(struct ntb_device *ndev)
{
int i;
if (!ndev->ntb_transport)
return;
for (i = 0; i < ndev->max_cbs; i++)
ntb_unregister_db_callback(ndev, i);
ntb_unregister_event_callback(ndev);
ndev->ntb_transport = NULL;
}
/**
* ntb_write_local_spad() - write to the secondary scratchpad register
* @ndev: pointer to ntb_device instance
* @idx: index to the scratchpad register, 0 based
* @val: the data value to put into the register
*
* This function allows writing of a 32bit value to the indexed scratchpad
* register. This writes over the data mirrored to the local scratchpad register
* by the remote system.
*
* RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/
int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
{
if (idx >= ndev->limits.max_spads)
return -EINVAL;
dev_dbg(&ndev->pdev->dev, "Writing %x to local scratch pad index %d\n",
val, idx);
writel(val, ndev->reg_ofs.spad_read + idx * 4);
return 0;
}
/**
* ntb_read_local_spad() - read from the primary scratchpad register
* @ndev: pointer to ntb_device instance
* @idx: index to scratchpad register, 0 based
* @val: pointer to 32bit integer for storing the register value
*
* This function allows reading of the 32bit scratchpad register on
* the primary (internal) side. This allows the local system to read data
* written and mirrored to the scratchpad register by the remote system.
*
* RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/
int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
{
if (idx >= ndev->limits.max_spads)
return -EINVAL;
*val = readl(ndev->reg_ofs.spad_write + idx * 4);
dev_dbg(&ndev->pdev->dev,
"Reading %x from local scratch pad index %d\n", *val, idx);
return 0;
}
/**
* ntb_write_remote_spad() - write to the secondary scratchpad register
* @ndev: pointer to ntb_device instance
* @idx: index to the scratchpad register, 0 based
* @val: the data value to put into the register
*
* This function allows writing of a 32bit value to the indexed scratchpad
* register. The register resides on the secondary (external) side. This allows
* the local system to write data to be mirrored to the remote systems
* scratchpad register.
*
* RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/
int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
{
if (idx >= ndev->limits.max_spads)
return -EINVAL;
dev_dbg(&ndev->pdev->dev, "Writing %x to remote scratch pad index %d\n",
val, idx);
writel(val, ndev->reg_ofs.spad_write + idx * 4);
return 0;
}
/**
* ntb_read_remote_spad() - read from the primary scratchpad register
* @ndev: pointer to ntb_device instance
* @idx: index to scratchpad register, 0 based
* @val: pointer to 32bit integer for storing the register value
*
* This function allows reading of the 32bit scratchpad register on
* the primary (internal) side. This alloows the local system to read the data
* it wrote to be mirrored on the remote system.
*
* RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/
int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
{
if (idx >= ndev->limits.max_spads)
return -EINVAL;
*val = readl(ndev->reg_ofs.spad_read + idx * 4);
dev_dbg(&ndev->pdev->dev,
"Reading %x from remote scratch pad index %d\n", *val, idx);
return 0;
}
/**
* ntb_get_mw_vbase() - get virtual addr for the NTB memory window
* @ndev: pointer to ntb_device instance
* @mw: memory window number
*
* This function provides the base virtual address of the memory window
* specified.
*
* RETURNS: pointer to virtual address, or NULL on error.
*/
void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw)
return NULL;
return ndev->mw[mw].vbase;
}
/**
* ntb_get_mw_size() - return size of NTB memory window
* @ndev: pointer to ntb_device instance
* @mw: memory window number
*
* This function provides the physical size of the memory window specified
*
* RETURNS: the size of the memory window or zero on error
*/
u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw)
return 0;
return ndev->mw[mw].bar_sz;
}
/**
* ntb_set_mw_addr - set the memory window address
* @ndev: pointer to ntb_device instance
* @mw: memory window number
* @addr: base address for data
*
* This function sets the base physical address of the memory window. This
* memory address is where data from the remote system will be transfered into
* or out of depending on how the transport is configured.
*/
void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
{
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
return;
dev_dbg(&ndev->pdev->dev, "Writing addr %Lx to BAR %d\n", addr,
MW_TO_BAR(mw));
ndev->mw[mw].phys_addr = addr;
switch (MW_TO_BAR(mw)) {
case NTB_BAR_23:
writeq(addr, ndev->reg_ofs.sbar2_xlat);
break;
case NTB_BAR_45:
writeq(addr, ndev->reg_ofs.sbar4_xlat);
break;
}
}
/**
* ntb_ring_sdb() - Set the doorbell on the secondary/external side
* @ndev: pointer to ntb_device instance
* @db: doorbell to ring
*
* This function allows triggering of a doorbell on the secondary/external
* side that will initiate an interrupt on the remote host
*
* RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/
void ntb_ring_sdb(struct ntb_device *ndev, unsigned int db)
{
dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db);
if (ndev->hw_type == BWD_HW)
writeq((u64) 1 << db, ndev->reg_ofs.sdb);
else
writew(((1 << ndev->bits_per_vector) - 1) <<
(db * ndev->bits_per_vector), ndev->reg_ofs.sdb);
}
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
static void bwd_recover_link(struct ntb_device *ndev)
{
u32 status;
/* Driver resets the NTB ModPhy lanes - magic! */
writeb(0xe0, ndev->reg_base + BWD_MODPHY_PCSREG6);
writeb(0x40, ndev->reg_base + BWD_MODPHY_PCSREG4);
writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG4);
writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG6);
/* Driver waits 100ms to allow the NTB ModPhy to settle */
msleep(100);
/* Clear AER Errors, write to clear */
status = readl(ndev->reg_base + BWD_ERRCORSTS_OFFSET);
dev_dbg(&ndev->pdev->dev, "ERRCORSTS = %x\n", status);
status &= PCI_ERR_COR_REP_ROLL;
writel(status, ndev->reg_base + BWD_ERRCORSTS_OFFSET);
/* Clear unexpected electrical idle event in LTSSM, write to clear */
status = readl(ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
dev_dbg(&ndev->pdev->dev, "LTSSMERRSTS0 = %x\n", status);
status |= BWD_LTSSMERRSTS0_UNEXPECTEDEI;
writel(status, ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
/* Clear DeSkew Buffer error, write to clear */
status = readl(ndev->reg_base + BWD_DESKEWSTS_OFFSET);
dev_dbg(&ndev->pdev->dev, "DESKEWSTS = %x\n", status);
status |= BWD_DESKEWSTS_DBERR;
writel(status, ndev->reg_base + BWD_DESKEWSTS_OFFSET);
status = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
dev_dbg(&ndev->pdev->dev, "IBSTERRRCRVSTS0 = %x\n", status);
status &= BWD_IBIST_ERR_OFLOW;
writel(status, ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
/* Releases the NTB state machine to allow the link to retrain */
status = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
dev_dbg(&ndev->pdev->dev, "LTSSMSTATEJMP = %x\n", status);
status &= ~BWD_LTSSMSTATEJMP_FORCEDETECT;
writel(status, ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
}
static void ntb_link_event(struct ntb_device *ndev, int link_state)
{
unsigned int event;
if (ndev->link_status == link_state)
return;
if (link_state == NTB_LINK_UP) {
u16 status;
dev_info(&ndev->pdev->dev, "Link Up\n");
ndev->link_status = NTB_LINK_UP;
event = NTB_EVENT_HW_LINK_UP;
if (ndev->hw_type == BWD_HW)
status = readw(ndev->reg_ofs.lnk_stat);
else {
int rc = pci_read_config_word(ndev->pdev,
SNB_LINK_STATUS_OFFSET,
&status);
if (rc)
return;
}
ndev->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
ndev->link_speed = (status & NTB_LINK_SPEED_MASK);
dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n",
} else {
dev_info(&ndev->pdev->dev, "Link Down\n");
ndev->link_status = NTB_LINK_DOWN;
event = NTB_EVENT_HW_LINK_DOWN;
/* Don't modify link width/speed, we need it in link recovery */
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
}
/* notify the upper layer if we have an event change */
if (ndev->event_cb)
ndev->event_cb(ndev->ntb_transport, event);
}
static int ntb_link_status(struct ntb_device *ndev)
{
int link_state;
if (ndev->hw_type == BWD_HW) {
u32 ntb_cntl;
ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
if (ntb_cntl & BWD_CNTL_LINK_DOWN)
link_state = NTB_LINK_DOWN;
else
link_state = NTB_LINK_UP;
} else {
u16 status;
int rc;
rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET,
&status);
if (rc)
return rc;
if (status & NTB_LINK_STATUS_ACTIVE)
link_state = NTB_LINK_UP;
else
link_state = NTB_LINK_DOWN;
}
ntb_link_event(ndev, link_state);
return 0;
}
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
static void bwd_link_recovery(struct work_struct *work)
{
struct ntb_device *ndev = container_of(work, struct ntb_device,
lr_timer.work);
u32 status32;
bwd_recover_link(ndev);
/* There is a potential race between the 2 NTB devices recovering at the
* same time. If the times are the same, the link will not recover and
* the driver will be stuck in this loop forever. Add a random interval
* to the recovery time to prevent this race.
*/
msleep(BWD_LINK_RECOVERY_TIME + prandom_u32() % BWD_LINK_RECOVERY_TIME);
status32 = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT)
goto retry;
status32 = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
if (status32 & BWD_IBIST_ERR_OFLOW)
goto retry;
status32 = readl(ndev->reg_ofs.lnk_cntl);
if (!(status32 & BWD_CNTL_LINK_DOWN)) {
unsigned char speed, width;
u16 status16;
status16 = readw(ndev->reg_ofs.lnk_stat);
width = (status16 & NTB_LINK_WIDTH_MASK) >> 4;
speed = (status16 & NTB_LINK_SPEED_MASK);
if (ndev->link_width != width || ndev->link_speed != speed)
goto retry;
}
schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
return;
retry:
schedule_delayed_work(&ndev->lr_timer, NTB_HB_TIMEOUT);
}
/* BWD doesn't have link status interrupt, poll on that platform */
static void bwd_link_poll(struct work_struct *work)
{
struct ntb_device *ndev = container_of(work, struct ntb_device,
hb_timer.work);
unsigned long ts = jiffies;
/* If we haven't gotten an interrupt in a while, check the BWD link
* status bit
*/
if (ts > ndev->last_ts + NTB_HB_TIMEOUT) {
int rc = ntb_link_status(ndev);
if (rc)
dev_err(&ndev->pdev->dev,
"Error determining link status\n");
/* Check to see if a link error is the cause of the link down */
if (ndev->link_status == NTB_LINK_DOWN) {
u32 status32 = readl(ndev->reg_base +
BWD_LTSSMSTATEJMP_OFFSET);
if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) {
schedule_delayed_work(&ndev->lr_timer, 0);
return;
}
}
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
}
schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
}
static int ntb_xeon_setup(struct ntb_device *ndev)
{
int rc;
u8 val;
ndev->hw_type = SNB_HW;
rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &val);
if (rc)
return rc;
switch (val & SNB_PPD_CONN_TYPE) {
case NTB_CONN_B2B:
ndev->conn_type = NTB_CONN_B2B;
break;
case NTB_CONN_CLASSIC:
case NTB_CONN_RP:
default:
dev_err(&ndev->pdev->dev, "Only B2B supported at this time\n");
return -EINVAL;
}
if (val & SNB_PPD_DEV_TYPE)
ndev->dev_type = NTB_DEV_USD;
ndev->reg_ofs.pdb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
ndev->reg_ofs.pdb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
ndev->reg_ofs.sbar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
ndev->reg_ofs.sbar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
ndev->reg_ofs.lnk_cntl = ndev->reg_base + SNB_NTBCNTL_OFFSET;
ndev->reg_ofs.lnk_stat = ndev->reg_base + SNB_LINK_STATUS_OFFSET;
ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
ndev->reg_ofs.spci_cmd = ndev->reg_base + SNB_PCICMD_OFFSET;
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
/* There is a Xeon hardware errata related to writes to
* SDOORBELL or B2BDOORBELL in conjunction with inbound access
* to NTB MMIO Space, which may hang the system. To workaround
* this use the second memory window to access the interrupt and
* scratch pad registers on the remote system.
*/
if (xeon_errata_workaround) {
if (!ndev->mw[1].bar_sz)
return -EINVAL;
ndev->limits.max_mw = SNB_ERRATA_MAX_MW;
ndev->reg_ofs.spad_write = ndev->mw[1].vbase +
SNB_SPAD_OFFSET;
ndev->reg_ofs.sdb = ndev->mw[1].vbase +
SNB_PDOORBELL_OFFSET;
/* Set the Limit register to 4k, the minimum size, to
* prevent an illegal access
*/
writeq(ndev->mw[1].bar_sz + 0x1000, ndev->reg_base +
SNB_PBAR4LMT_OFFSET);
} else {
ndev->limits.max_mw = SNB_MAX_MW;
ndev->reg_ofs.spad_write = ndev->reg_base +
SNB_B2B_SPAD_OFFSET;
ndev->reg_ofs.sdb = ndev->reg_base +
SNB_B2B_DOORBELL_OFFSET;
/* Disable the Limit register, just incase it is set to
* something silly
*/
writeq(0, ndev->reg_base + SNB_PBAR4LMT_OFFSET);
}
/* The Xeon errata workaround requires setting SBAR Base
* addresses to known values, so that the PBAR XLAT can be
* pointed at SBAR0 of the remote system.
*/
if (ndev->dev_type == NTB_DEV_USD) {
writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
SNB_PBAR2XLAT_OFFSET);
if (xeon_errata_workaround)
writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
SNB_PBAR4XLAT_OFFSET);
else {
writeq(SNB_MBAR45_DSD_ADDR, ndev->reg_base +
SNB_PBAR4XLAT_OFFSET);
/* B2B_XLAT_OFFSET is a 64bit register, but can
* only take 32bit writes
*/
writel(SNB_MBAR01_USD_ADDR & 0xffffffff,
ndev->reg_base + SNB_B2B_XLAT_OFFSETL);
writel(SNB_MBAR01_DSD_ADDR >> 32,
ndev->reg_base + SNB_B2B_XLAT_OFFSETU);
}
writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
SNB_SBAR0BASE_OFFSET);
writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
SNB_SBAR2BASE_OFFSET);
writeq(SNB_MBAR45_USD_ADDR, ndev->reg_base +
SNB_SBAR4BASE_OFFSET);
writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
SNB_PBAR2XLAT_OFFSET);
if (xeon_errata_workaround)
writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
SNB_PBAR4XLAT_OFFSET);
else {
writeq(SNB_MBAR45_USD_ADDR, ndev->reg_base +
SNB_PBAR4XLAT_OFFSET);
/* B2B_XLAT_OFFSET is a 64bit register, but can
* only take 32bit writes
*/
writel(SNB_MBAR01_USD_ADDR & 0xffffffff,
ndev->reg_base + SNB_B2B_XLAT_OFFSETL);
writel(SNB_MBAR01_USD_ADDR >> 32,
ndev->reg_base + SNB_B2B_XLAT_OFFSETU);
}
writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
SNB_SBAR0BASE_OFFSET);
writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
SNB_SBAR2BASE_OFFSET);
writeq(SNB_MBAR45_DSD_ADDR, ndev->reg_base +
SNB_SBAR4BASE_OFFSET);
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
ndev->limits.msix_cnt = SNB_MSIX_CNT;
ndev->bits_per_vector = SNB_DB_BITS_PER_VEC;
return 0;
}
static int ntb_bwd_setup(struct ntb_device *ndev)
{
int rc;
u32 val;
ndev->hw_type = BWD_HW;
rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &val);
if (rc)
return rc;
switch ((val & BWD_PPD_CONN_TYPE) >> 8) {
case NTB_CONN_B2B:
ndev->conn_type = NTB_CONN_B2B;
break;
case NTB_CONN_RP:
default:
dev_err(&ndev->pdev->dev, "Only B2B supported at this time\n");
return -EINVAL;
}
if (val & BWD_PPD_DEV_TYPE)
ndev->dev_type = NTB_DEV_DSD;
else
ndev->dev_type = NTB_DEV_USD;
/* Initiate PCI-E link training */
rc = pci_write_config_dword(ndev->pdev, NTB_PPD_OFFSET,
val | BWD_PPD_INIT_LINK);
if (rc)
return rc;
ndev->reg_ofs.pdb = ndev->reg_base + BWD_PDOORBELL_OFFSET;
ndev->reg_ofs.pdb_mask = ndev->reg_base + BWD_PDBMSK_OFFSET;
ndev->reg_ofs.sbar2_xlat = ndev->reg_base + BWD_SBAR2XLAT_OFFSET;
ndev->reg_ofs.sbar4_xlat = ndev->reg_base + BWD_SBAR4XLAT_OFFSET;
ndev->reg_ofs.lnk_cntl = ndev->reg_base + BWD_NTBCNTL_OFFSET;
ndev->reg_ofs.lnk_stat = ndev->reg_base + BWD_LINK_STATUS_OFFSET;
ndev->reg_ofs.spad_read = ndev->reg_base + BWD_SPAD_OFFSET;
ndev->reg_ofs.spci_cmd = ndev->reg_base + BWD_PCICMD_OFFSET;
if (ndev->conn_type == NTB_CONN_B2B) {
ndev->reg_ofs.sdb = ndev->reg_base + BWD_B2B_DOORBELL_OFFSET;
ndev->reg_ofs.spad_write = ndev->reg_base + BWD_B2B_SPAD_OFFSET;
ndev->limits.max_spads = BWD_MAX_SPADS;
} else {
ndev->reg_ofs.sdb = ndev->reg_base + BWD_PDOORBELL_OFFSET;
ndev->reg_ofs.spad_write = ndev->reg_base + BWD_SPAD_OFFSET;
ndev->limits.max_spads = BWD_MAX_COMPAT_SPADS;
}
ndev->limits.max_db_bits = BWD_MAX_DB_BITS;
ndev->limits.msix_cnt = BWD_MSIX_CNT;
ndev->bits_per_vector = BWD_DB_BITS_PER_VEC;
/* Since bwd doesn't have a link interrupt, setup a poll timer */
INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll);
INIT_DELAYED_WORK(&ndev->lr_timer, bwd_link_recovery);
schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
return 0;
}
static int ntb_device_setup(struct ntb_device *ndev)
{
int rc;
switch (ndev->pdev->device) {
case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
rc = ntb_xeon_setup(ndev);
break;
case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD:
rc = ntb_bwd_setup(ndev);
break;
default:
rc = -ENODEV;
}
dev_info(&ndev->pdev->dev, "Device Type = %s\n",
ndev->dev_type == NTB_DEV_USD ? "USD/DSP" : "DSD/USP");
/* Enable Bus Master and Memory Space on the secondary side */
writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, ndev->reg_ofs.spci_cmd);
}
static void ntb_device_free(struct ntb_device *ndev)
{
cancel_delayed_work_sync(&ndev->hb_timer);
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
}
static irqreturn_t bwd_callback_msix_irq(int irq, void *data)
{
struct ntb_db_cb *db_cb = data;
struct ntb_device *ndev = db_cb->ndev;
dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
db_cb->db_num);
if (db_cb->callback)
db_cb->callback(db_cb->data, db_cb->db_num);
/* No need to check for the specific HB irq, any interrupt means
* we're connected.
*/
ndev->last_ts = jiffies;
writeq((u64) 1 << db_cb->db_num, ndev->reg_ofs.pdb);
return IRQ_HANDLED;
}
static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
{
struct ntb_db_cb *db_cb = data;
struct ntb_device *ndev = db_cb->ndev;
dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
db_cb->db_num);
if (db_cb->callback)
db_cb->callback(db_cb->data, db_cb->db_num);
/* On Sandybridge, there are 16 bits in the interrupt register
* but only 4 vectors. So, 5 bits are assigned to the first 3
* vectors, with the 4th having a single bit for link
* interrupts.
*/
writew(((1 << ndev->bits_per_vector) - 1) <<
(db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.pdb);
return IRQ_HANDLED;
}
/* Since we do not have a HW doorbell in BWD, this is only used in JF/JT */
static irqreturn_t xeon_event_msix_irq(int irq, void *dev)
{
struct ntb_device *ndev = dev;
int rc;
dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for Events\n", irq);
rc = ntb_link_status(ndev);
if (rc)
dev_err(&ndev->pdev->dev, "Error determining link status\n");
/* bit 15 is always the link bit */
writew(1 << ndev->limits.max_db_bits, ndev->reg_ofs.pdb);
return IRQ_HANDLED;
}
static irqreturn_t ntb_interrupt(int irq, void *dev)
{
struct ntb_device *ndev = dev;
unsigned int i = 0;
if (ndev->hw_type == BWD_HW) {
u64 pdb = readq(ndev->reg_ofs.pdb);
dev_dbg(&ndev->pdev->dev, "irq %d - pdb = %Lx\n", irq, pdb);
while (pdb) {
i = __ffs(pdb);
pdb &= pdb - 1;
bwd_callback_msix_irq(irq, &ndev->db_cb[i]);
}
} else {
u16 pdb = readw(ndev->reg_ofs.pdb);
dev_dbg(&ndev->pdev->dev, "irq %d - pdb = %x sdb %x\n", irq,
pdb, readw(ndev->reg_ofs.sdb));
if (pdb & SNB_DB_HW_LINK) {
xeon_event_msix_irq(irq, dev);
pdb &= ~SNB_DB_HW_LINK;
}
while (pdb) {
i = __ffs(pdb);
pdb &= pdb - 1;
xeon_callback_msix_irq(irq, &ndev->db_cb[i]);
}
}
return IRQ_HANDLED;
}
static int ntb_setup_msix(struct ntb_device *ndev)
{
struct pci_dev *pdev = ndev->pdev;
struct msix_entry *msix;
int msix_entries;
int rc, i, pos;
u16 val;
pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
if (!pos) {
rc = -EIO;
goto err;
}
rc = pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &val);
if (rc)
goto err;
msix_entries = msix_table_size(val);
if (msix_entries > ndev->limits.msix_cnt) {
rc = -EINVAL;
goto err;
}
ndev->msix_entries = kmalloc(sizeof(struct msix_entry) * msix_entries,
GFP_KERNEL);
if (!ndev->msix_entries) {
rc = -ENOMEM;
goto err;
}
for (i = 0; i < msix_entries; i++)
ndev->msix_entries[i].entry = i;
rc = pci_enable_msix(pdev, ndev->msix_entries, msix_entries);
if (rc < 0)
goto err1;
if (rc > 0) {
/* On SNB, the link interrupt is always tied to 4th vector. If