Skip to content
cxgb4_main.c 92.7 KiB
Newer Older
	{
		int pf, vf;

		for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
			if (num_vf[pf] <= 0)
				continue;

			/* VF numbering starts at 1! */
			for (vf = 1; vf <= num_vf[pf]; vf++) {
				ret = t4_cfg_pfvf(adap, 0, pf, vf,
						  VFRES_NEQ, VFRES_NETHCTRL,
						  VFRES_NIQFLINT, VFRES_NIQ,
						  VFRES_TC, VFRES_NVI,
						  FW_PFVF_CMD_CMASK_MASK,
						  pfvfres_pmask(adap, pf, vf),
						  VFRES_NEXACTF,
						  VFRES_R_CAPS, VFRES_WX_CAPS);
				if (ret < 0)
					dev_warn(adap->pdev_dev, "failed to "
						 "provision pf/vf=%d/%d; "
						 "err=%d\n", pf, vf, ret);
			}
		}
	}
#endif

	return 0;

	/*
	 * If a command timed out or failed with EIO FW does not operate within
	 * its spec or something catastrophic happened to HW/FW, stop issuing
	 * commands.
	 */
bye:	if (ret != -ETIMEDOUT && ret != -EIO)
		t4_fw_bye(adap, 0);
	return ret;
}

/* EEH callbacks */

static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
					 pci_channel_state_t state)
{
	int i;
	struct adapter *adap = pci_get_drvdata(pdev);

	if (!adap)
		goto out;

	rtnl_lock();
	adap->flags &= ~FW_OK;
	notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
	for_each_port(adap, i) {
		struct net_device *dev = adap->port[i];

		netif_device_detach(dev);
		netif_carrier_off(dev);
	}
	if (adap->flags & FULL_INIT_DONE)
		cxgb_down(adap);
	rtnl_unlock();
	pci_disable_device(pdev);
out:	return state == pci_channel_io_perm_failure ?
		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
}

static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
{
	int i, ret;
	struct fw_caps_config_cmd c;
	struct adapter *adap = pci_get_drvdata(pdev);

	if (!adap) {
		pci_restore_state(pdev);
		pci_save_state(pdev);
		return PCI_ERS_RESULT_RECOVERED;
	}

	if (pci_enable_device(pdev)) {
		dev_err(&pdev->dev, "cannot reenable PCI device after reset\n");
		return PCI_ERS_RESULT_DISCONNECT;
	}

	pci_set_master(pdev);
	pci_restore_state(pdev);
	pci_save_state(pdev);
	pci_cleanup_aer_uncorrect_error_status(pdev);

	if (t4_wait_dev_ready(adap) < 0)
		return PCI_ERS_RESULT_DISCONNECT;
	if (t4_fw_hello(adap, 0, 0, MASTER_MUST, NULL))
		return PCI_ERS_RESULT_DISCONNECT;
	adap->flags |= FW_OK;
	if (adap_init1(adap, &c))
		return PCI_ERS_RESULT_DISCONNECT;

	for_each_port(adap, i) {
		struct port_info *p = adap2pinfo(adap, i);

		ret = t4_alloc_vi(adap, 0, p->tx_chan, 0, 0, 1, NULL, NULL);
		if (ret < 0)
			return PCI_ERS_RESULT_DISCONNECT;
		p->viid = ret;
		p->xact_addr_filt = -1;
	}

	t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
		     adap->params.b_wnd);
	if (cxgb_up(adap))
		return PCI_ERS_RESULT_DISCONNECT;
	return PCI_ERS_RESULT_RECOVERED;
}

static void eeh_resume(struct pci_dev *pdev)
{
	int i;
	struct adapter *adap = pci_get_drvdata(pdev);

	if (!adap)
		return;

	rtnl_lock();
	for_each_port(adap, i) {
		struct net_device *dev = adap->port[i];

		if (netif_running(dev)) {
			link_start(dev);
			cxgb_set_rxmode(dev);
		}
		netif_device_attach(dev);
	}
	rtnl_unlock();
}

static struct pci_error_handlers cxgb4_eeh = {
	.error_detected = eeh_err_detected,
	.slot_reset     = eeh_slot_reset,
	.resume         = eeh_resume,
};

static inline bool is_10g_port(const struct link_config *lc)
{
	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
}

static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx,
			     unsigned int size, unsigned int iqe_size)
{
	q->intr_params = QINTR_TIMER_IDX(timer_idx) |
			 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0);
	q->pktcnt_idx = pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0;
	q->iqe_len = iqe_size;
	q->size = size;
}

/*
 * Perform default configuration of DMA queues depending on the number and type
 * of ports we found and the number of available CPUs.  Most settings can be
 * modified by the admin prior to actual use.
 */
static void __devinit cfg_queues(struct adapter *adap)
{
	struct sge *s = &adap->sge;
	int i, q10g = 0, n10g = 0, qidx = 0;

	for_each_port(adap, i)
		n10g += is_10g_port(&adap2pinfo(adap, i)->link_cfg);

	/*
	 * We default to 1 queue per non-10G port and up to # of cores queues
	 * per 10G port.
	 */
	if (n10g)
		q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
	if (q10g > num_online_cpus())
		q10g = num_online_cpus();

	for_each_port(adap, i) {
		struct port_info *pi = adap2pinfo(adap, i);

		pi->first_qset = qidx;
		pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
		qidx += pi->nqsets;
	}

	s->ethqsets = qidx;
	s->max_ethqsets = qidx;   /* MSI-X may lower it later */

	if (is_offload(adap)) {
		/*
		 * For offload we use 1 queue/channel if all ports are up to 1G,
		 * otherwise we divide all available queues amongst the channels
		 * capped by the number of available cores.
		 */
		if (n10g) {
			i = min_t(int, ARRAY_SIZE(s->ofldrxq),
				  num_online_cpus());
			s->ofldqsets = roundup(i, adap->params.nports);
		} else
			s->ofldqsets = adap->params.nports;
		/* For RDMA one Rx queue per channel suffices */
		s->rdmaqs = adap->params.nports;
	}

	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
		struct sge_eth_rxq *r = &s->ethrxq[i];

		init_rspq(&r->rspq, 0, 0, 1024, 64);
		r->fl.size = 72;
	}

	for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
		s->ethtxq[i].q.size = 1024;

	for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
		s->ctrlq[i].q.size = 512;

	for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
		s->ofldtxq[i].q.size = 1024;

	for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
		struct sge_ofld_rxq *r = &s->ofldrxq[i];

		init_rspq(&r->rspq, 0, 0, 1024, 64);
		r->rspq.uld = CXGB4_ULD_ISCSI;
		r->fl.size = 72;
	}

	for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
		struct sge_ofld_rxq *r = &s->rdmarxq[i];

		init_rspq(&r->rspq, 0, 0, 511, 64);
		r->rspq.uld = CXGB4_ULD_RDMA;
		r->fl.size = 72;
	}

	init_rspq(&s->fw_evtq, 6, 0, 512, 64);
	init_rspq(&s->intrq, 6, 0, 2 * MAX_INGQ, 64);
}

/*
 * Reduce the number of Ethernet queues across all ports to at most n.
 * n provides at least one queue per port.
 */
static void __devinit reduce_ethqs(struct adapter *adap, int n)
{
	int i;
	struct port_info *pi;

	while (n < adap->sge.ethqsets)
		for_each_port(adap, i) {
			pi = adap2pinfo(adap, i);
			if (pi->nqsets > 1) {
				pi->nqsets--;
				adap->sge.ethqsets--;
				if (adap->sge.ethqsets <= n)
					break;
			}
		}

	n = 0;
	for_each_port(adap, i) {
		pi = adap2pinfo(adap, i);
		pi->first_qset = n;
		n += pi->nqsets;
	}
}

/* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
#define EXTRA_VECS 2

static int __devinit enable_msix(struct adapter *adap)
{
	int ofld_need = 0;
	int i, err, want, need;
	struct sge *s = &adap->sge;
	unsigned int nchan = adap->params.nports;
	struct msix_entry entries[MAX_INGQ + 1];

	for (i = 0; i < ARRAY_SIZE(entries); ++i)
		entries[i].entry = i;

	want = s->max_ethqsets + EXTRA_VECS;
	if (is_offload(adap)) {
		want += s->rdmaqs + s->ofldqsets;
		/* need nchan for each possible ULD */
		ofld_need = 2 * nchan;
	}
	need = adap->params.nports + EXTRA_VECS + ofld_need;

	while ((err = pci_enable_msix(adap->pdev, entries, want)) >= need)
		want = err;

	if (!err) {
		/*
		 * Distribute available vectors to the various queue groups.
		 * Every group gets its minimum requirement and NIC gets top
		 * priority for leftovers.
		 */
		i = want - EXTRA_VECS - ofld_need;
		if (i < s->max_ethqsets) {
			s->max_ethqsets = i;
			if (i < s->ethqsets)
				reduce_ethqs(adap, i);
		}
		if (is_offload(adap)) {
			i = want - EXTRA_VECS - s->max_ethqsets;
			i -= ofld_need - nchan;
			s->ofldqsets = (i / nchan) * nchan;  /* round down */
		}
		for (i = 0; i < want; ++i)
			adap->msix_info[i].vec = entries[i].vector;
	} else if (err > 0)
		dev_info(adap->pdev_dev,
			 "only %d MSI-X vectors left, not using MSI-X\n", err);
	return err;
}

#undef EXTRA_VECS

static void __devinit print_port_info(struct adapter *adap)
{
	static const char *base[] = {
		"R XFI", "R XAUI", "T SGMII", "T XFI", "T XAUI", "KX4", "CX4",
		"KX", "KR", "KR SFP+", "KR FEC"
	const char *spd = "";

	if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_2_5GB)
		spd = " 2.5 GT/s";
	else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_5_0GB)
		spd = " 5 GT/s";

	for_each_port(adap, i) {
		struct net_device *dev = adap->port[i];
		const struct port_info *pi = netdev_priv(dev);
		char *bufp = buf;

		if (!test_bit(i, &adap->registered_device_map))
			continue;

		if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
			bufp += sprintf(bufp, "100/");
		if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
			bufp += sprintf(bufp, "1000/");
		if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
			bufp += sprintf(bufp, "10G/");
		if (bufp != buf)
			--bufp;
		sprintf(bufp, "BASE-%s", base[pi->port_type]);

		netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n",
			    adap->params.vpd.id, adap->params.rev,
			    buf, is_offload(adap) ? "R" : "",
			    adap->params.pci.width, spd,
			    (adap->flags & USING_MSIX) ? " MSI-X" :
			    (adap->flags & USING_MSI) ? " MSI" : "");
		if (adap->name == dev->name)
			netdev_info(dev, "S/N: %s, E/C: %s\n",
				    adap->params.vpd.sn, adap->params.vpd.ec);
	}
}

/*
 * Free the following resources:
 * - memory used for tables
 * - MSI/MSI-X
 * - net devices
 * - resources FW is holding for us
 */
static void free_some_resources(struct adapter *adapter)
{
	unsigned int i;

	t4_free_mem(adapter->l2t);
	t4_free_mem(adapter->tids.tid_tab);
	disable_msi(adapter);

	for_each_port(adapter, i)
		if (adapter->port[i])
			free_netdev(adapter->port[i]);

	if (adapter->flags & FW_OK)
		t4_fw_bye(adapter, 0);
}

#define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |\
		   NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)

static int __devinit init_one(struct pci_dev *pdev,
			      const struct pci_device_id *ent)
{
	int func, i, err;
	struct port_info *pi;
	unsigned int highdma = 0;
	struct adapter *adapter = NULL;

	printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);

	err = pci_request_regions(pdev, KBUILD_MODNAME);
	if (err) {
		/* Just info, some other driver may have claimed the device. */
		dev_info(&pdev->dev, "cannot obtain PCI resources\n");
		return err;
	}

	/* We control everything through PF 0 */
	func = PCI_FUNC(pdev->devfn);
	if (func > 0) {
		pci_save_state(pdev);        /* to restore SR-IOV later */

	err = pci_enable_device(pdev);
	if (err) {
		dev_err(&pdev->dev, "cannot enable PCI device\n");
		goto out_release_regions;
	}

	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
		highdma = NETIF_F_HIGHDMA;
		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
		if (err) {
			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
				"coherent allocations\n");
			goto out_disable_device;
		}
	} else {
		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
		if (err) {
			dev_err(&pdev->dev, "no usable DMA configuration\n");
			goto out_disable_device;
		}
	}

	pci_enable_pcie_error_reporting(pdev);
	pci_set_master(pdev);
	pci_save_state(pdev);

	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
	if (!adapter) {
		err = -ENOMEM;
		goto out_disable_device;
	}

	adapter->regs = pci_ioremap_bar(pdev, 0);
	if (!adapter->regs) {
		dev_err(&pdev->dev, "cannot map device registers\n");
		err = -ENOMEM;
		goto out_free_adapter;
	}

	adapter->pdev = pdev;
	adapter->pdev_dev = &pdev->dev;
	adapter->name = pci_name(pdev);
	adapter->msg_enable = dflt_msg_enable;
	memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));

	spin_lock_init(&adapter->stats_lock);
	spin_lock_init(&adapter->tid_release_lock);

	INIT_WORK(&adapter->tid_release_task, process_tid_release_list);

	err = t4_prep_adapter(adapter);
	if (err)
		goto out_unmap_bar;
	err = adap_init0(adapter);
	if (err)
		goto out_unmap_bar;

	for_each_port(adapter, i) {
		struct net_device *netdev;

		netdev = alloc_etherdev_mq(sizeof(struct port_info),
					   MAX_ETH_QSETS);
		if (!netdev) {
			err = -ENOMEM;
			goto out_free_dev;
		}

		SET_NETDEV_DEV(netdev, &pdev->dev);

		adapter->port[i] = netdev;
		pi = netdev_priv(netdev);
		pi->adapter = adapter;
		pi->xact_addr_filt = -1;
		pi->rx_offload = RX_CSO;
		pi->port_id = i;
		netif_carrier_off(netdev);
		netif_tx_stop_all_queues(netdev);
		netdev->irq = pdev->irq;

		netdev->features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6;
		netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
		netdev->features |= NETIF_F_GRO | NETIF_F_RXHASH | highdma;
		netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
		netdev->vlan_features = netdev->features & VLAN_FEAT;

		netdev->netdev_ops = &cxgb4_netdev_ops;
		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
	}

	pci_set_drvdata(pdev, adapter);

	if (adapter->flags & FW_OK) {
		err = t4_port_init(adapter, 0, 0, 0);
		if (err)
			goto out_free_dev;
	}

	/*
	 * Configure queues and allocate tables now, they can be needed as
	 * soon as the first register_netdev completes.
	 */
	cfg_queues(adapter);

	adapter->l2t = t4_init_l2t();
	if (!adapter->l2t) {
		/* We tolerate a lack of L2T, giving up some functionality */
		dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
		adapter->params.offload = 0;
	}

	if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
		dev_warn(&pdev->dev, "could not allocate TID table, "
			 "continuing\n");
		adapter->params.offload = 0;
	}

	/* See what interrupts we'll be using */
	if (msi > 1 && enable_msix(adapter) == 0)
		adapter->flags |= USING_MSIX;
	else if (msi > 0 && pci_enable_msi(pdev) == 0)
		adapter->flags |= USING_MSI;

	/*
	 * The card is now ready to go.  If any errors occur during device
	 * registration we do not fail the whole card but rather proceed only
	 * with the ports we manage to register successfully.  However we must
	 * register at least one net device.
	 */
	for_each_port(adapter, i) {
		err = register_netdev(adapter->port[i]);
		if (err)
			dev_warn(&pdev->dev,
				 "cannot register net device %s, skipping\n",
				 adapter->port[i]->name);
		else {
			/*
			 * Change the name we use for messages to the name of
			 * the first successfully registered interface.
			 */
			if (!adapter->registered_device_map)
				adapter->name = adapter->port[i]->name;

			__set_bit(i, &adapter->registered_device_map);
			adapter->chan_map[adap2pinfo(adapter, i)->tx_chan] = i;
		}
	}
	if (!adapter->registered_device_map) {
		dev_err(&pdev->dev, "could not register any net devices\n");
		goto out_free_dev;
	}

	if (cxgb4_debugfs_root) {
		adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
							   cxgb4_debugfs_root);
		setup_debugfs(adapter);
	}

	if (is_offload(adapter))
		attach_ulds(adapter);

	print_port_info(adapter);

sriov:
#ifdef CONFIG_PCI_IOV
	if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
		if (pci_enable_sriov(pdev, num_vf[func]) == 0)
			dev_info(&pdev->dev,
				 "instantiated %u virtual functions\n",
				 num_vf[func]);
#endif
	return 0;

 out_free_dev:
	free_some_resources(adapter);
 out_unmap_bar:
	iounmap(adapter->regs);
 out_free_adapter:
	kfree(adapter);
 out_disable_device:
	pci_disable_pcie_error_reporting(pdev);
	pci_disable_device(pdev);
 out_release_regions:
	pci_release_regions(pdev);
	pci_set_drvdata(pdev, NULL);
	return err;
}

static void __devexit remove_one(struct pci_dev *pdev)
{
	struct adapter *adapter = pci_get_drvdata(pdev);

	pci_disable_sriov(pdev);

	if (adapter) {
		int i;

		if (is_offload(adapter))
			detach_ulds(adapter);

		for_each_port(adapter, i)
			if (test_bit(i, &adapter->registered_device_map))
				unregister_netdev(adapter->port[i]);

		if (adapter->debugfs_root)
			debugfs_remove_recursive(adapter->debugfs_root);

		if (adapter->flags & FULL_INIT_DONE)
			cxgb_down(adapter);
		free_some_resources(adapter);
		iounmap(adapter->regs);
		kfree(adapter);
		pci_disable_pcie_error_reporting(pdev);
		pci_disable_device(pdev);
		pci_release_regions(pdev);
		pci_set_drvdata(pdev, NULL);
	} else if (PCI_FUNC(pdev->devfn) > 0)
		pci_release_regions(pdev);
}

static struct pci_driver cxgb4_driver = {
	.name     = KBUILD_MODNAME,
	.id_table = cxgb4_pci_tbl,
	.probe    = init_one,
	.remove   = __devexit_p(remove_one),
	.err_handler = &cxgb4_eeh,
};

static int __init cxgb4_init_module(void)
{
	int ret;

	/* Debugfs support is optional, just warn if this fails */
	cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
	if (!cxgb4_debugfs_root)
		pr_warning("could not create debugfs entry, continuing\n");

	ret = pci_register_driver(&cxgb4_driver);
	if (ret < 0)
		debugfs_remove(cxgb4_debugfs_root);
	return ret;
}

static void __exit cxgb4_cleanup_module(void)
{
	pci_unregister_driver(&cxgb4_driver);
	debugfs_remove(cxgb4_debugfs_root);  /* NULL ok */
}

module_init(cxgb4_init_module);
module_exit(cxgb4_cleanup_module);