Sandy Bridge Xeon and Extreme chips have integrated memory controllers
with (rather limited) onboard SMBUS masters. This driver gives access
to the bus.
Signed-off-by: Andy Lutomirski <luto at amacapital.net>
---
drivers/i2c/busses/Kconfig | 15 ++
drivers/i2c/busses/Makefile | 1 +
drivers/i2c/busses/i2c-imc.c | 548 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 564 insertions(+)
create mode 100644 drivers/i2c/busses/i2c-imc.c
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e837f0e..7636782 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -137,6 +137,21 @@ config I2C_DIMM_BUS
tristate
default n
+config I2C_IMC
+ tristate "Intel iMC (LGA 2011) SMBus Controller"
+ depends on PCI && X86
+ select I2C_DIMM_BUS
+ help
+ If you say yes to this option, support will be included for the Intel
+ Integrated Memory Controller SMBus host controller interface. This
+ controller is found on LGA 2011 Xeons and Core i7 Extremes.
+
+ It is possibly, although unlikely, that the use of this driver will
+ interfere with your platform's RAM thermal management.
+
+ This driver can also be built as a module. If so, the module will be
+ called i2c-imc.
+
config I2C_PIIX4
tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)"
depends on PCI
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 226bb2e..790b63d 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_I2C_AMD8111) += i2c-amd8111.o
obj-$(CONFIG_I2C_I801) += i2c-i801.o
obj-$(CONFIG_I2C_ISCH) += i2c-isch.o
obj-$(CONFIG_I2C_ISMT) += i2c-ismt.o
+obj-$(CONFIG_I2C_IMC) += i2c-imc.o
obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o
obj-$(CONFIG_I2C_NFORCE2_S4985) += i2c-nforce2-s4985.o
obj-$(CONFIG_I2C_PIIX4) += i2c-piix4.o
diff --git a/drivers/i2c/busses/i2c-imc.c b/drivers/i2c/busses/i2c-imc.c
new file mode 100644
index 0000000..9643aeb
--- /dev/null
+++ b/drivers/i2c/busses/i2c-imc.c
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2013 Andrew Lutomirski <luto at amacapital.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/i2c/dimm-bus.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pm_qos.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/i2c.h>
+
+/*
+ * The datasheet can be found here, for example:
+ * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-1600-2600-vol-2-datasheet.pdf
+ *
+ * There seem to be quite a few bugs or spec errors, though:
+ *
+ * - A successful transaction sets WOD and RDO.
+ *
+ * - The docs for TSOD_POLL_EN make no sense (see imc_channel_claim)
+ *
+ * - Erratum BT109, which says:
+ *
+ * The processor may not complete SMBus (System Management Bus)
+ * transactions targeting the TSOD (Temperature Sensor On DIMM)
+ * when Package C-States are enabled. Due to this erratum, if the
+ * processor transitions into a Package C-State while an SMBus
+ * transaction with the TSOD is in process, the processor will
+ * suspend receipt of the transaction. The transaction completes
+ * while the processor is in a Package C-State. Upon exiting
+ * Package C-State, the processor will attempt to resume the
+ * SMBus transaction, detect a protocol violation, and log an
+ * error.
+ *
+ * That description seems to be wrong. If the package enters a
+ * C-State while a software-initiated SMBUS transaction is running,
+ * then the result of the transaction seems to be wrong, but no
+ * error is signaled. It does not seem to matter whether the
+ * transaction is targeting a TSOD.
+ *
+ * For added fun, the relevant bits in MSR_PKG_CST_CONFIG_CONTROL
+ * are lockable, so we can't just disable package C-States.
+ *
+ * The upshot is that we need to use a hack to keep a package awake
+ * while we're using its SMBUS master. pm_qos is the easiest, so we
+ * use it for now. It has the unfortunate side-effect that each
+ * claim and each release will result in an IPI to all CPUs and
+ * that, while any SMBUS transaction is running, the whole system
+ * will be forced to a high-power state.
+ */
+
+/* Register offsets (in PCI configuration space) */
+#define SMBSTAT(i) (0x180 + 0x10*i)
+#define SMBCMD(i) (0x184 + 0x10*i)
+#define SMBCNTL(i) (0x188 + 0x10*i)
+#define SMB_TSOD_POLL_RATE_CNTR(i) (0x18C + 0x10*i)
+#define SMB_TSOD_POLL_RATE (0x1A8)
+
+/* SMBSTAT fields */
+#define SMBSTAT_RDO (1U << 31) /* Read Data Valid */
+#define SMBSTAT_WOD (1U << 30) /* Write Operation Done */
+#define SMBSTAT_SBE (1U << 29) /* SMBus Error */
+#define SMBSTAT_SMB_BUSY (1U << 28) /* SMBus Busy State */
+/* 26:24 is the last automatically polled TSOD address */
+#define SMBSTAT_RDATA_MASK 0xffff /* result of a read */
+
+/* SMBCMD fields */
+#define SMBCMD_TRIGGER (1U << 31) /* CMD Trigger */
+#define SMBCMD_PNTR_SEL (1U << 30) /* HW polls TSOD with pointer */
+#define SMBCMD_WORD_ACCESS (1U << 29) /* word (vs byte) access */
+#define SMBCMD_TYPE_MASK (3U << 27) /* Mask for access type */
+#define SMBCMD_TYPE_READ (0U << 27) /* Read */
+#define SMBCMD_TYPE_WRITE (1U << 27) /* Write */
+#define SMBCMD_TYPE_PNTR_WRITE (3U << 27) /* Write to pointer */
+#define SMBCMD_SA_MASK (7U << 24) /* Slave Address high bits */
+#define SMBCMD_SA_SHIFT 24
+#define SMBCMD_BA_MASK 0xff0000 /* Bus Txn address */
+#define SMBCMD_BA_SHIFT 16
+#define SMBCMD_WDATA_MASK 0xffff /* data to write */
+
+/* SMBCNTL fields */
+#define SMBCNTL_DTI_MASK 0xf0000000 /* Slave Address low bits */
+#define SMBCNTL_DTI_SHIFT 28 /* Slave Address low bits */
+#define SMBCNTL_CKOVRD (1U << 27) /* # Clock Override */
+#define SMBCNTL_DIS_WRT (1U << 26) /* Disable Write (sadly) */
+#define SMBCNTL_SOFT_RST (1U << 10) /* Soft Reset */
+#define SMBCNTL_TSOD_POLL_EN (1U << 8) /* TSOD Polling Enable */
+/* Bits 0-3 and 4-6 indicate TSOD presence in various slots */
+
+/* System Address Controller, PCI dev 13 fn 6, 8086.3cf5 */
+#define SAD_CONTROL 0xf4
+
+/*
+ * The clock is around 100kHz, and transactions are nine cycles per byte
+ * plus a few start/stop cycles, plus whatever clock streching is involved.
+ * This is a guess at the polling interval.
+ */
+
+#define TXN_LEN_US (20 * 10)
+
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */
+
+struct imc_channel {
+ struct i2c_adapter adapter;
+ struct mutex mutex;
+ bool can_write, suspended;
+};
+
+struct imc_priv {
+ struct pci_dev *pci_dev;
+ struct imc_channel channels[2];
+};
+
+/*
+ * There's some cost to just having a pm_qos request installed, so we
+ * just use one globally instead of one per device.
+ */
+static struct pm_qos_request imc_pm_qos;
+static int imc_pm_qos_count;
+static DEFINE_MUTEX(imc_pm_qos_mutex);
+
+static void imc_pmqos_get(void)
+{
+ mutex_lock(&imc_pm_qos_mutex);
+ if (++imc_pm_qos_count == 1)
+ pm_qos_update_request(&imc_pm_qos, 5);
+ mutex_unlock(&imc_pm_qos_mutex);
+}
+
+static void imc_pmqos_put(void)
+{
+ mutex_lock(&imc_pm_qos_mutex);
+ if (--imc_pm_qos_count == 0)
+ pm_qos_update_request(&imc_pm_qos, PM_QOS_DEFAULT_VALUE);
+ mutex_unlock(&imc_pm_qos_mutex);
+}
+
+static void imc_channel_release(struct imc_priv *priv, int chan)
+{
+ /* Return to HW control. */
+ u32 cntl;
+ pci_read_config_dword(priv->pci_dev, SMBCNTL(chan), &cntl);
+ cntl |= SMBCNTL_TSOD_POLL_EN;
+ pci_write_config_dword(priv->pci_dev, SMBCNTL(chan), cntl);
+ imc_pmqos_put();
+}
+
+static int imc_channel_claim(struct imc_priv *priv, int chan)
+{
+ /*
+ * The docs are a bit confused here. We're supposed to disable TSOD
+ * polling, then wait for busy to be cleared, then set
+ * SMBCNTL_TSOD_POLL_EN to zero to switch to software control. But
+ * SMBCNTL_TSOD_POLL_EN is the only documented way to turn off polling.
+ */
+
+ u32 cntl, stat;
+ int i;
+
+ if (priv->channels[chan].suspended)
+ return -EIO;
+
+ imc_pmqos_get();
+
+ pci_read_config_dword(priv->pci_dev, SMBCNTL(chan), &cntl);
+ cntl &= ~SMBCNTL_TSOD_POLL_EN;
+ pci_write_config_dword(priv->pci_dev, SMBCNTL(chan), cntl);
+
+ for (i = 0; i < 20; i++) {
+ pci_read_config_dword(priv->pci_dev, SMBSTAT(chan), &stat);
+ if (!(stat & SMBSTAT_SMB_BUSY))
+ return 0; /* The channel is ours. */
+ usleep_range(TXN_LEN_US, 3*TXN_LEN_US);
+ }
+
+ /* We failed to take control of the channel. Return to HW control. */
+ imc_channel_release(priv, chan);
+ imc_pmqos_put();
+ return -EBUSY;
+}
+
+/*
+ * The iMC supports five access types. The terminology is rather
+ * inconsistent. These are the types:
+ *
+ * "Write to pointer register SMBus": I2C_SMBUS_WRITE, I2C_SMBUS_BYTE
+ *
+ * Read byte/word: I2C_SMBUS_READ, I2C_SMBUS_{BYTE|WORD}_DATA
+ *
+ * Write byte/word: I2C_SMBUS_WRITE, I2C_SMBUS_{BYTE|WORD}_DATA
+ *
+ * The pointer write operations is AFAICT completely useless for
+ * software control, for two reasons. First, HW periodically polls any
+ * TSODs on the bus, so it will corrupt the pointer in between SW
+ * transactions. More importantly, the matching "read byte"/"receive
+ * byte" (the address-less single-byte read) is not available for SW
+ * control. Therefore, this driver doesn't implement pointer writes
+ *
+ * There is no PEC support.
+ */
+
+static u32 imc_func(struct i2c_adapter *adapter)
+{
+ int chan;
+ struct imc_channel *ch;
+ struct imc_priv *priv = i2c_get_adapdata(adapter);
+
+ chan = (adapter == &priv->channels[0].adapter ? 0 : 1);
+ ch = &priv->channels[chan];
+
+ if (ch->can_write)
+ return I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA;
+ else
+ return I2C_FUNC_SMBUS_READ_BYTE_DATA |
+ I2C_FUNC_SMBUS_READ_WORD_DATA;
+}
+
+static s32 imc_smbus_xfer(struct i2c_adapter *adap, u16 addr,
+ unsigned short flags, char read_write, u8 command,
+ int size, union i2c_smbus_data *data)
+{
+ int i, ret, chan;
+ u32 tmp, cmdbits = 0, cntlbits = 0, stat;
+ struct imc_channel *ch;
+ struct imc_priv *priv = i2c_get_adapdata(adap);
+
+ chan = (adap == &priv->channels[0].adapter ? 0 : 1);
+ ch = &priv->channels[chan];
+
+ if (addr > 0x7f)
+ return -EOPNOTSUPP; /* No large address support */
+ if (flags)
+ return -EOPNOTSUPP; /* No PEC */
+
+ cmdbits |= ((u32)addr & 0x7) << SMBCMD_SA_SHIFT;
+ cntlbits |= ((u32)addr >> 3) << SMBCNTL_DTI_SHIFT;
+
+ switch (size) {
+ case I2C_SMBUS_BYTE_DATA:
+ cmdbits |= ((u32)command) << SMBCMD_BA_SHIFT;
+ if (read_write == I2C_SMBUS_READ)
+ cmdbits |= SMBCMD_TYPE_READ;
+ else
+ cmdbits |= SMBCMD_TYPE_WRITE | data->byte;
+ break;
+ case I2C_SMBUS_WORD_DATA:
+ cmdbits |= ((u32)command) << SMBCMD_BA_SHIFT;
+ cmdbits |= SMBCMD_WORD_ACCESS;
+ if (read_write == I2C_SMBUS_READ)
+ cmdbits |= SMBCMD_TYPE_READ;
+ else
+ cmdbits |= SMBCMD_TYPE_WRITE | swab16(data->word);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&ch->mutex);
+
+ ret = imc_channel_claim(priv, chan);
+ if (ret)
+ goto out_unlock;
+
+ pci_read_config_dword(priv->pci_dev, SMBCNTL(chan), &tmp);
+ tmp &= ~SMBCNTL_DTI_MASK;
+ tmp |= cntlbits;
+ pci_write_config_dword(priv->pci_dev, SMBCNTL(chan), tmp);
+
+ /*
+ * This clears SMBCMD_PNTR_SEL. We leave it cleared so that we don't
+ * need to think about keeping the TSOD pointer state consistent with
+ * the hardware's expectation. This probably has some miniscule
+ * power cost, as TSOD polls will take 9 extra cycles.
+ */
+ cmdbits |= SMBCMD_TRIGGER;
+ pci_write_config_dword(priv->pci_dev, SMBCMD(chan), cmdbits);
+
+ for (i = 0; ; i++) {
+ pci_read_config_dword(priv->pci_dev, SMBSTAT(chan), &stat);
+ if (!(stat & SMBSTAT_SMB_BUSY))
+ break;
+ if (i < 50) {
+ usleep_range(TXN_LEN_US, 3*TXN_LEN_US);
+ continue;
+ }
+
+ /* Timeout. TODO: Reset the controller? */
+ ret = -EIO;
+ dev_err(&priv->pci_dev->dev, "controller is wedged\n");
+ goto out_release;
+ }
+
+ if (stat & SMBSTAT_SBE) {
+ /*
+ * Clear the error to re-enable TSOD polling. The docs say
+ * that, as long as SBE is set, TSOD polling won't happen.
+ * The docs also say that writing zero to this bit (which is
+ * the only writable bit in the whole register) will clear
+ * the error. Empirically, writing 0 does not clear SBE, but
+ * it's probably still good to do the write in compliance with
+ * the spec. (TSOD polling still happens and seems to
+ * clear SBE on its own.)
+ */
+ pci_write_config_dword(priv->pci_dev, SMBSTAT(chan), 0);
+ ret = -ENXIO;
+ goto out_release;
+ }
+
+ if (read_write == I2C_SMBUS_READ) {
+ if (stat & SMBSTAT_RDO) {
+ /*
+ * The iMC SMBUS controller thinks of SMBUS
+ * words as being big-endian (MSB first).
+ * Linux treats them as little-endian, we need
+ * to swap them.
+ *
+ * Note: the controller will often (always?) set
+ * WOD here. This is probably a bug.
+ */
+ if (size == I2C_SMBUS_WORD_DATA)
+ data->word = swab16(stat & SMBSTAT_RDATA_MASK);
+ else
+ data->byte = stat & 0xFF;
+ ret = 0;
+ } else {
+ dev_err(&priv->pci_dev->dev,
+ "Unexpected read status 0x%08X\n", stat);
+ ret = -EIO;
+ }
+ } else {
+ if (stat & SMBSTAT_WOD) {
+ /* Same bug (?) as in the read case. */
+ ret = 0;
+ } else {
+ dev_err(&priv->pci_dev->dev,
+ "Unexpected write status 0x%08X\n", stat);
+ ret = -EIO;
+ }
+ }
+
+out_release:
+ imc_channel_release(priv, chan);
+
+out_unlock:
+ mutex_unlock(&ch->mutex);
+
+ return ret;
+}
+
+static const struct i2c_algorithm imc_smbus_algorithm = {
+ .smbus_xfer = imc_smbus_xfer,
+ .functionality = imc_func,
+};
+
+static int imc_init_channel(struct imc_priv *priv, int i, int socket)
+{
+ int err;
+ u32 val;
+ struct imc_channel *ch = &priv->channels[i];
+
+ i2c_set_adapdata(&ch->adapter, priv);
+ ch->adapter.owner = THIS_MODULE;
+ ch->adapter.algo = &imc_smbus_algorithm;
+ ch->adapter.dev.parent = &priv->pci_dev->dev;
+
+ pci_read_config_dword(priv->pci_dev, SMBCNTL(i), &val);
+ ch->can_write = !(val & SMBCNTL_DIS_WRT);
+
+ /*
+ * i2c_add_addapter can call imc_smbus_xfer, so we need to be
+ * ready immediately.
+ */
+ mutex_init(&ch->mutex);
+
+ snprintf(ch->adapter.name, sizeof(ch->adapter.name),
+ "iMC socket %d channel %d", socket, i);
+ err = i2c_add_adapter(&ch->adapter);
+ if (err) {
+ mutex_destroy(&ch->mutex);
+ return err;
+ }
+
+ i2c_scan_dimm_bus(&ch->adapter);
+
+ return 0;
+}
+
+static void imc_free_channel(struct imc_priv *priv, int i)
+{
+ struct imc_channel *ch = &priv->channels[i];
+
+ mutex_lock(&ch->mutex);
+ i2c_del_adapter(&ch->adapter);
+ mutex_unlock(&ch->mutex);
+ mutex_destroy(&ch->mutex);
+}
+
+static int imc_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ int i, err;
+ struct imc_priv *priv;
+ struct pci_dev *sad; /* System Address Decoder */
+ u32 sad_control;
+
+ /* Paranoia: the datasheet says this is always at 15.0 */
+ if (dev->devfn != PCI_DEVFN(15, 0))
+ return -ENODEV;
+
+ /*
+ * The socket number is hidden away on a different PCI device.
+ * There's another copy at devfn 11.0 offset 0x40, and an even
+ * less convincing copy at 5.0 0x140. The actual APICID register
+ * is "not used ... and is still implemented in hardware because
+ * of FUD".
+ *
+ * In principle we could double-check that the socket matches
+ * the numa_node from SRAT, but this is probably not worth it.
+ */
+ sad = pci_get_slot(dev->bus, PCI_DEVFN(13, 6));
+ if (!sad)
+ return -ENODEV;
+ if (sad->vendor != PCI_VENDOR_ID_INTEL ||
+ sad->device != PCI_DEVICE_ID_INTEL_SBRIDGE_BR) {
+ pci_dev_put(sad);
+ return -ENODEV;
+ }
+ pci_read_config_dword(sad, SAD_CONTROL, &sad_control);
+ pci_dev_put(sad);
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ priv->pci_dev = dev;
+
+ pci_set_drvdata(dev, priv);
+
+ for (i = 0; i < 2; i++) {
+ int j;
+ err = imc_init_channel(priv, i, sad_control & 0x7);
+ if (err) {
+ for (j = 0; j < i; j++)
+ imc_free_channel(priv, j);
+ goto exit_free;
+ }
+ }
+
+ return 0;
+
+exit_free:
+ kfree(priv);
+ return err;
+}
+
+static void imc_remove(struct pci_dev *dev)
+{
+ int i;
+ struct imc_priv *priv = pci_get_drvdata(dev);
+
+ for (i = 0; i < 2; i++)
+ imc_free_channel(priv, i);
+
+ kfree(priv);
+}
+
+static int imc_suspend(struct pci_dev *dev, pm_message_t mesg)
+{
+ int i;
+ struct imc_priv *priv = pci_get_drvdata(dev);
+
+ /* BIOS is in charge. We should finish any pending transaction */
+ for (i = 0; i < 2; i++) {
+ mutex_lock(&priv->channels[i].mutex);
+ priv->channels[i].suspended = true;
+ mutex_unlock(&priv->channels[i].mutex);
+ }
+
+ return 0;
+}
+
+static int imc_resume(struct pci_dev *dev)
+{
+ int i;
+ struct imc_priv *priv = pci_get_drvdata(dev);
+
+ for (i = 0; i < 2; i++) {
+ mutex_lock(&priv->channels[i].mutex);
+ priv->channels[i].suspended = false;
+ mutex_unlock(&priv->channels[i].mutex);
+ }
+
+ return 0;
+}
+
+static DEFINE_PCI_DEVICE_TABLE(imc_ids) = {
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA) },
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, imc_ids);
+
+static struct pci_driver imc_pci_driver = {
+ .name = "imc_smbus",
+ .id_table = imc_ids,
+ .probe = imc_probe,
+ .remove = imc_remove,
+ .suspend = imc_suspend,
+ .resume = imc_resume,
+};
+
+static int __init imc_init(void)
+{
+ int err;
+
+ pm_qos_add_request(&imc_pm_qos,
+ PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
+ err = pci_register_driver(&imc_pci_driver);
+ if (err)
+ pm_qos_remove_request(&imc_pm_qos);
+ return err;
+}
+
+static void __exit imc_exit(void)
+{
+ pci_unregister_driver(&imc_pci_driver);
+ pm_qos_remove_request(&imc_pm_qos);
+}
+
+module_init(imc_init);
+module_exit(imc_exit);
+
+MODULE_AUTHOR("Andrew Lutomirski <luto at amacapital.net>");
+MODULE_DESCRIPTION("iMC SMBus driver");
+MODULE_LICENSE("GPL");
--
1.8.1.4