diff options
author | Shannon Nelson <shannon.nelson@intel.com> | 2007-11-14 16:59:51 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-11-14 18:45:41 -0800 |
commit | 7bb67c14fd3778504fb77da30ce11582336dfced (patch) | |
tree | 24b65f267a98716824c7955be02af8879cfda688 | |
parent | cc9f2f8f68efcc73d8793a4df2c4c50196e90080 (diff) | |
download | kernel_samsung_smdk4412-7bb67c14fd3778504fb77da30ce11582336dfced.zip kernel_samsung_smdk4412-7bb67c14fd3778504fb77da30ce11582336dfced.tar.gz kernel_samsung_smdk4412-7bb67c14fd3778504fb77da30ce11582336dfced.tar.bz2 |
I/OAT: Add support for version 2 of ioatdma device
Add support for version 2 of the ioatdma device. This device handles
the descriptor chain and DCA services slightly differently:
- Instead of moving the dma descriptors between a busy and an idle chain,
this new version uses a single circular chain so that we don't have
rewrite the next_descriptor pointers as we add new requests, and the
device doesn't need to re-read the last descriptor.
- The new device has the DCA tags defined internally instead of needing
them defined statically.
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Cc: "Williams, Dan J" <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/dma/ioat.c | 11 | ||||
-rw-r--r-- | drivers/dma/ioat_dca.c | 164 | ||||
-rw-r--r-- | drivers/dma/ioat_dma.c | 578 | ||||
-rw-r--r-- | drivers/dma/ioatdma.h | 32 | ||||
-rw-r--r-- | drivers/dma/ioatdma_hw.h | 33 | ||||
-rw-r--r-- | drivers/dma/ioatdma_registers.h | 106 | ||||
-rw-r--r-- | include/linux/pci_ids.h | 1 |
7 files changed, 780 insertions, 145 deletions
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c index f204c39..16e0fd8 100644 --- a/drivers/dma/ioat.c +++ b/drivers/dma/ioat.c @@ -39,10 +39,14 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Intel Corporation"); static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v1 platforms */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + + /* I/OAT v2 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, { 0, } }; @@ -74,10 +78,17 @@ static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase) if (device->dma && ioat_dca_enabled) device->dca = ioat_dca_init(pdev, iobase); break; + case IOAT_VER_2_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat2_dca_init(pdev, iobase); + break; default: err = -ENODEV; break; } + if (!device->dma) + err = -ENODEV; return err; } diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c index ba98571..0fa8a98 100644 --- a/drivers/dma/ioat_dca.c +++ b/drivers/dma/ioat_dca.c @@ -261,3 +261,167 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) return dca; } + +static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat2_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu) +{ + u8 tag; + + tag = ioat_dca_get_tag(dca, cpu); + tag = (~tag) & 0x1F; + return tag; +} + +static struct dca_ops ioat2_dca_ops = { + .add_requester = ioat2_dca_add_requester, + .remove_requester = ioat2_dca_remove_requester, + .get_tag = ioat2_dca_get_tag, +}; + +static int ioat2_dca_count_dca_slots(void *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u32 tag_map; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat2_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat2_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { + csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { + pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); + for (i = 0; i < 5; i++) { + bit = (tag_map >> (4 * i)) & 0x0f; + if (bit < 8) + ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; + else + ioatdca->tag_map[i] = 0; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index 7e4a785..c1c2dcc 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -36,18 +36,24 @@ #include "ioatdma_registers.h" #include "ioatdma_hw.h" -#define INITIAL_IOAT_DESC_COUNT 128 - #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) +static int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); + /* internal functions */ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); + +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); static struct ioat_desc_sw * -ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( struct ioatdma_device *device, @@ -130,6 +136,12 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) ioat_chan->device = device; ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); ioat_chan->xfercap = xfercap; + ioat_chan->desccount = 0; + if (ioat_chan->device->version != IOAT_VER_1_2) { + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE + | IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } spin_lock_init(&ioat_chan->cleanup_lock); spin_lock_init(&ioat_chan->desc_lock); INIT_LIST_HEAD(&ioat_chan->free_desc); @@ -161,13 +173,17 @@ static void ioat_set_dest(dma_addr_t addr, tx_to_ioat_desc(tx)->dst = addr; } -static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx) +static inline void __ioat1_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan); +static inline void __ioat2_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan); + +static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); struct ioat_desc_sw *first = tx_to_ioat_desc(tx); struct ioat_desc_sw *prev, *new; struct ioat_dma_descriptor *hw; - int append = 0; dma_cookie_t cookie; LIST_HEAD(new_chain); u32 copy; @@ -209,7 +225,7 @@ static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx) list_add_tail(&new->node, &new_chain); desc_count++; prev = new; - } while (len && (new = ioat_dma_get_next_descriptor(ioat_chan))); + } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; if (new->async_tx.callback) { @@ -246,20 +262,98 @@ static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx) first->async_tx.phys; __list_splice(&new_chain, ioat_chan->used_desc.prev); + ioat_chan->dmacount += desc_count; ioat_chan->pending += desc_count; - if (ioat_chan->pending >= 4) { - append = 1; - ioat_chan->pending = 0; - } + if (ioat_chan->pending >= ioat_pending_level) + __ioat1_dma_memcpy_issue_pending(ioat_chan); spin_unlock_bh(&ioat_chan->desc_lock); - if (append) - writeb(IOAT_CHANCMD_APPEND, - ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); + return cookie; +} + +static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct ioat_desc_sw *first = tx_to_ioat_desc(tx); + struct ioat_desc_sw *new; + struct ioat_dma_descriptor *hw; + dma_cookie_t cookie; + u32 copy; + size_t len; + dma_addr_t src, dst; + int orig_ack; + unsigned int desc_count = 0; + + /* src and dest and len are stored in the initial descriptor */ + len = first->len; + src = first->src; + dst = first->dst; + orig_ack = first->async_tx.ack; + new = first; + + /* ioat_chan->desc_lock is still in force in version 2 path */ + + do { + copy = min((u32) len, ioat_chan->xfercap); + + new->async_tx.ack = 1; + + hw = new->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + desc_count++; + } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); + + hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + if (new->async_tx.callback) { + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; + if (first != new) { + /* move callback into to last desc */ + new->async_tx.callback = first->async_tx.callback; + new->async_tx.callback_param + = first->async_tx.callback_param; + first->async_tx.callback = NULL; + first->async_tx.callback_param = NULL; + } + } + + new->tx_cnt = desc_count; + new->async_tx.ack = orig_ack; /* client is in control of this ack */ + + /* store the original values for use in later cleanup */ + if (new != first) { + new->src = first->src; + new->dst = first->dst; + new->len = first->len; + } + + /* cookie incr and addition to used_list must be atomic */ + cookie = ioat_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + ioat_chan->common.cookie = new->async_tx.cookie = cookie; + + ioat_chan->dmacount += desc_count; + ioat_chan->pending += desc_count; + if (ioat_chan->pending >= ioat_pending_level) + __ioat2_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); return cookie; } +/** + * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair + * @ioat_chan: the channel supplying the memory pool for the descriptors + * @flags: allocation flags + */ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( struct ioat_dma_chan *ioat_chan, gfp_t flags) @@ -284,15 +378,57 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); desc_sw->async_tx.tx_set_src = ioat_set_src; desc_sw->async_tx.tx_set_dest = ioat_set_dest; - desc_sw->async_tx.tx_submit = ioat_tx_submit; + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + desc_sw->async_tx.tx_submit = ioat1_tx_submit; + break; + case IOAT_VER_2_0: + desc_sw->async_tx.tx_submit = ioat2_tx_submit; + break; + } INIT_LIST_HEAD(&desc_sw->async_tx.tx_list); + desc_sw->hw = desc; desc_sw->async_tx.phys = phys; return desc_sw; } -/* returns the actual number of allocated descriptors */ +static int ioat_initial_desc_count = 256; +module_param(ioat_initial_desc_count, int, 0644); +MODULE_PARM_DESC(ioat_initial_desc_count, + "initial descriptors per channel (default: 256)"); + +/** + * ioat2_dma_massage_chan_desc - link the descriptors into a circle + * @ioat_chan: the channel to be massaged + */ +static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *desc, *_desc; + + /* setup used_desc */ + ioat_chan->used_desc.next = ioat_chan->free_desc.next; + ioat_chan->used_desc.prev = NULL; + + /* pull free_desc out of the circle so that every node is a hw + * descriptor, but leave it pointing to the list + */ + ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; + ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; + + /* circle link the hw descriptors */ + desc = to_ioat_desc(ioat_chan->free_desc.next); + desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { + desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + } +} + +/** + * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors + * @chan: the channel to be filled out + */ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); @@ -304,7 +440,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) /* have we already been set up? */ if (!list_empty(&ioat_chan->free_desc)) - return INITIAL_IOAT_DESC_COUNT; + return ioat_chan->desccount; /* Setup register to interrupt and write completion status on error */ chanctrl = IOAT_CHANCTRL_ERR_INT_EN | @@ -320,7 +456,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) } /* Allocate descriptors */ - for (i = 0; i < INITIAL_IOAT_DESC_COUNT; i++) { + for (i = 0; i < ioat_initial_desc_count; i++) { desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); if (!desc) { dev_err(&ioat_chan->device->pdev->dev, @@ -330,7 +466,10 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) list_add_tail(&desc->node, &tmp_list); } spin_lock_bh(&ioat_chan->desc_lock); + ioat_chan->desccount = i; list_splice(&tmp_list, &ioat_chan->free_desc); + if (ioat_chan->device->version != IOAT_VER_1_2) + ioat2_dma_massage_chan_desc(ioat_chan); spin_unlock_bh(&ioat_chan->desc_lock); /* allocate a completion writeback area */ @@ -347,10 +486,14 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); tasklet_enable(&ioat_chan->cleanup_task); - ioat_dma_start_null_desc(ioat_chan); - return i; + ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ + return ioat_chan->desccount; } +/** + * ioat_dma_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ static void ioat_dma_free_chan_resources(struct dma_chan *chan) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); @@ -364,22 +507,45 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. */ - writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); mdelay(100); spin_lock_bh(&ioat_chan->desc_lock); - list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) { - list_del(&desc->node); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + list_for_each_entry_safe(desc, _desc, + &ioat_chan->used_desc, node) { + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat_chan->free_desc, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + break; + case IOAT_VER_2_0: + list_for_each_entry_safe(desc, _desc, + ioat_chan->free_desc.next, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + desc = to_ioat_desc(ioat_chan->free_desc.next); pci_pool_free(ioatdma_device->dma_pool, desc->hw, desc->async_tx.phys); kfree(desc); + INIT_LIST_HEAD(&ioat_chan->free_desc); + INIT_LIST_HEAD(&ioat_chan->used_desc); + break; } spin_unlock_bh(&ioat_chan->desc_lock); @@ -395,6 +561,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) ioat_chan->last_completion = ioat_chan->completion_addr = 0; ioat_chan->pending = 0; + ioat_chan->dmacount = 0; } /** @@ -406,7 +573,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) * has run out. */ static struct ioat_desc_sw * -ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) { struct ioat_desc_sw *new = NULL; @@ -425,7 +592,82 @@ ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) return new; } -static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy( +static struct ioat_desc_sw * +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *new = NULL; + + /* + * used.prev points to where to start processing + * used.next points to next free descriptor + * if used.prev == NULL, there are none waiting to be processed + * if used.next == used.prev.prev, there is only one free descriptor, + * and we need to use it to as a noop descriptor before + * linking in a new set of descriptors, since the device + * has probably already read the pointer to it + */ + if (ioat_chan->used_desc.prev && + ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { + + struct ioat_desc_sw *desc = NULL; + struct ioat_desc_sw *noop_desc = NULL; + int i; + + /* set up the noop descriptor */ + noop_desc = to_ioat_desc(ioat_chan->used_desc.next); + noop_desc->hw->size = 0; + noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; + noop_desc->hw->src_addr = 0; + noop_desc->hw->dst_addr = 0; + + ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; + ioat_chan->pending++; + ioat_chan->dmacount++; + + /* get a few more descriptors */ + for (i = 16; i; i--) { + desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); + BUG_ON(!desc); + list_add_tail(&desc->node, ioat_chan->used_desc.next); + + desc->hw->next + = to_ioat_desc(desc->node.next)->async_tx.phys; + to_ioat_desc(desc->node.prev)->hw->next + = desc->async_tx.phys; + ioat_chan->desccount++; + } + + ioat_chan->used_desc.next = noop_desc->node.next; + } + new = to_ioat_desc(ioat_chan->used_desc.next); + prefetch(new); + ioat_chan->used_desc.next = new->node.next; + + if (ioat_chan->used_desc.prev == NULL) + ioat_chan->used_desc.prev = &new->node; + + prefetch(new->hw); + return new; +} + +static struct ioat_desc_sw *ioat_dma_get_next_descriptor( + struct ioat_dma_chan *ioat_chan) +{ + if (!ioat_chan) + return NULL; + + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + return ioat1_dma_get_next_descriptor(ioat_chan); + break; + case IOAT_VER_2_0: + return ioat2_dma_get_next_descriptor(ioat_chan); + break; + } + return NULL; +} + +static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( struct dma_chan *chan, size_t len, int int_en) @@ -441,19 +683,62 @@ static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy( return new ? &new->async_tx : NULL; } +static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( + struct dma_chan *chan, + size_t len, + int int_en) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *new; + + spin_lock_bh(&ioat_chan->desc_lock); + new = ioat2_dma_get_next_descriptor(ioat_chan); + new->len = len; + + /* leave ioat_chan->desc_lock set in version 2 path */ + return new ? &new->async_tx : NULL; +} + + /** * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended * descriptors to hw * @chan: DMA channel handle */ -static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan) +static inline void __ioat1_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan) +{ + ioat_chan->pending = 0; + writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); +} + +static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); if (ioat_chan->pending != 0) { - ioat_chan->pending = 0; - writeb(IOAT_CHANCMD_APPEND, - ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); + spin_lock_bh(&ioat_chan->desc_lock); + __ioat1_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + } +} + +static inline void __ioat2_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan) +{ + ioat_chan->pending = 0; + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); +} + +static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + + if (ioat_chan->pending != 0) { + spin_lock_bh(&ioat_chan->desc_lock); + __ioat2_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); } } @@ -465,11 +750,17 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) chan->reg_base + IOAT_CHANCTRL_OFFSET); } +/** + * ioat_dma_memcpy_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + */ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) { unsigned long phys_complete; struct ioat_desc_sw *desc, *_desc; dma_cookie_t cookie = 0; + unsigned long desc_phys; + struct ioat_desc_sw *latest_desc; prefetch(ioat_chan->completion_virt); @@ -507,56 +798,115 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) cookie = 0; spin_lock_bh(&ioat_chan->desc_lock); - list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { - - /* - * Incoming DMA requests may use multiple descriptors, due to - * exceeding xfercap, perhaps. If so, only the last one will - * have a cookie, and require unmapping. - */ - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + list_for_each_entry_safe(desc, _desc, + &ioat_chan->used_desc, node) { /* - * yes we are unmapping both _page and _single alloc'd - * regions with unmap_page. Is this *really* that bad? + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. */ - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - if (desc->async_tx.callback) { - desc->async_tx.callback( - desc->async_tx.callback_param); - desc->async_tx.callback = NULL; + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; + + /* + * yes we are unmapping both _page and _single + * alloc'd regions with unmap_page. Is this + * *really* that bad? + */ + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + + if (desc->async_tx.callback) { + desc->async_tx.callback(desc->async_tx.callback_param); + desc->async_tx.callback = NULL; + } } - } - if (desc->async_tx.phys != phys_complete) { - /* - * a completed entry, but not the last, so cleanup - * if the client is done with the descriptor - */ - if (desc->async_tx.ack) { - list_del(&desc->node); - list_add_tail(&desc->node, - &ioat_chan->free_desc); - } else + if (desc->async_tx.phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (desc->async_tx.ack) { + list_del(&desc->node); + list_add_tail(&desc->node, + &ioat_chan->free_desc); + } else + desc->async_tx.cookie = 0; + } else { + /* + * last used desc. Do not remove, so we can + * append from it, but don't look at it next + * time, either + */ desc->async_tx.cookie = 0; - } else { - /* - * last used desc. Do not remove, so we can append from - * it, but don't look at it next time, either - */ - desc->async_tx.cookie = 0; - /* TODO check status bits? */ + /* TODO check status bits? */ + break; + } + } + break; + case IOAT_VER_2_0: + /* has some other thread has already cleaned up? */ + if (ioat_chan->used_desc.prev == NULL) break; + + /* work backwards to find latest finished desc */ + desc = to_ioat_desc(ioat_chan->used_desc.next); + latest_desc = NULL; + do { + desc = to_ioat_desc(desc->node.prev); + desc_phys = (unsigned long)desc->async_tx.phys + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; + if (desc_phys == phys_complete) { + latest_desc = desc; + break; + } + } while (&desc->node != ioat_chan->used_desc.prev); + + if (latest_desc != NULL) { + + /* work forwards to clear finished descriptors */ + for (desc = to_ioat_desc(ioat_chan->used_desc.prev); + &desc->node != latest_desc->node.next && + &desc->node != ioat_chan->used_desc.next; + desc = to_ioat_desc(desc->node.next)) { + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + + if (desc->async_tx.callback) { + desc->async_tx.callback(desc->async_tx.callback_param); + desc->async_tx.callback = NULL; + } + } + } + + /* move used.prev up beyond those that are finished */ + if (&desc->node == ioat_chan->used_desc.next) + ioat_chan->used_desc.prev = NULL; + else + ioat_chan->used_desc.prev = &desc->node; } + break; } spin_unlock_bh(&ioat_chan->desc_lock); @@ -621,8 +971,6 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, return dma_async_is_complete(cookie, last_complete, last_used); } -/* PCI API */ - static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) { struct ioat_desc_sw *desc; @@ -633,21 +981,34 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL | IOAT_DMA_DESCRIPTOR_CTL_INT_GN | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - desc->hw->next = 0; desc->hw->size = 0; desc->hw->src_addr = 0; desc->hw->dst_addr = 0; desc->async_tx.ack = 1; - - list_add_tail(&desc->node, &ioat_chan->used_desc); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + desc->hw->next = 0; + list_add_tail(&desc->node, &ioat_chan->used_desc); + + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + break; + case IOAT_VER_2_0: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + + ioat_chan->dmacount++; + __ioat2_dma_memcpy_issue_pending(ioat_chan); + break; + } spin_unlock_bh(&ioat_chan->desc_lock); - - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); } /* @@ -693,14 +1054,14 @@ static int ioat_dma_self_test(struct ioatdma_device *device) dma_chan = container_of(device->common.channels.next, struct dma_chan, device_node); - if (ioat_dma_alloc_chan_resources(dma_chan) < 1) { + if (device->common.device_alloc_chan_resources(dma_chan) < 1) { dev_err(&device->pdev->dev, "selftest cannot allocate chan resource\n"); err = -ENODEV; goto out; } - tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0); + tx = device->common.device_prep_dma_memcpy(dma_chan, IOAT_TEST_SIZE, 0); if (!tx) { dev_err(&device->pdev->dev, "Self-test prep failed, disabling\n"); @@ -710,24 +1071,25 @@ static int ioat_dma_self_test(struct ioatdma_device *device) async_tx_ack(tx); addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, - DMA_TO_DEVICE); - ioat_set_src(addr, tx, 0); + DMA_TO_DEVICE); + tx->tx_set_src(addr, tx, 0); addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, - DMA_FROM_DEVICE); - ioat_set_dest(addr, tx, 0); + DMA_FROM_DEVICE); + tx->tx_set_dest(addr, tx, 0); tx->callback = ioat_dma_test_callback; tx->callback_param = (void *)0x8086; - cookie = ioat_tx_submit(tx); + cookie = tx->tx_submit(tx); if (cookie < 0) { dev_err(&device->pdev->dev, "Self-test setup failed, disabling\n"); err = -ENODEV; goto free_resources; } - ioat_dma_memcpy_issue_pending(dma_chan); + device->common.device_issue_pending(dma_chan); msleep(1); - if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) + != DMA_SUCCESS) { dev_err(&device->pdev->dev, "Self-test copy timed out, disabling\n"); err = -ENODEV; @@ -741,7 +1103,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device) } free_resources: - ioat_dma_free_chan_resources(dma_chan); + device->common.device_free_chan_resources(dma_chan); out: kfree(src); kfree(dest); @@ -941,16 +1303,28 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, INIT_LIST_HEAD(&device->common.channels); ioat_dma_enumerate_channels(device); - dma_cap_set(DMA_MEMCPY, device->common.cap_mask); device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources; device->common.device_free_chan_resources = ioat_dma_free_chan_resources; - device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy; + device->common.dev = &pdev->dev; + + dma_cap_set(DMA_MEMCPY, device->common.cap_mask); device->common.device_is_tx_complete = ioat_dma_is_complete; - device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; device->common.device_dependency_added = ioat_dma_dependency_added; - device->common.dev = &pdev->dev; + switch (device->version) { + case IOAT_VER_1_2: + device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + device->common.device_issue_pending = + ioat1_dma_memcpy_issue_pending; + break; + case IOAT_VER_2_0: + device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + device->common.device_issue_pending = + ioat2_dma_memcpy_issue_pending; + break; + } + dev_err(&device->pdev->dev, "Intel(R) I/OAT DMA Engine found," " %d channels, device version 0x%02x, driver version %s\n", diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index 5f9881e7..b668234 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -28,7 +28,7 @@ #include <linux/cache.h> #include <linux/pci_ids.h> -#define IOAT_DMA_VERSION "1.26" +#define IOAT_DMA_VERSION "2.04" enum ioat_interrupt { none = 0, @@ -39,6 +39,8 @@ enum ioat_interrupt { }; #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 +#define IOAT_DMA_DCA_ANY_CPU ~0 + /** * struct ioatdma_device - internal representation of a IOAT device @@ -47,6 +49,9 @@ enum ioat_interrupt { * @dma_pool: for allocating DMA descriptors * @common: embedded struct dma_device * @version: version of ioatdma device + * @irq_mode: which style irq to use + * @msix_entries: irq handlers + * @idx: per channel data */ struct ioatdma_device { @@ -63,23 +68,7 @@ struct ioatdma_device { /** * struct ioat_dma_chan - internal representation of a DMA channel - * @device: - * @reg_base: - * @sw_in_use: - * @completion: - * @completion_low: - * @completion_high: - * @completed_cookie: last cookie seen completed on cleanup - * @cookie: value of last cookie given to client - * @last_completion: - * @xfercap: - * @desc_lock: - * @free_desc: - * @used_desc: - * @resource: - * @device_node: */ - struct ioat_dma_chan { void __iomem *reg_base; @@ -95,6 +84,8 @@ struct ioat_dma_chan { struct list_head used_desc; int pending; + int dmacount; + int desccount; struct ioatdma_device *device; struct dma_chan common; @@ -134,12 +125,13 @@ struct ioat_desc_sw { struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase); void ioat_dma_remove(struct ioatdma_device *device); -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, - void __iomem *iobase); +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); #else #define ioat_dma_probe(pdev, iobase) NULL #define ioat_dma_remove(device) do { } while (0) #define ioat_dca_init(pdev, iobase) NULL +#define ioat2_dca_init(pdev, iobase) NULL #endif #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h index 9e7434e..dd470fa 100644 --- a/drivers/dma/ioatdma_hw.h +++ b/drivers/dma/ioatdma_hw.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -22,12 +22,19 @@ #define _IOAT_HW_H_ /* PCI Configuration Space Values */ -#define IOAT_PCI_VID 0x8086 -#define IOAT_PCI_DID 0x1A38 -#define IOAT_PCI_RID 0x00 -#define IOAT_PCI_SVID 0x8086 -#define IOAT_PCI_SID 0x8086 -#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_PCI_VID 0x8086 + +/* CB device ID's */ +#define IOAT_PCI_DID_5000 0x1A38 +#define IOAT_PCI_DID_CNB 0x360B +#define IOAT_PCI_DID_SCNB 0x65FF +#define IOAT_PCI_DID_SNB 0x402F + +#define IOAT_PCI_RID 0x00 +#define IOAT_PCI_SVID 0x8086 +#define IOAT_PCI_SID 0x8086 +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ struct ioat_dma_descriptor { uint32_t size; @@ -47,6 +54,16 @@ struct ioat_dma_descriptor { #define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 #define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 #define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 -#define IOAT_DMA_DESCRIPTOR_OPCODE 0xFF000000 +#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 +#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 +#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 +#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 +#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 + +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 + +#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 #endif diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h index baaab5e..9832d7e 100644 --- a/drivers/dma/ioatdma_registers.h +++ b/drivers/dma/ioatdma_registers.h @@ -42,26 +42,25 @@ #define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */ #define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */ #define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */ -#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ +#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ #define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */ #define IOAT_VER_OFFSET 0x08 /* 8-bit */ #define IOAT_VER_MAJOR_MASK 0xF0 #define IOAT_VER_MINOR_MASK 0x0F -#define GET_IOAT_VER_MAJOR(x) ((x) & IOAT_VER_MAJOR_MASK) +#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4) #define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK) #define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ #define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ #define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */ -#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalesing Supported */ +#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ #define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 - #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ /* DMA Channel Registers */ @@ -74,25 +73,101 @@ #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 #define IOAT_CHANCTRL_INT_DISABLE 0x0001 -#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatability */ -#define IOAT_DMA_COMP_V1 0x0001 /* Compatability with DMA version 1 */ - -#define IOAT_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */ -#define IOAT_CHANSTS_OFFSET_LOW 0x04 -#define IOAT_CHANSTS_OFFSET_HIGH 0x08 -#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR 0xFFFFFFFFFFFFFFC0UL +#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ +#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ +#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */ + + +#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */ +#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */ +#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET) +#define IOAT1_CHANSTS_OFFSET_LOW 0x04 +#define IOAT2_CHANSTS_OFFSET_LOW 0x08 +#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW) +#define IOAT1_CHANSTS_OFFSET_HIGH 0x08 +#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C +#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F #define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 -#define IOAT_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ -#define IOAT_CHAINADDR_OFFSET_LOW 0x0C -#define IOAT_CHAINADDR_OFFSET_HIGH 0x10 -#define IOAT_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ + +#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */ + +#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */ +#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000 +#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */ + +/* CB DCA Memory Space Registers */ +#define IOAT_DCAOFFSET_OFFSET 0x14 +/* CB_BAR + IOAT_DCAOFFSET value */ +#define IOAT_DCA_VER_OFFSET 0x00 +#define IOAT_DCA_VER_MAJOR_MASK 0xF0 +#define IOAT_DCA_VER_MINOR_MASK 0x0F + +#define IOAT_DCA_COMP_OFFSET 0x02 +#define IOAT_DCA_COMP_V1 0x1 + +#define IOAT_FSB_CAPABILITY_OFFSET 0x04 +#define IOAT_FSB_CAPABILITY_PREFETCH 0x1 + +#define IOAT_PCI_CAPABILITY_OFFSET 0x06 +#define IOAT_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08 +#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1 + +#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A +#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1 + +#define IOAT_APICID_TAG_MAP_OFFSET 0x0C +#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F +#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0 +#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0 +#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4 +#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00 +#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8 +#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000 +#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12 +#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000 +#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16 +#define IOAT_APICID_TAG_CB2_VALID 0x8080808080 + +#define IOAT_DCA_GREQID_OFFSET 0x10 +#define IOAT_DCA_GREQID_SIZE 0x04 +#define IOAT_DCA_GREQID_MASK 0xFFFF +#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000 +#define IOAT_DCA_GREQID_VALID 0x20000000 +#define IOAT_DCA_GREQID_LASTID 0x80000000 + + + +#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ +#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ +#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET) +#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C +#define IOAT2_CHAINADDR_OFFSET_LOW 0x10 +#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW) +#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10 +#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14 +#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH) + +#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ +#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */ +#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET) #define IOAT_CHANCMD_RESET 0x20 #define IOAT_CHANCMD_RESUME 0x10 #define IOAT_CHANCMD_ABORT 0x08 @@ -124,6 +199,7 @@ #define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 #define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 #define IOAT_CHANERR_SOFT_ERR 0x4000 +#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index cd6cdb3..1ee009e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2332,6 +2332,7 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b +#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff #define PCI_DEVICE_ID_INTEL_TOLAPAI_0 0x5031 #define PCI_DEVICE_ID_INTEL_TOLAPAI_1 0x5032 |