aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig86
-rw-r--r--drivers/xen/Makefile8
-rw-r--r--drivers/xen/balloon.c215
-rw-r--r--drivers/xen/events.c131
-rw-r--r--drivers/xen/evtchn.c31
-rw-r--r--drivers/xen/gntdev.c118
-rw-r--r--drivers/xen/grant-table.c10
-rw-r--r--drivers/xen/manage.c21
-rw-r--r--drivers/xen/pci.c107
-rw-r--r--drivers/xen/swiotlb-xen.c62
-rw-r--r--drivers/xen/tmem.c170
-rw-r--r--drivers/xen/xen-balloon.c17
-rw-r--r--drivers/xen/xenbus/xenbus_client.c16
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c4
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c151
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h5
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c12
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c128
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c4
19 files changed, 996 insertions, 300 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a59638b..8795480 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -9,6 +9,53 @@ config XEN_BALLOON
the system to expand the domain's memory allocation, or alternatively
return unneeded memory to the system.
+config XEN_SELFBALLOONING
+ bool "Dynamically self-balloon kernel memory to target"
+ depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP && XEN_TMEM
+ default n
+ help
+ Self-ballooning dynamically balloons available kernel memory driven
+ by the current usage of anonymous memory ("committed AS") and
+ controlled by various sysfs-settable parameters. Configuring
+ FRONTSWAP is highly recommended; if it is not configured, self-
+ ballooning is disabled by default but can be enabled with the
+ 'selfballooning' kernel boot parameter. If FRONTSWAP is configured,
+ frontswap-selfshrinking is enabled by default but can be disabled
+ with the 'noselfshrink' kernel boot parameter; and self-ballooning
+ is enabled by default but can be disabled with the 'noselfballooning'
+ kernel boot parameter. Note that systems without a sufficiently
+ large swap device should not enable self-ballooning.
+
+config XEN_BALLOON_MEMORY_HOTPLUG
+ bool "Memory hotplug support for Xen balloon driver"
+ default n
+ depends on XEN_BALLOON && MEMORY_HOTPLUG
+ help
+ Memory hotplug support for Xen balloon driver allows expanding memory
+ available for the system above limit declared at system startup.
+ It is very useful on critical systems which require long
+ run without rebooting.
+
+ Memory could be hotplugged in following steps:
+
+ 1) dom0: xl mem-max <domU> <maxmem>
+ where <maxmem> is >= requested memory size,
+
+ 2) dom0: xl mem-set <domU> <memory>
+ where <memory> is requested memory size; alternatively memory
+ could be added by writing proper value to
+ /sys/devices/system/xen_memory/xen_memory0/target or
+ /sys/devices/system/xen_memory/xen_memory0/target_kb on dumU,
+
+ 3) domU: for i in /sys/devices/system/memory/memory*/state; do \
+ [ "`cat "$i"`" = offline ] && echo online > "$i"; done
+
+ Memory could be onlined automatically on domU by adding following line to udev rules:
+
+ SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'"
+
+ In that case step 3 should be omitted.
+
config XEN_SCRUB_PAGES
bool "Scrub pages before returning them to system"
depends on XEN_BALLOON
@@ -90,19 +137,38 @@ config XEN_GRANT_DEV_ALLOC
to other domains. This can be used to implement frontend drivers
or as part of an inter-domain shared memory channel.
-config XEN_PLATFORM_PCI
- tristate "xen platform pci device driver"
- depends on XEN_PVHVM && PCI
- default m
- help
- Driver for the Xen PCI Platform device: it is responsible for
- initializing xenbus and grant_table when running in a Xen HVM
- domain. As a consequence this driver is required to run any Xen PV
- frontend on Xen HVM.
-
config SWIOTLB_XEN
def_bool y
depends on PCI
select SWIOTLB
+config XEN_TMEM
+ bool
+ default y if (CLEANCACHE || FRONTSWAP)
+ help
+ Shim to interface in-kernel Transcendent Memory hooks
+ (e.g. cleancache and frontswap) to Xen tmem hypercalls.
+
+config XEN_PCIDEV_BACKEND
+ tristate "Xen PCI-device backend driver"
+ depends on PCI && X86 && XEN
+ depends on XEN_BACKEND
+ default m
+ help
+ The PCI device backend driver allows the kernel to export arbitrary
+ PCI devices to other guests. If you select this to be a module, you
+ will need to make sure no other driver has bound to the device(s)
+ you want to make visible to other guests.
+
+ The parameter "passthrough" allows you specify how you want the PCI
+ devices to appear in the guest. You can choose the default (0) where
+ PCI topology starts at 00.00.0, or (1) for passthrough if you want
+ the PCI devices topology appear the same as in the host.
+
+ The "hide" parameter (only applicable if backend driver is compiled
+ into the kernel) allows you to bind the PCI devices to this module
+ from the default device drivers. The argument is the list of PCI BDFs:
+ xen-pciback.hide=(03:00.0)(04:00.0)
+
+ If in doubt, say m.
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index bbc1825..974fffd 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,6 +1,5 @@
obj-y += grant-table.o features.o events.o manage.o balloon.o
obj-y += xenbus/
-obj-y += tmem.o
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_features.o := $(nostackp)
@@ -9,17 +8,18 @@ obj-$(CONFIG_BLOCK) += biomerge.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
+obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o
obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
-obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
+obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
+obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o
-
-xen-platform-pci-y := platform-pci.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f54290b..31ab82f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -4,6 +4,12 @@
* Copyright (c) 2003, B Dragovic
* Copyright (c) 2003-2004, M Williamson, K Fraser
* Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * Copyright (c) 2010 Daniel Kiper
+ *
+ * Memory hotplug support was written by Daniel Kiper. Work on
+ * it was sponsored by Google under Google Summer of Code 2010
+ * program. Jeremy Fitzhardinge from Citrix was the mentor for
+ * this project.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
@@ -33,6 +39,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/errno.h>
+#include <linux/module.h>
#include <linux/mm.h>
#include <linux/bootmem.h>
#include <linux/pagemap.h>
@@ -40,6 +47,9 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/gfp.h>
+#include <linux/notifier.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -84,8 +94,8 @@ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
#define inc_totalhigh_pages() (totalhigh_pages++)
#define dec_totalhigh_pages() (totalhigh_pages--)
#else
-#define inc_totalhigh_pages() do {} while(0)
-#define dec_totalhigh_pages() do {} while(0)
+#define inc_totalhigh_pages() do {} while (0)
+#define dec_totalhigh_pages() do {} while (0)
#endif
/* List of ballooned pages, threaded through the mem_map array. */
@@ -145,8 +155,7 @@ static struct page *balloon_retrieve(bool prefer_highmem)
if (PageHighMem(page)) {
balloon_stats.balloon_high--;
inc_totalhigh_pages();
- }
- else
+ } else
balloon_stats.balloon_low--;
totalram_pages++;
@@ -194,6 +203,87 @@ static enum bp_state update_schedule(enum bp_state state)
return BP_EAGAIN;
}
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static long current_credit(void)
+{
+ return balloon_stats.target_pages - balloon_stats.current_pages -
+ balloon_stats.hotplug_pages;
+}
+
+static bool balloon_is_inflated(void)
+{
+ if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
+ balloon_stats.balloon_hotplug)
+ return true;
+ else
+ return false;
+}
+
+/*
+ * reserve_additional_memory() adds memory region of size >= credit above
+ * max_pfn. New region is section aligned and size is modified to be multiple
+ * of section size. Those features allow optimal use of address space and
+ * establish proper alignment when this function is called first time after
+ * boot (last section not fully populated at boot time contains unused memory
+ * pages with PG_reserved bit not set; online_pages_range() does not allow page
+ * onlining in whole range if first onlined page does not have PG_reserved
+ * bit set). Real size of added memory is established at page onlining stage.
+ */
+
+static enum bp_state reserve_additional_memory(long credit)
+{
+ int nid, rc;
+ u64 hotplug_start_paddr;
+ unsigned long balloon_hotplug = credit;
+
+ hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
+ balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
+ nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
+
+ rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
+
+ if (rc) {
+ pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc);
+ return BP_EAGAIN;
+ }
+
+ balloon_hotplug -= credit;
+
+ balloon_stats.hotplug_pages += credit;
+ balloon_stats.balloon_hotplug = balloon_hotplug;
+
+ return BP_DONE;
+}
+
+static void xen_online_page(struct page *page)
+{
+ __online_page_set_limits(page);
+
+ mutex_lock(&balloon_mutex);
+
+ __balloon_append(page);
+
+ if (balloon_stats.hotplug_pages)
+ --balloon_stats.hotplug_pages;
+ else
+ --balloon_stats.balloon_hotplug;
+
+ mutex_unlock(&balloon_mutex);
+}
+
+static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
+{
+ if (val == MEM_ONLINE)
+ schedule_delayed_work(&balloon_worker, 0);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block xen_memory_nb = {
+ .notifier_call = xen_memory_notifier,
+ .priority = 0
+};
+#else
static long current_credit(void)
{
unsigned long target = balloon_stats.target_pages;
@@ -206,6 +296,21 @@ static long current_credit(void)
return target - balloon_stats.current_pages;
}
+static bool balloon_is_inflated(void)
+{
+ if (balloon_stats.balloon_low || balloon_stats.balloon_high)
+ return true;
+ else
+ return false;
+}
+
+static enum bp_state reserve_additional_memory(long credit)
+{
+ balloon_stats.target_pages = balloon_stats.current_pages;
+ return BP_DONE;
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
+
static enum bp_state increase_reservation(unsigned long nr_pages)
{
int rc;
@@ -217,6 +322,15 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
.domid = DOMID_SELF
};
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+ if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
+ nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
+ balloon_stats.hotplug_pages += nr_pages;
+ balloon_stats.balloon_hotplug -= nr_pages;
+ return BP_DONE;
+ }
+#endif
+
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
@@ -279,6 +393,15 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
.domid = DOMID_SELF
};
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+ if (balloon_stats.hotplug_pages) {
+ nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
+ balloon_stats.hotplug_pages -= nr_pages;
+ balloon_stats.balloon_hotplug += nr_pages;
+ return BP_DONE;
+ }
+#endif
+
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
@@ -299,7 +422,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
(unsigned long)__va(pfn << PAGE_SHIFT),
__pte_ma(0), 0);
BUG_ON(ret);
- }
+ }
}
@@ -340,8 +463,12 @@ static void balloon_process(struct work_struct *work)
do {
credit = current_credit();
- if (credit > 0)
- state = increase_reservation(credit);
+ if (credit > 0) {
+ if (balloon_is_inflated())
+ state = increase_reservation(credit);
+ else
+ state = reserve_additional_memory(credit);
+ }
if (credit < 0)
state = decrease_reservation(-credit, GFP_BALLOON);
@@ -374,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
* alloc_xenballooned_pages - get pages that have been ballooned out
* @nr_pages: Number of pages to get
* @pages: pages returned
+ * @highmem: allow highmem pages
* @return 0 on success, error otherwise
*/
-int alloc_xenballooned_pages(int nr_pages, struct page** pages)
+int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
{
int pgno = 0;
- struct page* page;
+ struct page *page;
mutex_lock(&balloon_mutex);
while (pgno < nr_pages) {
- page = balloon_retrieve(true);
- if (page) {
+ page = balloon_retrieve(highmem);
+ if (page && (highmem || !PageHighMem(page))) {
pages[pgno++] = page;
} else {
enum bp_state st;
- st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
+ if (page)
+ balloon_append(page);
+ st = decrease_reservation(nr_pages - pgno,
+ highmem ? GFP_HIGHUSER : GFP_USER);
if (st != BP_DONE)
goto out_undo;
}
@@ -409,7 +540,7 @@ EXPORT_SYMBOL(alloc_xenballooned_pages);
* @nr_pages: Number of pages
* @pages: pages to return
*/
-void free_xenballooned_pages(int nr_pages, struct page** pages)
+void free_xenballooned_pages(int nr_pages, struct page **pages)
{
int i;
@@ -428,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page** pages)
}
EXPORT_SYMBOL(free_xenballooned_pages);
-static int __init balloon_init(void)
+static void __init balloon_add_region(unsigned long start_pfn,
+ unsigned long pages)
{
unsigned long pfn, extra_pfn_end;
struct page *page;
+ /*
+ * If the amount of usable memory has been limited (e.g., with
+ * the 'mem' command line parameter), don't add pages beyond
+ * this limit.
+ */
+ extra_pfn_end = min(max_pfn, start_pfn + pages);
+
+ for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
+ page = pfn_to_page(pfn);
+ /* totalram_pages and totalhigh_pages do not
+ include the boot-time balloon extension, so
+ don't subtract from it. */
+ __balloon_append(page);
+ }
+}
+
+static int __init balloon_init(void)
+{
+ int i;
+
if (!xen_domain())
return -ENODEV;
pr_info("xen/balloon: Initialising balloon driver.\n");
- balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn;
+ balloon_stats.current_pages = xen_pv_domain()
+ ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
+ : max_pfn;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
@@ -448,25 +602,22 @@ static int __init balloon_init(void)
balloon_stats.retry_count = 1;
balloon_stats.max_retry_count = RETRY_UNLIMITED;
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+ balloon_stats.hotplug_pages = 0;
+ balloon_stats.balloon_hotplug = 0;
+
+ set_online_page_callback(&xen_online_page);
+ register_memory_notifier(&xen_memory_nb);
+#endif
+
/*
- * Initialise the balloon with excess memory space. We need
- * to make sure we don't add memory which doesn't exist or
- * logically exist. The E820 map can be trimmed to be smaller
- * than the amount of physical memory due to the mem= command
- * line parameter. And if this is a 32-bit non-HIGHMEM kernel
- * on a system with memory which requires highmem to access,
- * don't try to use it.
+ * Initialize the balloon with pages from the extra memory
+ * regions (see arch/x86/xen/setup.c).
*/
- extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
- (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
- for (pfn = PFN_UP(xen_extra_mem_start);
- pfn < extra_pfn_end;
- pfn++) {
- page = pfn_to_page(pfn);
- /* totalram_pages and totalhigh_pages do not include the boot-time
- balloon extension, so don't subtract from it. */
- __balloon_append(page);
- }
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
+ if (xen_extra_mem[i].size)
+ balloon_add_region(PFN_UP(xen_extra_mem[i].start),
+ PFN_DOWN(xen_extra_mem[i].size));
return 0;
}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 7ba4d0e..bcf7711 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -54,7 +54,7 @@
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
*/
-static DEFINE_SPINLOCK(irq_mapping_update_lock);
+static DEFINE_MUTEX(irq_mapping_update_lock);
static LIST_HEAD(xen_irq_list_head);
@@ -85,8 +85,7 @@ enum xen_irq_type {
* IPI - IPI vector
* EVTCHN -
*/
-struct irq_info
-{
+struct irq_info {
struct list_head list;
enum xen_irq_type type; /* type */
unsigned irq;
@@ -282,9 +281,9 @@ static inline unsigned long active_evtchns(unsigned int cpu,
struct shared_info *sh,
unsigned int idx)
{
- return (sh->evtchn_pending[idx] &
+ return sh->evtchn_pending[idx] &
per_cpu(cpu_evtchn_mask, cpu)[idx] &
- ~sh->evtchn_mask[idx]);
+ ~sh->evtchn_mask[idx];
}
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
@@ -432,7 +431,8 @@ static int __must_check xen_allocate_irq_dynamic(void)
irq = irq_alloc_desc_from(first, -1);
- xen_irq_init(irq);
+ if (irq >= 0)
+ xen_irq_init(irq);
return irq;
}
@@ -600,7 +600,7 @@ static void disable_pirq(struct irq_data *data)
disable_dynirq(data);
}
-static int find_irq_by_gsi(unsigned gsi)
+int xen_irq_from_gsi(unsigned gsi)
{
struct irq_info *info;
@@ -614,11 +614,7 @@ static int find_irq_by_gsi(unsigned gsi)
return -1;
}
-
-int xen_allocate_pirq_gsi(unsigned gsi)
-{
- return gsi;
-}
+EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
/*
* Do not make any assumptions regarding the relationship between the
@@ -636,9 +632,9 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
int irq = -1;
struct physdev_irq irq_op;
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
- irq = find_irq_by_gsi(gsi);
+ irq = xen_irq_from_gsi(gsi);
if (irq != -1) {
printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
irq, gsi);
@@ -689,7 +685,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
handle_edge_irq, name);
out:
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
return irq;
}
@@ -715,10 +711,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
{
int irq, ret;
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
irq = xen_allocate_irq_dynamic();
- if (irq == -1)
+ if (irq < 0)
goto out;
irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
@@ -729,12 +725,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
if (ret < 0)
goto error_irq;
out:
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
return irq;
error_irq:
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
xen_free_irq(irq);
- return -1;
+ return ret;
}
#endif
@@ -745,7 +741,7 @@ int xen_destroy_irq(int irq)
struct irq_info *info = info_for_irq(irq);
int rc = -ENOENT;
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
desc = irq_to_desc(irq);
if (!desc)
@@ -771,7 +767,7 @@ int xen_destroy_irq(int irq)
xen_free_irq(irq);
out:
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
return rc;
}
@@ -781,10 +777,10 @@ int xen_irq_from_pirq(unsigned pirq)
struct irq_info *info;
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
list_for_each_entry(info, &xen_irq_list_head, list) {
- if (info == NULL || info->type != IRQT_PIRQ)
+ if (info->type != IRQT_PIRQ)
continue;
irq = info->irq;
if (info->u.pirq.pirq == pirq)
@@ -792,7 +788,7 @@ int xen_irq_from_pirq(unsigned pirq)
}
irq = -1;
out:
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
return irq;
}
@@ -807,7 +803,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
{
int irq;
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
irq = evtchn_to_irq[evtchn];
@@ -823,7 +819,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
}
out:
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
return irq;
}
@@ -834,7 +830,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
struct evtchn_bind_ipi bind_ipi;
int evtchn, irq;
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
irq = per_cpu(ipi_to_irq, cpu)[ipi];
@@ -858,7 +854,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
}
out:
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
return irq;
}
@@ -877,13 +873,34 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
}
+static int find_virq(unsigned int virq, unsigned int cpu)
+{
+ struct evtchn_status status;
+ int port, rc = -ENOENT;
-int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+ memset(&status, 0, sizeof(status));
+ for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+ status.dom = DOMID_SELF;
+ status.port = port;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
+ if (rc < 0)
+ continue;
+ if (status.status != EVTCHNSTAT_virq)
+ continue;
+ if (status.u.virq == virq && status.vcpu == cpu) {
+ rc = port;
+ break;
+ }
+ }
+ return rc;
+}
+
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
{
struct evtchn_bind_virq bind_virq;
- int evtchn, irq;
+ int evtchn, irq, ret;
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
irq = per_cpu(virq_to_irq, cpu)[virq];
@@ -892,15 +909,25 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
if (irq == -1)
goto out;
- irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
- handle_percpu_irq, "virq");
+ if (percpu)
+ irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
+ handle_percpu_irq, "virq");
+ else
+ irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+ handle_edge_irq, "virq");
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
- &bind_virq) != 0)
- BUG();
- evtchn = bind_virq.port;
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq);
+ if (ret == 0)
+ evtchn = bind_virq.port;
+ else {
+ if (ret == -EEXIST)
+ ret = find_virq(virq, cpu);
+ BUG_ON(ret < 0);
+ evtchn = ret;
+ }
xen_irq_info_virq_init(cpu, irq, evtchn, virq);
@@ -908,7 +935,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
}
out:
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
return irq;
}
@@ -918,7 +945,7 @@ static void unbind_from_irq(unsigned int irq)
struct evtchn_close close;
int evtchn = evtchn_from_irq(irq);
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
if (VALID_EVTCHN(evtchn)) {
close.port = evtchn;
@@ -948,7 +975,7 @@ static void unbind_from_irq(unsigned int irq)
xen_free_irq(irq);
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
}
int bind_evtchn_to_irqhandler(unsigned int evtchn,
@@ -1000,7 +1027,7 @@ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
{
int irq, retval;
- irq = bind_virq_to_irq(virq, cpu);
+ irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
@@ -1157,7 +1184,7 @@ static void __xen_evtchn_do_upcall(void)
int cpu = get_cpu();
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
- unsigned count;
+ unsigned count;
do {
unsigned long pending_words;
@@ -1295,7 +1322,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
will also be masked. */
disable_irq(irq);
- spin_lock(&irq_mapping_update_lock);
+ mutex_lock(&irq_mapping_update_lock);
/* After resume the irq<->evtchn mappings are all cleared out */
BUG_ON(evtchn_to_irq[evtchn] != -1);
@@ -1305,7 +1332,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
xen_irq_info_evtchn_init(irq, evtchn);
- spin_unlock(&irq_mapping_update_lock);
+ mutex_unlock(&irq_mapping_update_lock);
/* new event channels are always bound to cpu 0 */
irq_set_affinity(irq, cpumask_of(0));
@@ -1317,8 +1344,10 @@ void rebind_evtchn_irq(int evtchn, int irq)
/* Rebind an evtchn so that it gets delivered to a specific cpu */
static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
{
+ struct shared_info *s = HYPERVISOR_shared_info;
struct evtchn_bind_vcpu bind_vcpu;
int evtchn = evtchn_from_irq(irq);
+ int masked;
if (!VALID_EVTCHN(evtchn))
return -1;
@@ -1335,6 +1364,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
bind_vcpu.vcpu = tcpu;
/*
+ * Mask the event while changing the VCPU binding to prevent
+ * it being delivered on an unexpected VCPU.
+ */
+ masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
+
+ /*
* If this fails, it usually just indicates that we're dealing with a
* virq or IPI channel, which don't actually need to be rebound. Ignore
* it, but don't do the xenlinux-level rebind in that case.
@@ -1342,6 +1377,9 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
bind_evtchn_to_cpu(evtchn, tcpu);
+ if (!masked)
+ unmask_evtchn(evtchn);
+
return 0;
}
@@ -1686,6 +1724,7 @@ void __init xen_init_IRQ(void)
evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
GFP_KERNEL);
+ BUG_ON(!evtchn_to_irq);
for (i = 0; i < NR_EVENT_CHANNELS; i++)
evtchn_to_irq[i] = -1;
@@ -1704,6 +1743,6 @@ void __init xen_init_IRQ(void)
} else {
irq_ctx_init(smp_processor_id());
if (xen_initial_domain())
- xen_setup_pirqs();
+ pci_xen_initial_domain();
}
}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index ce3a0f5..c93d59e 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -269,6 +269,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
u->name, (void *)(unsigned long)port);
if (rc >= 0)
rc = 0;
+ else {
+ /* bind failed, should close the port now */
+ struct evtchn_close close;
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+ set_port_user(port, NULL);
+ }
return rc;
}
@@ -277,6 +285,8 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port)
{
int irq = irq_from_evtchn(port);
+ BUG_ON(irq < 0);
+
unbind_from_irqhandler(irq, (void *)(unsigned long)port);
set_port_user(port, NULL);
@@ -367,12 +377,18 @@ static long evtchn_ioctl(struct file *file,
if (unbind.port >= NR_EVENT_CHANNELS)
break;
+ spin_lock_irq(&port_user_lock);
+
rc = -ENOTCONN;
- if (get_port_user(unbind.port) != u)
+ if (get_port_user(unbind.port) != u) {
+ spin_unlock_irq(&port_user_lock);
break;
+ }
disable_irq(irq_from_evtchn(unbind.port));
+ spin_unlock_irq(&port_user_lock);
+
evtchn_unbind_from_user(u, unbind.port);
rc = 0;
@@ -472,15 +488,26 @@ static int evtchn_release(struct inode *inode, struct file *filp)
int i;
struct per_user_data *u = filp->private_data;
+ spin_lock_irq(&port_user_lock);
+
+ free_page((unsigned long)u->ring);
+
for (i = 0; i < NR_EVENT_CHANNELS; i++) {
if (get_port_user(i) != u)
continue;
disable_irq(irq_from_evtchn(i));
+ }
+
+ spin_unlock_irq(&port_user_lock);
+
+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+ if (get_port_user(i) != u)
+ continue;
+
evtchn_unbind_from_user(get_port_user(i), i);
}
- free_page((unsigned long)u->ring);
kfree(u->name);
kfree(u);
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index b4e830e..5027662 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -60,7 +60,7 @@ static int use_ptemod;
struct gntdev_priv {
struct list_head maps;
/* lock protects maps from concurrent changes */
- spinlock_t lock;
+ struct mutex lock;
struct mm_struct *mm;
struct mmu_notifier mn;
};
@@ -83,6 +83,7 @@ struct grant_map {
struct ioctl_gntdev_grant_ref *grants;
struct gnttab_map_grant_ref *map_ops;
struct gnttab_unmap_grant_ref *unmap_ops;
+ struct gnttab_map_grant_ref *kmap_ops;
struct page **pages;
};
@@ -104,6 +105,21 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
#endif
}
+static void gntdev_free_map(struct grant_map *map)
+{
+ if (map == NULL)
+ return;
+
+ if (map->pages)
+ free_xenballooned_pages(map->count, map->pages);
+ kfree(map->pages);
+ kfree(map->grants);
+ kfree(map->map_ops);
+ kfree(map->unmap_ops);
+ kfree(map->kmap_ops);
+ kfree(map);
+}
+
static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
{
struct grant_map *add;
@@ -113,22 +129,25 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
if (NULL == add)
return NULL;
- add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
- add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
- add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
- add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL);
+ add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
+ add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
+ add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
+ add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
+ add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
+ NULL == add->kmap_ops ||
NULL == add->pages)
goto err;
- if (alloc_xenballooned_pages(count, add->pages))
+ if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */))
goto err;
for (i = 0; i < count; i++) {
add->map_ops[i].handle = -1;
add->unmap_ops[i].handle = -1;
+ add->kmap_ops[i].handle = -1;
}
add->index = 0;
@@ -138,11 +157,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
return add;
err:
- kfree(add->pages);
- kfree(add->grants);
- kfree(add->map_ops);
- kfree(add->unmap_ops);
- kfree(add);
+ gntdev_free_map(add);
return NULL;
}
@@ -188,21 +203,12 @@ static void gntdev_put_map(struct grant_map *map)
atomic_sub(map->count, &pages_mapped);
- if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
+ if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
notify_remote_via_evtchn(map->notify.event);
- }
-
- if (map->pages) {
- if (!use_ptemod)
- unmap_grant_pages(map, 0, map->count);
- free_xenballooned_pages(map->count, map->pages);
- }
- kfree(map->pages);
- kfree(map->grants);
- kfree(map->map_ops);
- kfree(map->unmap_ops);
- kfree(map);
+ if (map->pages && !use_ptemod)
+ unmap_grant_pages(map, 0, map->count);
+ gntdev_free_map(map);
}
/* ------------------------------------------------------------------ */
@@ -243,10 +249,35 @@ static int map_grant_pages(struct grant_map *map)
gnttab_set_unmap_op(&map->unmap_ops[i], addr,
map->flags, -1 /* handle */);
}
+ } else {
+ /*
+ * Setup the map_ops corresponding to the pte entries pointing
+ * to the kernel linear addresses of the struct pages.
+ * These ptes are completely different from the user ptes dealt
+ * with find_grant_ptes.
+ */
+ for (i = 0; i < map->count; i++) {
+ unsigned level;
+ unsigned long address = (unsigned long)
+ pfn_to_kaddr(page_to_pfn(map->pages[i]));
+ pte_t *ptep;
+ u64 pte_maddr = 0;
+ BUG_ON(PageHighMem(map->pages[i]));
+
+ ptep = lookup_address(address, &level);
+ pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
+ gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
+ map->flags |
+ GNTMAP_host_map |
+ GNTMAP_contains_pte,
+ map->grants[i].ref,
+ map->grants[i].domid);
+ }
}
pr_debug("map %d+%d\n", map->index, map->count);
- err = gnttab_map_refs(map->map_ops, map->pages, map->count);
+ err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
+ map->pages, map->count);
if (err)
return err;
@@ -364,7 +395,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
unsigned long mstart, mend;
int err;
- spin_lock(&priv->lock);
+ mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
if (!map->vma)
continue;
@@ -383,7 +414,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
(mend - mstart) >> PAGE_SHIFT);
WARN_ON(err);
}
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
}
static void mn_invl_page(struct mmu_notifier *mn,
@@ -400,7 +431,7 @@ static void mn_release(struct mmu_notifier *mn,
struct grant_map *map;
int err;
- spin_lock(&priv->lock);
+ mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
if (!map->vma)
continue;
@@ -410,7 +441,7 @@ static void mn_release(struct mmu_notifier *mn,
err = unmap_grant_pages(map, /* offset */ 0, map->count);
WARN_ON(err);
}
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
}
struct mmu_notifier_ops gntdev_mmu_ops = {
@@ -431,7 +462,7 @@ static int gntdev_open(struct inode *inode, struct file *flip)
return -ENOMEM;
INIT_LIST_HEAD(&priv->maps);
- spin_lock_init(&priv->lock);
+ mutex_init(&priv->lock);
if (use_ptemod) {
priv->mm = get_task_mm(current);
@@ -462,13 +493,13 @@ static int gntdev_release(struct inode *inode, struct file *flip)
pr_debug("priv %p\n", priv);
- spin_lock(&priv->lock);
+ mutex_lock(&priv->lock);
while (!list_empty(&priv->maps)) {
map = list_entry(priv->maps.next, struct grant_map, next);
list_del(&map->next);
gntdev_put_map(map);
}
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
if (use_ptemod)
mmu_notifier_unregister(&priv->mn, priv->mm);
@@ -506,10 +537,10 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
return err;
}
- spin_lock(&priv->lock);
+ mutex_lock(&priv->lock);
gntdev_add_map(priv, map);
op.index = map->index << PAGE_SHIFT;
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
if (copy_to_user(u, &op, sizeof(op)) != 0)
return -EFAULT;
@@ -528,14 +559,15 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
return -EFAULT;
pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count);
- spin_lock(&priv->lock);
+ mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
if (map) {
list_del(&map->next);
- gntdev_put_map(map);
err = 0;
}
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
+ if (map)
+ gntdev_put_map(map);
return err;
}
@@ -578,7 +610,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
return -EINVAL;
- spin_lock(&priv->lock);
+ mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
uint64_t begin = map->index << PAGE_SHIFT;
@@ -601,7 +633,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
map->notify.event = op.event_channel_port;
rc = 0;
unlock_out:
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
return rc;
}
@@ -646,7 +678,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
index, count, vma->vm_start, vma->vm_pgoff);
- spin_lock(&priv->lock);
+ mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, index, count);
if (!map)
goto unlock_out;
@@ -681,7 +713,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
if (use_ptemod) {
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
@@ -709,11 +741,11 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
return 0;
unlock_out:
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
return err;
out_unlock_put:
- spin_unlock(&priv->lock);
+ mutex_unlock(&priv->lock);
out_put_map:
if (use_ptemod)
map->vma = NULL;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 949af52..b657de6 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -82,7 +82,7 @@ static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
static int get_free_entries(unsigned count)
{
unsigned long flags;
- int ref, rc;
+ int ref, rc = 0;
grant_ref_t head;
spin_lock_irqsave(&gnttab_list_lock, flags);
@@ -193,7 +193,7 @@ int gnttab_query_foreign_access(grant_ref_t ref)
nflags = shared[ref].flags;
- return (nflags & (GTF_reading|GTF_writing));
+ return nflags & (GTF_reading|GTF_writing);
}
EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
@@ -457,7 +457,8 @@ unsigned int gnttab_max_grant_frames(void)
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
- struct page **pages, unsigned int count)
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count)
{
int i, ret;
pte_t *pte;
@@ -497,8 +498,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
*/
return -EOPNOTSUPP;
}
- ret = m2p_add_override(mfn, pages[i],
- map_ops[i].flags & GNTMAP_contains_pte);
+ ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
if (ret)
return ret;
}
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 0b5366b..caa3969 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -9,6 +9,7 @@
#include <linux/stop_machine.h>
#include <linux/freezer.h>
#include <linux/syscore_ops.h>
+#include <linux/export.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -92,7 +93,6 @@ static int xen_suspend(void *data)
if (!si->cancelled) {
xen_irq_resume();
- xen_console_resume();
xen_timer_resume();
}
@@ -108,16 +108,17 @@ static void do_suspend(void)
shutting_down = SHUTDOWN_SUSPEND;
-#ifdef CONFIG_PREEMPT
- /* If the kernel is preemptible, we need to freeze all the processes
- to prevent them from being in the middle of a pagetable update
- during suspend. */
err = freeze_processes();
if (err) {
- printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
+ pr_err("%s: freeze processes failed %d\n", __func__, err);
goto out;
}
-#endif
+
+ err = freeze_kernel_threads();
+ if (err) {
+ pr_err("%s: freeze kernel threads failed %d\n", __func__, err);
+ goto out_thaw;
+ }
err = dpm_suspend_start(PMSG_FREEZE);
if (err) {
@@ -148,6 +149,10 @@ static void do_suspend(void)
err = stop_machine(xen_suspend, &si, cpumask_of(0));
+ /* Resume console as early as possible. */
+ if (!si.cancelled)
+ xen_console_resume();
+
dpm_resume_noirq(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
if (err) {
@@ -168,10 +173,8 @@ out_resume:
clock_was_set();
out_thaw:
-#ifdef CONFIG_PREEMPT
thaw_processes();
out:
-#endif
shutting_down = SHUTDOWN_INVALID;
}
#endif /* CONFIG_HIBERNATE_CALLBACKS */
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index cef4baf..b84bf0b 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -18,6 +18,7 @@
*/
#include <linux/pci.h>
+#include <linux/acpi.h>
#include <xen/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/xen.h>
@@ -26,26 +27,85 @@
#include <asm/xen/hypercall.h>
#include "../pci/pci.h"
+static bool __read_mostly pci_seg_supported = true;
+
static int xen_add_device(struct device *dev)
{
int r;
struct pci_dev *pci_dev = to_pci_dev(dev);
+#ifdef CONFIG_PCI_IOV
+ struct pci_dev *physfn = pci_dev->physfn;
+#endif
+
+ if (pci_seg_supported) {
+ struct physdev_pci_device_add add = {
+ .seg = pci_domain_nr(pci_dev->bus),
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
+#ifdef CONFIG_ACPI
+ acpi_handle handle;
+#endif
+
+#ifdef CONFIG_PCI_IOV
+ if (pci_dev->is_virtfn) {
+ add.flags = XEN_PCI_DEV_VIRTFN;
+ add.physfn.bus = physfn->bus->number;
+ add.physfn.devfn = physfn->devfn;
+ } else
+#endif
+ if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
+ add.flags = XEN_PCI_DEV_EXTFN;
+
+#ifdef CONFIG_ACPI
+ handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+ if (!handle)
+ handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+#ifdef CONFIG_PCI_IOV
+ if (!handle && pci_dev->is_virtfn)
+ handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+#endif
+ if (handle) {
+ acpi_status status;
+
+ do {
+ unsigned long long pxm;
+
+ status = acpi_evaluate_integer(handle, "_PXM",
+ NULL, &pxm);
+ if (ACPI_SUCCESS(status)) {
+ add.optarr[0] = pxm;
+ add.flags |= XEN_PCI_DEV_PXM;
+ break;
+ }
+ status = acpi_get_parent(handle, &handle);
+ } while (ACPI_SUCCESS(status));
+ }
+#endif /* CONFIG_ACPI */
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
+ if (r != -ENOSYS)
+ return r;
+ pci_seg_supported = false;
+ }
+ if (pci_domain_nr(pci_dev->bus))
+ r = -ENOSYS;
#ifdef CONFIG_PCI_IOV
- if (pci_dev->is_virtfn) {
+ else if (pci_dev->is_virtfn) {
struct physdev_manage_pci_ext manage_pci_ext = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
.is_virtfn = 1,
- .physfn.bus = pci_dev->physfn->bus->number,
- .physfn.devfn = pci_dev->physfn->devfn,
+ .physfn.bus = physfn->bus->number,
+ .physfn.devfn = physfn->devfn,
};
r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
&manage_pci_ext);
- } else
+ }
#endif
- if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
+ else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
struct physdev_manage_pci_ext manage_pci_ext = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
@@ -56,7 +116,7 @@ static int xen_add_device(struct device *dev)
&manage_pci_ext);
} else {
struct physdev_manage_pci manage_pci = {
- .bus = pci_dev->bus->number,
+ .bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
};
@@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev)
{
int r;
struct pci_dev *pci_dev = to_pci_dev(dev);
- struct physdev_manage_pci manage_pci;
- manage_pci.bus = pci_dev->bus->number;
- manage_pci.devfn = pci_dev->devfn;
+ if (pci_seg_supported) {
+ struct physdev_pci_device device = {
+ .seg = pci_domain_nr(pci_dev->bus),
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
- r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
- &manage_pci);
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
+ &device);
+ } else if (pci_domain_nr(pci_dev->bus))
+ r = -ENOSYS;
+ else {
+ struct physdev_manage_pci manage_pci = {
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
+ &manage_pci);
+ }
return r;
}
@@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb,
r = xen_remove_device(dev);
break;
default:
- break;
+ return NOTIFY_DONE;
}
-
- return r;
+ if (r)
+ dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n",
+ action == BUS_NOTIFY_ADD_DEVICE ? "add" :
+ (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?"));
+ return NOTIFY_OK;
}
-struct notifier_block device_nb = {
+static struct notifier_block device_nb = {
.notifier_call = xen_pci_notifier,
};
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index fd60dff..89588e7 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -35,9 +35,11 @@
#include <linux/bootmem.h>
#include <linux/dma-mapping.h>
+#include <linux/export.h>
#include <xen/swiotlb-xen.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
+#include <xen/hvc-console.h>
/*
* Used to do a quick range check in swiotlb_tbl_unmap_single and
* swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -146,8 +148,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
void __init xen_swiotlb_init(int verbose)
{
unsigned long bytes;
- int rc;
+ int rc = -ENOMEM;
unsigned long nr_tbl;
+ char *m = NULL;
+ unsigned int repeat = 3;
nr_tbl = swioltb_nr_tbl();
if (nr_tbl)
@@ -156,16 +160,17 @@ void __init xen_swiotlb_init(int verbose)
xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
}
-
+retry:
bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
/*
* Get IO TLB memory from any location.
*/
xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
- if (!xen_io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer");
-
+ if (!xen_io_tlb_start) {
+ m = "Cannot allocate Xen-SWIOTLB buffer!\n";
+ goto error;
+ }
xen_io_tlb_end = xen_io_tlb_start + bytes;
/*
* And replace that memory with pages under 4GB.
@@ -173,17 +178,28 @@ void __init xen_swiotlb_init(int verbose)
rc = xen_swiotlb_fixup(xen_io_tlb_start,
bytes,
xen_io_tlb_nslabs);
- if (rc)
+ if (rc) {
+ free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
+ m = "Failed to get contiguous memory for DMA from Xen!\n"\
+ "You either: don't have the permissions, do not have"\
+ " enough free memory under 4GB, or the hypervisor memory"\
+ "is too fragmented!";
goto error;
-
+ }
start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
return;
error:
- panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\
- "We either don't have the permission or you do not have enough"\
- "free memory under 4GB!\n", rc);
+ if (repeat--) {
+ xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
+ (xen_io_tlb_nslabs >> 1));
+ printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n",
+ (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+ goto retry;
+ }
+ xen_raw_printk("%s (rc:%d)", m, rc);
+ panic("%s (rc:%d)", m, rc);
}
void *
@@ -194,6 +210,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
int order = get_order(size);
u64 dma_mask = DMA_BIT_MASK(32);
unsigned long vstart;
+ phys_addr_t phys;
+ dma_addr_t dev_addr;
/*
* Ignore region specifiers - the kernel's ideas of
@@ -209,18 +227,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
vstart = __get_free_pages(flags, order);
ret = (void *)vstart;
+ if (!ret)
+ return ret;
+
if (hwdev && hwdev->coherent_dma_mask)
dma_mask = dma_alloc_coherent_mask(hwdev, flags);
- if (ret) {
+ phys = virt_to_phys(ret);
+ dev_addr = xen_phys_to_bus(phys);
+ if (((dev_addr + size - 1 <= dma_mask)) &&
+ !range_straddles_page_boundary(phys, size))
+ *dma_handle = dev_addr;
+ else {
if (xen_create_contiguous_region(vstart, order,
fls64(dma_mask)) != 0) {
free_pages(vstart, order);
return NULL;
}
- memset(ret, 0, size);
*dma_handle = virt_to_machine(ret).maddr;
}
+ memset(ret, 0, size);
return ret;
}
EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
@@ -230,11 +256,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
dma_addr_t dev_addr)
{
int order = get_order(size);
+ phys_addr_t phys;
+ u64 dma_mask = DMA_BIT_MASK(32);
if (dma_release_from_coherent(hwdev, order, vaddr))
return;
- xen_destroy_contiguous_region((unsigned long)vaddr, order);
+ if (hwdev && hwdev->coherent_dma_mask)
+ dma_mask = hwdev->coherent_dma_mask;
+
+ phys = virt_to_phys(vaddr);
+
+ if (((dev_addr + size - 1 > dma_mask)) ||
+ range_straddles_page_boundary(phys, size))
+ xen_destroy_contiguous_region((unsigned long)vaddr, order);
+
free_pages((unsigned long)vaddr, order);
}
EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 816a449..d369965 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -1,7 +1,7 @@
/*
* Xen implementation for transcendent memory (tmem)
*
- * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
+ * Copyright (C) 2009-2011 Oracle Corp. All rights reserved.
* Author: Dan Magenheimer
*/
@@ -9,8 +9,14 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/pagemap.h>
+#include <linux/module.h>
#include <linux/cleancache.h>
+/* temporary ifdef until include/linux/frontswap.h is upstream */
+#ifdef CONFIG_FRONTSWAP
+#include <linux/frontswap.h>
+#endif
+
#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <asm/xen/hypercall.h>
@@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
}
-static int xen_tmem_destroy_pool(u32 pool_id)
-{
- struct tmem_oid oid = { { 0 } };
-
- return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
-}
-
-int tmem_enabled;
+int tmem_enabled __read_mostly;
+EXPORT_SYMBOL(tmem_enabled);
static int __init enable_tmem(char *s)
{
@@ -139,6 +139,14 @@ static int __init enable_tmem(char *s)
__setup("tmem", enable_tmem);
+#ifdef CONFIG_CLEANCACHE
+static int xen_tmem_destroy_pool(u32 pool_id)
+{
+ struct tmem_oid oid = { { 0 } };
+
+ return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
+}
+
/* cleancache ops */
static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
@@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = {
.init_shared_fs = tmem_cleancache_init_shared_fs,
.init_fs = tmem_cleancache_init_fs
};
+#endif
-static int __init xen_tmem_init(void)
+#ifdef CONFIG_FRONTSWAP
+/* frontswap tmem operations */
+
+/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+static int tmem_frontswap_poolid;
+
+/*
+ * Swizzling increases objects per swaptype, increasing tmem concurrency
+ * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
+ */
+#define SWIZ_BITS 4
+#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
+#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
+#define iswiz(_ind) (_ind >> SWIZ_BITS)
+
+static inline struct tmem_oid oswiz(unsigned type, u32 ind)
{
- struct cleancache_ops old_ops;
+ struct tmem_oid oid = { .oid = { 0 } };
+ oid.oid[0] = _oswiz(type, ind);
+ return oid;
+}
+/* returns 0 if the page was successfully put into frontswap, -1 if not */
+static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
+ struct page *page)
+{
+ u64 ind64 = (u64)offset;
+ u32 ind = (u32)offset;
+ unsigned long pfn = page_to_pfn(page);
+ int pool = tmem_frontswap_poolid;
+ int ret;
+
+ if (pool < 0)
+ return -1;
+ if (ind64 != ind)
+ return -1;
+ mb(); /* ensure page is quiescent; tmem may address it with an alias */
+ ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
+ /* translate Xen tmem return values to linux semantics */
+ if (ret == 1)
+ return 0;
+ else
+ return -1;
+}
+
+/*
+ * returns 0 if the page was successfully gotten from frontswap, -1 if
+ * was not present (should never happen!)
+ */
+static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
+ struct page *page)
+{
+ u64 ind64 = (u64)offset;
+ u32 ind = (u32)offset;
+ unsigned long pfn = page_to_pfn(page);
+ int pool = tmem_frontswap_poolid;
+ int ret;
+
+ if (pool < 0)
+ return -1;
+ if (ind64 != ind)
+ return -1;
+ ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
+ /* translate Xen tmem return values to linux semantics */
+ if (ret == 1)
+ return 0;
+ else
+ return -1;
+}
+
+/* flush a single page from frontswap */
+static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset)
+{
+ u64 ind64 = (u64)offset;
+ u32 ind = (u32)offset;
+ int pool = tmem_frontswap_poolid;
+
+ if (pool < 0)
+ return;
+ if (ind64 != ind)
+ return;
+ (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind));
+}
+
+/* flush all pages from the passed swaptype */
+static void tmem_frontswap_flush_area(unsigned type)
+{
+ int pool = tmem_frontswap_poolid;
+ int ind;
+
+ if (pool < 0)
+ return;
+ for (ind = SWIZ_MASK; ind >= 0; ind--)
+ (void)xen_tmem_flush_object(pool, oswiz(type, ind));
+}
+
+static void tmem_frontswap_init(unsigned ignored)
+{
+ struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID;
+
+ /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+ if (tmem_frontswap_poolid < 0)
+ tmem_frontswap_poolid =
+ xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
+}
+
+static int __initdata use_frontswap = 1;
+
+static int __init no_frontswap(char *s)
+{
+ use_frontswap = 0;
+ return 1;
+}
+
+__setup("nofrontswap", no_frontswap);
+
+static struct frontswap_ops tmem_frontswap_ops = {
+ .put_page = tmem_frontswap_put_page,
+ .get_page = tmem_frontswap_get_page,
+ .flush_page = tmem_frontswap_flush_page,
+ .flush_area = tmem_frontswap_flush_area,
+ .init = tmem_frontswap_init
+};
+#endif
+
+static int __init xen_tmem_init(void)
+{
if (!xen_domain())
return 0;
+#ifdef CONFIG_FRONTSWAP
+ if (tmem_enabled && use_frontswap) {
+ char *s = "";
+ struct frontswap_ops old_ops =
+ frontswap_register_ops(&tmem_frontswap_ops);
+
+ tmem_frontswap_poolid = -1;
+ if (old_ops.init != NULL)
+ s = " (WARNING: frontswap_ops overridden)";
+ printk(KERN_INFO "frontswap enabled, RAM provided by "
+ "Xen Transcendent Memory\n");
+ }
+#endif
#ifdef CONFIG_CLEANCACHE
BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
if (tmem_enabled && use_cleancache) {
char *s = "";
- old_ops = cleancache_register_ops(&tmem_cleancache_ops);
+ struct cleancache_ops old_ops =
+ cleancache_register_ops(&tmem_cleancache_ops);
if (old_ops.init_fs != NULL)
s = " (WARNING: cleancache_ops overridden)";
printk(KERN_INFO "cleancache enabled, RAM provided by "
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index a4ff225..9cc2259 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -50,11 +50,6 @@ static struct sys_device balloon_sysdev;
static int register_balloon(struct sys_device *sysdev);
-static struct xenbus_watch target_watch =
-{
- .node = "memory/target"
-};
-
/* React to a change in the target key */
static void watch_target(struct xenbus_watch *watch,
const char **vec, unsigned int len)
@@ -73,6 +68,11 @@ static void watch_target(struct xenbus_watch *watch,
*/
balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
}
+static struct xenbus_watch target_watch = {
+ .node = "memory/target",
+ .callback = watch_target,
+};
+
static int balloon_init_watcher(struct notifier_block *notifier,
unsigned long event,
@@ -87,7 +87,9 @@ static int balloon_init_watcher(struct notifier_block *notifier,
return NOTIFY_DONE;
}
-static struct notifier_block xenstore_notifier;
+static struct notifier_block xenstore_notifier = {
+ .notifier_call = balloon_init_watcher,
+};
static int __init balloon_init(void)
{
@@ -98,8 +100,7 @@ static int __init balloon_init(void)
register_balloon(&balloon_sysdev);
- target_watch.callback = watch_target;
- xenstore_notifier.notifier_call = balloon_init_watcher;
+ register_xen_selfballooning(&balloon_sysdev);
register_xenstore_notifier(&xenstore_notifier);
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index cdacf92..1906125 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -33,7 +33,9 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
+#include <linux/export.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
#include <xen/events.h>
@@ -435,25 +437,26 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
{
struct gnttab_map_grant_ref op = {
- .flags = GNTMAP_host_map,
+ .flags = GNTMAP_host_map | GNTMAP_contains_pte,
.ref = gnt_ref,
.dom = dev->otherend_id,
};
struct vm_struct *area;
+ pte_t *pte;
*vaddr = NULL;
- area = xen_alloc_vm_area(PAGE_SIZE);
+ area = alloc_vm_area(PAGE_SIZE, &pte);
if (!area)
return -ENOMEM;
- op.host_addr = (unsigned long)area->addr;
+ op.host_addr = arbitrary_virt_to_machine(pte).maddr;
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
BUG();
if (op.status != GNTST_okay) {
- xen_free_vm_area(area);
+ free_vm_area(area);
xenbus_dev_fatal(dev, op.status,
"mapping in shared page %d from domain %d",
gnt_ref, dev->otherend_id);
@@ -526,6 +529,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
struct gnttab_unmap_grant_ref op = {
.host_addr = (unsigned long)vaddr,
};
+ unsigned int level;
/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
* method so that we don't have to muck with vmalloc internals here.
@@ -547,12 +551,14 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
}
op.handle = (grant_handle_t)area->phys_addr;
+ op.host_addr = arbitrary_virt_to_machine(
+ lookup_address((unsigned long)vaddr, &level)).maddr;
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
BUG();
if (op.status == GNTST_okay)
- xen_free_vm_area(area);
+ free_vm_area(area);
else
xenbus_dev_error(dev, op.status,
"unmapping page at handle %d error %d",
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 090c61e..2eff7a6 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -212,7 +212,9 @@ int xb_init_comms(void)
printk(KERN_WARNING "XENBUS response ring is not quiescent "
"(%08x:%08x): fixing up\n",
intf->rsp_cons, intf->rsp_prod);
- intf->rsp_cons = intf->rsp_prod;
+ /* breaks kdump */
+ if (!reset_devices)
+ intf->rsp_cons = intf->rsp_prod;
}
if (xenbus_irq) {
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 7397695..1b178c6 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -46,6 +46,7 @@
#include <linux/mutex.h>
#include <linux/io.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -309,8 +310,7 @@ void xenbus_unregister_driver(struct xenbus_driver *drv)
}
EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
-struct xb_find_info
-{
+struct xb_find_info {
struct xenbus_device *dev;
const char *nodename;
};
@@ -378,26 +378,32 @@ static void xenbus_dev_release(struct device *dev)
kfree(to_xenbus_device(dev));
}
-static ssize_t xendev_show_nodename(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t nodename_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
}
-static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
-static ssize_t xendev_show_devtype(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t devtype_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
}
-static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
-static ssize_t xendev_show_modalias(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t modalias_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
+ return sprintf(buf, "%s:%s\n", dev->bus->name,
+ to_xenbus_device(dev)->devicetype);
}
-static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
+
+struct device_attribute xenbus_dev_attrs[] = {
+ __ATTR_RO(nodename),
+ __ATTR_RO(devtype),
+ __ATTR_RO(modalias),
+ __ATTR_NULL
+};
+EXPORT_SYMBOL_GPL(xenbus_dev_attrs);
int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
@@ -449,25 +455,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
if (err)
goto fail;
- err = device_create_file(&xendev->dev, &dev_attr_nodename);
- if (err)
- goto fail_unregister;
-
- err = device_create_file(&xendev->dev, &dev_attr_devtype);
- if (err)
- goto fail_remove_nodename;
-
- err = device_create_file(&xendev->dev, &dev_attr_modalias);
- if (err)
- goto fail_remove_devtype;
-
return 0;
-fail_remove_devtype:
- device_remove_file(&xendev->dev, &dev_attr_devtype);
-fail_remove_nodename:
- device_remove_file(&xendev->dev, &dev_attr_nodename);
-fail_unregister:
- device_unregister(&xendev->dev);
fail:
kfree(xendev);
return err;
@@ -651,7 +639,7 @@ int xenbus_dev_cancel(struct device *dev)
EXPORT_SYMBOL_GPL(xenbus_dev_cancel);
/* A flag to determine if xenstored is 'ready' (i.e. has started) */
-int xenstored_ready = 0;
+int xenstored_ready;
int register_xenstore_notifier(struct notifier_block *nb)
@@ -696,64 +684,74 @@ static int __init xenbus_probe_initcall(void)
device_initcall(xenbus_probe_initcall);
-static int __init xenbus_init(void)
+/* Set up event channel for xenstored which is run as a local process
+ * (this is normally used only in dom0)
+ */
+static int __init xenstored_local_init(void)
{
int err = 0;
unsigned long page = 0;
+ struct evtchn_alloc_unbound alloc_unbound;
- DPRINTK("");
+ /* Allocate Xenstore page */
+ page = get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ goto out_err;
- err = -ENODEV;
- if (!xen_domain())
- return err;
+ xen_store_mfn = xen_start_info->store_mfn =
+ pfn_to_mfn(virt_to_phys((void *)page) >>
+ PAGE_SHIFT);
- /*
- * Domain0 doesn't have a store_evtchn or store_mfn yet.
- */
- if (xen_initial_domain()) {
- struct evtchn_alloc_unbound alloc_unbound;
+ /* Next allocate a local port which xenstored can bind to */
+ alloc_unbound.dom = DOMID_SELF;
+ alloc_unbound.remote_dom = DOMID_SELF;
- /* Allocate Xenstore page */
- page = get_zeroed_page(GFP_KERNEL);
- if (!page)
- goto out_error;
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+ &alloc_unbound);
+ if (err == -ENOSYS)
+ goto out_err;
- xen_store_mfn = xen_start_info->store_mfn =
- pfn_to_mfn(virt_to_phys((void *)page) >>
- PAGE_SHIFT);
+ BUG_ON(err);
+ xen_store_evtchn = xen_start_info->store_evtchn =
+ alloc_unbound.port;
- /* Next allocate a local port which xenstored can bind to */
- alloc_unbound.dom = DOMID_SELF;
- alloc_unbound.remote_dom = 0;
+ return 0;
- err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
- &alloc_unbound);
- if (err == -ENOSYS)
- goto out_error;
+ out_err:
+ if (page != 0)
+ free_page(page);
+ return err;
+}
- BUG_ON(err);
- xen_store_evtchn = xen_start_info->store_evtchn =
- alloc_unbound.port;
+static int __init xenbus_init(void)
+{
+ int err = 0;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
+ if (!xen_domain())
+ return -ENODEV;
+
+ if (xen_hvm_domain()) {
+ uint64_t v = 0;
+ err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+ if (err)
+ goto out_error;
+ xen_store_evtchn = (int)v;
+ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ if (err)
+ goto out_error;
+ xen_store_mfn = (unsigned long)v;
+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
} else {
- if (xen_hvm_domain()) {
- uint64_t v = 0;
- err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
- if (err)
- goto out_error;
- xen_store_evtchn = (int)v;
- err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ if (xen_store_evtchn)
+ xenstored_ready = 1;
+ else {
+ err = xenstored_local_init();
if (err)
goto out_error;
- xen_store_mfn = (unsigned long)v;
- xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
- } else {
- xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
- xenstored_ready = 1;
}
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
}
/* Initialize the interface to xenstore. */
@@ -772,12 +770,7 @@ static int __init xenbus_init(void)
proc_mkdir("xen", NULL);
#endif
- return 0;
-
- out_error:
- if (page != 0)
- free_page(page);
-
+out_error:
return err;
}
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index 888b990..9b1de4e 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -36,8 +36,7 @@
#define XEN_BUS_ID_SIZE 20
-struct xen_bus_type
-{
+struct xen_bus_type {
char *root;
unsigned int levels;
int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
@@ -48,6 +47,8 @@ struct xen_bus_type
struct bus_type bus;
};
+extern struct device_attribute xenbus_dev_attrs[];
+
extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
extern int xenbus_dev_probe(struct device *_dev);
extern int xenbus_dev_remove(struct device *_dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 6cf467b..c3c7cd1 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -42,6 +42,7 @@
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/notifier.h>
+#include <linux/export.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -104,8 +105,9 @@ static int xenbus_uevent_backend(struct device *dev,
xdev = to_xenbus_device(dev);
bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
- if (xdev == NULL)
- return -ENODEV;
+
+ if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
+ return -ENOMEM;
/* stuff we want to pass to /sbin/hotplug */
if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype))
@@ -183,10 +185,6 @@ static void frontend_changed(struct xenbus_watch *watch,
xenbus_otherend_changed(watch, vec, len, 0);
}
-static struct device_attribute xenbus_backend_dev_attrs[] = {
- __ATTR_NULL
-};
-
static struct xen_bus_type xenbus_backend = {
.root = "backend",
.levels = 3, /* backend/type/<frontend>/<id> */
@@ -200,7 +198,7 @@ static struct xen_bus_type xenbus_backend = {
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
- .dev_attrs = xenbus_backend_dev_attrs,
+ .dev_attrs = xenbus_dev_attrs,
},
};
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 560f217..2ce95c0 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -13,6 +13,7 @@
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/io.h>
+#include <linux/module.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -81,10 +82,6 @@ static void backend_changed(struct xenbus_watch *watch,
xenbus_otherend_changed(watch, vec, len, 1);
}
-static struct device_attribute xenbus_frontend_dev_attrs[] = {
- __ATTR_NULL
-};
-
static const struct dev_pm_ops xenbus_pm_ops = {
.suspend = xenbus_dev_suspend,
.resume = xenbus_dev_resume,
@@ -106,7 +103,7 @@ static struct xen_bus_type xenbus_frontend = {
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
- .dev_attrs = xenbus_frontend_dev_attrs,
+ .dev_attrs = xenbus_dev_attrs,
.pm = &xenbus_pm_ops,
},
@@ -289,10 +286,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv,
}
EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+static int backend_state;
+
+static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
+ const char **v, unsigned int l)
+{
+ xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
+ printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+ v[XS_WATCH_PATH], xenbus_strstate(backend_state));
+ wake_up(&backend_state_wq);
+}
+
+static void xenbus_reset_wait_for_backend(char *be, int expected)
+{
+ long timeout;
+ timeout = wait_event_interruptible_timeout(backend_state_wq,
+ backend_state == expected, 5 * HZ);
+ if (timeout <= 0)
+ printk(KERN_INFO "XENBUS: backend %s timed out.\n", be);
+}
+
+/*
+ * Reset frontend if it is in Connected or Closed state.
+ * Wait for backend to catch up.
+ * State Connected happens during kdump, Closed after kexec.
+ */
+static void xenbus_reset_frontend(char *fe, char *be, int be_state)
+{
+ struct xenbus_watch be_watch;
+
+ printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+ be, xenbus_strstate(be_state));
+
+ memset(&be_watch, 0, sizeof(be_watch));
+ be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
+ if (!be_watch.node)
+ return;
+
+ be_watch.callback = xenbus_reset_backend_state_changed;
+ backend_state = XenbusStateUnknown;
+
+ printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
+ register_xenbus_watch(&be_watch);
+
+ /* fall through to forward backend to state XenbusStateInitialising */
+ switch (be_state) {
+ case XenbusStateConnected:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
+ xenbus_reset_wait_for_backend(be, XenbusStateClosing);
+
+ case XenbusStateClosing:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
+ xenbus_reset_wait_for_backend(be, XenbusStateClosed);
+
+ case XenbusStateClosed:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
+ xenbus_reset_wait_for_backend(be, XenbusStateInitWait);
+ }
+
+ unregister_xenbus_watch(&be_watch);
+ printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
+ kfree(be_watch.node);
+}
+
+static void xenbus_check_frontend(char *class, char *dev)
+{
+ int be_state, fe_state, err;
+ char *backend, *frontend;
+
+ frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
+ if (!frontend)
+ return;
+
+ err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
+ if (err != 1)
+ goto out;
+
+ switch (fe_state) {
+ case XenbusStateConnected:
+ case XenbusStateClosed:
+ printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
+ frontend, xenbus_strstate(fe_state));
+ backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
+ if (!backend || IS_ERR(backend))
+ goto out;
+ err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
+ if (err == 1)
+ xenbus_reset_frontend(frontend, backend, be_state);
+ kfree(backend);
+ break;
+ default:
+ break;
+ }
+out:
+ kfree(frontend);
+}
+
+static void xenbus_reset_state(void)
+{
+ char **devclass, **dev;
+ int devclass_n, dev_n;
+ int i, j;
+
+ devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
+ if (IS_ERR(devclass))
+ return;
+
+ for (i = 0; i < devclass_n; i++) {
+ dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
+ if (IS_ERR(dev))
+ continue;
+ for (j = 0; j < dev_n; j++)
+ xenbus_check_frontend(devclass[i], dev[j]);
+ kfree(dev);
+ }
+ kfree(devclass);
+}
+
static int frontend_probe_and_watch(struct notifier_block *notifier,
unsigned long event,
void *data)
{
+ /* reset devices in Connected or Closed state */
+ if (xen_hvm_domain())
+ xenbus_reset_state();
/* Enumerate devices in xenstore and watch for changes. */
xenbus_probe_devices(&xenbus_frontend);
register_xenbus_watch(&fe_watch);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index daee5db..a580b17 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -45,6 +45,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <xen/xenbus.h>
+#include <xen/xen.h>
#include "xenbus_comms.h"
struct xs_stored_msg {
@@ -638,8 +639,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
err = xs_watch(watch->node, token);
- /* Ignore errors due to multiple registration. */
- if ((err != 0) && (err != -EEXIST)) {
+ if (err) {
spin_lock(&watches_lock);
list_del(&watch->list);
spin_unlock(&watches_lock);