Newer
Older
/*
* Core of Xen paravirt_ops implementation.
*
* This file contains the xen_paravirt_ops structure itself, and the
* implementations for:
* - privileged instructions
* - interrupt flags
* - segment operations
* - booting and setup
*
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/preempt.h>
#include <linux/percpu.h>
#include <linux/delay.h>
#include <linux/start_kernel.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/highmem.h>
#include <xen/interface/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
#include <xen/features.h>
#include <xen/page.h>
#include <asm/paravirt.h>
#include <asm/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include "multicalls.h"
EXPORT_SYMBOL_GPL(hypercall_page);
DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
DEFINE_PER_CPU(unsigned long, xen_cr3);
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
static /* __initdata */ struct shared_info dummy_shared_info;
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
*/
struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
/*
* Flag to determine whether vcpu info placement is available on all
* VCPUs. We assume it is to start with, and then set it to zero on
* the first failure. This is because it can succeed on some VCPUs
* and not others, since it can involve hypervisor memory allocation,
* or because the guest failed to guarantee all the appropriate
* constraints on all VCPUs (ie buffer can't cross a page boundary).
*
* Note that any particular CPU may be using a placed vcpu structure,
* but we can only optimise if the all are.
*
* 0: not available, 1: available
*/
static int have_vcpu_info_placement = 1;
static void __init xen_vcpu_setup(int cpu)
struct vcpu_register_vcpu_info info;
int err;
struct vcpu_info *vcpup;
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
if (!have_vcpu_info_placement)
return; /* already tested, not available */
vcpup = &per_cpu(xen_vcpu_info, cpu);
info.mfn = virt_to_mfn(vcpup);
info.offset = offset_in_page(vcpup);
printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n",
cpu, vcpup, info.mfn, info.offset);
/* Check to see if the hypervisor will put the vcpu_info
structure where we want it, which allows direct access via
a percpu-variable. */
err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
if (err) {
printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
have_vcpu_info_placement = 0;
} else {
/* This cpu is using the registered vcpu info, even if
later ones fail to. */
per_cpu(xen_vcpu, cpu) = vcpup;
printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
cpu, vcpup);
}
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
}
static void __init xen_banner(void)
{
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
paravirt_ops.name);
printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
}
static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
unsigned maskedx = ~0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
if (*eax == 1)
maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
(1 << X86_FEATURE_ACPI) | /* disable ACPI */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
asm(XEN_EMULATE_PREFIX "cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx));
*edx &= maskedx;
}
static void xen_set_debugreg(int reg, unsigned long val)
{
HYPERVISOR_set_debugreg(reg, val);
}
static unsigned long xen_get_debugreg(int reg)
{
return HYPERVISOR_get_debugreg(reg);
}
static unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
vcpu = x86_read_percpu(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
/* convert to IF type flag
-0 -> 0x00000000
-1 -> 0xffffffff
*/
return (-flags) & X86_EFLAGS_IF;
}
static void xen_restore_fl(unsigned long flags)
{
struct vcpu_info *vcpu;
/* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF);
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
if (flags == 0) {
preempt_check_resched();
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
}
static void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
static void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
}
static void xen_safe_halt(void)
{
/* Blocking includes an implicit local_irq_enable(). */
if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
BUG();
}
static void xen_halt(void)
{
if (irqs_disabled())
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
else
xen_safe_halt();
}
static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
{
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
switch (mode) {
case PARAVIRT_LAZY_NONE:
BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
break;
case PARAVIRT_LAZY_MMU:
case PARAVIRT_LAZY_CPU:
BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
break;
case PARAVIRT_LAZY_FLUSH:
/* flush if necessary, but don't change state */
if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
xen_mc_flush();
return;
}
xen_mc_flush();
x86_write_percpu(xen_lazy_mode, mode);
}
static unsigned long xen_store_tr(void)
{
return 0;
}
static void xen_set_ldt(const void *addr, unsigned entries)
{
unsigned long linear_addr = (unsigned long)addr;
struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
op = mcs.args;
op->cmd = MMUEXT_SET_LDT;
if (linear_addr) {
/* ldt my be vmalloced, use arbitrary_virt_to_machine */
xmaddr_t maddr;
maddr = arbitrary_virt_to_machine((unsigned long)addr);
linear_addr = (unsigned long)maddr.maddr;
}
op->arg1.linear_addr = linear_addr;
op->arg2.nr_ents = entries;
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
static void xen_load_gdt(const struct Xgt_desc_struct *dtr)
{
unsigned long *frames;
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
int f;
struct multicall_space mcs;
/* A GDT can be up to 64k in size, which corresponds to 8192
8-byte entries, or 16 4k pages.. */
BUG_ON(size > 65536);
BUG_ON(va & ~PAGE_MASK);
mcs = xen_mc_entry(sizeof(*frames) * pages);
frames = mcs.args;
for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
frames[f] = virt_to_mfn(va);
make_lowmem_page_readonly((void *)va);
}
MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
static void load_TLS_descriptor(struct thread_struct *t,
unsigned int cpu, unsigned int i)
{
struct desc_struct *gdt = get_cpu_gdt_table(cpu);
xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
struct multicall_space mc = __xen_mc_entry(0);
MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
}
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{
xen_mc_batch();
load_TLS_descriptor(t, cpu, 0);
load_TLS_descriptor(t, cpu, 1);
load_TLS_descriptor(t, cpu, 2);
xen_mc_issue(PARAVIRT_LAZY_CPU);
/*
* XXX sleazy hack: If we're being called in a lazy-cpu zone,
* it means we're in a context switch, and %gs has just been
* saved. This means we can zero it out to prevent faults on
* exit from the hypervisor if the next process has no %gs.
* Either way, it has been saved, and the new value will get
* loaded properly. This will go away as soon as Xen has been
* modified to not save/restore %gs for normal hypercalls.
*/
if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU)
loadsegment(gs, 0);
}
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
u32 low, u32 high)
{
unsigned long lp = (unsigned long)&dt[entrynum];
xmaddr_t mach_lp = virt_to_machine(lp);
u64 entry = (u64)high << 32 | low;
xen_mc_flush();
if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
BUG();
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
}
static int cvt_gate_to_trap(int vector, u32 low, u32 high,
struct trap_info *info)
{
u8 type, dpl;
type = (high >> 8) & 0x1f;
dpl = (high >> 13) & 3;
if (type != 0xf && type != 0xe)
return 0;
info->vector = vector;
info->address = (high & 0xffff0000) | (low & 0x0000ffff);
info->cs = low >> 16;
info->flags = dpl;
/* interrupt gates clear IF */
if (type == 0xe)
info->flags |= 4;
return 1;
}
/* Locations of each CPU's IDT */
static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc);
/* Set an IDT entry. If the entry is part of the current IDT, then
also update Xen. */
static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
u32 low, u32 high)
{
unsigned long p = (unsigned long)&dt[entrynum];
unsigned long start, end;
preempt_disable();
start = __get_cpu_var(idt_desc).address;
end = start + __get_cpu_var(idt_desc).size + 1;
xen_mc_flush();
write_dt_entry(dt, entrynum, low, high);
if (p >= start && (p + 8) <= end) {
struct trap_info info[2];
info[1].address = 0;
if (cvt_gate_to_trap(entrynum, low, high, &info[0]))
if (HYPERVISOR_set_trap_table(info))
BUG();
}
static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
struct trap_info *traps)
{
unsigned in, out, count;
count = (desc->size+1) / 8;
BUG_ON(count > 256);
for (in = out = 0; in < count; in++) {
const u32 *entry = (u32 *)(desc->address + in * 8);
if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out]))
out++;
}
traps[out].address = 0;
}
void xen_copy_trap_info(struct trap_info *traps)
{
const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
xen_convert_trap_info(desc, traps);
}
/* Load a new IDT into Xen. In principle this can be per-CPU, so we
hold a spinlock to protect the static traps[] array (static because
it avoids allocation, and saves stack space). */
static void xen_load_idt(const struct Xgt_desc_struct *desc)
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
spin_lock(&lock);
__get_cpu_var(idt_desc) = *desc;
xen_mc_flush();
if (HYPERVISOR_set_trap_table(traps))
BUG();
spin_unlock(&lock);
}
/* Write a GDT descriptor entry. Ignore LDT descriptors, since
they're handled differently. */
static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
u32 low, u32 high)
{
switch ((high >> 8) & 0xff) {
case DESCTYPE_LDT:
case DESCTYPE_TSS:
/* ignore */
break;
default: {
xmaddr_t maddr = virt_to_machine(&dt[entry]);
u64 desc = (u64)high << 32 | low;
xen_mc_flush();
if (HYPERVISOR_update_descriptor(maddr.maddr, desc))
BUG();
}
}
}
static void xen_load_esp0(struct tss_struct *tss,
{
struct multicall_space mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
static void xen_set_iopl_mask(unsigned mask)
{
struct physdev_set_iopl set_iopl;
/* Force the change at ring 0. */
set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
}
static void xen_io_delay(void)
{
}
#ifdef CONFIG_X86_LOCAL_APIC
static unsigned long xen_apic_read(unsigned long reg)
{
return 0;
}
static void xen_apic_write(unsigned long reg, unsigned long val)
{
/* Warn to see if there's any stray references */
WARN_ON(1);
}
#endif
static void xen_flush_tlb(void)
{
struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
op = mcs.args;
op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
static void xen_flush_tlb_single(unsigned long addr)
{
struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
op = mcs.args;
op->cmd = MMUEXT_INVLPG_LOCAL;
op->arg1.linear_addr = addr & PAGE_MASK;
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
xen_mc_issue(PARAVIRT_LAZY_MMU);
static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
unsigned long va)
{
struct {
struct mmuext_op op;
cpumask_t mask;
} *args;
/*
* A couple of (to be removed) sanity checks:
*
* - current CPU must not be in mask
* - mask must exist :)
*/
BUG_ON(cpus_empty(cpumask));
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);
/* If a CPU which we ran on has gone down, OK. */
cpus_and(cpumask, cpumask, cpu_online_map);
if (cpus_empty(cpumask))
return;
mcs = xen_mc_entry(sizeof(*args));
args = mcs.args;
args->mask = cpumask;
args->op.arg2.vcpumask = &args->mask;
args->op.cmd = MMUEXT_INVLPG_MULTI;
args->op.arg1.linear_addr = va;
MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
xen_mc_issue(PARAVIRT_LAZY_MMU);
static void xen_write_cr2(unsigned long cr2)
{
x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
}
static unsigned long xen_read_cr2(void)
{
return x86_read_percpu(xen_vcpu)->arch.cr2;
}
static unsigned long xen_read_cr2_direct(void)
{
return x86_read_percpu(xen_vcpu_info.arch.cr2);
}
static void xen_write_cr4(unsigned long cr4)
{
/* never allow TSC to be disabled */
native_write_cr4(cr4 & ~X86_CR4_TSD);
}
static unsigned long xen_read_cr3(void)
{
return x86_read_percpu(xen_cr3);
}
static void xen_write_cr3(unsigned long cr3)
{
if (cr3 == x86_read_percpu(xen_cr3)) {
/* just a simple tlb flush */
xen_flush_tlb();
return;
}
x86_write_percpu(xen_cr3, cr3);
{
struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
op = mcs.args;
op->cmd = MMUEXT_NEW_BASEPTR;
op->arg1.mfn = mfn;
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
}
/* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */
static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
BUG_ON(mem_map); /* should only be used early */
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
}
/* This needs to make sure the new pte page is pinned iff its being
attached to a pinned pagetable. */
static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
struct page *page = pfn_to_page(pfn);
if (PagePinned(virt_to_page(mm->pgd))) {
SetPagePinned(page);
if (!PageHighMem(page))
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
else
/* make sure there are no stray mappings of
this page */
kmap_flush_unused();
}
/* This should never happen until we're OK to use struct page */
static void xen_release_pt(u32 pfn)
{
struct page *page = pfn_to_page(pfn);
if (PagePinned(page)) {
if (!PageHighMem(page))
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
#ifdef CONFIG_HIGHPTE
static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
pgprot_t prot = PAGE_KERNEL;
if (PagePinned(page))
prot = PAGE_KERNEL_RO;
if (0 && PageHighMem(page))
printk("mapping highpte %lx type %d prot %s\n",
page_to_pfn(page), type,
(unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
return kmap_atomic_prot(page, type, prot);
static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
{
/* If there's an existing pte, then don't allow _PAGE_RW to be set */
if (pte_val_ma(*ptep) & _PAGE_PRESENT)
pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
pte_val_ma(pte));
return pte;
}
/* Init-time set_pte while constructing initial pagetables, which
doesn't allow RO pagetable pages to be remapped RW */
static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
{
pte = mask_rw_pte(ptep, pte);
xen_set_pte(ptep, pte);
}
static __init void xen_pagetable_setup_start(pgd_t *base)
{
pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
/* special set_pte for pagetable initialization */
paravirt_ops.set_pte = xen_set_pte_init;
init_mm.pgd = base;
/*
* copy top-level of Xen-supplied pagetable into place. For
* !PAE we can use this as-is, but for PAE it is a stand-in
* while we copy the pmd pages.
*/
memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
if (PTRS_PER_PMD > 1) {
int i;
/*
* For PAE, need to allocate new pmds, rather than
* share Xen's, since Xen doesn't like pmd's being
* shared between address spaces.
*/
for (i = 0; i < PTRS_PER_PGD; i++) {
if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
PAGE_SIZE);
set_pgd(&base[i], __pgd(1 + __pa(pmd)));
} else
pgd_clear(&base[i]);
}
}
/* make sure zero_page is mapped RO so we can use it in pagetables */
make_lowmem_page_readonly(empty_zero_page);
make_lowmem_page_readonly(base);
/*
* Switch to new pagetable. This is done before
* pagetable_init has done anything so that the new pages
* added to the table can be prepared properly for Xen.
*/
xen_write_cr3(__pa(base));
}
static __init void xen_pagetable_setup_done(pgd_t *base)
{
/* This will work as long as patching hasn't happened yet
(which it hasn't) */
paravirt_ops.alloc_pt = xen_alloc_pt;
paravirt_ops.set_pte = xen_set_pte;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/*
* Create a mapping for the shared info page.
* Should be set_fixmap(), but shared_info is a machine
* address with no corresponding pseudo-phys address.
*/
set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
PFN_DOWN(xen_start_info->shared_info),
PAGE_KERNEL);
HYPERVISOR_shared_info =
(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
} else
HYPERVISOR_shared_info =
(struct shared_info *)__va(xen_start_info->shared_info);
/* Actually pin the pagetable down, but we can't set PG_pinned
yet because the page structures don't exist yet. */
{
struct mmuext_op op;
#ifdef CONFIG_X86_PAE
op.cmd = MMUEXT_PIN_L3_TABLE;
#else
op.cmd = MMUEXT_PIN_L3_TABLE;
#endif
op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base)));
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
BUG();
}
/* This is called once we have the cpu_possible_map */
void __init xen_setup_vcpu_info_placement(void)
{
int cpu;
for_each_possible_cpu(cpu)
xen_vcpu_setup(cpu);
/* xen_vcpu_setup managed to place the vcpu_info within the
percpu area for all cpus, so make use of it */
if (have_vcpu_info_placement) {
printk(KERN_INFO "Xen: using vcpu_info placement\n");
paravirt_ops.save_fl = xen_save_fl_direct;
paravirt_ops.restore_fl = xen_restore_fl_direct;
paravirt_ops.irq_disable = xen_irq_disable_direct;
paravirt_ops.irq_enable = xen_irq_enable_direct;
paravirt_ops.read_cr2 = xen_read_cr2_direct;
paravirt_ops.iret = xen_iret_direct;
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
{
char *start, *end, *reloc;
unsigned ret;
start = end = reloc = NULL;
#define SITE(x) \
case PARAVIRT_PATCH(x): \
if (have_vcpu_info_placement) { \
start = (char *)xen_##x##_direct; \
end = xen_##x##_direct_end; \
reloc = xen_##x##_direct_reloc; \
} \
goto patch_site
switch (type) {
SITE(irq_enable);
SITE(irq_disable);
SITE(save_fl);
SITE(restore_fl);
#undef SITE
patch_site:
if (start == NULL || (end-start) > len)
goto default_patch;
ret = paravirt_patch_insns(insns, len, start, end);
/* Note: because reloc is assigned from something that
appears to be an array, gcc assumes it's non-null,
but doesn't know its relationship with start and
end. */
if (reloc > start && reloc < end) {
int reloc_off = reloc - start;
long *relocp = (long *)(insns + reloc_off);
long delta = start - (char *)insns;
*relocp += delta;
}
break;
default_patch:
default:
ret = paravirt_patch_default(type, clobbers, insns, len);
break;
}
return ret;
}
static const struct paravirt_ops xen_paravirt_ops __initdata = {
.paravirt_enabled = 1,
.shared_kernel_pmd = 0,
.name = "Xen",
.banner = xen_banner,
.patch = xen_patch,
.memory_setup = xen_memory_setup,
.arch_setup = xen_arch_setup,
.post_allocator_init = xen_mark_init_mm_pinned,
.time_init = xen_time_init,
.set_wallclock = xen_set_wallclock,
.get_wallclock = xen_get_wallclock,
.get_cpu_khz = xen_cpu_khz,
.cpuid = xen_cpuid,
.set_debugreg = xen_set_debugreg,
.get_debugreg = xen_get_debugreg,
.clts = native_clts,
.read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0,
.read_cr2 = xen_read_cr2,
.write_cr2 = xen_write_cr2,
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
.read_cr3 = xen_read_cr3,
.write_cr3 = xen_write_cr3,
.read_cr4 = native_read_cr4,
.read_cr4_safe = native_read_cr4_safe,
.write_cr4 = xen_write_cr4,
.save_fl = xen_save_fl,
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
.iret = (void *)&hypercall_page[__HYPERVISOR_iret],
.irq_enable_sysexit = NULL, /* never called */
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
.load_gdt = xen_load_gdt,
.load_idt = xen_load_idt,
.load_tls = xen_load_tls,
.store_gdt = native_store_gdt,
.store_idt = native_store_idt,
.store_tr = xen_store_tr,
.write_ldt_entry = xen_write_ldt_entry,
.write_gdt_entry = xen_write_gdt_entry,
.write_idt_entry = xen_write_idt_entry,
.load_esp0 = xen_load_esp0,
.set_iopl_mask = xen_set_iopl_mask,
.io_delay = xen_io_delay,
#ifdef CONFIG_X86_LOCAL_APIC
.apic_write = xen_apic_write,
.apic_write_atomic = xen_apic_write,
.apic_read = xen_apic_read,
.setup_boot_clock = paravirt_nop,
.setup_secondary_clock = paravirt_nop,
.startup_ipi_hook = paravirt_nop,
#endif
.flush_tlb_user = xen_flush_tlb,
.flush_tlb_kernel = xen_flush_tlb,
.flush_tlb_single = xen_flush_tlb_single,
.pte_update = paravirt_nop,
.pte_update_defer = paravirt_nop,
.pagetable_setup_start = xen_pagetable_setup_start,
.pagetable_setup_done = xen_pagetable_setup_done,
.alloc_pd = paravirt_nop,
.alloc_pd_clone = paravirt_nop,
.release_pd = paravirt_nop,
#ifdef CONFIG_HIGHPTE
.kmap_atomic_pte = xen_kmap_atomic_pte,
#endif
.set_pte = NULL, /* see xen_pagetable_setup_* */