aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest/segments.c
blob: f675a41a80da57cdc11d5d4825d2c7ce84a9254f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
/*P:600 The x86 architecture has segments, which involve a table of descriptors
 * which can be used to do funky things with virtual address interpretation.
 * We originally used to use segments so the Guest couldn't alter the
 * Guest<->Host Switcher, and then we had to trim Guest segments, and restore
 * for userspace per-thread segments, but trim again for on userspace->kernel
 * transitions...  This nightmarish creation was contained within this file,
 * where we knew not to tread without heavy armament and a change of underwear.
 *
 * In these modern times, the segment handling code consists of simple sanity
 * checks, and the worst you'll experience reading this code is butterfly-rash
 * from frolicking through its parklike serenity. :*/
#include "lg.h"

/*H:600
 * We've almost completed the Host; there's just one file to go!
 *
 * Segments & The Global Descriptor Table
 *
 * (That title sounds like a bad Nerdcore group.  Not to suggest that there are
 * any good Nerdcore groups, but in high school a friend of mine had a band
 * called Joe Fish and the Chips, so there are definitely worse band names).
 *
 * To refresh: the GDT is a table of 8-byte values describing segments.  Once
 * set up, these segments can be loaded into one of the 6 "segment registers".
 *
 * GDT entries are passed around as "struct desc_struct"s, which like IDT
 * entries are split into two 32-bit members, "a" and "b".  One day, someone
 * will clean that up, and be declared a Hero.  (No pressure, I'm just saying).
 *
 * Anyway, the GDT entry contains a base (the start address of the segment), a
 * limit (the size of the segment - 1), and some flags.  Sounds simple, and it
 * would be, except those zany Intel engineers decided that it was too boring
 * to put the base at one end, the limit at the other, and the flags in
 * between.  They decided to shotgun the bits at random throughout the 8 bytes,
 * like so:
 *
 * 0               16                     40       48  52  56     63
 * [ limit part 1 ][     base part 1     ][ flags ][li][fl][base ]
 *                                                  mit ags part 2
 *                                                part 2
 *
 * As a result, this file contains a certain amount of magic numeracy.  Let's
 * begin.
 */

/* Is the descriptor the Guest wants us to put in OK?
 *
 * The flag which Intel says must be zero: must be zero.  The descriptor must
 * be present, (this is actually checked earlier but is here for thorougness),
 * and the descriptor type must be 1 (a memory segment).  */
static int desc_ok(const struct desc_struct *gdt)
{
	return ((gdt->b & 0x00209000) == 0x00009000);
}

/* Is the segment present?  (Otherwise it can't be used by the Guest). */
static int segment_present(const struct desc_struct *gdt)
{
	return gdt->b & 0x8000;
}

/* There are several entries we don't let the Guest set.  The TSS entry is the
 * "Task State Segment" which controls all kinds of delicate things.  The
 * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the
 * the Guest can't be trusted to deal with double faults. */
static int ignored_gdt(unsigned int num)
{
	return (num == GDT_ENTRY_TSS
		|| num == GDT_ENTRY_LGUEST_CS
		|| num == GDT_ENTRY_LGUEST_DS
		|| num == GDT_ENTRY_DOUBLEFAULT_TSS);
}

/* If the Guest asks us to remove an entry from the GDT, we have to be careful.
 * If one of the segment registers is pointing at that entry the Switcher will
 * crash when it tries to reload the segment registers for the Guest.
 *
 * It doesn't make much sense for the Guest to try to remove its own code, data
 * or stack segments while they're in use: assume that's a Guest bug.  If it's
 * one of the lesser segment registers using the removed entry, we simply set
 * that register to 0 (unusable). */
static void check_segment_use(struct lguest *lg, unsigned int desc)
{
	/* GDT entries are 8 bytes long, so we divide to get the index and
	 * ignore the bottom bits. */
	if (lg->regs->gs / 8 == desc)
		lg->regs->gs = 0;
	if (lg->regs->fs / 8 == desc)
		lg->regs->fs = 0;
	if (lg->regs->es / 8 == desc)
		lg->regs->es = 0;
	if (lg->regs->ds / 8 == desc
	    || lg->regs->cs / 8 == desc
	    || lg->regs->ss / 8 == desc)
		kill_guest(lg, "Removed live GDT entry %u", desc);
}
/*:*/
/*M:009 We wouldn't need to check for removal of in-use segments if we handled
 * faults in the Switcher.  However, it's probably not a worthwhile
 * optimization. :*/

/*H:610 Once the GDT has been changed, we look through the changed entries and
 * see if they're OK.  If not, we'll call kill_guest() and the Guest will never
 * get to use the invalid entries. */
static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
{
	unsigned int i;

	for (i = start; i < end; i++) {
		/* We never copy these ones to real GDT, so we don't care what
		 * they say */
		if (ignored_gdt(i))
			continue;

		/* We could fault in switch_to_guest if they are using
		 * a removed segment. */
		if (!segment_present(&lg->gdt[i])) {
			check_segment_use(lg, i);
			continue;
		}

		if (!desc_ok(&lg->gdt[i]))
			kill_guest(lg, "Bad GDT descriptor %i", i);

		/* Segment descriptors contain a privilege level: the Guest is
		 * sometimes careless and leaves this as 0, even though it's
		 * running at privilege level 1.  If so, we fix it here. */
		if ((lg->gdt[i].b & 0x00006000) == 0)
			lg->gdt[i].b |= (GUEST_PL << 13);

		/* Each descriptor has an "accessed" bit.  If we don't set it
		 * now, the CPU will try to set it when the Guest first loads
		 * that entry into a segment register.  But the GDT isn't
		 * writable by the Guest, so bad things can happen. */
		lg->gdt[i].b |= 0x00000100;
	}
}

/* This routine is called at boot or modprobe time for each CPU to set up the
 * "constant" GDT entries for Guests running on that CPU. */
void setup_default_gdt_entries(struct lguest_ro_state *state)
{
	struct desc_struct *gdt = state->guest_gdt;
	unsigned long tss = (unsigned long)&state->guest_tss;

	/* The hypervisor segments are full 0-4G segments, privilege level 0 */
	gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
	gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;

	/* The TSS segment refers to the TSS entry for this CPU, so we cannot
	 * copy it from the Guest.  Forgive the magic flags */
	gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16);
	gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000)
		| ((tss >> 16) & 0x000000FF);
}

/* This routine is called before the Guest is run for the first time. */
void setup_guest_gdt(struct lguest *lg)
{
	/* Start with full 0-4G segments... */
	lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
	lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
	/* ...except the Guest is allowed to use them, so set the privilege
	 * level appropriately in the flags. */
	lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
	lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
}

/* Like the IDT, we never simply use the GDT the Guest gives us.  We set up the
 * GDTs for each CPU, then we copy across the entries each time we want to run
 * a different Guest on that CPU. */

/* A partial GDT load, for the three "thead-local storage" entries.  Otherwise
 * it's just like load_guest_gdt().  So much, in fact, it would probably be
 * neater to have a single hypercall to cover both. */
void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt)
{
	unsigned int i;

	for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++)
		gdt[i] = lg->gdt[i];
}

/* This is the full version */
void copy_gdt(const struct lguest *lg, struct desc_struct *gdt)
{
	unsigned int i;

	/* The default entries from setup_default_gdt_entries() are not
	 * replaced.  See ignored_gdt() above. */
	for (i = 0; i < GDT_ENTRIES; i++)
		if (!ignored_gdt(i))
			gdt[i] = lg->gdt[i];
}

/* This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). */
void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num)
{
	/* We assume the Guest has the same number of GDT entries as the
	 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
	if (num > ARRAY_SIZE(lg->gdt))
		kill_guest(lg, "too many gdt entries %i", num);

	/* We read the whole thing in, then fix it up. */
	lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0]));
	fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt));
	/* Mark that the GDT changed so the core knows it has to copy it again,
	 * even if the Guest is run on the same CPU. */
	lg->changed |= CHANGED_GDT;
}

void guest_load_tls(struct lguest *lg, unsigned long gtls)
{
	struct desc_struct *tls = &lg->gdt[GDT_ENTRY_TLS_MIN];

	lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES);
	fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1);
	lg->changed |= CHANGED_GDT_TLS;
}

/*
 * With this, we have finished the Host.
 *
 * Five of the seven parts of our task are complete.  You have made it through
 * the Bit of Despair (I think that's somewhere in the page table code,
 * myself).
 *
 * Next, we examine "make Switcher".  It's short, but intense.
 */