tags/2.6.22-2/20069_xen-split-pt-lock.patch1


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220

From: jbeulich@novell.com
Subject: allow use of split page table locks
Patch-mainline: obsolete

---
 arch/i386/mm/pgtable-xen.c    |   66 +++++++++++++++++++++++++++++++++++++++---
 arch/x86_64/mm/pageattr-xen.c |   66 +++++++++++++++++++++++++++++++++++++++---
 mm/Kconfig                    |    3 -
 3 files changed, 124 insertions(+), 11 deletions(-)

--- a/arch/i386/mm/pgtable-xen.c	2007-08-27 14:01:27.000000000 -0400
+++ b/arch/i386/mm/pgtable-xen.c	2007-08-27 14:01:27.000000000 -0400
@@ -658,6 +658,64 @@ void make_pages_writable(void *va, unsig
 	}
 }
 
+static void _pin_lock(struct mm_struct *mm, int lock) {
+	if (lock)
+		spin_lock(&mm->page_table_lock);
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+	/* While mm->page_table_lock protects us against insertions and
+	 * removals of higher level page table pages, it doesn't protect
+	 * against updates of pte-s. Such updates, however, require the
+	 * pte pages to be in consistent state (unpinned+writable or
+	 * pinned+readonly). The pinning and attribute changes, however
+	 * cannot be done atomically, which is why such updates must be
+	 * prevented from happening concurrently.
+	 * Note that no pte lock can ever elsewhere be acquired nesting
+	 * with an already acquired one in the same mm, or with the mm's
+	 * page_table_lock already acquired, as that would break in the
+	 * non-split case (where all these are actually resolving to the
+	 * one page_table_lock). Thus acquiring all of them here is not
+	 * going to result in dead locks, and the order of acquires
+	 * doesn't matter.
+	 */
+	{
+		pgd_t *pgd = mm->pgd;
+		unsigned g;
+
+		for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
+			pud_t *pud;
+			unsigned u;
+
+			if (pgd_none(*pgd))
+				continue;
+			pud = pud_offset(pgd, 0);
+			for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+				pmd_t *pmd;
+				unsigned m;
+
+				if (pud_none(*pud))
+					continue;
+				pmd = pmd_offset(pud, 0);
+				for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+					spinlock_t *ptl;
+
+					if (pmd_none(*pmd))
+						continue;
+					ptl = pte_lockptr(0, pmd);
+					if (lock)
+						spin_lock(ptl);
+					else
+						spin_unlock(ptl);
+				}
+			}
+		}
+	}
+#endif
+	if (!lock)
+		spin_unlock(&mm->page_table_lock);
+}
+#define pin_lock(mm) _pin_lock(mm, 1)
+#define pin_unlock(mm) _pin_lock(mm, 0)
+
 static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags)
 {
 	unsigned long pfn = page_to_pfn(page);
@@ -740,18 +798,18 @@ void mm_pin(struct mm_struct *mm)
 {
 	if (xen_feature(XENFEAT_writable_page_tables))
 		return;
-	spin_lock(&mm->page_table_lock);
+	pin_lock(mm);
 	__pgd_pin(mm->pgd);
-	spin_unlock(&mm->page_table_lock);
+	pin_unlock(mm);
 }
 
 void mm_unpin(struct mm_struct *mm)
 {
 	if (xen_feature(XENFEAT_writable_page_tables))
 		return;
-	spin_lock(&mm->page_table_lock);
+	pin_lock(mm);
 	__pgd_unpin(mm->pgd);
-	spin_unlock(&mm->page_table_lock);
+	pin_unlock(mm);
 }
 
 void mm_pin_all(void)
--- a/arch/x86_64/mm/pageattr-xen.c	2007-08-27 14:01:27.000000000 -0400
+++ b/arch/x86_64/mm/pageattr-xen.c	2007-08-27 14:01:27.000000000 -0400
@@ -20,6 +20,64 @@
 LIST_HEAD(mm_unpinned);
 DEFINE_SPINLOCK(mm_unpinned_lock);
 
+static void _pin_lock(struct mm_struct *mm, int lock) {
+	if (lock)
+		spin_lock(&mm->page_table_lock);
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+	/* While mm->page_table_lock protects us against insertions and
+	 * removals of higher level page table pages, it doesn't protect
+	 * against updates of pte-s. Such updates, however, require the
+	 * pte pages to be in consistent state (unpinned+writable or
+	 * pinned+readonly). The pinning and attribute changes, however
+	 * cannot be done atomically, which is why such updates must be
+	 * prevented from happening concurrently.
+	 * Note that no pte lock can ever elsewhere be acquired nesting
+	 * with an already acquired one in the same mm, or with the mm's
+	 * page_table_lock already acquired, as that would break in the
+	 * non-split case (where all these are actually resolving to the
+	 * one page_table_lock). Thus acquiring all of them here is not
+	 * going to result in dead locks, and the order of acquires
+	 * doesn't matter.
+	 */
+	{
+		pgd_t *pgd = mm->pgd;
+		unsigned g;
+
+		for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
+			pud_t *pud;
+			unsigned u;
+
+			if (pgd_none(*pgd))
+				continue;
+			pud = pud_offset(pgd, 0);
+			for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+				pmd_t *pmd;
+				unsigned m;
+
+				if (pud_none(*pud))
+					continue;
+				pmd = pmd_offset(pud, 0);
+				for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+					spinlock_t *ptl;
+
+					if (pmd_none(*pmd))
+						continue;
+					ptl = pte_lockptr(0, pmd);
+					if (lock)
+						spin_lock(ptl);
+					else
+						spin_unlock(ptl);
+				}
+			}
+		}
+	}
+#endif
+	if (!lock)
+		spin_unlock(&mm->page_table_lock);
+}
+#define pin_lock(mm) _pin_lock(mm, 1)
+#define pin_unlock(mm) _pin_lock(mm, 0)
+
 static inline void mm_walk_set_prot(void *pt, pgprot_t flags)
 {
 	struct page *page = virt_to_page(pt);
@@ -76,7 +134,7 @@ void mm_pin(struct mm_struct *mm)
 	if (xen_feature(XENFEAT_writable_page_tables))
 		return;
 
-	spin_lock(&mm->page_table_lock);
+	pin_lock(mm);
 
 	mm_walk(mm, PAGE_KERNEL_RO);
 	if (HYPERVISOR_update_va_mapping(
@@ -97,7 +155,7 @@ void mm_pin(struct mm_struct *mm)
 	list_del(&mm->context.unpinned);
 	spin_unlock(&mm_unpinned_lock);
 
-	spin_unlock(&mm->page_table_lock);
+	pin_unlock(mm);
 }
 
 void mm_unpin(struct mm_struct *mm)
@@ -105,7 +163,7 @@ void mm_unpin(struct mm_struct *mm)
 	if (xen_feature(XENFEAT_writable_page_tables))
 		return;
 
-	spin_lock(&mm->page_table_lock);
+	pin_lock(mm);
 
 	xen_pgd_unpin(__pa(mm->pgd));
 	xen_pgd_unpin(__pa(__user_pgd(mm->pgd)));
@@ -125,7 +183,7 @@ void mm_unpin(struct mm_struct *mm)
 	list_add(&mm->context.unpinned, &mm_unpinned);
 	spin_unlock(&mm_unpinned_lock);
 
-	spin_unlock(&mm->page_table_lock);
+	pin_unlock(mm);
 }
 
 void mm_pin_all(void)
--- a/mm/Kconfig	2007-08-27 14:01:25.000000000 -0400
+++ b/mm/Kconfig	2007-08-27 14:01:27.000000000 -0400
@@ -132,14 +132,11 @@ config MEMORY_HOTPLUG_SPARSE
 # Default to 4 for wider testing, though 8 might be more appropriate.
 # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
 # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
-# XEN on x86 architecture uses the mapping field on pagetable pages to store a
-# pointer to the destructor. This conflicts with pte_lock_deinit().
 #
 config SPLIT_PTLOCK_CPUS
 	int
 	default "4096" if ARM && !CPU_CACHE_VIPT
 	default "4096" if PARISC && !PA20
-	default "4096" if X86_XEN || X86_64_XEN
 	default "4"
 
 #