summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/2.6.18/30079a_amd64-fix-zeroing-on-exception-in-copy_user-pre.patch')
-rw-r--r--trunk/2.6.18/30079a_amd64-fix-zeroing-on-exception-in-copy_user-pre.patch798
1 files changed, 798 insertions, 0 deletions
diff --git a/trunk/2.6.18/30079a_amd64-fix-zeroing-on-exception-in-copy_user-pre.patch b/trunk/2.6.18/30079a_amd64-fix-zeroing-on-exception-in-copy_user-pre.patch
new file mode 100644
index 0000000..17e1346
--- /dev/null
+++ b/trunk/2.6.18/30079a_amd64-fix-zeroing-on-exception-in-copy_user-pre.patch
@@ -0,0 +1,798 @@
+commit 8d379dad8f1670d233ac67b76b1c5a42ad3714a3
+Author: Jan Beulich <jbeulich@novell.com>
+Date: Tue Sep 26 10:52:32 2006 +0200
+
+ [PATCH] annotate arch/x86_64/lib/*.S
+
+ Add unwind annotations to arch/x86_64/lib/*.S, and also use the macros
+ provided by linux/linkage.h where-ever possible.
+
+ Some of the alternative instructions handling needed to be adjusted so
+ that the replacement code would also have valid unwind information.
+
+ Signed-off-by: Jan Beulich <jbeulich@novell.com>
+ Signed-off-by: Andi Kleen <ak@suse.de>
+
+diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
+index 1f81b79..9a10a78 100644
+--- a/arch/x86_64/lib/clear_page.S
++++ b/arch/x86_64/lib/clear_page.S
+@@ -1,10 +1,22 @@
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++
+ /*
+ * Zero a page.
+ * rdi page
+ */
+- .globl clear_page
+- .p2align 4
+-clear_page:
++ ALIGN
++clear_page_c:
++ CFI_STARTPROC
++ movl $4096/8,%ecx
++ xorl %eax,%eax
++ rep stosq
++ ret
++ CFI_ENDPROC
++ENDPROC(clear_page)
++
++ENTRY(clear_page)
++ CFI_STARTPROC
+ xorl %eax,%eax
+ movl $4096/64,%ecx
+ .p2align 4
+@@ -23,28 +35,25 @@ clear_page:
+ jnz .Lloop
+ nop
+ ret
+-clear_page_end:
++ CFI_ENDPROC
++.Lclear_page_end:
++ENDPROC(clear_page)
+
+ /* Some CPUs run faster using the string instructions.
+ It is also a lot simpler. Use this when possible */
+
+ #include <asm/cpufeature.h>
+
++ .section .altinstr_replacement,"ax"
++1: .byte 0xeb /* jmp <disp8> */
++ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
++2:
++ .previous
+ .section .altinstructions,"a"
+ .align 8
+- .quad clear_page
+- .quad clear_page_c
+- .byte X86_FEATURE_REP_GOOD
+- .byte clear_page_end-clear_page
+- .byte clear_page_c_end-clear_page_c
+- .previous
+-
+- .section .altinstr_replacement,"ax"
+-clear_page_c:
+- movl $4096/8,%ecx
+- xorl %eax,%eax
+- rep
+- stosq
+- ret
+-clear_page_c_end:
++ .quad clear_page
++ .quad 1b
++ .byte X86_FEATURE_REP_GOOD
++ .byte .Lclear_page_end - clear_page
++ .byte 2b - 1b
+ .previous
+diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
+index 8fa19d9..0ebb03b 100644
+--- a/arch/x86_64/lib/copy_page.S
++++ b/arch/x86_64/lib/copy_page.S
+@@ -1,17 +1,33 @@
+ /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
+
++#include <linux/config.h>
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++
++ ALIGN
++copy_page_c:
++ CFI_STARTPROC
++ movl $4096/8,%ecx
++ rep movsq
++ ret
++ CFI_ENDPROC
++ENDPROC(copy_page_c)
++
+ /* Don't use streaming store because it's better when the target
+ ends up in cache. */
+
+ /* Could vary the prefetch distance based on SMP/UP */
+
+- .globl copy_page
+- .p2align 4
+-copy_page:
++ENTRY(copy_page)
++ CFI_STARTPROC
+ subq $3*8,%rsp
++ CFI_ADJUST_CFA_OFFSET 3*8
+ movq %rbx,(%rsp)
++ CFI_REL_OFFSET rbx, 0
+ movq %r12,1*8(%rsp)
++ CFI_REL_OFFSET r12, 1*8
+ movq %r13,2*8(%rsp)
++ CFI_REL_OFFSET r13, 2*8
+
+ movl $(4096/64)-5,%ecx
+ .p2align 4
+@@ -72,30 +88,33 @@ copy_page:
+ jnz .Loop2
+
+ movq (%rsp),%rbx
++ CFI_RESTORE rbx
+ movq 1*8(%rsp),%r12
++ CFI_RESTORE r12
+ movq 2*8(%rsp),%r13
++ CFI_RESTORE r13
+ addq $3*8,%rsp
++ CFI_ADJUST_CFA_OFFSET -3*8
+ ret
++.Lcopy_page_end:
++ CFI_ENDPROC
++ENDPROC(copy_page)
+
+ /* Some CPUs run faster using the string copy instructions.
+ It is also a lot simpler. Use this when possible */
+
+ #include <asm/cpufeature.h>
+
++ .section .altinstr_replacement,"ax"
++1: .byte 0xeb /* jmp <disp8> */
++ .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
++2:
++ .previous
+ .section .altinstructions,"a"
+ .align 8
+- .quad copy_page
+- .quad copy_page_c
+- .byte X86_FEATURE_REP_GOOD
+- .byte copy_page_c_end-copy_page_c
+- .byte copy_page_c_end-copy_page_c
+- .previous
+-
+- .section .altinstr_replacement,"ax"
+-copy_page_c:
+- movl $4096/8,%ecx
+- rep
+- movsq
+- ret
+-copy_page_c_end:
++ .quad copy_page
++ .quad 1b
++ .byte X86_FEATURE_REP_GOOD
++ .byte .Lcopy_page_end - copy_page
++ .byte 2b - 1b
+ .previous
+diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
+index f64569b..962f3a6 100644
+--- a/arch/x86_64/lib/copy_user.S
++++ b/arch/x86_64/lib/copy_user.S
+@@ -4,6 +4,9 @@
+ * Functions to copy from and to user space.
+ */
+
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++
+ #define FIX_ALIGNMENT 1
+
+ #include <asm/current.h>
+@@ -12,9 +15,8 @@
+ #include <asm/cpufeature.h>
+
+ /* Standard copy_to_user with segment limit checking */
+- .globl copy_to_user
+- .p2align 4
+-copy_to_user:
++ENTRY(copy_to_user)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%rax)
+ movq %rdi,%rcx
+ addq %rdx,%rcx
+@@ -25,9 +27,11 @@ copy_to_user:
+ .byte 0xe9 /* 32bit jump */
+ .long .Lcug-1f
+ 1:
++ CFI_ENDPROC
++ENDPROC(copy_to_user)
+
+ .section .altinstr_replacement,"ax"
+-3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
++3: .byte 0xe9 /* replacement jmp with 32 bit immediate */
+ .long copy_user_generic_c-1b /* offset */
+ .previous
+ .section .altinstructions,"a"
+@@ -40,9 +44,8 @@ copy_to_user:
+ .previous
+
+ /* Standard copy_from_user with segment limit checking */
+- .globl copy_from_user
+- .p2align 4
+-copy_from_user:
++ENTRY(copy_from_user)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%rax)
+ movq %rsi,%rcx
+ addq %rdx,%rcx
+@@ -50,10 +53,13 @@ copy_from_user:
+ cmpq threadinfo_addr_limit(%rax),%rcx
+ jae bad_from_user
+ /* FALL THROUGH to copy_user_generic */
++ CFI_ENDPROC
++ENDPROC(copy_from_user)
+
+ .section .fixup,"ax"
+ /* must zero dest */
+ bad_from_user:
++ CFI_STARTPROC
+ movl %edx,%ecx
+ xorl %eax,%eax
+ rep
+@@ -61,6 +67,8 @@ bad_from_user:
+ bad_to_user:
+ movl %edx,%eax
+ ret
++ CFI_ENDPROC
++END(bad_from_user)
+ .previous
+
+
+@@ -75,9 +83,8 @@ bad_to_user:
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+- .globl copy_user_generic
+- .p2align 4
+-copy_user_generic:
++ENTRY(copy_user_generic)
++ CFI_STARTPROC
+ .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
+ .byte 0x66,0x90
+ 1:
+@@ -95,6 +102,8 @@ copy_user_generic:
+ .previous
+ .Lcug:
+ pushq %rbx
++ CFI_ADJUST_CFA_OFFSET 8
++ CFI_REL_OFFSET rbx, 0
+ xorl %eax,%eax /*zero for the exception handler */
+
+ #ifdef FIX_ALIGNMENT
+@@ -168,9 +177,13 @@ copy_user_generic:
+ decl %ecx
+ jnz .Lloop_1
+
++ CFI_REMEMBER_STATE
+ .Lende:
+ popq %rbx
++ CFI_ADJUST_CFA_OFFSET -8
++ CFI_RESTORE rbx
+ ret
++ CFI_RESTORE_STATE
+
+ #ifdef FIX_ALIGNMENT
+ /* align destination */
+@@ -261,6 +274,9 @@ copy_user_generic:
+ .Le_zero:
+ movq %rdx,%rax
+ jmp .Lende
++ CFI_ENDPROC
++ENDPROC(copy_user_generic)
++
+
+ /* Some CPUs run faster using the string copy instructions.
+ This is also a lot simpler. Use them when possible.
+@@ -282,6 +298,7 @@ copy_user_generic:
+ * this please consider this.
+ */
+ copy_user_generic_c:
++ CFI_STARTPROC
+ movl %edx,%ecx
+ shrl $3,%ecx
+ andl $7,%edx
+@@ -294,6 +311,8 @@ copy_user_generic_c:
+ ret
+ 3: lea (%rdx,%rcx,8),%rax
+ ret
++ CFI_ENDPROC
++END(copy_user_generic_c)
+
+ .section __ex_table,"a"
+ .quad 1b,3b
+diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
+index 72fd55e..f0dba36 100644
+--- a/arch/x86_64/lib/csum-copy.S
++++ b/arch/x86_64/lib/csum-copy.S
+@@ -5,8 +5,9 @@
+ * License. See the file COPYING in the main directory of this archive
+ * for more details. No warranty for anything given at all.
+ */
+- #include <linux/linkage.h>
+- #include <asm/errno.h>
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/errno.h>
+
+ /*
+ * Checksum copy with exception handling.
+@@ -53,19 +54,24 @@
+ .endm
+
+
+- .globl csum_partial_copy_generic
+- .p2align 4
+-csum_partial_copy_generic:
++ENTRY(csum_partial_copy_generic)
++ CFI_STARTPROC
+ cmpl $3*64,%edx
+ jle .Lignore
+
+ .Lignore:
+ subq $7*8,%rsp
++ CFI_ADJUST_CFA_OFFSET 7*8
+ movq %rbx,2*8(%rsp)
++ CFI_REL_OFFSET rbx, 2*8
+ movq %r12,3*8(%rsp)
++ CFI_REL_OFFSET r12, 3*8
+ movq %r14,4*8(%rsp)
++ CFI_REL_OFFSET r14, 4*8
+ movq %r13,5*8(%rsp)
++ CFI_REL_OFFSET r13, 5*8
+ movq %rbp,6*8(%rsp)
++ CFI_REL_OFFSET rbp, 6*8
+
+ movq %r8,(%rsp)
+ movq %r9,1*8(%rsp)
+@@ -208,14 +214,22 @@ csum_partial_copy_generic:
+ addl %ebx,%eax
+ adcl %r9d,%eax /* carry */
+
++ CFI_REMEMBER_STATE
+ .Lende:
+ movq 2*8(%rsp),%rbx
++ CFI_RESTORE rbx
+ movq 3*8(%rsp),%r12
++ CFI_RESTORE r12
+ movq 4*8(%rsp),%r14
++ CFI_RESTORE r14
+ movq 5*8(%rsp),%r13
++ CFI_RESTORE r13
+ movq 6*8(%rsp),%rbp
++ CFI_RESTORE rbp
+ addq $7*8,%rsp
++ CFI_ADJUST_CFA_OFFSET -7*8
+ ret
++ CFI_RESTORE_STATE
+
+ /* Exception handlers. Very simple, zeroing is done in the wrappers */
+ .Lbad_source:
+@@ -231,3 +245,5 @@ csum_partial_copy_generic:
+ jz .Lende
+ movl $-EFAULT,(%rax)
+ jmp .Lende
++ CFI_ENDPROC
++ENDPROC(csum_partial_copy_generic)
+diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S
+index 3844d5e..5448876 100644
+--- a/arch/x86_64/lib/getuser.S
++++ b/arch/x86_64/lib/getuser.S
+@@ -27,25 +27,26 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/dwarf2.h>
+ #include <asm/page.h>
+ #include <asm/errno.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/thread_info.h>
+
+ .text
+- .p2align 4
+-.globl __get_user_1
+-__get_user_1:
++ENTRY(__get_user_1)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%r8)
+ cmpq threadinfo_addr_limit(%r8),%rcx
+ jae bad_get_user
+ 1: movzb (%rcx),%edx
+ xorl %eax,%eax
+ ret
++ CFI_ENDPROC
++ENDPROC(__get_user_1)
+
+- .p2align 4
+-.globl __get_user_2
+-__get_user_2:
++ENTRY(__get_user_2)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%r8)
+ addq $1,%rcx
+ jc 20f
+@@ -57,10 +58,11 @@ __get_user_2:
+ ret
+ 20: decq %rcx
+ jmp bad_get_user
++ CFI_ENDPROC
++ENDPROC(__get_user_2)
+
+- .p2align 4
+-.globl __get_user_4
+-__get_user_4:
++ENTRY(__get_user_4)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%r8)
+ addq $3,%rcx
+ jc 30f
+@@ -72,10 +74,11 @@ __get_user_4:
+ ret
+ 30: subq $3,%rcx
+ jmp bad_get_user
++ CFI_ENDPROC
++ENDPROC(__get_user_4)
+
+- .p2align 4
+-.globl __get_user_8
+-__get_user_8:
++ENTRY(__get_user_8)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%r8)
+ addq $7,%rcx
+ jc 40f
+@@ -87,11 +90,16 @@ __get_user_8:
+ ret
+ 40: subq $7,%rcx
+ jmp bad_get_user
++ CFI_ENDPROC
++ENDPROC(__get_user_8)
+
+ bad_get_user:
++ CFI_STARTPROC
+ xorl %edx,%edx
+ movq $(-EFAULT),%rax
+ ret
++ CFI_ENDPROC
++END(bad_get_user)
+
+ .section __ex_table,"a"
+ .quad 1b,bad_get_user
+diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S
+index 8bbade5..05a95e7 100644
+--- a/arch/x86_64/lib/iomap_copy.S
++++ b/arch/x86_64/lib/iomap_copy.S
+@@ -15,12 +15,16 @@
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++
+ /*
+ * override generic version in lib/iomap_copy.c
+ */
+- .globl __iowrite32_copy
+- .p2align 4
+-__iowrite32_copy:
++ENTRY(__iowrite32_copy)
++ CFI_STARTPROC
+ movl %edx,%ecx
+ rep movsd
+ ret
++ CFI_ENDPROC
++ENDPROC(__iowrite32_copy)
+diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
+index 5554948..967b22f 100644
+--- a/arch/x86_64/lib/memcpy.S
++++ b/arch/x86_64/lib/memcpy.S
+@@ -1,6 +1,10 @@
+ /* Copyright 2002 Andi Kleen */
+
+- #include <asm/cpufeature.h>
++#include <linux/config.h>
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/cpufeature.h>
++
+ /*
+ * memcpy - Copy a memory block.
+ *
+@@ -13,12 +17,26 @@
+ * rax original destination
+ */
+
+- .globl __memcpy
+- .globl memcpy
+- .p2align 4
+-__memcpy:
+-memcpy:
++ ALIGN
++memcpy_c:
++ CFI_STARTPROC
++ movq %rdi,%rax
++ movl %edx,%ecx
++ shrl $3,%ecx
++ andl $7,%edx
++ rep movsq
++ movl %edx,%ecx
++ rep movsb
++ ret
++ CFI_ENDPROC
++ENDPROC(memcpy_c)
++
++ENTRY(__memcpy)
++ENTRY(memcpy)
++ CFI_STARTPROC
+ pushq %rbx
++ CFI_ADJUST_CFA_OFFSET 8
++ CFI_REL_OFFSET rbx, 0
+ movq %rdi,%rax
+
+ movl %edx,%ecx
+@@ -86,36 +104,27 @@ memcpy:
+
+ .Lende:
+ popq %rbx
++ CFI_ADJUST_CFA_OFFSET -8
++ CFI_RESTORE rbx
+ ret
+ .Lfinal:
++ CFI_ENDPROC
++ENDPROC(memcpy)
++ENDPROC(__memcpy)
+
+ /* Some CPUs run faster using the string copy instructions.
+ It is also a lot simpler. Use this when possible */
+
++ .section .altinstr_replacement,"ax"
++1: .byte 0xeb /* jmp <disp8> */
++ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
++2:
++ .previous
+ .section .altinstructions,"a"
+ .align 8
+- .quad memcpy
+- .quad memcpy_c
+- .byte X86_FEATURE_REP_GOOD
+- .byte .Lfinal-memcpy
+- .byte memcpy_c_end-memcpy_c
+- .previous
+-
+- .section .altinstr_replacement,"ax"
+- /* rdi destination
+- * rsi source
+- * rdx count
+- */
+-memcpy_c:
+- movq %rdi,%rax
+- movl %edx,%ecx
+- shrl $3,%ecx
+- andl $7,%edx
+- rep
+- movsq
+- movl %edx,%ecx
+- rep
+- movsb
+- ret
+-memcpy_c_end:
++ .quad memcpy
++ .quad 1b
++ .byte X86_FEATURE_REP_GOOD
++ .byte .Lfinal - memcpy
++ .byte 2b - 1b
+ .previous
+diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
+index ad397f2..09ed1f6 100644
+--- a/arch/x86_64/lib/memset.S
++++ b/arch/x86_64/lib/memset.S
+@@ -1,4 +1,9 @@
+ /* Copyright 2002 Andi Kleen, SuSE Labs */
++
++#include <linux/config.h>
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++
+ /*
+ * ISO C memset - set a memory block to a byte value.
+ *
+@@ -8,11 +13,29 @@
+ *
+ * rax original destination
+ */
+- .globl __memset
+- .globl memset
+- .p2align 4
+-memset:
+-__memset:
++ ALIGN
++memset_c:
++ CFI_STARTPROC
++ movq %rdi,%r9
++ movl %edx,%r8d
++ andl $7,%r8d
++ movl %edx,%ecx
++ shrl $3,%ecx
++ /* expand byte value */
++ movzbl %sil,%esi
++ movabs $0x0101010101010101,%rax
++ mulq %rsi /* with rax, clobbers rdx */
++ rep stosq
++ movl %r8d,%ecx
++ rep stosb
++ movq %r9,%rax
++ ret
++ CFI_ENDPROC
++ENDPROC(memset_c)
++
++ENTRY(memset)
++ENTRY(__memset)
++ CFI_STARTPROC
+ movq %rdi,%r10
+ movq %rdx,%r11
+
+@@ -25,6 +48,7 @@ __memset:
+ movl %edi,%r9d
+ andl $7,%r9d
+ jnz .Lbad_alignment
++ CFI_REMEMBER_STATE
+ .Lafter_bad_alignment:
+
+ movl %r11d,%ecx
+@@ -75,6 +99,7 @@ __memset:
+ movq %r10,%rax
+ ret
+
++ CFI_RESTORE_STATE
+ .Lbad_alignment:
+ cmpq $7,%r11
+ jbe .Lhandle_7
+@@ -84,42 +109,26 @@ __memset:
+ addq %r8,%rdi
+ subq %r8,%r11
+ jmp .Lafter_bad_alignment
++.Lfinal:
++ CFI_ENDPROC
++ENDPROC(memset)
++ENDPROC(__memset)
+
+ /* Some CPUs run faster using the string instructions.
+ It is also a lot simpler. Use this when possible */
+
+ #include <asm/cpufeature.h>
+
++ .section .altinstr_replacement,"ax"
++1: .byte 0xeb /* jmp <disp8> */
++ .byte (memset_c - memset) - (2f - 1b) /* offset */
++2:
++ .previous
+ .section .altinstructions,"a"
+ .align 8
+- .quad memset
+- .quad memset_c
+- .byte X86_FEATURE_REP_GOOD
+- .byte memset_c_end-memset_c
+- .byte memset_c_end-memset_c
+- .previous
+-
+- .section .altinstr_replacement,"ax"
+- /* rdi destination
+- * rsi value
+- * rdx count
+- */
+-memset_c:
+- movq %rdi,%r9
+- movl %edx,%r8d
+- andl $7,%r8d
+- movl %edx,%ecx
+- shrl $3,%ecx
+- /* expand byte value */
+- movzbl %sil,%esi
+- movabs $0x0101010101010101,%rax
+- mulq %rsi /* with rax, clobbers rdx */
+- rep
+- stosq
+- movl %r8d,%ecx
+- rep
+- stosb
+- movq %r9,%rax
+- ret
+-memset_c_end:
++ .quad memset
++ .quad 1b
++ .byte X86_FEATURE_REP_GOOD
++ .byte .Lfinal - memset
++ .byte 2b - 1b
+ .previous
+diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S
+index 7f55939..4989f5a 100644
+--- a/arch/x86_64/lib/putuser.S
++++ b/arch/x86_64/lib/putuser.S
+@@ -25,25 +25,26 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/dwarf2.h>
+ #include <asm/page.h>
+ #include <asm/errno.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/thread_info.h>
+
+ .text
+- .p2align 4
+-.globl __put_user_1
+-__put_user_1:
++ENTRY(__put_user_1)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%r8)
+ cmpq threadinfo_addr_limit(%r8),%rcx
+ jae bad_put_user
+ 1: movb %dl,(%rcx)
+ xorl %eax,%eax
+ ret
++ CFI_ENDPROC
++ENDPROC(__put_user_1)
+
+- .p2align 4
+-.globl __put_user_2
+-__put_user_2:
++ENTRY(__put_user_2)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%r8)
+ addq $1,%rcx
+ jc 20f
+@@ -55,10 +56,11 @@ __put_user_2:
+ ret
+ 20: decq %rcx
+ jmp bad_put_user
++ CFI_ENDPROC
++ENDPROC(__put_user_2)
+
+- .p2align 4
+-.globl __put_user_4
+-__put_user_4:
++ENTRY(__put_user_4)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%r8)
+ addq $3,%rcx
+ jc 30f
+@@ -70,10 +72,11 @@ __put_user_4:
+ ret
+ 30: subq $3,%rcx
+ jmp bad_put_user
++ CFI_ENDPROC
++ENDPROC(__put_user_4)
+
+- .p2align 4
+-.globl __put_user_8
+-__put_user_8:
++ENTRY(__put_user_8)
++ CFI_STARTPROC
+ GET_THREAD_INFO(%r8)
+ addq $7,%rcx
+ jc 40f
+@@ -85,10 +88,15 @@ __put_user_8:
+ ret
+ 40: subq $7,%rcx
+ jmp bad_put_user
++ CFI_ENDPROC
++ENDPROC(__put_user_8)
+
+ bad_put_user:
++ CFI_STARTPROC
+ movq $(-EFAULT),%rax
+ ret
++ CFI_ENDPROC
++END(bad_put_user)
+
+ .section __ex_table,"a"
+ .quad 1b,bad_put_user