From 61d3b98bad64bdb6e8f61dec73ee540beb49539a Mon Sep 17 00:00:00 2001 From: Michael Marineau Date: Sat, 23 Feb 2008 23:34:18 +0000 Subject: Releasing 2.6.22-2 svn path=/patches/; revision=76 --- tags/2.6.22-2/00000_README | 36 + tags/2.6.22-2/01012_linux-2.6.22.13.patch | 42 + tags/2.6.22-2/01013_linux-2.6.22.14.patch | 1319 + tags/2.6.22-2/01014_linux-2.6.22.15.patch | 1096 + tags/2.6.22-2/01015_linux-2.6.22.16.patch | 27 + tags/2.6.22-2/01016_linux-2.6.22.17.patch | 1360 + tags/2.6.22-2/01017_linux-2.6.22.18.patch | 14 + .../20001_x86-early-quirks-unificiation.patch1 | 237 + tags/2.6.22-2/20002_add-console-use-vt.patch1 | 58 + ...-2.6.19-rc1-kexec-move_segment_code-i386.patch1 | 172 + ....6.19-rc1-kexec-move_segment_code-x86_64.patch1 | 164 + tags/2.6.22-2/20005_blktap-aio-16_03_06.patch1 | 209 + tags/2.6.22-2/20006_fix-ide-cd-pio-mode.patch1 | 36 + tags/2.6.22-2/20007_i386-mach-io-check-nmi.patch1 | 53 + tags/2.6.22-2/20008_net-csum.patch1 | 50 + tags/2.6.22-2/20009_xenoprof-generic.patch1 | 669 + tags/2.6.22-2/20010_softlockup-no-idle-hz.patch1 | 75 + tags/2.6.22-2/20011_xen3-auto-xen-arch.patch1 | 49825 +++++++++++++++++++ tags/2.6.22-2/20012_xen3-auto-xen-drivers.patch1 | 28404 +++++++++++ .../20013_xen3-auto-include-xen-interface.patch1 | 8771 ++++ tags/2.6.22-2/20014_xen3-auto-xen-kconfig.patch1 | 887 + tags/2.6.22-2/20015_xen3-auto-common.patch1 | 2101 + tags/2.6.22-2/20016_xen3-auto-arch-i386.patch1 | 483 + tags/2.6.22-2/20017_xen3-auto-arch-x86_64.patch1 | 502 + .../20018_15130-x86_64-vsyscall-user.patch1 | 51 + tags/2.6.22-2/20019_15181-dma-tracking.patch1 | 551 + tags/2.6.22-2/20020_30-bit-field-booleans.patch1 | 38 + tags/2.6.22-2/20021_42-freeze.patch1 | 67 + tags/2.6.22-2/20022_67-edd.patch1 | 209 + tags/2.6.22-2/20023_70-edid.patch1 | 118 + tags/2.6.22-2/20024_79-balloon-highmem.patch1 | 42 + tags/2.6.22-2/20025_80-blk-teardown.patch1 | 57 + tags/2.6.22-2/20026_81-clock-was-set.patch1 | 48 + tags/2.6.22-2/20027_82-blkdev-wait.patch1 | 92 + tags/2.6.22-2/20028_93-swiotlb.patch1 | 146 + tags/2.6.22-2/20029_95-privcmd-wrlock.patch1 | 72 + .../2.6.22-2/20030_136-pae-vmalloc-sync-all.patch1 | 63 + .../20031_137-netfront-copy-release.patch1 | 128 + tags/2.6.22-2/20032_141-driver-autoload.patch1 | 120 + tags/2.6.22-2/20033_144-xenbus-dev-wait.patch1 | 104 + tags/2.6.22-2/20034_145-xenbus-error-path.patch1 | 24 + .../20035_148-blkfront-no-bounce-bufs.patch1 | 25 + .../20036_152-netloop-check-cloned-skb.patch1 | 35 + tags/2.6.22-2/20037_157-netfront-skb-deref.patch1 | 35 + .../20038_252-l1-entry-update-highpte.patch1 | 27 + tags/2.6.22-2/20039_265-ptep_get_and_clear.patch1 | 74 + tags/2.6.22-2/20040_xen3-fixup-common.patch1 | 365 + tags/2.6.22-2/20041_xen3-fixup-arch-i386.patch1 | 76 + tags/2.6.22-2/20042_xen3-fixup-arch-x86_64.patch1 | 103 + tags/2.6.22-2/20043_xen3-patch-2.6.18.patch1 | 394 + tags/2.6.22-2/20044_xen3-patch-2.6.19.patch1 | 12637 +++++ tags/2.6.22-2/20045_xen3-patch-2.6.20.patch1 | 7592 +++ tags/2.6.22-2/20046_xen3-patch-2.6.21.patch1 | 5107 ++ tags/2.6.22-2/20047_xen3-patch-2.6.22.patch1 | 7866 +++ tags/2.6.22-2/20048_xen3-patch-2.6.22.5-6.patch1 | 30 + tags/2.6.22-2/20049_xen3-patch-2.6.22.6-7.patch1 | 57 + tags/2.6.22-2/20050_xen3-patch-2.6.22.11-12.patch1 | 88 + ...20051_xen3-x86-early-quirks-unificiation.patch1 | 25 + tags/2.6.22-2/20052_xen3-x86-fam10-l3cache.patch1 | 28 + tags/2.6.22-2/20053_xen3-aux-at_vector_size.patch1 | 47 + tags/2.6.22-2/20054_xen-balloon-min.patch1 | 77 + tags/2.6.22-2/20055_xen-modular-blktap.patch1 | 41 + tags/2.6.22-2/20056_xen-x86-panic-no-reboot.patch1 | 58 + tags/2.6.22-2/20057_xen-i386-panic-on-oops.patch1 | 27 + .../20058_xen-x86-kconfig-no-cpu_freq.patch1 | 35 + .../2.6.22-2/20059_xen-configurable-console.patch1 | 181 + tags/2.6.22-2/20060_xen-x86_64-init-cleanup.patch1 | 294 + tags/2.6.22-2/20061_xen-balloon-max-target.patch1 | 32 + tags/2.6.22-2/20062_xen-x86-dcr-fallback.patch1 | 158 + tags/2.6.22-2/20063_xen-x86-consistent-nmi.patch1 | 345 + tags/2.6.22-2/20064_xen-x86-no-lapic.patch1 | 1426 + tags/2.6.22-2/20065_xen-no-video-select.patch1 | 21 + .../2.6.22-2/20066_xen-blkback-bimodal-suse.patch1 | 39 + tags/2.6.22-2/20067_xen-console-default.patch1 | 41 + tags/2.6.22-2/20068_xen-x86-panic-smp.patch1 | 96 + tags/2.6.22-2/20069_xen-split-pt-lock.patch1 | 220 + .../20070_xen-blkif-protocol-fallback-hack.patch1 | 229 + tags/2.6.22-2/20071_xen-x86-pXX_val.patch1 | 434 + tags/2.6.22-2/20072_xen-x86_64-physmap-nx.patch1 | 36 + tags/2.6.22-2/20073_xen-i386-kconfig-msr.patch1 | 18 + tags/2.6.22-2/20074_xen-x86_64-entry.patch1 | 42 + tags/2.6.22-2/20075_xen-intel-agp.patch1 | 33 + tags/2.6.22-2/20076_xen-blkback-cdrom.patch1 | 277 + tags/2.6.22-2/20077_xen-isa-dma.patch1 | 543 + tags/2.6.22-2/20078_xen-i386-set-fixmap.patch1 | 126 + tags/2.6.22-2/20079_xenfb-module-param.patch1 | 108 + tags/2.6.22-2/50001_make-install.patch | 52 + .../2.6.22-2/50002_always-enable-xen-genapic.patch | 12 + tags/2.6.22-2/50009_gentooify-tls-warning.patch | 16 + 89 files changed, 138148 insertions(+) create mode 100644 tags/2.6.22-2/00000_README create mode 100644 tags/2.6.22-2/01012_linux-2.6.22.13.patch create mode 100644 tags/2.6.22-2/01013_linux-2.6.22.14.patch create mode 100644 tags/2.6.22-2/01014_linux-2.6.22.15.patch create mode 100644 tags/2.6.22-2/01015_linux-2.6.22.16.patch create mode 100644 tags/2.6.22-2/01016_linux-2.6.22.17.patch create mode 100644 tags/2.6.22-2/01017_linux-2.6.22.18.patch create mode 100644 tags/2.6.22-2/20001_x86-early-quirks-unificiation.patch1 create mode 100644 tags/2.6.22-2/20002_add-console-use-vt.patch1 create mode 100644 tags/2.6.22-2/20003_linux-2.6.19-rc1-kexec-move_segment_code-i386.patch1 create mode 100644 tags/2.6.22-2/20004_linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch1 create mode 100644 tags/2.6.22-2/20005_blktap-aio-16_03_06.patch1 create mode 100644 tags/2.6.22-2/20006_fix-ide-cd-pio-mode.patch1 create mode 100644 tags/2.6.22-2/20007_i386-mach-io-check-nmi.patch1 create mode 100644 tags/2.6.22-2/20008_net-csum.patch1 create mode 100644 tags/2.6.22-2/20009_xenoprof-generic.patch1 create mode 100644 tags/2.6.22-2/20010_softlockup-no-idle-hz.patch1 create mode 100644 tags/2.6.22-2/20011_xen3-auto-xen-arch.patch1 create mode 100644 tags/2.6.22-2/20012_xen3-auto-xen-drivers.patch1 create mode 100644 tags/2.6.22-2/20013_xen3-auto-include-xen-interface.patch1 create mode 100644 tags/2.6.22-2/20014_xen3-auto-xen-kconfig.patch1 create mode 100644 tags/2.6.22-2/20015_xen3-auto-common.patch1 create mode 100644 tags/2.6.22-2/20016_xen3-auto-arch-i386.patch1 create mode 100644 tags/2.6.22-2/20017_xen3-auto-arch-x86_64.patch1 create mode 100644 tags/2.6.22-2/20018_15130-x86_64-vsyscall-user.patch1 create mode 100644 tags/2.6.22-2/20019_15181-dma-tracking.patch1 create mode 100644 tags/2.6.22-2/20020_30-bit-field-booleans.patch1 create mode 100644 tags/2.6.22-2/20021_42-freeze.patch1 create mode 100644 tags/2.6.22-2/20022_67-edd.patch1 create mode 100644 tags/2.6.22-2/20023_70-edid.patch1 create mode 100644 tags/2.6.22-2/20024_79-balloon-highmem.patch1 create mode 100644 tags/2.6.22-2/20025_80-blk-teardown.patch1 create mode 100644 tags/2.6.22-2/20026_81-clock-was-set.patch1 create mode 100644 tags/2.6.22-2/20027_82-blkdev-wait.patch1 create mode 100644 tags/2.6.22-2/20028_93-swiotlb.patch1 create mode 100644 tags/2.6.22-2/20029_95-privcmd-wrlock.patch1 create mode 100644 tags/2.6.22-2/20030_136-pae-vmalloc-sync-all.patch1 create mode 100644 tags/2.6.22-2/20031_137-netfront-copy-release.patch1 create mode 100644 tags/2.6.22-2/20032_141-driver-autoload.patch1 create mode 100644 tags/2.6.22-2/20033_144-xenbus-dev-wait.patch1 create mode 100644 tags/2.6.22-2/20034_145-xenbus-error-path.patch1 create mode 100644 tags/2.6.22-2/20035_148-blkfront-no-bounce-bufs.patch1 create mode 100644 tags/2.6.22-2/20036_152-netloop-check-cloned-skb.patch1 create mode 100644 tags/2.6.22-2/20037_157-netfront-skb-deref.patch1 create mode 100644 tags/2.6.22-2/20038_252-l1-entry-update-highpte.patch1 create mode 100644 tags/2.6.22-2/20039_265-ptep_get_and_clear.patch1 create mode 100644 tags/2.6.22-2/20040_xen3-fixup-common.patch1 create mode 100644 tags/2.6.22-2/20041_xen3-fixup-arch-i386.patch1 create mode 100644 tags/2.6.22-2/20042_xen3-fixup-arch-x86_64.patch1 create mode 100644 tags/2.6.22-2/20043_xen3-patch-2.6.18.patch1 create mode 100644 tags/2.6.22-2/20044_xen3-patch-2.6.19.patch1 create mode 100644 tags/2.6.22-2/20045_xen3-patch-2.6.20.patch1 create mode 100644 tags/2.6.22-2/20046_xen3-patch-2.6.21.patch1 create mode 100644 tags/2.6.22-2/20047_xen3-patch-2.6.22.patch1 create mode 100644 tags/2.6.22-2/20048_xen3-patch-2.6.22.5-6.patch1 create mode 100644 tags/2.6.22-2/20049_xen3-patch-2.6.22.6-7.patch1 create mode 100644 tags/2.6.22-2/20050_xen3-patch-2.6.22.11-12.patch1 create mode 100644 tags/2.6.22-2/20051_xen3-x86-early-quirks-unificiation.patch1 create mode 100644 tags/2.6.22-2/20052_xen3-x86-fam10-l3cache.patch1 create mode 100644 tags/2.6.22-2/20053_xen3-aux-at_vector_size.patch1 create mode 100644 tags/2.6.22-2/20054_xen-balloon-min.patch1 create mode 100644 tags/2.6.22-2/20055_xen-modular-blktap.patch1 create mode 100644 tags/2.6.22-2/20056_xen-x86-panic-no-reboot.patch1 create mode 100644 tags/2.6.22-2/20057_xen-i386-panic-on-oops.patch1 create mode 100644 tags/2.6.22-2/20058_xen-x86-kconfig-no-cpu_freq.patch1 create mode 100644 tags/2.6.22-2/20059_xen-configurable-console.patch1 create mode 100644 tags/2.6.22-2/20060_xen-x86_64-init-cleanup.patch1 create mode 100644 tags/2.6.22-2/20061_xen-balloon-max-target.patch1 create mode 100644 tags/2.6.22-2/20062_xen-x86-dcr-fallback.patch1 create mode 100644 tags/2.6.22-2/20063_xen-x86-consistent-nmi.patch1 create mode 100644 tags/2.6.22-2/20064_xen-x86-no-lapic.patch1 create mode 100644 tags/2.6.22-2/20065_xen-no-video-select.patch1 create mode 100644 tags/2.6.22-2/20066_xen-blkback-bimodal-suse.patch1 create mode 100644 tags/2.6.22-2/20067_xen-console-default.patch1 create mode 100644 tags/2.6.22-2/20068_xen-x86-panic-smp.patch1 create mode 100644 tags/2.6.22-2/20069_xen-split-pt-lock.patch1 create mode 100644 tags/2.6.22-2/20070_xen-blkif-protocol-fallback-hack.patch1 create mode 100644 tags/2.6.22-2/20071_xen-x86-pXX_val.patch1 create mode 100644 tags/2.6.22-2/20072_xen-x86_64-physmap-nx.patch1 create mode 100644 tags/2.6.22-2/20073_xen-i386-kconfig-msr.patch1 create mode 100644 tags/2.6.22-2/20074_xen-x86_64-entry.patch1 create mode 100644 tags/2.6.22-2/20075_xen-intel-agp.patch1 create mode 100644 tags/2.6.22-2/20076_xen-blkback-cdrom.patch1 create mode 100644 tags/2.6.22-2/20077_xen-isa-dma.patch1 create mode 100644 tags/2.6.22-2/20078_xen-i386-set-fixmap.patch1 create mode 100644 tags/2.6.22-2/20079_xenfb-module-param.patch1 create mode 100644 tags/2.6.22-2/50001_make-install.patch create mode 100644 tags/2.6.22-2/50002_always-enable-xen-genapic.patch create mode 100644 tags/2.6.22-2/50009_gentooify-tls-warning.patch diff --git a/tags/2.6.22-2/00000_README b/tags/2.6.22-2/00000_README new file mode 100644 index 0000000..82c139d --- /dev/null +++ b/tags/2.6.22-2/00000_README @@ -0,0 +1,36 @@ +Xen Patches README +------------------ + +These patches are intended to be stacked on top of genpatches-base. + +Many of the patches included here are swiped from various sources which +use their own four digit patch numbering scheme, so we are stuck with five +digits to indiciate the source for easier tracking and re-syncing. + +Numbering +--------- + +0xxxx Gentoo, not related to Xen. (in case we pull something from extras) +2xxxx Suse, we are using their Xen patch for 2.6.22 +5xxxx Gentoo, Xen and other fixes for Redhat and/or Debian patches. + +Patches +------- + +0xxxx_linux-2.6.22.??? + Kernel.org 2.6.22.y releases that are not included in genpatches. + +2xxxx_??? + Xen patches from Suse's kernel. Note that they are named *.patch1 + to make sure unipatch does the correct thing. + +50001_make-install.patch + Handle make install in a semi-sane way that plays nice with + split domU/dom0 kernels. + +50002_always-enable-xen-genapic.patch + Compile fix for non-SMP (UP) kernels. Since UP support is broken in + upstream Xen I'm not sure if I trust it or not. :-P + +50009_gentooify-tls-warning.patch + Change tls warning instructions to apply directly to Gentoo. diff --git a/tags/2.6.22-2/01012_linux-2.6.22.13.patch b/tags/2.6.22-2/01012_linux-2.6.22.13.patch new file mode 100644 index 0000000..cfd8333 --- /dev/null +++ b/tags/2.6.22-2/01012_linux-2.6.22.13.patch @@ -0,0 +1,42 @@ +Subject: Linux 2.6.22.13 +From: Greg Kroah-Hartman + +Signed-off-by: Greg Kroah-Hartman + +diff --git a/kernel/exit.c b/kernel/exit.c +index 5c8ecba..e3adc46 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1336,8 +1336,7 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, + int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; + + exit_code = p->exit_code; +- if (unlikely(!exit_code) || +- unlikely(p->state & TASK_TRACED)) ++ if (unlikely(!exit_code) || unlikely(p->exit_state)) + goto bail_ref; + return wait_noreap_copyout(p, pid, uid, + why, (exit_code << 8) | 0x7f, +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index e33fb3d..2e1d8e7 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -994,6 +994,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ + if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) + return 0; + ++ if (!tp->packets_out) ++ goto out; ++ + /* SACK fastpath: + * if the only SACK change is the increase of the end_seq of + * the first block then only apply that SACK block +@@ -1262,6 +1265,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ + (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) + tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); + ++out: ++ + #if FASTRETRANS_DEBUG > 0 + BUG_TRAP((int)tp->sacked_out >= 0); + BUG_TRAP((int)tp->lost_out >= 0); diff --git a/tags/2.6.22-2/01013_linux-2.6.22.14.patch b/tags/2.6.22-2/01013_linux-2.6.22.14.patch new file mode 100644 index 0000000..aea3379 --- /dev/null +++ b/tags/2.6.22-2/01013_linux-2.6.22.14.patch @@ -0,0 +1,1319 @@ +Subject: Linux 2.6.22.14 +From: Greg Kroah-Hartman + +Signed-off-by: Greg Kroah-Hartman + +diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c +index f64b81f..8e02ed6 100644 +--- a/arch/i386/kernel/tsc.c ++++ b/arch/i386/kernel/tsc.c +@@ -122,7 +122,7 @@ unsigned long native_calculate_cpu_khz(void) + { + unsigned long long start, end; + unsigned long count; +- u64 delta64; ++ u64 delta64 = (u64)ULLONG_MAX; + int i; + unsigned long flags; + +@@ -134,6 +134,7 @@ unsigned long native_calculate_cpu_khz(void) + rdtscll(start); + mach_countup(&count); + rdtscll(end); ++ delta64 = min(delta64, (end - start)); + } + /* + * Error: ECTCNEVERSET +@@ -144,8 +145,6 @@ unsigned long native_calculate_cpu_khz(void) + if (count <= 1) + goto err; + +- delta64 = end - start; +- + /* cpu freq too fast: */ + if (delta64 > (1ULL<<32)) + goto err; +diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c +index 58e3271..dcf5dec 100644 +--- a/drivers/i2c/busses/i2c-pasemi.c ++++ b/drivers/i2c/busses/i2c-pasemi.c +@@ -51,6 +51,7 @@ struct pasemi_smbus { + #define MRXFIFO_DATA_M 0x000000ff + + #define SMSTA_XEN 0x08000000 ++#define SMSTA_MTN 0x00200000 + + #define CTL_MRR 0x00000400 + #define CTL_MTR 0x00000200 +@@ -98,6 +99,10 @@ static unsigned int pasemi_smb_waitready(struct pasemi_smbus *smbus) + status = reg_read(smbus, REG_SMSTA); + } + ++ /* Got NACK? */ ++ if (status & SMSTA_MTN) ++ return -ENXIO; ++ + if (timeout < 0) { + dev_warn(&smbus->dev->dev, "Timeout, status 0x%08x\n", status); + reg_write(smbus, REG_SMSTA, status); +diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c +index bfce13c..5ad36ab 100644 +--- a/drivers/i2c/chips/eeprom.c ++++ b/drivers/i2c/chips/eeprom.c +@@ -125,13 +125,20 @@ static ssize_t eeprom_read(struct kobject *kobj, char *buf, loff_t off, size_t c + for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++) + eeprom_update_client(client, slice); + +- /* Hide Vaio security settings to regular users (16 first bytes) */ +- if (data->nature == VAIO && off < 16 && !capable(CAP_SYS_ADMIN)) { +- size_t in_row1 = 16 - off; +- in_row1 = min(in_row1, count); +- memset(buf, 0, in_row1); +- if (count - in_row1 > 0) +- memcpy(buf + in_row1, &data->data[16], count - in_row1); ++ /* Hide Vaio private settings to regular users: ++ - BIOS passwords: bytes 0x00 to 0x0f ++ - UUID: bytes 0x10 to 0x1f ++ - Serial number: 0xc0 to 0xdf */ ++ if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) { ++ int i; ++ ++ for (i = 0; i < count; i++) { ++ if ((off + i <= 0x1f) || ++ (off + i >= 0xc0 && off + i <= 0xdf)) ++ buf[i] = 0; ++ else ++ buf[i] = data->data[off + i]; ++ } + } else { + memcpy(buf, &data->data[off], count); + } +@@ -195,14 +202,18 @@ static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind) + goto exit_kfree; + + /* Detect the Vaio nature of EEPROMs. +- We use the "PCG-" prefix as the signature. */ ++ We use the "PCG-" or "VGN-" prefix as the signature. */ + if (address == 0x57) { +- if (i2c_smbus_read_byte_data(new_client, 0x80) == 'P' +- && i2c_smbus_read_byte(new_client) == 'C' +- && i2c_smbus_read_byte(new_client) == 'G' +- && i2c_smbus_read_byte(new_client) == '-') { ++ char name[4]; ++ ++ name[0] = i2c_smbus_read_byte_data(new_client, 0x80); ++ name[1] = i2c_smbus_read_byte(new_client); ++ name[2] = i2c_smbus_read_byte(new_client); ++ name[3] = i2c_smbus_read_byte(new_client); ++ ++ if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) { + dev_info(&new_client->dev, "Vaio EEPROM detected, " +- "enabling password protection\n"); ++ "enabling privacy protection\n"); + data->nature = VAIO; + } + } +diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c +index d9c4fd1..096a081 100644 +--- a/drivers/ide/pci/serverworks.c ++++ b/drivers/ide/pci/serverworks.c +@@ -101,6 +101,7 @@ static u8 svwks_udma_filter(ide_drive_t *drive) + mode = 2; + + switch(mode) { ++ case 3: mask = 0x3f; break; + case 2: mask = 0x1f; break; + case 1: mask = 0x07; break; + default: mask = 0x00; break; +diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c +index 7a69a18..4484a64 100644 +--- a/drivers/isdn/hardware/avm/b1.c ++++ b/drivers/isdn/hardware/avm/b1.c +@@ -321,12 +321,15 @@ void b1_reset_ctr(struct capi_ctr *ctrl) + avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata); + avmcard *card = cinfo->card; + unsigned int port = card->port; ++ unsigned long flags; + + b1_reset(port); + b1_reset(port); + + memset(cinfo->version, 0, sizeof(cinfo->version)); ++ spin_lock_irqsave(&card->lock, flags); + capilib_release(&cinfo->ncci_head); ++ spin_unlock_irqrestore(&card->lock, flags); + capi_ctr_reseted(ctrl); + } + +@@ -361,9 +364,8 @@ void b1_release_appl(struct capi_ctr *ctrl, u16 appl) + unsigned int port = card->port; + unsigned long flags; + +- capilib_release_appl(&cinfo->ncci_head, appl); +- + spin_lock_irqsave(&card->lock, flags); ++ capilib_release_appl(&cinfo->ncci_head, appl); + b1_put_byte(port, SEND_RELEASE); + b1_put_word(port, appl); + spin_unlock_irqrestore(&card->lock, flags); +@@ -380,27 +382,27 @@ u16 b1_send_message(struct capi_ctr *ctrl, struct sk_buff *skb) + u8 subcmd = CAPIMSG_SUBCOMMAND(skb->data); + u16 dlen, retval; + ++ spin_lock_irqsave(&card->lock, flags); + if (CAPICMD(cmd, subcmd) == CAPI_DATA_B3_REQ) { + retval = capilib_data_b3_req(&cinfo->ncci_head, + CAPIMSG_APPID(skb->data), + CAPIMSG_NCCI(skb->data), + CAPIMSG_MSGID(skb->data)); +- if (retval != CAPI_NOERROR) ++ if (retval != CAPI_NOERROR) { ++ spin_unlock_irqrestore(&card->lock, flags); + return retval; ++ } + + dlen = CAPIMSG_DATALEN(skb->data); + +- spin_lock_irqsave(&card->lock, flags); + b1_put_byte(port, SEND_DATA_B3_REQ); + b1_put_slice(port, skb->data, len); + b1_put_slice(port, skb->data + len, dlen); +- spin_unlock_irqrestore(&card->lock, flags); + } else { +- spin_lock_irqsave(&card->lock, flags); + b1_put_byte(port, SEND_MESSAGE); + b1_put_slice(port, skb->data, len); +- spin_unlock_irqrestore(&card->lock, flags); + } ++ spin_unlock_irqrestore(&card->lock, flags); + + dev_kfree_skb_any(skb); + return CAPI_NOERROR; +@@ -534,17 +536,17 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr) + + ApplId = (unsigned) b1_get_word(card->port); + MsgLen = b1_get_slice(card->port, card->msgbuf); +- spin_unlock_irqrestore(&card->lock, flags); + if (!(skb = alloc_skb(MsgLen, GFP_ATOMIC))) { + printk(KERN_ERR "%s: incoming packet dropped\n", + card->name); ++ spin_unlock_irqrestore(&card->lock, flags); + } else { + memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen); + if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF) + capilib_data_b3_conf(&cinfo->ncci_head, ApplId, + CAPIMSG_NCCI(skb->data), + CAPIMSG_MSGID(skb->data)); +- ++ spin_unlock_irqrestore(&card->lock, flags); + capi_ctr_handle_message(ctrl, ApplId, skb); + } + break; +@@ -554,21 +556,17 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr) + ApplId = b1_get_word(card->port); + NCCI = b1_get_word(card->port); + WindowSize = b1_get_word(card->port); +- spin_unlock_irqrestore(&card->lock, flags); +- + capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize); +- ++ spin_unlock_irqrestore(&card->lock, flags); + break; + + case RECEIVE_FREE_NCCI: + + ApplId = b1_get_word(card->port); + NCCI = b1_get_word(card->port); +- spin_unlock_irqrestore(&card->lock, flags); +- + if (NCCI != 0xffffffff) + capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI); +- ++ spin_unlock_irqrestore(&card->lock, flags); + break; + + case RECEIVE_START: +diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c +index d58f927..8710cf6 100644 +--- a/drivers/isdn/hardware/avm/c4.c ++++ b/drivers/isdn/hardware/avm/c4.c +@@ -727,6 +727,7 @@ static void c4_send_init(avmcard *card) + { + struct sk_buff *skb; + void *p; ++ unsigned long flags; + + skb = alloc_skb(15, GFP_ATOMIC); + if (!skb) { +@@ -744,12 +745,15 @@ static void c4_send_init(avmcard *card) + skb_put(skb, (u8 *)p - (u8 *)skb->data); + + skb_queue_tail(&card->dma->send_queue, skb); ++ spin_lock_irqsave(&card->lock, flags); + c4_dispatch_tx(card); ++ spin_unlock_irqrestore(&card->lock, flags); + } + + static int queue_sendconfigword(avmcard *card, u32 val) + { + struct sk_buff *skb; ++ unsigned long flags; + void *p; + + skb = alloc_skb(3+4, GFP_ATOMIC); +@@ -766,7 +770,9 @@ static int queue_sendconfigword(avmcard *card, u32 val) + skb_put(skb, (u8 *)p - (u8 *)skb->data); + + skb_queue_tail(&card->dma->send_queue, skb); ++ spin_lock_irqsave(&card->lock, flags); + c4_dispatch_tx(card); ++ spin_unlock_irqrestore(&card->lock, flags); + return 0; + } + +@@ -986,7 +992,9 @@ static void c4_release_appl(struct capi_ctr *ctrl, u16 appl) + struct sk_buff *skb; + void *p; + ++ spin_lock_irqsave(&card->lock, flags); + capilib_release_appl(&cinfo->ncci_head, appl); ++ spin_unlock_irqrestore(&card->lock, flags); + + if (ctrl->cnr == card->cardnr) { + skb = alloc_skb(7, GFP_ATOMIC); +@@ -1019,7 +1027,8 @@ static u16 c4_send_message(struct capi_ctr *ctrl, struct sk_buff *skb) + u16 retval = CAPI_NOERROR; + unsigned long flags; + +- if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) { ++ spin_lock_irqsave(&card->lock, flags); ++ if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) { + retval = capilib_data_b3_req(&cinfo->ncci_head, + CAPIMSG_APPID(skb->data), + CAPIMSG_NCCI(skb->data), +@@ -1027,10 +1036,9 @@ static u16 c4_send_message(struct capi_ctr *ctrl, struct sk_buff *skb) + } + if (retval == CAPI_NOERROR) { + skb_queue_tail(&card->dma->send_queue, skb); +- spin_lock_irqsave(&card->lock, flags); + c4_dispatch_tx(card); +- spin_unlock_irqrestore(&card->lock, flags); + } ++ spin_unlock_irqrestore(&card->lock, flags); + return retval; + } + +diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c +index 765fb75..06f6ec3 100644 +--- a/drivers/net/forcedeth.c ++++ b/drivers/net/forcedeth.c +@@ -987,7 +987,7 @@ static void nv_enable_irq(struct net_device *dev) + if (np->msi_flags & NV_MSI_X_ENABLED) + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); + else +- enable_irq(dev->irq); ++ enable_irq(np->pci_dev->irq); + } else { + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); +@@ -1003,7 +1003,7 @@ static void nv_disable_irq(struct net_device *dev) + if (np->msi_flags & NV_MSI_X_ENABLED) + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); + else +- disable_irq(dev->irq); ++ disable_irq(np->pci_dev->irq); + } else { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); +@@ -1600,7 +1600,7 @@ static void nv_do_rx_refill(unsigned long data) + if (np->msi_flags & NV_MSI_X_ENABLED) + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); + else +- disable_irq(dev->irq); ++ disable_irq(np->pci_dev->irq); + } else { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + } +@@ -1618,7 +1618,7 @@ static void nv_do_rx_refill(unsigned long data) + if (np->msi_flags & NV_MSI_X_ENABLED) + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); + else +- enable_irq(dev->irq); ++ enable_irq(np->pci_dev->irq); + } else { + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + } +@@ -3556,10 +3556,12 @@ static int nv_request_irq(struct net_device *dev, int intr_test) + if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) { + if ((ret = pci_enable_msi(np->pci_dev)) == 0) { + np->msi_flags |= NV_MSI_ENABLED; ++ dev->irq = np->pci_dev->irq; + if (request_irq(np->pci_dev->irq, handler, IRQF_SHARED, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret); + pci_disable_msi(np->pci_dev); + np->msi_flags &= ~NV_MSI_ENABLED; ++ dev->irq = np->pci_dev->irq; + goto out_err; + } + +@@ -3622,7 +3624,7 @@ static void nv_do_nic_poll(unsigned long data) + if (np->msi_flags & NV_MSI_X_ENABLED) + disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); + else +- disable_irq_lockdep(dev->irq); ++ disable_irq_lockdep(np->pci_dev->irq); + mask = np->irqmask; + } else { + if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { +@@ -3640,6 +3642,8 @@ static void nv_do_nic_poll(unsigned long data) + } + np->nic_poll_irq = 0; + ++ /* disable_irq() contains synchronize_irq, thus no irq handler can run now */ ++ + if (np->recover_error) { + np->recover_error = 0; + printk(KERN_INFO "forcedeth: MAC in recoverable error state\n"); +@@ -3676,7 +3680,6 @@ static void nv_do_nic_poll(unsigned long data) + } + } + +- /* FIXME: Do we need synchronize_irq(dev->irq) here? */ + + writel(mask, base + NvRegIrqMask); + pci_push(base); +@@ -3689,7 +3692,7 @@ static void nv_do_nic_poll(unsigned long data) + if (np->msi_flags & NV_MSI_X_ENABLED) + enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); + else +- enable_irq_lockdep(dev->irq); ++ enable_irq_lockdep(np->pci_dev->irq); + } else { + if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { + nv_nic_irq_rx(0, dev); +@@ -4943,7 +4946,7 @@ static int nv_close(struct net_device *dev) + np->in_shutdown = 1; + spin_unlock_irq(&np->lock); + netif_poll_disable(dev); +- synchronize_irq(dev->irq); ++ synchronize_irq(np->pci_dev->irq); + + del_timer_sync(&np->oom_kick); + del_timer_sync(&np->nic_poll); +diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c +index bec83cb..7e40105 100644 +--- a/drivers/scsi/hptiop.c ++++ b/drivers/scsi/hptiop.c +@@ -377,8 +377,9 @@ static void hptiop_host_request_callback(struct hptiop_hba *hba, u32 tag) + scp->result = SAM_STAT_CHECK_CONDITION; + memset(&scp->sense_buffer, + 0, sizeof(scp->sense_buffer)); +- memcpy(&scp->sense_buffer, +- &req->sg_list, le32_to_cpu(req->dataxfer_length)); ++ memcpy(&scp->sense_buffer, &req->sg_list, ++ min(sizeof(scp->sense_buffer), ++ le32_to_cpu(req->dataxfer_length))); + break; + + default: +diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h +index ef50fa4..87f6467 100644 +--- a/drivers/usb/core/hcd.h ++++ b/drivers/usb/core/hcd.h +@@ -19,6 +19,8 @@ + + #ifdef __KERNEL__ + ++#include ++ + /* This file contains declarations of usbcore internals that are mostly + * used or exposed by Host Controller Drivers. + */ +@@ -464,5 +466,9 @@ static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb) {} + : (in_interrupt () ? "in_interrupt" : "can sleep")) + + +-#endif /* __KERNEL__ */ ++/* This rwsem is for use only by the hub driver and ehci-hcd. ++ * Nobody else should touch it. ++ */ ++extern struct rw_semaphore ehci_cf_port_reset_rwsem; + ++#endif /* __KERNEL__ */ +diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c +index a1c1a11..bc93e06 100644 +--- a/drivers/usb/core/hub.c ++++ b/drivers/usb/core/hub.c +@@ -117,6 +117,12 @@ MODULE_PARM_DESC(use_both_schemes, + "try the other device initialization scheme if the " + "first one fails"); + ++/* Mutual exclusion for EHCI CF initialization. This interferes with ++ * port reset on some companion controllers. ++ */ ++DECLARE_RWSEM(ehci_cf_port_reset_rwsem); ++EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); ++ + + static inline char *portspeed(int portstatus) + { +@@ -1513,6 +1519,11 @@ static int hub_port_reset(struct usb_hub *hub, int port1, + { + int i, status; + ++ /* Block EHCI CF initialization during the port reset. ++ * Some companion controllers don't like it when they mix. ++ */ ++ down_read(&ehci_cf_port_reset_rwsem); ++ + /* Reset the port */ + for (i = 0; i < PORT_RESET_TRIES; i++) { + status = set_port_feature(hub->hdev, +@@ -1543,7 +1554,7 @@ static int hub_port_reset(struct usb_hub *hub, int port1, + usb_set_device_state(udev, status + ? USB_STATE_NOTATTACHED + : USB_STATE_DEFAULT); +- return status; ++ goto done; + } + + dev_dbg (hub->intfdev, +@@ -1556,6 +1567,8 @@ static int hub_port_reset(struct usb_hub *hub, int port1, + "Cannot enable port %i. Maybe the USB cable is bad?\n", + port1); + ++ done: ++ up_read(&ehci_cf_port_reset_rwsem); + return status; + } + +diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c +index 099aff6..5caa8b3 100644 +--- a/drivers/usb/host/ehci-hcd.c ++++ b/drivers/usb/host/ehci-hcd.c +@@ -566,10 +566,18 @@ static int ehci_run (struct usb_hcd *hcd) + * are explicitly handed to companion controller(s), so no TT is + * involved with the root hub. (Except where one is integrated, + * and there's no companion controller unless maybe for USB OTG.) ++ * ++ * Turning on the CF flag will transfer ownership of all ports ++ * from the companions to the EHCI controller. If any of the ++ * companions are in the middle of a port reset at the time, it ++ * could cause trouble. Write-locking ehci_cf_port_reset_rwsem ++ * guarantees that no resets are in progress. + */ ++ down_write(&ehci_cf_port_reset_rwsem); + hcd->state = HC_STATE_RUNNING; + ehci_writel(ehci, FLAG_CF, &ehci->regs->configured_flag); + ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ ++ up_write(&ehci_cf_port_reset_rwsem); + + temp = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase)); + ehci_info (ehci, +diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c +index 4f8282a..c36eb79 100644 +--- a/drivers/usb/serial/generic.c ++++ b/drivers/usb/serial/generic.c +@@ -190,14 +190,15 @@ int usb_serial_generic_write(struct usb_serial_port *port, const unsigned char * + + /* only do something if we have a bulk out endpoint */ + if (serial->num_bulk_out) { +- spin_lock_bh(&port->lock); ++ unsigned long flags; ++ spin_lock_irqsave(&port->lock, flags); + if (port->write_urb_busy) { +- spin_unlock_bh(&port->lock); ++ spin_unlock_irqrestore(&port->lock, flags); + dbg("%s - already writing", __FUNCTION__); + return 0; + } + port->write_urb_busy = 1; +- spin_unlock_bh(&port->lock); ++ spin_unlock_irqrestore(&port->lock, flags); + + count = (count > port->bulk_out_size) ? port->bulk_out_size : count; + +diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c +index 0683b51..6f22419 100644 +--- a/drivers/usb/serial/kobil_sct.c ++++ b/drivers/usb/serial/kobil_sct.c +@@ -82,6 +82,7 @@ static int kobil_tiocmset(struct usb_serial_port *port, struct file *file, + unsigned int set, unsigned int clear); + static void kobil_read_int_callback( struct urb *urb ); + static void kobil_write_callback( struct urb *purb ); ++static void kobil_set_termios(struct usb_serial_port *port, struct ktermios *old); + + + static struct usb_device_id id_table [] = { +@@ -119,6 +120,7 @@ static struct usb_serial_driver kobil_device = { + .attach = kobil_startup, + .shutdown = kobil_shutdown, + .ioctl = kobil_ioctl, ++ .set_termios = kobil_set_termios, + .tiocmget = kobil_tiocmget, + .tiocmset = kobil_tiocmset, + .open = kobil_open, +@@ -137,7 +139,6 @@ struct kobil_private { + int cur_pos; // index of the next char to send in buf + __u16 device_type; + int line_state; +- struct ktermios internal_termios; + }; + + +@@ -216,7 +217,7 @@ static void kobil_shutdown (struct usb_serial *serial) + + static int kobil_open (struct usb_serial_port *port, struct file *filp) + { +- int i, result = 0; ++ int result = 0; + struct kobil_private *priv; + unsigned char *transfer_buffer; + int transfer_buffer_length = 8; +@@ -242,16 +243,6 @@ static int kobil_open (struct usb_serial_port *port, struct file *filp) + port->tty->termios->c_iflag = IGNBRK | IGNPAR | IXOFF; + port->tty->termios->c_oflag &= ~ONLCR; // do NOT translate CR to CR-NL (0x0A -> 0x0A 0x0D) + +- // set up internal termios structure +- priv->internal_termios.c_iflag = port->tty->termios->c_iflag; +- priv->internal_termios.c_oflag = port->tty->termios->c_oflag; +- priv->internal_termios.c_cflag = port->tty->termios->c_cflag; +- priv->internal_termios.c_lflag = port->tty->termios->c_lflag; +- +- for (i=0; iinternal_termios.c_cc[i] = port->tty->termios->c_cc[i]; +- } +- + // allocate memory for transfer buffer + transfer_buffer = kzalloc(transfer_buffer_length, GFP_KERNEL); + if (! transfer_buffer) { +@@ -358,24 +349,26 @@ static void kobil_close (struct usb_serial_port *port, struct file *filp) + } + + +-static void kobil_read_int_callback( struct urb *purb) ++static void kobil_read_int_callback(struct urb *urb) + { + int result; +- struct usb_serial_port *port = (struct usb_serial_port *) purb->context; ++ struct usb_serial_port *port = urb->context; + struct tty_struct *tty; +- unsigned char *data = purb->transfer_buffer; ++ unsigned char *data = urb->transfer_buffer; ++ int status = urb->status; + // char *dbg_data; + + dbg("%s - port %d", __FUNCTION__, port->number); + +- if (purb->status) { +- dbg("%s - port %d Read int status not zero: %d", __FUNCTION__, port->number, purb->status); ++ if (status) { ++ dbg("%s - port %d Read int status not zero: %d", ++ __FUNCTION__, port->number, status); + return; + } +- +- tty = port->tty; +- if (purb->actual_length) { +- ++ ++ tty = port->tty; ++ if (urb->actual_length) { ++ + // BEGIN DEBUG + /* + dbg_data = kzalloc((3 * purb->actual_length + 10) * sizeof(char), GFP_KERNEL); +@@ -390,15 +383,15 @@ static void kobil_read_int_callback( struct urb *purb) + */ + // END DEBUG + +- tty_buffer_request_room(tty, purb->actual_length); +- tty_insert_flip_string(tty, data, purb->actual_length); ++ tty_buffer_request_room(tty, urb->actual_length); ++ tty_insert_flip_string(tty, data, urb->actual_length); + tty_flip_buffer_push(tty); + } + + // someone sets the dev to 0 if the close method has been called + port->interrupt_in_urb->dev = port->serial->dev; + +- result = usb_submit_urb( port->interrupt_in_urb, GFP_ATOMIC ); ++ result = usb_submit_urb(port->interrupt_in_urb, GFP_ATOMIC); + dbg("%s - port %d Send read URB returns: %i", __FUNCTION__, port->number, result); + } + +@@ -605,102 +598,79 @@ static int kobil_tiocmset(struct usb_serial_port *port, struct file *file, + return (result < 0) ? result : 0; + } + +- +-static int kobil_ioctl(struct usb_serial_port *port, struct file *file, +- unsigned int cmd, unsigned long arg) ++static void kobil_set_termios(struct usb_serial_port *port, struct ktermios *old) + { + struct kobil_private * priv; + int result; + unsigned short urb_val = 0; +- unsigned char *transfer_buffer; +- int transfer_buffer_length = 8; +- char *settings; +- void __user *user_arg = (void __user *)arg; ++ int c_cflag = port->tty->termios->c_cflag; ++ speed_t speed; ++ void * settings; + + priv = usb_get_serial_port_data(port); +- if ((priv->device_type == KOBIL_USBTWIN_PRODUCT_ID) || (priv->device_type == KOBIL_KAAN_SIM_PRODUCT_ID)) { ++ if (priv->device_type == KOBIL_USBTWIN_PRODUCT_ID || priv->device_type == KOBIL_KAAN_SIM_PRODUCT_ID) + // This device doesn't support ioctl calls +- return 0; +- } +- +- switch (cmd) { +- case TCGETS: // 0x5401 +- if (!access_ok(VERIFY_WRITE, user_arg, sizeof(struct ktermios))) { +- dbg("%s - port %d Error in access_ok", __FUNCTION__, port->number); +- return -EFAULT; +- } +- if (kernel_termios_to_user_termios((struct ktermios __user *)arg, +- &priv->internal_termios)) +- return -EFAULT; +- return 0; +- +- case TCSETS: // 0x5402 +- if (!(port->tty->termios)) { +- dbg("%s - port %d Error: port->tty->termios is NULL", __FUNCTION__, port->number); +- return -ENOTTY; +- } +- if (!access_ok(VERIFY_READ, user_arg, sizeof(struct ktermios))) { +- dbg("%s - port %d Error in access_ok", __FUNCTION__, port->number); +- return -EFAULT; +- } +- if (user_termios_to_kernel_termios(&priv->internal_termios, +- (struct ktermios __user *)arg)) +- return -EFAULT; +- +- settings = kzalloc(50, GFP_KERNEL); +- if (! settings) { +- return -ENOBUFS; +- } ++ return; + +- switch (priv->internal_termios.c_cflag & CBAUD) { +- case B1200: ++ switch (speed = tty_get_baud_rate(port->tty)) { ++ case 1200: + urb_val = SUSBCR_SBR_1200; +- strcat(settings, "1200 "); + break; +- case B9600: ++ case 9600: + default: + urb_val = SUSBCR_SBR_9600; +- strcat(settings, "9600 "); + break; +- } ++ } ++ urb_val |= (c_cflag & CSTOPB) ? SUSBCR_SPASB_2StopBits : SUSBCR_SPASB_1StopBit; + +- urb_val |= (priv->internal_termios.c_cflag & CSTOPB) ? SUSBCR_SPASB_2StopBits : SUSBCR_SPASB_1StopBit; +- strcat(settings, (priv->internal_termios.c_cflag & CSTOPB) ? "2 StopBits " : "1 StopBit "); ++ settings = kzalloc(50, GFP_KERNEL); ++ if (! settings) ++ return; + +- if (priv->internal_termios.c_cflag & PARENB) { +- if (priv->internal_termios.c_cflag & PARODD) { +- urb_val |= SUSBCR_SPASB_OddParity; +- strcat(settings, "Odd Parity"); +- } else { +- urb_val |= SUSBCR_SPASB_EvenParity; +- strcat(settings, "Even Parity"); +- } ++ sprintf(settings, "%d ", speed); ++ ++ if (c_cflag & PARENB) { ++ if (c_cflag & PARODD) { ++ urb_val |= SUSBCR_SPASB_OddParity; ++ strcat(settings, "Odd Parity"); + } else { +- urb_val |= SUSBCR_SPASB_NoParity; +- strcat(settings, "No Parity"); ++ urb_val |= SUSBCR_SPASB_EvenParity; ++ strcat(settings, "Even Parity"); + } +- dbg("%s - port %d setting port to: %s", __FUNCTION__, port->number, settings ); ++ } else { ++ urb_val |= SUSBCR_SPASB_NoParity; ++ strcat(settings, "No Parity"); ++ } + +- result = usb_control_msg( port->serial->dev, +- usb_rcvctrlpipe(port->serial->dev, 0 ), +- SUSBCRequest_SetBaudRateParityAndStopBits, +- USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT, +- urb_val, +- 0, +- settings, +- 0, +- KOBIL_TIMEOUT +- ); ++ result = usb_control_msg( port->serial->dev, ++ usb_rcvctrlpipe(port->serial->dev, 0 ), ++ SUSBCRequest_SetBaudRateParityAndStopBits, ++ USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT, ++ urb_val, ++ 0, ++ settings, ++ 0, ++ KOBIL_TIMEOUT ++ ); ++ kfree(settings); ++} + +- dbg("%s - port %d Send set_baudrate URB returns: %i", __FUNCTION__, port->number, result); +- kfree(settings); ++static int kobil_ioctl(struct usb_serial_port *port, struct file * file, unsigned int cmd, unsigned long arg) ++{ ++ struct kobil_private * priv = usb_get_serial_port_data(port); ++ unsigned char *transfer_buffer; ++ int transfer_buffer_length = 8; ++ int result; ++ ++ if (priv->device_type == KOBIL_USBTWIN_PRODUCT_ID || priv->device_type == KOBIL_KAAN_SIM_PRODUCT_ID) ++ // This device doesn't support ioctl calls + return 0; + ++ switch (cmd) { + case TCFLSH: // 0x540B + transfer_buffer = kmalloc(transfer_buffer_length, GFP_KERNEL); +- if (! transfer_buffer) { ++ if (! transfer_buffer) + return -ENOBUFS; +- } + + result = usb_control_msg( port->serial->dev, + usb_rcvctrlpipe(port->serial->dev, 0 ), +@@ -714,15 +684,13 @@ static int kobil_ioctl(struct usb_serial_port *port, struct file *file, + ); + + dbg("%s - port %d Send reset_all_queues (FLUSH) URB returns: %i", __FUNCTION__, port->number, result); +- + kfree(transfer_buffer); +- return ((result < 0) ? -EFAULT : 0); +- ++ return (result < 0) ? -EFAULT : 0; ++ default: ++ return -ENOIOCTLCMD; + } +- return -ENOIOCTLCMD; + } + +- + static int __init kobil_init (void) + { + int retval; +diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c +index a480b09..3175288 100644 +--- a/fs/ocfs2/aops.c ++++ b/fs/ocfs2/aops.c +@@ -661,6 +661,27 @@ static void ocfs2_clear_page_regions(struct page *page, + } + + /* ++ * Nonsparse file systems fully allocate before we get to the write ++ * code. This prevents ocfs2_write() from tagging the write as an ++ * allocating one, which means ocfs2_map_page_blocks() might try to ++ * read-in the blocks at the tail of our file. Avoid reading them by ++ * testing i_size against each block offset. ++ */ ++static int ocfs2_should_read_blk(struct inode *inode, struct page *page, ++ unsigned int block_start) ++{ ++ u64 offset = page_offset(page) + block_start; ++ ++ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) ++ return 1; ++ ++ if (i_size_read(inode) > offset) ++ return 1; ++ ++ return 0; ++} ++ ++/* + * Some of this taken from block_prepare_write(). We already have our + * mapping by now though, and the entire write will be allocating or + * it won't, so not much need to use BH_New. +@@ -711,7 +732,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && +- (block_start < from || block_end > to)) { ++ ocfs2_should_read_blk(inode, page, block_start) && ++ (block_start < from || block_end > to)) { + ll_rw_block(READ, 1, &bh); + *wait_bh++=bh; + } +diff --git a/include/linux/netlink.h b/include/linux/netlink.h +index 2e23353..b2834d8 100644 +--- a/include/linux/netlink.h ++++ b/include/linux/netlink.h +@@ -173,7 +173,7 @@ extern int netlink_unregister_notifier(struct notifier_block *nb); + /* finegrained unicast helpers: */ + struct sock *netlink_getsockbyfilp(struct file *filp); + int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, +- long timeo, struct sock *ssk); ++ long *timeo, struct sock *ssk); + void netlink_detachskb(struct sock *sk, struct sk_buff *skb); + int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol); + +diff --git a/ipc/mqueue.c b/ipc/mqueue.c +index a242c83..1eef14b 100644 +--- a/ipc/mqueue.c ++++ b/ipc/mqueue.c +@@ -1014,6 +1014,8 @@ asmlinkage long sys_mq_notify(mqd_t mqdes, + return -EINVAL; + } + if (notification.sigev_notify == SIGEV_THREAD) { ++ long timeo; ++ + /* create the notify skb */ + nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL); + ret = -ENOMEM; +@@ -1042,8 +1044,8 @@ retry: + goto out; + } + +- ret = netlink_attachskb(sock, nc, 0, +- MAX_SCHEDULE_TIMEOUT, NULL); ++ timeo = MAX_SCHEDULE_TIMEOUT; ++ ret = netlink_attachskb(sock, nc, 0, &timeo, NULL); + if (ret == 1) + goto retry; + if (ret) { +diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c +index 7e52eb0..589b1e4 100644 +--- a/kernel/futex_compat.c ++++ b/kernel/futex_compat.c +@@ -29,6 +29,15 @@ fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + return 0; + } + ++static void __user *futex_uaddr(struct robust_list *entry, ++ compat_long_t futex_offset) ++{ ++ compat_uptr_t base = ptr_to_compat(entry); ++ void __user *uaddr = compat_ptr(base + futex_offset); ++ ++ return uaddr; ++} ++ + /* + * Walk curr->robust_list (very carefully, it's a userspace list!) + * and mark any locks found there dead, and notify any waiters. +@@ -61,18 +70,23 @@ void compat_exit_robust_list(struct task_struct *curr) + if (fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pip)) + return; +- if (pending) +- handle_futex_death((void __user *)pending + futex_offset, curr, pip); ++ if (pending) { ++ void __user *uaddr = futex_uaddr(pending, ++ futex_offset); ++ handle_futex_death(uaddr, curr, pip); ++ } + + while (entry != (struct robust_list __user *) &head->list) { + /* + * A pending lock might already be on the list, so + * dont process it twice: + */ +- if (entry != pending) +- if (handle_futex_death((void __user *)entry + futex_offset, +- curr, pi)) ++ if (entry != pending) { ++ void __user *uaddr = futex_uaddr(entry, ++ futex_offset); ++ if (handle_futex_death(uaddr, curr, pi)) + return; ++ } + + /* + * Fetch the next entry in the list: +diff --git a/kernel/params.c b/kernel/params.c +index 8e8ca8f..1f17b58 100644 +--- a/kernel/params.c ++++ b/kernel/params.c +@@ -591,19 +591,16 @@ static void __init param_sysfs_builtin(void) + + for (i=0; i < __stop___param - __start___param; i++) { + char *dot; +- size_t kplen; ++ size_t max_name_len; + + kp = &__start___param[i]; +- kplen = strlen(kp->name); ++ max_name_len = ++ min_t(size_t, MAX_KBUILD_MODNAME, strlen(kp->name)); + +- /* We do not handle args without periods. */ +- if (kplen > MAX_KBUILD_MODNAME) { +- DEBUGP("kernel parameter name is too long: %s\n", kp->name); +- continue; +- } +- dot = memchr(kp->name, '.', kplen); ++ dot = memchr(kp->name, '.', max_name_len); + if (!dot) { +- DEBUGP("couldn't find period in %s\n", kp->name); ++ DEBUGP("couldn't find period in first %d characters " ++ "of %s\n", MAX_KBUILD_MODNAME, kp->name); + continue; + } + name_len = dot - kp->name; +diff --git a/mm/page-writeback.c b/mm/page-writeback.c +index eec1481..2d39627 100644 +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -674,8 +674,10 @@ retry: + + ret = (*writepage)(page, wbc, data); + +- if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) ++ if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); ++ ret = 0; ++ } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { +diff --git a/mm/shmem.c b/mm/shmem.c +index b6aae2b..2320b60 100644 +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -911,6 +911,21 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) + struct inode *inode; + + BUG_ON(!PageLocked(page)); ++ /* ++ * shmem_backing_dev_info's capabilities prevent regular writeback or ++ * sync from ever calling shmem_writepage; but a stacking filesystem ++ * may use the ->writepage of its underlying filesystem, in which case ++ * we want to do nothing when that underlying filesystem is tmpfs ++ * (writing out to swap is useful as a response to memory pressure, but ++ * of no use to stabilize the data) - just redirty the page, unlock it ++ * and claim success in this case. AOP_WRITEPAGE_ACTIVATE, and the ++ * page_mapped check below, must be avoided unless we're in reclaim. ++ */ ++ if (!wbc->for_reclaim) { ++ set_page_dirty(page); ++ unlock_page(page); ++ return 0; ++ } + BUG_ON(page_mapped(page)); + + mapping = page->mapping; +diff --git a/mm/slub.c b/mm/slub.c +index e0cf621..648f2c7 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1431,28 +1431,8 @@ new_slab: + page = new_slab(s, gfpflags, node); + if (page) { + cpu = smp_processor_id(); +- if (s->cpu_slab[cpu]) { +- /* +- * Someone else populated the cpu_slab while we +- * enabled interrupts, or we have gotten scheduled +- * on another cpu. The page may not be on the +- * requested node even if __GFP_THISNODE was +- * specified. So we need to recheck. +- */ +- if (node == -1 || +- page_to_nid(s->cpu_slab[cpu]) == node) { +- /* +- * Current cpuslab is acceptable and we +- * want the current one since its cache hot +- */ +- discard_slab(s, page); +- page = s->cpu_slab[cpu]; +- slab_lock(page); +- goto load_freelist; +- } +- /* New slab does not fit our expectations */ ++ if (s->cpu_slab[cpu]) + flush_slab(s, s->cpu_slab[cpu], cpu); +- } + slab_lock(page); + SetSlabFrozen(page); + s->cpu_slab[cpu] = page; +diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c +index ab86137..630ebb7 100644 +--- a/net/ipv4/ipcomp.c ++++ b/net/ipv4/ipcomp.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -355,7 +356,7 @@ static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); +- if (!tfm) ++ if (IS_ERR(tfm)) + goto error; + *per_cpu_ptr(tfms, cpu) = tfm; + } +diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c +index 1ee50b5..3680f64 100644 +--- a/net/ipv6/ipcomp6.c ++++ b/net/ipv6/ipcomp6.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -366,7 +367,7 @@ static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); +- if (!tfm) ++ if (IS_ERR(tfm)) + goto error; + *per_cpu_ptr(tfms, cpu) = tfm; + } +diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c +index ccdd5d2..2721ff4 100644 +--- a/net/netfilter/nf_conntrack_proto_tcp.c ++++ b/net/netfilter/nf_conntrack_proto_tcp.c +@@ -839,6 +839,22 @@ static int tcp_packet(struct nf_conn *conntrack, + new_state = tcp_conntracks[dir][index][old_state]; + + switch (new_state) { ++ case TCP_CONNTRACK_SYN_SENT: ++ if (old_state < TCP_CONNTRACK_TIME_WAIT) ++ break; ++ if ((conntrack->proto.tcp.seen[!dir].flags & ++ IP_CT_TCP_FLAG_CLOSE_INIT) ++ || (conntrack->proto.tcp.last_dir == dir ++ && conntrack->proto.tcp.last_index == TCP_RST_SET)) { ++ /* Attempt to reopen a closed/aborted connection. ++ * Delete this connection and look up again. */ ++ write_unlock_bh(&tcp_lock); ++ if (del_timer(&conntrack->timeout)) ++ conntrack->timeout.function((unsigned long) ++ conntrack); ++ return -NF_REPEAT; ++ } ++ /* Fall through */ + case TCP_CONNTRACK_IGNORE: + /* Ignored packets: + * +@@ -888,27 +904,6 @@ static int tcp_packet(struct nf_conn *conntrack, + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: invalid state "); + return -NF_ACCEPT; +- case TCP_CONNTRACK_SYN_SENT: +- if (old_state < TCP_CONNTRACK_TIME_WAIT) +- break; +- if ((conntrack->proto.tcp.seen[dir].flags & +- IP_CT_TCP_FLAG_CLOSE_INIT) +- || after(ntohl(th->seq), +- conntrack->proto.tcp.seen[dir].td_end)) { +- /* Attempt to reopen a closed connection. +- * Delete this connection and look up again. */ +- write_unlock_bh(&tcp_lock); +- if (del_timer(&conntrack->timeout)) +- conntrack->timeout.function((unsigned long) +- conntrack); +- return -NF_REPEAT; +- } else { +- write_unlock_bh(&tcp_lock); +- if (LOG_INVALID(IPPROTO_TCP)) +- nf_log_packet(pf, 0, skb, NULL, NULL, +- NULL, "nf_ct_tcp: invalid SYN"); +- return -NF_ACCEPT; +- } + case TCP_CONNTRACK_CLOSE: + if (index == TCP_RST_SET + && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) +@@ -941,6 +936,7 @@ static int tcp_packet(struct nf_conn *conntrack, + in_window: + /* From now on we have got in-window packets */ + conntrack->proto.tcp.last_index = index; ++ conntrack->proto.tcp.last_dir = dir; + + DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", +diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c +index 1f15821..6ac83c2 100644 +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -732,7 +732,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) + * 1: repeat lookup - reference dropped while waiting for socket memory. + */ + int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, +- long timeo, struct sock *ssk) ++ long *timeo, struct sock *ssk) + { + struct netlink_sock *nlk; + +@@ -741,7 +741,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || + test_bit(0, &nlk->state)) { + DECLARE_WAITQUEUE(wait, current); +- if (!timeo) { ++ if (!*timeo) { + if (!ssk || nlk_sk(ssk)->pid == 0) + netlink_overrun(sk); + sock_put(sk); +@@ -755,7 +755,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || + test_bit(0, &nlk->state)) && + !sock_flag(sk, SOCK_DEAD)) +- timeo = schedule_timeout(timeo); ++ *timeo = schedule_timeout(*timeo); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&nlk->wait, &wait); +@@ -763,7 +763,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + + if (signal_pending(current)) { + kfree_skb(skb); +- return sock_intr_errno(timeo); ++ return sock_intr_errno(*timeo); + } + return 1; + } +@@ -827,7 +827,7 @@ retry: + kfree_skb(skb); + return PTR_ERR(sk); + } +- err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); ++ err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk); + if (err == 1) + goto retry; + if (err) +diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c +index f2686ea..1d36265 100644 +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -107,7 +107,7 @@ static struct tc_u_common *u32_list; + + static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift) + { +- unsigned h = (key & sel->hmask)>>fshift; ++ unsigned h = ntohl(key & sel->hmask)>>fshift; + + return h; + } +@@ -631,7 +631,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, + n->handle = handle; + { + u8 i = 0; +- u32 mask = s->hmask; ++ u32 mask = ntohl(s->hmask); + if (mask) { + while (!(mask & 1)) { + i++; +diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c +index f05ad9a..656ccd9 100644 +--- a/net/sched/sch_teql.c ++++ b/net/sched/sch_teql.c +@@ -263,6 +263,9 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * + static __inline__ int + teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) + { ++ if (dev->qdisc == &noop_qdisc) ++ return -ENODEV; ++ + if (dev->hard_header == NULL || + skb->dst == NULL || + skb->dst->neighbour == NULL) +diff --git a/net/socket.c b/net/socket.c +index 48bd793..8211578 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -1246,11 +1246,14 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, + goto out_release_both; + + fd1 = sock_alloc_fd(&newfile1); +- if (unlikely(fd1 < 0)) ++ if (unlikely(fd1 < 0)) { ++ err = fd1; + goto out_release_both; ++ } + + fd2 = sock_alloc_fd(&newfile2); + if (unlikely(fd2 < 0)) { ++ err = fd2; + put_filp(newfile1); + put_unused_fd(fd1); + goto out_release_both; +diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c +index e3964fc..d5b2f53 100644 +--- a/sound/pci/hda/patch_sigmatel.c ++++ b/sound/pci/hda/patch_sigmatel.c +@@ -153,8 +153,9 @@ static hda_nid_t stac925x_dac_nids[1] = { + 0x02, + }; + +-static hda_nid_t stac925x_dmic_nids[1] = { +- 0x15, ++#define STAC925X_NUM_DMICS 1 ++static hda_nid_t stac925x_dmic_nids[STAC925X_NUM_DMICS + 1] = { ++ 0x15, 0 + }; + + static hda_nid_t stac922x_adc_nids[2] = { +@@ -181,8 +182,9 @@ static hda_nid_t stac9205_mux_nids[2] = { + 0x19, 0x1a + }; + +-static hda_nid_t stac9205_dmic_nids[2] = { +- 0x17, 0x18, ++#define STAC9205_NUM_DMICS 2 ++static hda_nid_t stac9205_dmic_nids[STAC9205_NUM_DMICS + 1] = { ++ 0x17, 0x18, 0 + }; + + static hda_nid_t stac9200_pin_nids[8] = { +@@ -1972,7 +1974,7 @@ static int patch_stac925x(struct hda_codec *codec) + case 0x83847633: /* STAC9202D */ + case 0x83847636: /* STAC9251 */ + case 0x83847637: /* STAC9251D */ +- spec->num_dmics = 1; ++ spec->num_dmics = STAC925X_NUM_DMICS; + spec->dmic_nids = stac925x_dmic_nids; + break; + default: +@@ -2202,7 +2204,7 @@ static int patch_stac9205(struct hda_codec *codec) + spec->mux_nids = stac9205_mux_nids; + spec->num_muxes = ARRAY_SIZE(stac9205_mux_nids); + spec->dmic_nids = stac9205_dmic_nids; +- spec->num_dmics = ARRAY_SIZE(stac9205_dmic_nids); ++ spec->num_dmics = STAC9205_NUM_DMICS; + spec->dmux_nid = 0x1d; + + spec->init = stac9205_core_init; +diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c +index 3b3ef65..75dcb9a 100644 +--- a/sound/pci/rme9652/hdsp.c ++++ b/sound/pci/rme9652/hdsp.c +@@ -3108,6 +3108,9 @@ static int hdsp_dds_offset(struct hdsp *hdsp) + unsigned int dds_value = hdsp->dds_value; + int system_sample_rate = hdsp->system_sample_rate; + ++ if (!dds_value) ++ return 0; ++ + n = DDS_NUMERATOR; + /* + * dds_value = n / rate diff --git a/tags/2.6.22-2/01014_linux-2.6.22.15.patch b/tags/2.6.22-2/01014_linux-2.6.22.15.patch new file mode 100644 index 0000000..320c021 --- /dev/null +++ b/tags/2.6.22-2/01014_linux-2.6.22.15.patch @@ -0,0 +1,1096 @@ +Subject: Linux 2.6.22.15 +From: Greg Kroah-Hartman + +Signed-off-by: Greg Kroah-Hartman + +diff --git a/crypto/algapi.c b/crypto/algapi.c +index f137a43..ec286a2 100644 +--- a/crypto/algapi.c ++++ b/crypto/algapi.c +@@ -98,6 +98,9 @@ static void crypto_remove_spawn(struct crypto_spawn *spawn, + return; + + inst->alg.cra_flags |= CRYPTO_ALG_DEAD; ++ if (hlist_unhashed(&inst->list)) ++ return; ++ + if (!tmpl || !crypto_tmpl_get(tmpl)) + return; + +@@ -333,9 +336,6 @@ int crypto_register_instance(struct crypto_template *tmpl, + LIST_HEAD(list); + int err = -EINVAL; + +- if (inst->alg.cra_destroy) +- goto err; +- + err = crypto_check_alg(&inst->alg); + if (err) + goto err; +diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c +index 3400b3e..e722f83 100644 +--- a/drivers/ata/ahci.c ++++ b/drivers/ata/ahci.c +@@ -1241,7 +1241,7 @@ static void ahci_host_intr(struct ata_port *ap) + struct ata_eh_info *ehi = &ap->eh_info; + struct ahci_port_priv *pp = ap->private_data; + u32 status, qc_active; +- int rc, known_irq = 0; ++ int rc; + + status = readl(port_mmio + PORT_IRQ_STAT); + writel(status, port_mmio + PORT_IRQ_STAT); +@@ -1257,74 +1257,11 @@ static void ahci_host_intr(struct ata_port *ap) + qc_active = readl(port_mmio + PORT_CMD_ISSUE); + + rc = ata_qc_complete_multiple(ap, qc_active, NULL); +- if (rc > 0) +- return; + if (rc < 0) { + ehi->err_mask |= AC_ERR_HSM; + ehi->action |= ATA_EH_SOFTRESET; + ata_port_freeze(ap); +- return; +- } +- +- /* hmmm... a spurious interupt */ +- +- /* if !NCQ, ignore. No modern ATA device has broken HSM +- * implementation for non-NCQ commands. +- */ +- if (!ap->sactive) +- return; +- +- if (status & PORT_IRQ_D2H_REG_FIS) { +- if (!pp->ncq_saw_d2h) +- ata_port_printk(ap, KERN_INFO, +- "D2H reg with I during NCQ, " +- "this message won't be printed again\n"); +- pp->ncq_saw_d2h = 1; +- known_irq = 1; +- } +- +- if (status & PORT_IRQ_DMAS_FIS) { +- if (!pp->ncq_saw_dmas) +- ata_port_printk(ap, KERN_INFO, +- "DMAS FIS during NCQ, " +- "this message won't be printed again\n"); +- pp->ncq_saw_dmas = 1; +- known_irq = 1; +- } +- +- if (status & PORT_IRQ_SDB_FIS) { +- const __le32 *f = pp->rx_fis + RX_FIS_SDB; +- +- if (le32_to_cpu(f[1])) { +- /* SDB FIS containing spurious completions +- * might be dangerous, whine and fail commands +- * with HSM violation. EH will turn off NCQ +- * after several such failures. +- */ +- ata_ehi_push_desc(ehi, +- "spurious completions during NCQ " +- "issue=0x%x SAct=0x%x FIS=%08x:%08x", +- readl(port_mmio + PORT_CMD_ISSUE), +- readl(port_mmio + PORT_SCR_ACT), +- le32_to_cpu(f[0]), le32_to_cpu(f[1])); +- ehi->err_mask |= AC_ERR_HSM; +- ehi->action |= ATA_EH_SOFTRESET; +- ata_port_freeze(ap); +- } else { +- if (!pp->ncq_saw_sdb) +- ata_port_printk(ap, KERN_INFO, +- "spurious SDB FIS %08x:%08x during NCQ, " +- "this message won't be printed again\n", +- le32_to_cpu(f[0]), le32_to_cpu(f[1])); +- pp->ncq_saw_sdb = 1; +- } +- known_irq = 1; + } +- +- if (!known_irq) +- ata_port_printk(ap, KERN_INFO, "spurious interrupt " +- "(irq_stat 0x%x active_tag 0x%x sactive 0x%x)\n", +- status, ap->active_tag, ap->sactive); + } + + static void ahci_irq_clear(struct ata_port *ap) +diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c +index e6e403f..22b6368 100644 +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -3785,6 +3785,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { + /* Devices where NCQ should be avoided */ + /* NCQ is slow */ + { "WDC WD740ADFD-00", NULL, ATA_HORKAGE_NONCQ }, ++ { "WDC WD740ADFD-00NLR1", NULL, ATA_HORKAGE_NONCQ, }, + /* http://thread.gmane.org/gmane.linux.ide/14907 */ + { "FUJITSU MHT2060BH", NULL, ATA_HORKAGE_NONCQ }, + /* NCQ is broken */ +@@ -3803,15 +3804,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { + { "HTS541060G9SA00", "MB3OC60D", ATA_HORKAGE_NONCQ, }, + { "HTS541080G9SA00", "MB4OC60D", ATA_HORKAGE_NONCQ, }, + { "HTS541010G9SA00", "MBZOC60D", ATA_HORKAGE_NONCQ, }, +- /* Drives which do spurious command completion */ +- { "HTS541680J9SA00", "SB2IC7EP", ATA_HORKAGE_NONCQ, }, +- { "HTS541612J9SA00", "SBDIC7JP", ATA_HORKAGE_NONCQ, }, +- { "Hitachi HTS541616J9SA00", "SB4OC70P", ATA_HORKAGE_NONCQ, }, +- { "WDC WD740ADFD-00NLR1", NULL, ATA_HORKAGE_NONCQ, }, +- { "FUJITSU MHV2080BH", "00840028", ATA_HORKAGE_NONCQ, }, +- { "ST9160821AS", "3.CLF", ATA_HORKAGE_NONCQ, }, +- { "ST3160812AS", "3.AD", ATA_HORKAGE_NONCQ, }, +- { "SAMSUNG HD401LJ", "ZZ100-15", ATA_HORKAGE_NONCQ, }, + + /* End Marker */ + { } +diff --git a/drivers/atm/he.c b/drivers/atm/he.c +index d33aba6..3b64a99 100644 +--- a/drivers/atm/he.c ++++ b/drivers/atm/he.c +@@ -394,6 +394,11 @@ he_init_one(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent) + he_dev->atm_dev->dev_data = he_dev; + atm_dev->dev_data = he_dev; + he_dev->number = atm_dev->number; ++#ifdef USE_TASKLET ++ tasklet_init(&he_dev->tasklet, he_tasklet, (unsigned long) he_dev); ++#endif ++ spin_lock_init(&he_dev->global_lock); ++ + if (he_start(atm_dev)) { + he_stop(he_dev); + err = -ENODEV; +@@ -1173,11 +1178,6 @@ he_start(struct atm_dev *dev) + if ((err = he_init_irq(he_dev)) != 0) + return err; + +-#ifdef USE_TASKLET +- tasklet_init(&he_dev->tasklet, he_tasklet, (unsigned long) he_dev); +-#endif +- spin_lock_init(&he_dev->global_lock); +- + /* 4.11 enable pci bus controller state machines */ + host_cntl |= (OUTFF_ENB | CMDFF_ENB | + QUICK_RD_RETRY | QUICK_WR_RETRY | PERR_INT_ENB); +diff --git a/drivers/block/rd.c b/drivers/block/rd.c +index a1512da..e30bd9e 100644 +--- a/drivers/block/rd.c ++++ b/drivers/block/rd.c +@@ -189,6 +189,18 @@ static int ramdisk_set_page_dirty(struct page *page) + return 0; + } + ++/* ++ * releasepage is called by pagevec_strip/try_to_release_page if ++ * buffers_heads_over_limit is true. Without a releasepage function ++ * try_to_free_buffers is called instead. That can unset the dirty ++ * bit of our ram disk pages, which will be eventually freed, even ++ * if the page is still in use. ++ */ ++static int ramdisk_releasepage(struct page *page, gfp_t dummy) ++{ ++ return 0; ++} ++ + static const struct address_space_operations ramdisk_aops = { + .readpage = ramdisk_readpage, + .prepare_write = ramdisk_prepare_write, +@@ -196,6 +208,7 @@ static const struct address_space_operations ramdisk_aops = { + .writepage = ramdisk_writepage, + .set_page_dirty = ramdisk_set_page_dirty, + .writepages = ramdisk_writepages, ++ .releasepage = ramdisk_releasepage, + }; + + static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, +diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c +index c97330b..eb9a247 100644 +--- a/drivers/isdn/i4l/isdn_common.c ++++ b/drivers/isdn/i4l/isdn_common.c +@@ -1514,6 +1514,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg) + if (copy_from_user(&iocts, argp, + sizeof(isdn_ioctl_struct))) + return -EFAULT; ++ iocts.drvid[sizeof(iocts.drvid)-1] = 0; + if (strlen(iocts.drvid)) { + if ((p = strchr(iocts.drvid, ','))) + *p = 0; +@@ -1598,6 +1599,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg) + if (copy_from_user(&iocts, argp, + sizeof(isdn_ioctl_struct))) + return -EFAULT; ++ iocts.drvid[sizeof(iocts.drvid)-1] = 0; + if (strlen(iocts.drvid)) { + drvidx = -1; + for (i = 0; i < ISDN_MAX_DRIVERS; i++) +@@ -1642,7 +1644,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg) + } else { + p = (char __user *) iocts.arg; + for (i = 0; i < 10; i++) { +- sprintf(bname, "%s%s", ++ snprintf(bname, sizeof(bname), "%s%s", + strlen(dev->drv[drvidx]->msn2eaz[i]) ? + dev->drv[drvidx]->msn2eaz[i] : "_", + (i < 9) ? "," : "\0"); +@@ -1672,6 +1674,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg) + char *p; + if (copy_from_user(&iocts, argp, sizeof(isdn_ioctl_struct))) + return -EFAULT; ++ iocts.drvid[sizeof(iocts.drvid)-1] = 0; + if (strlen(iocts.drvid)) { + if ((p = strchr(iocts.drvid, ','))) + *p = 0; +diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c +index aa83277..75e1423 100644 +--- a/drivers/isdn/i4l/isdn_net.c ++++ b/drivers/isdn/i4l/isdn_net.c +@@ -2126,7 +2126,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) + u_long flags; + isdn_net_dev *p; + isdn_net_phone *n; +- char nr[32]; ++ char nr[ISDN_MSNLEN]; + char *my_eaz; + + /* Search name in netdev-chain */ +@@ -2135,7 +2135,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) + nr[1] = '\0'; + printk(KERN_INFO "isdn_net: Incoming call without OAD, assuming '0'\n"); + } else +- strcpy(nr, setup->phone); ++ strlcpy(nr, setup->phone, ISDN_MSNLEN); + si1 = (int) setup->si1; + si2 = (int) setup->si2; + if (!setup->eazmsn[0]) { +@@ -2802,7 +2802,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) + chidx = -1; + } + } +- strcpy(lp->msn, cfg->eaz); ++ strlcpy(lp->msn, cfg->eaz, sizeof(lp->msn)); + lp->pre_device = drvidx; + lp->pre_channel = chidx; + lp->onhtime = cfg->onhtime; +@@ -2951,7 +2951,7 @@ isdn_net_addphone(isdn_net_ioctl_phone * phone) + if (p) { + if (!(n = kmalloc(sizeof(isdn_net_phone), GFP_KERNEL))) + return -ENOMEM; +- strcpy(n->num, phone->phone); ++ strlcpy(n->num, phone->phone, sizeof(n->num)); + n->next = p->local->phone[phone->outgoing & 1]; + p->local->phone[phone->outgoing & 1] = n; + return 0; +diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c +index 6862c11..1b7a5a8 100644 +--- a/drivers/net/atl1/atl1_main.c ++++ b/drivers/net/atl1/atl1_main.c +@@ -2097,21 +2097,26 @@ static int __devinit atl1_probe(struct pci_dev *pdev, + struct net_device *netdev; + struct atl1_adapter *adapter; + static int cards_found = 0; +- bool pci_using_64 = true; + int err; + + err = pci_enable_device(pdev); + if (err) + return err; + +- err = pci_set_dma_mask(pdev, DMA_64BIT_MASK); ++ /* ++ * The atl1 chip can DMA to 64-bit addresses, but it uses a single ++ * shared register for the high 32 bits, so only a single, aligned, ++ * 4 GB physical address range can be used at a time. ++ * ++ * Supporting 64-bit DMA on this hardware is more trouble than it's ++ * worth. It is far easier to limit to 32-bit DMA than update ++ * various kernel subsystems to support the mechanics required by a ++ * fixed-high-32-bit system. ++ */ ++ err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { +- err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); +- if (err) { +- dev_err(&pdev->dev, "no usable DMA configuration\n"); +- goto err_dma; +- } +- pci_using_64 = false; ++ dev_err(&pdev->dev, "no usable DMA configuration\n"); ++ goto err_dma; + } + /* Mark all PCI regions associated with PCI device + * pdev as being reserved by owner atl1_driver_name +@@ -2176,7 +2181,6 @@ static int __devinit atl1_probe(struct pci_dev *pdev, + + netdev->ethtool_ops = &atl1_ethtool_ops; + adapter->bd_number = cards_found; +- adapter->pci_using_64 = pci_using_64; + + /* setup the private structure */ + err = atl1_sw_init(adapter); +@@ -2193,9 +2197,6 @@ static int __devinit atl1_probe(struct pci_dev *pdev, + */ + /* netdev->features |= NETIF_F_TSO; */ + +- if (pci_using_64) +- netdev->features |= NETIF_F_HIGHDMA; +- + netdev->features |= NETIF_F_LLTX; + + /* +diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c +index 06f6ec3..36b3a66 100644 +--- a/drivers/net/forcedeth.c ++++ b/drivers/net/forcedeth.c +@@ -5283,19 +5283,15 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i + if (readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_PHY_INIT) { + np->mac_in_use = readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_MGMT_ST; + dprintk(KERN_INFO "%s: mgmt unit is running. mac in use %x.\n", pci_name(pci_dev), np->mac_in_use); +- for (i = 0; i < 5000; i++) { +- msleep(1); +- if (nv_mgmt_acquire_sema(dev)) { +- /* management unit setup the phy already? */ +- if ((readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_MASK) == +- NVREG_XMITCTL_SYNC_PHY_INIT) { +- /* phy is inited by mgmt unit */ +- phyinitialized = 1; +- dprintk(KERN_INFO "%s: Phy already initialized by mgmt unit.\n", pci_name(pci_dev)); +- } else { +- /* we need to init the phy */ +- } +- break; ++ if (nv_mgmt_acquire_sema(dev)) { ++ /* management unit setup the phy already? */ ++ if ((readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_MASK) == ++ NVREG_XMITCTL_SYNC_PHY_INIT) { ++ /* phy is inited by mgmt unit */ ++ phyinitialized = 1; ++ dprintk(KERN_INFO "%s: Phy already initialized by mgmt unit.\n", pci_name(pci_dev)); ++ } else { ++ /* we need to init the phy */ + } + } + } +@@ -5553,6 +5549,22 @@ static struct pci_device_id pci_tbl[] = { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_27), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, ++ { /* MCP79 Ethernet Controller */ ++ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_36), ++ .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, ++ }, ++ { /* MCP79 Ethernet Controller */ ++ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_37), ++ .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, ++ }, ++ { /* MCP79 Ethernet Controller */ ++ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_38), ++ .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, ++ }, ++ { /* MCP79 Ethernet Controller */ ++ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_39), ++ .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, ++ }, + {0,}, + }; + +diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c +index 5caa8b3..ba78f8e 100644 +--- a/drivers/usb/host/ehci-hcd.c ++++ b/drivers/usb/host/ehci-hcd.c +@@ -571,12 +571,15 @@ static int ehci_run (struct usb_hcd *hcd) + * from the companions to the EHCI controller. If any of the + * companions are in the middle of a port reset at the time, it + * could cause trouble. Write-locking ehci_cf_port_reset_rwsem +- * guarantees that no resets are in progress. ++ * guarantees that no resets are in progress. After we set CF, ++ * a short delay lets the hardware catch up; new resets shouldn't ++ * be started before the port switching actions could complete. + */ + down_write(&ehci_cf_port_reset_rwsem); + hcd->state = HC_STATE_RUNNING; + ehci_writel(ehci, FLAG_CF, &ehci->regs->configured_flag); + ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ ++ msleep(5); + up_write(&ehci_cf_port_reset_rwsem); + + temp = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase)); +diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c +index 51bd80d..3acfd1a 100644 +--- a/drivers/usb/image/microtek.c ++++ b/drivers/usb/image/microtek.c +@@ -823,7 +823,7 @@ static int mts_usb_probe(struct usb_interface *intf, + goto out_kfree2; + + new_desc->host->hostdata[0] = (unsigned long)new_desc; +- if (scsi_add_host(new_desc->host, NULL)) { ++ if (scsi_add_host(new_desc->host, &dev->dev)) { + err_retval = -EIO; + goto out_host_put; + } +diff --git a/drivers/video/fb_ddc.c b/drivers/video/fb_ddc.c +index f836137..a0df632 100644 +--- a/drivers/video/fb_ddc.c ++++ b/drivers/video/fb_ddc.c +@@ -56,13 +56,12 @@ unsigned char *fb_ddc_read(struct i2c_adapter *adapter) + int i, j; + + algo_data->setscl(algo_data->data, 1); +- algo_data->setscl(algo_data->data, 0); + + for (i = 0; i < 3; i++) { + /* For some old monitors we need the + * following process to initialize/stop DDC + */ +- algo_data->setsda(algo_data->data, 0); ++ algo_data->setsda(algo_data->data, 1); + msleep(13); + + algo_data->setscl(algo_data->data, 1); +@@ -97,14 +96,15 @@ unsigned char *fb_ddc_read(struct i2c_adapter *adapter) + algo_data->setsda(algo_data->data, 1); + msleep(15); + algo_data->setscl(algo_data->data, 0); ++ algo_data->setsda(algo_data->data, 0); + if (edid) + break; + } + /* Release the DDC lines when done or the Apple Cinema HD display + * will switch off + */ +- algo_data->setsda(algo_data->data, 0); +- algo_data->setscl(algo_data->data, 0); ++ algo_data->setsda(algo_data->data, 1); ++ algo_data->setscl(algo_data->data, 1); + + return edid; + } +diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c +index 6ca2d24..f83d235 100644 +--- a/fs/nfsd/nfsfh.c ++++ b/fs/nfsd/nfsfh.c +@@ -565,13 +565,23 @@ enum fsid_source fsid_source(struct svc_fh *fhp) + case FSID_DEV: + case FSID_ENCODE_DEV: + case FSID_MAJOR_MINOR: +- return FSIDSOURCE_DEV; ++ if (fhp->fh_export->ex_dentry->d_inode->i_sb->s_type->fs_flags ++ & FS_REQUIRES_DEV) ++ return FSIDSOURCE_DEV; ++ break; + case FSID_NUM: +- return FSIDSOURCE_FSID; +- default: + if (fhp->fh_export->ex_flags & NFSEXP_FSID) + return FSIDSOURCE_FSID; +- else +- return FSIDSOURCE_UUID; ++ break; ++ default: ++ break; + } ++ /* either a UUID type filehandle, or the filehandle doesn't ++ * match the export. ++ */ ++ if (fhp->fh_export->ex_flags & NFSEXP_FSID) ++ return FSIDSOURCE_FSID; ++ if (fhp->fh_export->ex_uuid) ++ return FSIDSOURCE_UUID; ++ return FSIDSOURCE_DEV; + } +diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h +index c1ffa1b..887c2ce 100644 +--- a/include/linux/pci_ids.h ++++ b/include/linux/pci_ids.h +@@ -1239,6 +1239,10 @@ + #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE 0x0560 + #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE 0x056C + #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE 0x0759 ++#define PCI_DEVICE_ID_NVIDIA_NVENET_36 0x0AB0 ++#define PCI_DEVICE_ID_NVIDIA_NVENET_37 0x0AB1 ++#define PCI_DEVICE_ID_NVIDIA_NVENET_38 0x0AB2 ++#define PCI_DEVICE_ID_NVIDIA_NVENET_39 0x0AB3 + + #define PCI_VENDOR_ID_IMS 0x10e0 + #define PCI_DEVICE_ID_IMS_TT128 0x9128 +diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h +index 1c4eb41..9c4ad75 100644 +--- a/include/linux/thread_info.h ++++ b/include/linux/thread_info.h +@@ -7,12 +7,25 @@ + #ifndef _LINUX_THREAD_INFO_H + #define _LINUX_THREAD_INFO_H + ++#include ++ + /* +- * System call restart block. ++ * System call restart block. + */ + struct restart_block { + long (*fn)(struct restart_block *); +- unsigned long arg0, arg1, arg2, arg3; ++ union { ++ struct { ++ unsigned long arg0, arg1, arg2, arg3; ++ }; ++ /* For futex_wait */ ++ struct { ++ u32 *uaddr; ++ u32 val; ++ u32 flags; ++ u64 time; ++ } futex; ++ }; + }; + + extern long do_no_restart_syscall(struct restart_block *parm); +diff --git a/include/net/tcp.h b/include/net/tcp.h +index a99b4f6..c05e018 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1258,6 +1258,9 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new, + struct sock *sk) + { + __skb_insert(new, skb->prev, skb, &sk->sk_write_queue); ++ ++ if (sk->sk_send_head == skb) ++ sk->sk_send_head = new; + } + + static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) +diff --git a/kernel/exit.c b/kernel/exit.c +index e3adc46..369dae2 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1339,7 +1339,7 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, + if (unlikely(!exit_code) || unlikely(p->exit_state)) + goto bail_ref; + return wait_noreap_copyout(p, pid, uid, +- why, (exit_code << 8) | 0x7f, ++ why, exit_code, + infop, ru); + } + +diff --git a/kernel/futex.c b/kernel/futex.c +index 9b57f7e..592cf07 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1129,9 +1129,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, + + /* + * In case we must use restart_block to restart a futex_wait, +- * we encode in the 'arg3' shared capability ++ * we encode in the 'flags' shared capability + */ +-#define ARG3_SHARED 1 ++#define FLAGS_SHARED 1 + + static long futex_wait_restart(struct restart_block *restart); + static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, +@@ -1272,12 +1272,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, + struct restart_block *restart; + restart = ¤t_thread_info()->restart_block; + restart->fn = futex_wait_restart; +- restart->arg0 = (unsigned long)uaddr; +- restart->arg1 = (unsigned long)val; +- restart->arg2 = (unsigned long)abs_time; +- restart->arg3 = 0; ++ restart->futex.uaddr = (u32 *)uaddr; ++ restart->futex.val = val; ++ restart->futex.time = abs_time->tv64; ++ restart->futex.flags = 0; ++ + if (fshared) +- restart->arg3 |= ARG3_SHARED; ++ restart->futex.flags |= FLAGS_SHARED; + return -ERESTART_RESTARTBLOCK; + } + +@@ -1293,15 +1294,15 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, + + static long futex_wait_restart(struct restart_block *restart) + { +- u32 __user *uaddr = (u32 __user *)restart->arg0; +- u32 val = (u32)restart->arg1; +- ktime_t *abs_time = (ktime_t *)restart->arg2; ++ u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; + struct rw_semaphore *fshared = NULL; ++ ktime_t t; + ++ t.tv64 = restart->futex.time; + restart->fn = do_no_restart_syscall; +- if (restart->arg3 & ARG3_SHARED) ++ if (restart->futex.flags & FLAGS_SHARED) + fshared = ¤t->mm->mmap_sem; +- return (long)futex_wait(uaddr, fshared, val, abs_time); ++ return (long)futex_wait(uaddr, fshared, restart->futex.val, &t); + } + + +diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c +index 23c03f4..355e867 100644 +--- a/kernel/hrtimer.c ++++ b/kernel/hrtimer.c +@@ -825,6 +825,14 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) + #ifdef CONFIG_TIME_LOW_RES + tim = ktime_add(tim, base->resolution); + #endif ++ /* ++ * Careful here: User space might have asked for a ++ * very long sleep, so the add above might result in a ++ * negative number, which enqueues the timer in front ++ * of the queue. ++ */ ++ if (tim.tv64 < 0) ++ tim.tv64 = KTIME_MAX; + } + timer->expires = tim; + +diff --git a/kernel/sys.c b/kernel/sys.c +index afd9b93..28e8364 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -31,7 +31,6 @@ + #include + #include + #include +-#include + + #include + #include +@@ -866,7 +865,6 @@ EXPORT_SYMBOL_GPL(kernel_halt); + void kernel_power_off(void) + { + kernel_shutdown_prepare(SYSTEM_POWER_OFF); +- disable_nonboot_cpus(); + printk(KERN_EMERG "Power down.\n"); + machine_power_off(); + } +diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c +index 60f4680..1f3a52e 100644 +--- a/lib/libcrc32c.c ++++ b/lib/libcrc32c.c +@@ -33,7 +33,6 @@ + #include + #include + #include +-#include + + MODULE_AUTHOR("Clay Haapala "); + MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations"); +@@ -161,15 +160,13 @@ static const u32 crc32c_table[256] = { + */ + + u32 __attribute_pure__ +-crc32c_le(u32 seed, unsigned char const *data, size_t length) ++crc32c_le(u32 crc, unsigned char const *data, size_t length) + { +- u32 crc = __cpu_to_le32(seed); +- + while (length--) + crc = + crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); + +- return __le32_to_cpu(crc); ++ return crc; + } + + #endif /* CRC_LE_BITS == 8 */ +diff --git a/lib/textsearch.c b/lib/textsearch.c +index 88c98a2..be8bda3 100644 +--- a/lib/textsearch.c ++++ b/lib/textsearch.c +@@ -7,7 +7,7 @@ + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf +- * Pablo Neira Ayuso ++ * Pablo Neira Ayuso + * + * ========================================================================== + * +@@ -250,7 +250,8 @@ unsigned int textsearch_find_continuous(struct ts_config *conf, + * the various search algorithms. + * + * Returns a new textsearch configuration according to the specified +- * parameters or a ERR_PTR(). ++ * parameters or a ERR_PTR(). If a zero length pattern is passed, this ++ * function returns EINVAL. + */ + struct ts_config *textsearch_prepare(const char *algo, const void *pattern, + unsigned int len, gfp_t gfp_mask, int flags) +@@ -259,6 +260,9 @@ struct ts_config *textsearch_prepare(const char *algo, const void *pattern, + struct ts_config *conf; + struct ts_ops *ops; + ++ if (len == 0) ++ return ERR_PTR(-EINVAL); ++ + ops = lookup_ts_algo(algo); + #ifdef CONFIG_KMOD + /* +diff --git a/mm/shmem.c b/mm/shmem.c +index 2320b60..d1c65fb 100644 +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -1066,7 +1066,7 @@ shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); + pvma.vm_pgoff = idx; + pvma.vm_end = PAGE_SIZE; +- page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0); ++ page = alloc_page_vma(gfp, &pvma, 0); + mpol_free(pvma.vm_policy); + return page; + } +@@ -1086,7 +1086,7 @@ shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) + static inline struct page * + shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx) + { +- return alloc_page(gfp | __GFP_ZERO); ++ return alloc_page(gfp); + } + #endif + +@@ -1295,6 +1295,7 @@ repeat: + + info->alloced++; + spin_unlock(&info->lock); ++ clear_highpage(filepage); + flush_dcache_page(filepage); + SetPageUptodate(filepage); + } +diff --git a/net/bridge/br.c b/net/bridge/br.c +index 848b8fa..94ae4d2 100644 +--- a/net/bridge/br.c ++++ b/net/bridge/br.c +@@ -39,7 +39,7 @@ static int __init br_init(void) + + err = br_fdb_init(); + if (err) +- goto err_out1; ++ goto err_out; + + err = br_netfilter_init(); + if (err) +@@ -65,6 +65,8 @@ err_out3: + err_out2: + br_netfilter_fini(); + err_out1: ++ br_fdb_fini(); ++err_out: + llc_sap_put(br_stp_sap); + return err; + } +diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c +index 420bbb9..fb2c7cc 100644 +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -127,6 +127,7 @@ static inline int is_link_local(const unsigned char *dest) + struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) + { + const unsigned char *dest = eth_hdr(skb)->h_dest; ++ int (*rhook)(struct sk_buff **pskb); + + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) + goto drop; +@@ -148,9 +149,9 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) + + switch (p->state) { + case BR_STATE_FORWARDING: +- +- if (br_should_route_hook) { +- if (br_should_route_hook(&skb)) ++ rhook = rcu_dereference(br_should_route_hook); ++ if (rhook != NULL) { ++ if (rhook(&skb)) + return skb; + dest = eth_hdr(skb)->h_dest; + } +diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c +index d37ce04..bc17cf5 100644 +--- a/net/bridge/netfilter/ebtable_broute.c ++++ b/net/bridge/netfilter/ebtable_broute.c +@@ -70,13 +70,13 @@ static int __init ebtable_broute_init(void) + if (ret < 0) + return ret; + /* see br_input.c */ +- br_should_route_hook = ebt_broute; ++ rcu_assign_pointer(br_should_route_hook, ebt_broute); + return ret; + } + + static void __exit ebtable_broute_fini(void) + { +- br_should_route_hook = NULL; ++ rcu_assign_pointer(br_should_route_hook, NULL); + synchronize_net(); + ebt_unregister_table(&broute_table); + } +diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c +index d46e453..b51ee15 100644 +--- a/net/decnet/dn_dev.c ++++ b/net/decnet/dn_dev.c +@@ -651,16 +651,18 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + struct dn_dev *dn_db; + struct ifaddrmsg *ifm; + struct dn_ifaddr *ifa, **ifap; +- int err = -EADDRNOTAVAIL; ++ int err; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + if (err < 0) + goto errout; + ++ err = -ENODEV; + ifm = nlmsg_data(nlh); + if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL) + goto errout; + ++ err = -EADDRNOTAVAIL; + for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) { + if (tb[IFA_LOCAL] && + nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) +diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c +index e00767e..84097ee 100644 +--- a/net/ipv4/arp.c ++++ b/net/ipv4/arp.c +@@ -110,12 +110,8 @@ + #include + #include + #include +-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) + #include +-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) + #include +-#endif +-#endif + #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) + #include + struct neigh_table *clip_tbl_hook; +@@ -729,20 +725,10 @@ static int arp_process(struct sk_buff *skb) + htons(dev_type) != arp->ar_hrd) + goto out; + break; +-#ifdef CONFIG_NET_ETHERNET + case ARPHRD_ETHER: +-#endif +-#ifdef CONFIG_TR + case ARPHRD_IEEE802_TR: +-#endif +-#ifdef CONFIG_FDDI + case ARPHRD_FDDI: +-#endif +-#ifdef CONFIG_NET_FC + case ARPHRD_IEEE802: +-#endif +-#if defined(CONFIG_NET_ETHERNET) || defined(CONFIG_TR) || \ +- defined(CONFIG_FDDI) || defined(CONFIG_NET_FC) + /* + * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802 + * devices, according to RFC 2625) devices will accept ARP +@@ -757,21 +743,16 @@ static int arp_process(struct sk_buff *skb) + arp->ar_pro != htons(ETH_P_IP)) + goto out; + break; +-#endif +-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) + case ARPHRD_AX25: + if (arp->ar_pro != htons(AX25_P_IP) || + arp->ar_hrd != htons(ARPHRD_AX25)) + goto out; + break; +-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) + case ARPHRD_NETROM: + if (arp->ar_pro != htons(AX25_P_IP) || + arp->ar_hrd != htons(ARPHRD_NETROM)) + goto out; + break; +-#endif +-#endif + } + + /* Understand only these message types */ +diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c +index ea02f00..3b01a5f 100644 +--- a/net/ipv4/netfilter/nf_nat_core.c ++++ b/net/ipv4/netfilter/nf_nat_core.c +@@ -633,7 +633,7 @@ static int clean_nat(struct nf_conn *i, void *data) + + if (!nat) + return 0; +- memset(nat, 0, sizeof(nat)); ++ memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); + return 0; + } +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 29ca63e..4aa2551 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -3150,18 +3150,14 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset, + offset /= sizeof(u32); + + if (length > 0) { +- u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset; + u32 *dst = (u32 *) buffer; + +- /* Copy first cpu. */ + *start = buffer; +- memcpy(dst, src, length); ++ memset(dst, 0, length); + +- /* Add the other cpus in, one int at a time */ + for_each_possible_cpu(i) { + unsigned int j; +- +- src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset; ++ u32 *src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset; + + for (j = 0; j < length/4; j++) + dst[j] += src[j]; +diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c +index 53ef0f4..6ea1306 100644 +--- a/net/ipv4/sysctl_net_ipv4.c ++++ b/net/ipv4/sysctl_net_ipv4.c +@@ -121,7 +121,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, + + tcp_get_default_congestion_control(val); + ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); +- if (ret == 0 && newval && newlen) ++ if (ret == 1 && newval && newlen) + ret = tcp_set_default_congestion_control(val); + return ret; + } +diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c +index b2b2256..31dd8c5 100644 +--- a/net/ipv4/tcp_illinois.c ++++ b/net/ipv4/tcp_illinois.c +@@ -300,7 +300,7 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) + struct illinois *ca = inet_csk_ca(sk); + + /* Multiplicative decrease */ +- return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U); ++ return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); + } + + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 53232dd..eee57e6 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1279,7 +1279,6 @@ static int tcp_mtu_probe(struct sock *sk) + + skb = tcp_send_head(sk); + tcp_insert_write_queue_before(nskb, skb, sk); +- tcp_advance_send_head(sk, skb); + + TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; + TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index e26b473..6d614c0 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -2285,6 +2285,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, + break; + } + ++ if (!idev && dev->mtu >= IPV6_MIN_MTU) ++ idev = ipv6_add_dev(dev); ++ + if (idev) + idev->if_flags |= IF_READY; + } else { +@@ -2349,12 +2352,18 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, + break; + + case NETDEV_CHANGEMTU: +- if ( idev && dev->mtu >= IPV6_MIN_MTU) { ++ if (idev && dev->mtu >= IPV6_MIN_MTU) { + rt6_mtu_change(dev, dev->mtu); + idev->cnf.mtu6 = dev->mtu; + break; + } + ++ if (!idev && dev->mtu >= IPV6_MIN_MTU) { ++ idev = ipv6_add_dev(dev); ++ if (idev) ++ break; ++ } ++ + /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ + + case NETDEV_DOWN: +diff --git a/net/key/af_key.c b/net/key/af_key.c +index 0f8304b..ca0db0f 100644 +--- a/net/key/af_key.c ++++ b/net/key/af_key.c +@@ -1543,7 +1543,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, + + out_hdr = (struct sadb_msg *) out_skb->data; + out_hdr->sadb_msg_version = hdr->sadb_msg_version; +- out_hdr->sadb_msg_type = SADB_DUMP; ++ out_hdr->sadb_msg_type = SADB_GET; + out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); + out_hdr->sadb_msg_errno = 0; + out_hdr->sadb_msg_reserved = 0; +diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c +index 15fe8f6..fe7b3d8 100644 +--- a/net/netfilter/xt_TCPMSS.c ++++ b/net/netfilter/xt_TCPMSS.c +@@ -178,10 +178,8 @@ xt_tcpmss_target6(struct sk_buff **pskb, + + nexthdr = ipv6h->nexthdr; + tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr); +- if (tcphoff < 0) { +- WARN_ON(1); ++ if (tcphoff < 0) + return NF_DROP; +- } + ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff, + sizeof(*ipv6h) + sizeof(struct tcphdr)); + if (ret < 0) +diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig +index e662f1d..0d3103c 100644 +--- a/net/rxrpc/Kconfig ++++ b/net/rxrpc/Kconfig +@@ -5,6 +5,7 @@ + config AF_RXRPC + tristate "RxRPC session sockets" + depends on INET && EXPERIMENTAL ++ select CRYPTO + select KEYS + help + Say Y or M here to include support for RxRPC session sockets (just +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index d70fa30..ae80150 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1608,8 +1608,15 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, + mutex_lock(&u->readlock); + + skb = skb_recv_datagram(sk, flags, noblock, &err); +- if (!skb) ++ if (!skb) { ++ unix_state_lock(sk); ++ /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ ++ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && ++ (sk->sk_shutdown & RCV_SHUTDOWN)) ++ err = 0; ++ unix_state_unlock(sk); + goto out_unlock; ++ } + + wake_up_interruptible(&u->peer_wait); + +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index dfacb9c..7775488 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -371,7 +371,7 @@ int __xfrm_state_delete(struct xfrm_state *x) + * The xfrm_state_alloc call gives a reference, and that + * is what we are dropping here. + */ +- __xfrm_state_put(x); ++ xfrm_state_put(x); + err = 0; + } + diff --git a/tags/2.6.22-2/01015_linux-2.6.22.16.patch b/tags/2.6.22-2/01015_linux-2.6.22.16.patch new file mode 100644 index 0000000..34ae110 --- /dev/null +++ b/tags/2.6.22-2/01015_linux-2.6.22.16.patch @@ -0,0 +1,27 @@ +Subject: Linux 2.6.22.16 +From: Greg Kroah-Hartman + +Signed-off-by: Greg Kroah-Hartman + +diff --git a/fs/namei.c b/fs/namei.c +index 5e2d98d..8e209ce 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1543,7 +1543,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) + if (S_ISLNK(inode->i_mode)) + return -ELOOP; + +- if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) ++ if (S_ISDIR(inode->i_mode) && (acc_mode & MAY_WRITE)) + return -EISDIR; + + error = vfs_permission(nd, acc_mode); +@@ -1562,7 +1562,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) + return -EACCES; + + flag &= ~O_TRUNC; +- } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) ++ } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) + return -EROFS; + /* + * An append-only file must be opened in append mode for writing. diff --git a/tags/2.6.22-2/01016_linux-2.6.22.17.patch b/tags/2.6.22-2/01016_linux-2.6.22.17.patch new file mode 100644 index 0000000..4f735d5 --- /dev/null +++ b/tags/2.6.22-2/01016_linux-2.6.22.17.patch @@ -0,0 +1,1360 @@ +Subject: Linux 2.6.22.17 +From: Greg Kroah-Hartman + +Signed-off-by: Greg Kroah-Hartman + +diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c +index 4f2f453..c84b7cc 100644 +--- a/arch/powerpc/mm/hash_utils_64.c ++++ b/arch/powerpc/mm/hash_utils_64.c +@@ -795,7 +795,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, + + #ifdef CONFIG_PPC_MM_SLICES + /* We only prefault standard pages for now */ +- if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)); ++ if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) + return; + #endif + +diff --git a/arch/sparc64/kernel/chmc.c b/arch/sparc64/kernel/chmc.c +index 777d345..6d4f02e 100644 +--- a/arch/sparc64/kernel/chmc.c ++++ b/arch/sparc64/kernel/chmc.c +@@ -1,7 +1,6 @@ +-/* $Id: chmc.c,v 1.4 2002/01/08 16:00:14 davem Exp $ +- * memctrlr.c: Driver for UltraSPARC-III memory controller. ++/* memctrlr.c: Driver for UltraSPARC-III memory controller. + * +- * Copyright (C) 2001 David S. Miller (davem@redhat.com) ++ * Copyright (C) 2001, 2007 David S. Miller (davem@davemloft.net) + */ + + #include +@@ -16,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -242,8 +242,11 @@ int chmc_getunumber(int syndrome_code, + */ + static u64 read_mcreg(struct mctrl_info *mp, unsigned long offset) + { +- unsigned long ret; +- int this_cpu = get_cpu(); ++ unsigned long ret, this_cpu; ++ ++ preempt_disable(); ++ ++ this_cpu = real_hard_smp_processor_id(); + + if (mp->portid == this_cpu) { + __asm__ __volatile__("ldxa [%1] %2, %0" +@@ -255,7 +258,8 @@ static u64 read_mcreg(struct mctrl_info *mp, unsigned long offset) + : "r" (mp->regs + offset), + "i" (ASI_PHYS_BYPASS_EC_E)); + } +- put_cpu(); ++ ++ preempt_enable(); + + return ret; + } +diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S +index 8059531..193791c 100644 +--- a/arch/sparc64/kernel/entry.S ++++ b/arch/sparc64/kernel/entry.S +@@ -2593,3 +2593,15 @@ sun4v_mmustat_info: + retl + nop + .size sun4v_mmustat_info, .-sun4v_mmustat_info ++ ++ .globl sun4v_mmu_demap_all ++ .type sun4v_mmu_demap_all,#function ++sun4v_mmu_demap_all: ++ clr %o0 ++ clr %o1 ++ mov HV_MMU_ALL, %o2 ++ mov HV_FAST_MMU_DEMAP_ALL, %o5 ++ ta HV_FAST_TRAP ++ retl ++ nop ++ .size sun4v_mmu_demap_all, .-sun4v_mmu_demap_all +diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c +index 4dcd7d0..3ddd99c 100644 +--- a/arch/sparc64/kernel/smp.c ++++ b/arch/sparc64/kernel/smp.c +@@ -403,7 +403,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c + */ + static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) + { +- u64 pstate, ver; ++ u64 pstate, ver, busy_mask; + int nack_busy_id, is_jbus, need_more; + + if (cpus_empty(mask)) +@@ -435,14 +435,20 @@ retry: + "i" (ASI_INTR_W)); + + nack_busy_id = 0; ++ busy_mask = 0; + { + int i; + + for_each_cpu_mask(i, mask) { + u64 target = (i << 14) | 0x70; + +- if (!is_jbus) ++ if (is_jbus) { ++ busy_mask |= (0x1UL << (i * 2)); ++ } else { + target |= (nack_busy_id << 24); ++ busy_mask |= (0x1UL << ++ (nack_busy_id * 2)); ++ } + __asm__ __volatile__( + "stxa %%g0, [%0] %1\n\t" + "membar #Sync\n\t" +@@ -458,15 +464,16 @@ retry: + + /* Now, poll for completion. */ + { +- u64 dispatch_stat; ++ u64 dispatch_stat, nack_mask; + long stuck; + + stuck = 100000 * nack_busy_id; ++ nack_mask = busy_mask << 1; + do { + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (dispatch_stat) + : "i" (ASI_INTR_DISPATCH_STAT)); +- if (dispatch_stat == 0UL) { ++ if (!(dispatch_stat & (busy_mask | nack_mask))) { + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + if (unlikely(need_more)) { +@@ -483,12 +490,12 @@ retry: + } + if (!--stuck) + break; +- } while (dispatch_stat & 0x5555555555555555UL); ++ } while (dispatch_stat & busy_mask); + + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + +- if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) { ++ if (dispatch_stat & busy_mask) { + /* Busy bits will not clear, continue instead + * of freezing up on this cpu. + */ +diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c +index 3010227..ed2484d 100644 +--- a/arch/sparc64/mm/init.c ++++ b/arch/sparc64/mm/init.c +@@ -1135,14 +1135,9 @@ static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) + } + } + +-static void __init kernel_physical_mapping_init(void) ++static void __init init_kpte_bitmap(void) + { + unsigned long i; +-#ifdef CONFIG_DEBUG_PAGEALLOC +- unsigned long mem_alloced = 0UL; +-#endif +- +- read_obp_memory("reg", &pall[0], &pall_ents); + + for (i = 0; i < pall_ents; i++) { + unsigned long phys_start, phys_end; +@@ -1151,14 +1146,24 @@ static void __init kernel_physical_mapping_init(void) + phys_end = phys_start + pall[i].reg_size; + + mark_kpte_bitmap(phys_start, phys_end); ++ } ++} + ++static void __init kernel_physical_mapping_init(void) ++{ + #ifdef CONFIG_DEBUG_PAGEALLOC ++ unsigned long i, mem_alloced = 0UL; ++ ++ for (i = 0; i < pall_ents; i++) { ++ unsigned long phys_start, phys_end; ++ ++ phys_start = pall[i].phys_addr; ++ phys_end = phys_start + pall[i].reg_size; ++ + mem_alloced += kernel_map_range(phys_start, phys_end, + PAGE_KERNEL); +-#endif + } + +-#ifdef CONFIG_DEBUG_PAGEALLOC + printk("Allocated %ld bytes for kernel page tables.\n", + mem_alloced); + +@@ -1400,6 +1405,10 @@ void __init paging_init(void) + + inherit_prom_mappings(); + ++ read_obp_memory("reg", &pall[0], &pall_ents); ++ ++ init_kpte_bitmap(); ++ + /* Ok, we can use our TLB miss and window trap handlers safely. */ + setup_tba(); + +@@ -1854,7 +1863,9 @@ void __flush_tlb_all(void) + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); +- if (tlb_type == spitfire) { ++ if (tlb_type == hypervisor) { ++ sun4v_mmu_demap_all(); ++ } else if (tlb_type == spitfire) { + for (i = 0; i < 64; i++) { + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no +diff --git a/drivers/acpi/dispatcher/dsobject.c b/drivers/acpi/dispatcher/dsobject.c +index a474ca2..954ac8c 100644 +--- a/drivers/acpi/dispatcher/dsobject.c ++++ b/drivers/acpi/dispatcher/dsobject.c +@@ -137,6 +137,71 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state, + return_ACPI_STATUS(status); + } + } ++ ++ /* Special object resolution for elements of a package */ ++ ++ if ((op->common.parent->common.aml_opcode == AML_PACKAGE_OP) || ++ (op->common.parent->common.aml_opcode == ++ AML_VAR_PACKAGE_OP)) { ++ /* ++ * Attempt to resolve the node to a value before we insert it into ++ * the package. If this is a reference to a common data type, ++ * resolve it immediately. According to the ACPI spec, package ++ * elements can only be "data objects" or method references. ++ * Attempt to resolve to an Integer, Buffer, String or Package. ++ * If cannot, return the named reference (for things like Devices, ++ * Methods, etc.) Buffer Fields and Fields will resolve to simple ++ * objects (int/buf/str/pkg). ++ * ++ * NOTE: References to things like Devices, Methods, Mutexes, etc. ++ * will remain as named references. This behavior is not described ++ * in the ACPI spec, but it appears to be an oversight. ++ */ ++ obj_desc = (union acpi_operand_object *)op->common.node; ++ ++ status = ++ acpi_ex_resolve_node_to_value(ACPI_CAST_INDIRECT_PTR ++ (struct ++ acpi_namespace_node, ++ &obj_desc), ++ walk_state); ++ if (ACPI_FAILURE(status)) { ++ return_ACPI_STATUS(status); ++ } ++ ++ switch (op->common.node->type) { ++ /* ++ * For these types, we need the actual node, not the subobject. ++ * However, the subobject got an extra reference count above. ++ */ ++ case ACPI_TYPE_MUTEX: ++ case ACPI_TYPE_METHOD: ++ case ACPI_TYPE_POWER: ++ case ACPI_TYPE_PROCESSOR: ++ case ACPI_TYPE_EVENT: ++ case ACPI_TYPE_REGION: ++ case ACPI_TYPE_DEVICE: ++ case ACPI_TYPE_THERMAL: ++ ++ obj_desc = ++ (union acpi_operand_object *)op->common. ++ node; ++ break; ++ ++ default: ++ break; ++ } ++ ++ /* ++ * If above resolved to an operand object, we are done. Otherwise, ++ * we have a NS node, we must create the package entry as a named ++ * reference. ++ */ ++ if (ACPI_GET_DESCRIPTOR_TYPE(obj_desc) != ++ ACPI_DESC_TYPE_NAMED) { ++ goto exit; ++ } ++ } + } + + /* Create and init a new internal ACPI object */ +@@ -156,6 +221,7 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state, + return_ACPI_STATUS(status); + } + ++ exit: + *obj_desc_ptr = obj_desc; + return_ACPI_STATUS(AE_OK); + } +@@ -356,12 +422,25 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state, + arg = arg->common.next; + for (i = 0; arg && (i < element_count); i++) { + if (arg->common.aml_opcode == AML_INT_RETURN_VALUE_OP) { +- +- /* This package element is already built, just get it */ +- +- obj_desc->package.elements[i] = +- ACPI_CAST_PTR(union acpi_operand_object, +- arg->common.node); ++ if (arg->common.node->type == ACPI_TYPE_METHOD) { ++ /* ++ * A method reference "looks" to the parser to be a method ++ * invocation, so we special case it here ++ */ ++ arg->common.aml_opcode = AML_INT_NAMEPATH_OP; ++ status = ++ acpi_ds_build_internal_object(walk_state, ++ arg, ++ &obj_desc-> ++ package. ++ elements[i]); ++ } else { ++ /* This package element is already built, just get it */ ++ ++ obj_desc->package.elements[i] = ++ ACPI_CAST_PTR(union acpi_operand_object, ++ arg->common.node); ++ } + } else { + status = acpi_ds_build_internal_object(walk_state, arg, + &obj_desc-> +diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c +index 14ced85..0c205b0 100644 +--- a/drivers/atm/nicstar.c ++++ b/drivers/atm/nicstar.c +@@ -625,14 +625,6 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) + if (mac[i] == NULL) + nicstar_init_eprom(card->membase); + +- if (request_irq(pcidev->irq, &ns_irq_handler, IRQF_DISABLED | IRQF_SHARED, "nicstar", card) != 0) +- { +- printk("nicstar%d: can't allocate IRQ %d.\n", i, pcidev->irq); +- error = 9; +- ns_init_card_error(card, error); +- return error; +- } +- + /* Set the VPI/VCI MSb mask to zero so we can receive OAM cells */ + writel(0x00000000, card->membase + VPM); + +@@ -858,8 +850,6 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) + card->iovpool.count++; + } + +- card->intcnt = 0; +- + /* Configure NICStAR */ + if (card->rct_size == 4096) + ns_cfg_rctsize = NS_CFG_RCTSIZE_4096_ENTRIES; +@@ -868,6 +858,15 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) + + card->efbie = 1; + ++ card->intcnt = 0; ++ if (request_irq(pcidev->irq, &ns_irq_handler, IRQF_DISABLED | IRQF_SHARED, "nicstar", card) != 0) ++ { ++ printk("nicstar%d: can't allocate IRQ %d.\n", i, pcidev->irq); ++ error = 9; ++ ns_init_card_error(card, error); ++ return error; ++ } ++ + /* Register device */ + card->atmdev = atm_dev_register("nicstar", &atm_ops, -1, NULL); + if (card->atmdev == NULL) +diff --git a/drivers/char/drm/drm_vm.c b/drivers/char/drm/drm_vm.c +index b5c5b9f..e2d7be9 100644 +--- a/drivers/char/drm/drm_vm.c ++++ b/drivers/char/drm/drm_vm.c +@@ -520,6 +520,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) + vma->vm_ops = &drm_vm_dma_ops; + + vma->vm_flags |= VM_RESERVED; /* Don't swap */ ++ vma->vm_flags |= VM_DONTEXPAND; + + vma->vm_file = filp; /* Needed for drm_vm_open() */ + drm_vm_open_locked(vma); +@@ -669,6 +670,7 @@ static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma) + return -EINVAL; /* This should never happen. */ + } + vma->vm_flags |= VM_RESERVED; /* Don't swap */ ++ vma->vm_flags |= VM_DONTEXPAND; + + vma->vm_file = filp; /* Needed for drm_vm_open() */ + drm_vm_open_locked(vma); +diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c +index 7ac3061..5685b7a 100644 +--- a/drivers/char/mspec.c ++++ b/drivers/char/mspec.c +@@ -265,7 +265,8 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, int type) + vdata->refcnt = ATOMIC_INIT(1); + vma->vm_private_data = vdata; + +- vma->vm_flags |= (VM_IO | VM_LOCKED | VM_RESERVED | VM_PFNMAP); ++ vma->vm_flags |= (VM_IO | VM_LOCKED | VM_RESERVED | VM_PFNMAP | ++ VM_DONTEXPAND); + if (vdata->type == MSPEC_FETCHOP || vdata->type == MSPEC_UNCACHED) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &mspec_vm_ops; +diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c +index 296f510..12ceed5 100644 +--- a/drivers/connector/cn_queue.c ++++ b/drivers/connector/cn_queue.c +@@ -99,8 +99,8 @@ int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id + spin_unlock_bh(&dev->queue_lock); + + if (found) { +- atomic_dec(&dev->refcnt); + cn_queue_free_callback(cbq); ++ atomic_dec(&dev->refcnt); + return -EINVAL; + } + +diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c +index 59b9943..ad55baa 100644 +--- a/drivers/net/cassini.c ++++ b/drivers/net/cassini.c +@@ -336,30 +336,6 @@ static inline void cas_mask_intr(struct cas *cp) + cas_disable_irq(cp, i); + } + +-static inline void cas_buffer_init(cas_page_t *cp) +-{ +- struct page *page = cp->buffer; +- atomic_set((atomic_t *)&page->lru.next, 1); +-} +- +-static inline int cas_buffer_count(cas_page_t *cp) +-{ +- struct page *page = cp->buffer; +- return atomic_read((atomic_t *)&page->lru.next); +-} +- +-static inline void cas_buffer_inc(cas_page_t *cp) +-{ +- struct page *page = cp->buffer; +- atomic_inc((atomic_t *)&page->lru.next); +-} +- +-static inline void cas_buffer_dec(cas_page_t *cp) +-{ +- struct page *page = cp->buffer; +- atomic_dec((atomic_t *)&page->lru.next); +-} +- + static void cas_enable_irq(struct cas *cp, const int ring) + { + if (ring == 0) { /* all but TX_DONE */ +@@ -497,7 +473,6 @@ static int cas_page_free(struct cas *cp, cas_page_t *page) + { + pci_unmap_page(cp->pdev, page->dma_addr, cp->page_size, + PCI_DMA_FROMDEVICE); +- cas_buffer_dec(page); + __free_pages(page->buffer, cp->page_order); + kfree(page); + return 0; +@@ -527,7 +502,6 @@ static cas_page_t *cas_page_alloc(struct cas *cp, const gfp_t flags) + page->buffer = alloc_pages(flags, cp->page_order); + if (!page->buffer) + goto page_err; +- cas_buffer_init(page); + page->dma_addr = pci_map_page(cp->pdev, page->buffer, 0, + cp->page_size, PCI_DMA_FROMDEVICE); + return page; +@@ -606,7 +580,7 @@ static void cas_spare_recover(struct cas *cp, const gfp_t flags) + list_for_each_safe(elem, tmp, &list) { + cas_page_t *page = list_entry(elem, cas_page_t, list); + +- if (cas_buffer_count(page) > 1) ++ if (page_count(page->buffer) > 1) + continue; + + list_del(elem); +@@ -1374,7 +1348,7 @@ static inline cas_page_t *cas_page_spare(struct cas *cp, const int index) + cas_page_t *page = cp->rx_pages[1][index]; + cas_page_t *new; + +- if (cas_buffer_count(page) == 1) ++ if (page_count(page->buffer) == 1) + return page; + + new = cas_page_dequeue(cp); +@@ -1394,7 +1368,7 @@ static cas_page_t *cas_page_swap(struct cas *cp, const int ring, + cas_page_t **page1 = cp->rx_pages[1]; + + /* swap if buffer is in use */ +- if (cas_buffer_count(page0[index]) > 1) { ++ if (page_count(page0[index]->buffer) > 1) { + cas_page_t *new = cas_page_spare(cp, index); + if (new) { + page1[index] = page0[index]; +@@ -1979,6 +1953,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, + struct cas_page *page; + struct sk_buff *skb; + void *addr, *crcaddr; ++ __sum16 csum; + char *p; + + hlen = CAS_VAL(RX_COMP2_HDR_SIZE, words[1]); +@@ -2062,10 +2037,10 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, + + skb_shinfo(skb)->nr_frags++; + skb->data_len += hlen - swivel; ++ skb->truesize += hlen - swivel; + skb->len += hlen - swivel; + + get_page(page->buffer); +- cas_buffer_inc(page); + frag->page = page->buffer; + frag->page_offset = off; + frag->size = hlen - swivel; +@@ -2090,7 +2065,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, + frag++; + + get_page(page->buffer); +- cas_buffer_inc(page); + frag->page = page->buffer; + frag->page_offset = 0; + frag->size = hlen; +@@ -2158,14 +2132,15 @@ end_copy_pkt: + skb_put(skb, alloclen); + } + +- i = CAS_VAL(RX_COMP4_TCP_CSUM, words[3]); ++ csum = (__force __sum16)htons(CAS_VAL(RX_COMP4_TCP_CSUM, words[3])); + if (cp->crc_size) { + /* checksum includes FCS. strip it out. */ +- i = csum_fold(csum_partial(crcaddr, cp->crc_size, i)); ++ csum = csum_fold(csum_partial(crcaddr, cp->crc_size, ++ csum_unfold(csum))); + if (addr) + cas_page_unmap(addr); + } +- skb->csum = ntohs(i ^ 0xffff); ++ skb->csum = csum_unfold(~csum); + skb->ip_summed = CHECKSUM_COMPLETE; + skb->protocol = eth_type_trans(skb, cp->dev); + return len; +@@ -2253,7 +2228,7 @@ static int cas_post_rxds_ringN(struct cas *cp, int ring, int num) + released = 0; + while (entry != last) { + /* make a new buffer if it's still in use */ +- if (cas_buffer_count(page[entry]) > 1) { ++ if (page_count(page[entry]->buffer) > 1) { + cas_page_t *new = cas_page_dequeue(cp); + if (!new) { + /* let the timer know that we need to +diff --git a/drivers/net/cassini.h b/drivers/net/cassini.h +index a970804..a201431 100644 +--- a/drivers/net/cassini.h ++++ b/drivers/net/cassini.h +@@ -4122,8 +4122,8 @@ cas_saturn_patch_t cas_saturn_patch[] = { + inserted into + outgoing frame. */ + struct cas_tx_desc { +- u64 control; +- u64 buffer; ++ __le64 control; ++ __le64 buffer; + }; + + /* descriptor ring for free buffers contains page-sized buffers. the index +@@ -4131,8 +4131,8 @@ struct cas_tx_desc { + * the completion ring. + */ + struct cas_rx_desc { +- u64 index; +- u64 buffer; ++ __le64 index; ++ __le64 buffer; + }; + + /* received packets are put on the completion ring. */ +@@ -4210,10 +4210,10 @@ struct cas_rx_desc { + #define RX_INDEX_RELEASE 0x0000000000002000ULL + + struct cas_rx_comp { +- u64 word1; +- u64 word2; +- u64 word3; +- u64 word4; ++ __le64 word1; ++ __le64 word2; ++ __le64 word3; ++ __le64 word4; + }; + + enum link_state { +@@ -4252,7 +4252,7 @@ struct cas_init_block { + struct cas_rx_comp rxcs[N_RX_COMP_RINGS][INIT_BLOCK_RX_COMP]; + struct cas_rx_desc rxds[N_RX_DESC_RINGS][INIT_BLOCK_RX_DESC]; + struct cas_tx_desc txds[N_TX_RINGS][INIT_BLOCK_TX]; +- u64 tx_compwb; ++ __le64 tx_compwb; + }; + + /* tiny buffers to deal with target abort issue. we allocate a bit +diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c +index 231ce43..a82a1fa 100644 +--- a/drivers/net/chelsio/cxgb2.c ++++ b/drivers/net/chelsio/cxgb2.c +@@ -370,6 +370,8 @@ static char stats_strings[][ETH_GSTRING_LEN] = { + "TxInternalMACXmitError", + "TxFramesWithExcessiveDeferral", + "TxFCSErrors", ++ "TxJumboFramesOk", ++ "TxJumboOctetsOk", + + "RxOctetsOK", + "RxOctetsBad", +@@ -388,15 +390,16 @@ static char stats_strings[][ETH_GSTRING_LEN] = { + "RxInRangeLengthErrors", + "RxOutOfRangeLengthField", + "RxFrameTooLongErrors", ++ "RxJumboFramesOk", ++ "RxJumboOctetsOk", + + /* Port stats */ +- "RxPackets", + "RxCsumGood", +- "TxPackets", + "TxCsumOffload", + "TxTso", + "RxVlan", + "TxVlan", ++ "TxNeedHeadroom", + + /* Interrupt stats */ + "rx drops", +@@ -454,23 +457,56 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats, + const struct cmac_statistics *s; + const struct sge_intr_counts *t; + struct sge_port_stats ss; +- unsigned int len; + + s = mac->ops->statistics_update(mac, MAC_STATS_UPDATE_FULL); +- +- len = sizeof(u64)*(&s->TxFCSErrors + 1 - &s->TxOctetsOK); +- memcpy(data, &s->TxOctetsOK, len); +- data += len; +- +- len = sizeof(u64)*(&s->RxFrameTooLongErrors + 1 - &s->RxOctetsOK); +- memcpy(data, &s->RxOctetsOK, len); +- data += len; +- ++ t = t1_sge_get_intr_counts(adapter->sge); + t1_sge_get_port_stats(adapter->sge, dev->if_port, &ss); +- memcpy(data, &ss, sizeof(ss)); +- data += sizeof(ss); + +- t = t1_sge_get_intr_counts(adapter->sge); ++ *data++ = s->TxOctetsOK; ++ *data++ = s->TxOctetsBad; ++ *data++ = s->TxUnicastFramesOK; ++ *data++ = s->TxMulticastFramesOK; ++ *data++ = s->TxBroadcastFramesOK; ++ *data++ = s->TxPauseFrames; ++ *data++ = s->TxFramesWithDeferredXmissions; ++ *data++ = s->TxLateCollisions; ++ *data++ = s->TxTotalCollisions; ++ *data++ = s->TxFramesAbortedDueToXSCollisions; ++ *data++ = s->TxUnderrun; ++ *data++ = s->TxLengthErrors; ++ *data++ = s->TxInternalMACXmitError; ++ *data++ = s->TxFramesWithExcessiveDeferral; ++ *data++ = s->TxFCSErrors; ++ *data++ = s->TxJumboFramesOK; ++ *data++ = s->TxJumboOctetsOK; ++ ++ *data++ = s->RxOctetsOK; ++ *data++ = s->RxOctetsBad; ++ *data++ = s->RxUnicastFramesOK; ++ *data++ = s->RxMulticastFramesOK; ++ *data++ = s->RxBroadcastFramesOK; ++ *data++ = s->RxPauseFrames; ++ *data++ = s->RxFCSErrors; ++ *data++ = s->RxAlignErrors; ++ *data++ = s->RxSymbolErrors; ++ *data++ = s->RxDataErrors; ++ *data++ = s->RxSequenceErrors; ++ *data++ = s->RxRuntErrors; ++ *data++ = s->RxJabberErrors; ++ *data++ = s->RxInternalMACRcvError; ++ *data++ = s->RxInRangeLengthErrors; ++ *data++ = s->RxOutOfRangeLengthField; ++ *data++ = s->RxFrameTooLongErrors; ++ *data++ = s->RxJumboFramesOK; ++ *data++ = s->RxJumboOctetsOK; ++ ++ *data++ = ss.rx_cso_good; ++ *data++ = ss.tx_cso; ++ *data++ = ss.tx_tso; ++ *data++ = ss.vlan_xtract; ++ *data++ = ss.vlan_insert; ++ *data++ = ss.tx_need_hdrroom; ++ + *data++ = t->rx_drops; + *data++ = t->pure_rsps; + *data++ = t->unhandled_irqs; +diff --git a/drivers/net/chelsio/pm3393.c b/drivers/net/chelsio/pm3393.c +index 678778a..2117c4f 100644 +--- a/drivers/net/chelsio/pm3393.c ++++ b/drivers/net/chelsio/pm3393.c +@@ -45,7 +45,7 @@ + + #include + +-#define OFFSET(REG_ADDR) (REG_ADDR << 2) ++#define OFFSET(REG_ADDR) ((REG_ADDR) << 2) + + /* Max frame size PM3393 can handle. Includes Ethernet header and CRC. */ + #define MAX_FRAME_SIZE 9600 +@@ -428,69 +428,26 @@ static int pm3393_set_speed_duplex_fc(struct cmac *cmac, int speed, int duplex, + return 0; + } + +-static void pm3393_rmon_update(struct adapter *adapter, u32 offs, u64 *val, +- int over) +-{ +- u32 val0, val1, val2; +- +- t1_tpi_read(adapter, offs, &val0); +- t1_tpi_read(adapter, offs + 4, &val1); +- t1_tpi_read(adapter, offs + 8, &val2); +- +- *val &= ~0ull << 40; +- *val |= val0 & 0xffff; +- *val |= (val1 & 0xffff) << 16; +- *val |= (u64)(val2 & 0xff) << 32; +- +- if (over) +- *val += 1ull << 40; ++#define RMON_UPDATE(mac, name, stat_name) \ ++{ \ ++ t1_tpi_read((mac)->adapter, OFFSET(name), &val0); \ ++ t1_tpi_read((mac)->adapter, OFFSET((name)+1), &val1); \ ++ t1_tpi_read((mac)->adapter, OFFSET((name)+2), &val2); \ ++ (mac)->stats.stat_name = (u64)(val0 & 0xffff) | \ ++ ((u64)(val1 & 0xffff) << 16) | \ ++ ((u64)(val2 & 0xff) << 32) | \ ++ ((mac)->stats.stat_name & \ ++ 0xffffff0000000000ULL); \ ++ if (ro & \ ++ (1ULL << ((name - SUNI1x10GEXP_REG_MSTAT_COUNTER_0_LOW) >> 2))) \ ++ (mac)->stats.stat_name += 1ULL << 40; \ + } + + static const struct cmac_statistics *pm3393_update_statistics(struct cmac *mac, + int flag) + { +- static struct { +- unsigned int reg; +- unsigned int offset; +- } hw_stats [] = { +- +-#define HW_STAT(name, stat_name) \ +- { name, (&((struct cmac_statistics *)NULL)->stat_name) - (u64 *)NULL } +- +- /* Rx stats */ +- HW_STAT(RxOctetsReceivedOK, RxOctetsOK), +- HW_STAT(RxUnicastFramesReceivedOK, RxUnicastFramesOK), +- HW_STAT(RxMulticastFramesReceivedOK, RxMulticastFramesOK), +- HW_STAT(RxBroadcastFramesReceivedOK, RxBroadcastFramesOK), +- HW_STAT(RxPAUSEMACCtrlFramesReceived, RxPauseFrames), +- HW_STAT(RxFrameCheckSequenceErrors, RxFCSErrors), +- HW_STAT(RxFramesLostDueToInternalMACErrors, +- RxInternalMACRcvError), +- HW_STAT(RxSymbolErrors, RxSymbolErrors), +- HW_STAT(RxInRangeLengthErrors, RxInRangeLengthErrors), +- HW_STAT(RxFramesTooLongErrors , RxFrameTooLongErrors), +- HW_STAT(RxJabbers, RxJabberErrors), +- HW_STAT(RxFragments, RxRuntErrors), +- HW_STAT(RxUndersizedFrames, RxRuntErrors), +- HW_STAT(RxJumboFramesReceivedOK, RxJumboFramesOK), +- HW_STAT(RxJumboOctetsReceivedOK, RxJumboOctetsOK), +- +- /* Tx stats */ +- HW_STAT(TxOctetsTransmittedOK, TxOctetsOK), +- HW_STAT(TxFramesLostDueToInternalMACTransmissionError, +- TxInternalMACXmitError), +- HW_STAT(TxTransmitSystemError, TxFCSErrors), +- HW_STAT(TxUnicastFramesTransmittedOK, TxUnicastFramesOK), +- HW_STAT(TxMulticastFramesTransmittedOK, TxMulticastFramesOK), +- HW_STAT(TxBroadcastFramesTransmittedOK, TxBroadcastFramesOK), +- HW_STAT(TxPAUSEMACCtrlFramesTransmitted, TxPauseFrames), +- HW_STAT(TxJumboFramesReceivedOK, TxJumboFramesOK), +- HW_STAT(TxJumboOctetsReceivedOK, TxJumboOctetsOK) +- }, *p = hw_stats; +- u64 ro; +- u32 val0, val1, val2, val3; +- u64 *stats = (u64 *) &mac->stats; +- unsigned int i; ++ u64 ro; ++ u32 val0, val1, val2, val3; + + /* Snap the counters */ + pmwrite(mac, SUNI1x10GEXP_REG_MSTAT_CONTROL, +@@ -504,14 +461,35 @@ static const struct cmac_statistics *pm3393_update_statistics(struct cmac *mac, + ro = ((u64)val0 & 0xffff) | (((u64)val1 & 0xffff) << 16) | + (((u64)val2 & 0xffff) << 32) | (((u64)val3 & 0xffff) << 48); + +- for (i = 0; i < ARRAY_SIZE(hw_stats); i++) { +- unsigned reg = p->reg - SUNI1x10GEXP_REG_MSTAT_COUNTER_0_LOW; +- +- pm3393_rmon_update((mac)->adapter, OFFSET(p->reg), +- stats + p->offset, ro & (reg >> 2)); +- } +- +- ++ /* Rx stats */ ++ RMON_UPDATE(mac, RxOctetsReceivedOK, RxOctetsOK); ++ RMON_UPDATE(mac, RxUnicastFramesReceivedOK, RxUnicastFramesOK); ++ RMON_UPDATE(mac, RxMulticastFramesReceivedOK, RxMulticastFramesOK); ++ RMON_UPDATE(mac, RxBroadcastFramesReceivedOK, RxBroadcastFramesOK); ++ RMON_UPDATE(mac, RxPAUSEMACCtrlFramesReceived, RxPauseFrames); ++ RMON_UPDATE(mac, RxFrameCheckSequenceErrors, RxFCSErrors); ++ RMON_UPDATE(mac, RxFramesLostDueToInternalMACErrors, ++ RxInternalMACRcvError); ++ RMON_UPDATE(mac, RxSymbolErrors, RxSymbolErrors); ++ RMON_UPDATE(mac, RxInRangeLengthErrors, RxInRangeLengthErrors); ++ RMON_UPDATE(mac, RxFramesTooLongErrors , RxFrameTooLongErrors); ++ RMON_UPDATE(mac, RxJabbers, RxJabberErrors); ++ RMON_UPDATE(mac, RxFragments, RxRuntErrors); ++ RMON_UPDATE(mac, RxUndersizedFrames, RxRuntErrors); ++ RMON_UPDATE(mac, RxJumboFramesReceivedOK, RxJumboFramesOK); ++ RMON_UPDATE(mac, RxJumboOctetsReceivedOK, RxJumboOctetsOK); ++ ++ /* Tx stats */ ++ RMON_UPDATE(mac, TxOctetsTransmittedOK, TxOctetsOK); ++ RMON_UPDATE(mac, TxFramesLostDueToInternalMACTransmissionError, ++ TxInternalMACXmitError); ++ RMON_UPDATE(mac, TxTransmitSystemError, TxFCSErrors); ++ RMON_UPDATE(mac, TxUnicastFramesTransmittedOK, TxUnicastFramesOK); ++ RMON_UPDATE(mac, TxMulticastFramesTransmittedOK, TxMulticastFramesOK); ++ RMON_UPDATE(mac, TxBroadcastFramesTransmittedOK, TxBroadcastFramesOK); ++ RMON_UPDATE(mac, TxPAUSEMACCtrlFramesTransmitted, TxPauseFrames); ++ RMON_UPDATE(mac, TxJumboFramesReceivedOK, TxJumboFramesOK); ++ RMON_UPDATE(mac, TxJumboOctetsReceivedOK, TxJumboOctetsOK); + + return &mac->stats; + } +diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c +index e4f874a..d77f1eb 100644 +--- a/drivers/net/chelsio/sge.c ++++ b/drivers/net/chelsio/sge.c +@@ -986,11 +986,10 @@ void t1_sge_get_port_stats(const struct sge *sge, int port, + for_each_possible_cpu(cpu) { + struct sge_port_stats *st = per_cpu_ptr(sge->port_stats[port], cpu); + +- ss->rx_packets += st->rx_packets; + ss->rx_cso_good += st->rx_cso_good; +- ss->tx_packets += st->tx_packets; + ss->tx_cso += st->tx_cso; + ss->tx_tso += st->tx_tso; ++ ss->tx_need_hdrroom += st->tx_need_hdrroom; + ss->vlan_xtract += st->vlan_xtract; + ss->vlan_insert += st->vlan_insert; + } +@@ -1379,11 +1378,10 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) + } + __skb_pull(skb, sizeof(*p)); + +- skb->dev->last_rx = jiffies; + st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id()); +- st->rx_packets++; + + skb->protocol = eth_type_trans(skb, adapter->port[p->iff].dev); ++ skb->dev->last_rx = jiffies; + if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && + skb->protocol == htons(ETH_P_IP) && + (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) { +@@ -1851,7 +1849,8 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct adapter *adapter = dev->priv; + struct sge *sge = adapter->sge; +- struct sge_port_stats *st = per_cpu_ptr(sge->port_stats[dev->if_port], smp_processor_id()); ++ struct sge_port_stats *st = per_cpu_ptr(sge->port_stats[dev->if_port], ++ smp_processor_id()); + struct cpl_tx_pkt *cpl; + struct sk_buff *orig_skb = skb; + int ret; +@@ -1859,6 +1858,18 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) + if (skb->protocol == htons(ETH_P_CPL5)) + goto send; + ++ /* ++ * We are using a non-standard hard_header_len. ++ * Allocate more header room in the rare cases it is not big enough. ++ */ ++ if (unlikely(skb_headroom(skb) < dev->hard_header_len - ETH_HLEN)) { ++ skb = skb_realloc_headroom(skb, sizeof(struct cpl_tx_pkt_lso)); ++ ++st->tx_need_hdrroom; ++ dev_kfree_skb_any(orig_skb); ++ if (!skb) ++ return NETDEV_TX_OK; ++ } ++ + if (skb_shinfo(skb)->gso_size) { + int eth_type; + struct cpl_tx_pkt_lso *hdr; +@@ -1892,24 +1903,6 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) + return NETDEV_TX_OK; + } + +- /* +- * We are using a non-standard hard_header_len and some kernel +- * components, such as pktgen, do not handle it right. +- * Complain when this happens but try to fix things up. +- */ +- if (unlikely(skb_headroom(skb) < dev->hard_header_len - ETH_HLEN)) { +- pr_debug("%s: headroom %d header_len %d\n", dev->name, +- skb_headroom(skb), dev->hard_header_len); +- +- if (net_ratelimit()) +- printk(KERN_ERR "%s: inadequate headroom in " +- "Tx packet\n", dev->name); +- skb = skb_realloc_headroom(skb, sizeof(*cpl)); +- dev_kfree_skb_any(orig_skb); +- if (!skb) +- return NETDEV_TX_OK; +- } +- + if (!(adapter->flags & UDP_CSUM_CAPABLE) && + skb->ip_summed == CHECKSUM_PARTIAL && + ip_hdr(skb)->protocol == IPPROTO_UDP) { +@@ -1955,7 +1948,6 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) + cpl->vlan_valid = 0; + + send: +- st->tx_packets++; + dev->trans_start = jiffies; + ret = t1_sge_tx(skb, adapter, 0, dev); + +diff --git a/drivers/net/chelsio/sge.h b/drivers/net/chelsio/sge.h +index d132a0e..80165f9 100644 +--- a/drivers/net/chelsio/sge.h ++++ b/drivers/net/chelsio/sge.h +@@ -57,13 +57,12 @@ struct sge_intr_counts { + }; + + struct sge_port_stats { +- u64 rx_packets; /* # of Ethernet packets received */ + u64 rx_cso_good; /* # of successful RX csum offloads */ +- u64 tx_packets; /* # of TX packets */ + u64 tx_cso; /* # of TX checksum offloads */ + u64 tx_tso; /* # of TSO requests */ + u64 vlan_xtract; /* # of VLAN tag extractions */ + u64 vlan_insert; /* # of VLAN tag insertions */ ++ u64 tx_need_hdrroom; /* # of TX skbs in need of more header room */ + }; + + struct sk_buff; +diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c +index 60d2944..4ebb6ea 100644 +--- a/drivers/net/usb/kaweth.c ++++ b/drivers/net/usb/kaweth.c +@@ -70,7 +70,7 @@ + #define KAWETH_TX_TIMEOUT (5 * HZ) + #define KAWETH_SCRATCH_SIZE 32 + #define KAWETH_FIRMWARE_BUF_SIZE 4096 +-#define KAWETH_CONTROL_TIMEOUT (30 * HZ) ++#define KAWETH_CONTROL_TIMEOUT (30000) + + #define KAWETH_STATUS_BROKEN 0x0000001 + #define KAWETH_STATUS_CLOSING 0x0000002 +diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c +index 6240b97..3bbc5c4 100644 +--- a/drivers/net/usb/mcs7830.c ++++ b/drivers/net/usb/mcs7830.c +@@ -94,7 +94,7 @@ static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data) + + ret = usb_control_msg(xdev, usb_rcvctrlpipe(xdev, 0), MCS7830_RD_BREQ, + MCS7830_RD_BMREQ, 0x0000, index, data, +- size, msecs_to_jiffies(MCS7830_CTRL_TIMEOUT)); ++ size, MCS7830_CTRL_TIMEOUT); + return ret; + } + +@@ -105,7 +105,7 @@ static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, void *data) + + ret = usb_control_msg(xdev, usb_sndctrlpipe(xdev, 0), MCS7830_WR_BREQ, + MCS7830_WR_BMREQ, 0x0000, index, data, +- size, msecs_to_jiffies(MCS7830_CTRL_TIMEOUT)); ++ size, MCS7830_CTRL_TIMEOUT); + return ret; + } + +diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c +index 749e7d8..9f90c10 100644 +--- a/drivers/pci/quirks.c ++++ b/drivers/pci/quirks.c +@@ -465,6 +465,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, quirk + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_0, quirk_ich6_lpc_acpi ); + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_2, quirk_ich6_lpc_acpi ); + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_3, quirk_ich6_lpc_acpi ); ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, quirk_ich6_lpc_acpi ); ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, quirk_ich6_lpc_acpi ); ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_2, quirk_ich6_lpc_acpi ); ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_4, quirk_ich6_lpc_acpi ); ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, quirk_ich6_lpc_acpi ); ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_8, quirk_ich6_lpc_acpi ); + + /* + * VIA ACPI: One IO region pointed to by longword at +diff --git a/fs/exec.c b/fs/exec.c +index 3da429d..224e973 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1561,6 +1561,12 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) + but keep the previous behaviour for now. */ + if (!ispipe && !S_ISREG(inode->i_mode)) + goto close_fail; ++ /* ++ * Dont allow local users get cute and trick others to coredump ++ * into their pre-created files: ++ */ ++ if (inode->i_uid != current->fsuid) ++ goto close_fail; + if (!file->f_op) + goto close_fail; + if (!file->f_op->write) +diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c +index 70a6911..f87de97 100644 +--- a/fs/ncpfs/mmap.c ++++ b/fs/ncpfs/mmap.c +@@ -47,9 +47,6 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, + pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT); + + count = PAGE_SIZE; +- if (address + PAGE_SIZE > area->vm_end) { +- count = area->vm_end - address; +- } + /* what we can read in one go */ + bufsize = NCP_SERVER(inode)->buffer_size; + +diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h +index db2130a..a63a1f6 100644 +--- a/include/asm-sparc64/hypervisor.h ++++ b/include/asm-sparc64/hypervisor.h +@@ -709,6 +709,10 @@ extern unsigned long sun4v_mmu_tsb_ctx0(unsigned long num_descriptions, + */ + #define HV_FAST_MMU_DEMAP_ALL 0x24 + ++#ifndef __ASSEMBLY__ ++extern void sun4v_mmu_demap_all(void); ++#endif ++ + /* mmu_map_perm_addr() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_MAP_PERM_ADDR +diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h +index 887c2ce..c6c9d48 100644 +--- a/include/linux/pci_ids.h ++++ b/include/linux/pci_ids.h +@@ -2285,6 +2285,8 @@ + #define PCI_DEVICE_ID_INTEL_ICH9_4 0x2914 + #define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919 + #define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930 ++#define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916 ++#define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918 + #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340 + #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 + #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 +diff --git a/kernel/relay.c b/kernel/relay.c +index 95db8c7..24db7e8 100644 +--- a/kernel/relay.c ++++ b/kernel/relay.c +@@ -91,6 +91,7 @@ int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) + return -EINVAL; + + vma->vm_ops = &relay_file_mmap_ops; ++ vma->vm_flags |= VM_DONTEXPAND; + vma->vm_private_data = buf; + buf->chan->cb->buf_mapped(buf, filp); + +diff --git a/mm/mmap.c b/mm/mmap.c +index 906ed40..33fb671 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2157,7 +2157,7 @@ int install_special_mapping(struct mm_struct *mm, + vma->vm_start = addr; + vma->vm_end = addr + len; + +- vma->vm_flags = vm_flags | mm->def_flags; ++ vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND; + vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + + vma->vm_ops = &special_mapping_vmops; +diff --git a/net/atm/mpc.c b/net/atm/mpc.c +index 7c85aa5..181c1c8 100644 +--- a/net/atm/mpc.c ++++ b/net/atm/mpc.c +@@ -542,6 +542,13 @@ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev) + if (eth->h_proto != htons(ETH_P_IP)) + goto non_ip; /* Multi-Protocol Over ATM :-) */ + ++ /* Weed out funny packets (e.g., AF_PACKET or raw). */ ++ if (skb->len < ETH_HLEN + sizeof(struct iphdr)) ++ goto non_ip; ++ skb_set_network_header(skb, ETH_HLEN); ++ if (skb->len < ETH_HLEN + ip_hdr(skb)->ihl * 4 || ip_hdr(skb)->ihl < 5) ++ goto non_ip; ++ + while (i < mpc->number_of_mps_macs) { + if (!compare_ether_addr(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN))) + if ( send_via_shortcut(skb, mpc) == 0 ) /* try shortcut */ +diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c +index 0ddaff0..8a9f0ac 100644 +--- a/net/ax25/ax25_in.c ++++ b/net/ax25/ax25_in.c +@@ -124,7 +124,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) + } + + skb_pull(skb, 1); /* Remove PID */ +- skb_reset_mac_header(skb); ++ skb->mac_header = skb->network_header; + skb_reset_network_header(skb); + skb->dev = ax25->ax25_dev->dev; + skb->pkt_type = PACKET_HOST; +diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c +index 0dcc245..9607d78 100644 +--- a/net/ipv4/devinet.c ++++ b/net/ipv4/devinet.c +@@ -1030,7 +1030,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) + memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + if (named++ == 0) + continue; +- dot = strchr(ifa->ifa_label, ':'); ++ dot = strchr(old, ':'); + if (dot == NULL) { + sprintf(old, ":%d", named); + dot = old; +diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c +index 6328293..724b612 100644 +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -613,7 +613,7 @@ static int ipgre_rcv(struct sk_buff *skb) + offset += 4; + } + +- skb_reset_mac_header(skb); ++ skb->mac_header = skb->network_header; + __pskb_pull(skb, offset); + skb_reset_network_header(skb); + skb_postpull_rcsum(skb, skb_transport_header(skb), offset); +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 4aa2551..8f443ed 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2885,11 +2885,10 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) + int idx, s_idx; + + s_h = cb->args[0]; ++ if (s_h < 0) ++ s_h = 0; + s_idx = idx = cb->args[1]; +- for (h = 0; h <= rt_hash_mask; h++) { +- if (h < s_h) continue; +- if (h > s_h) +- s_idx = 0; ++ for (h = s_h; h <= rt_hash_mask; h++) { + rcu_read_lock_bh(); + for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; + rt = rcu_dereference(rt->u.dst.rt_next), idx++) { +@@ -2906,6 +2905,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) + dst_release(xchg(&skb->dst, NULL)); + } + rcu_read_unlock_bh(); ++ s_idx = 0; + } + + done: +diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c +index dcd7e32..73708b5 100644 +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -1115,8 +1115,6 @@ static int irda_create(struct socket *sock, int protocol) + self->max_sdu_size_rx = TTP_SAR_UNBOUND; + break; + default: +- IRDA_ERROR("%s: protocol not supported!\n", +- __FUNCTION__); + return -ESOCKTNOSUPPORT; + } + break; +diff --git a/net/key/af_key.c b/net/key/af_key.c +index ca0db0f..0be3be2 100644 +--- a/net/key/af_key.c ++++ b/net/key/af_key.c +@@ -2777,12 +2777,22 @@ static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp) + + static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) + { +- return t->aalgos & (1 << d->desc.sadb_alg_id); ++ unsigned int id = d->desc.sadb_alg_id; ++ ++ if (id >= sizeof(t->aalgos) * 8) ++ return 0; ++ ++ return (t->aalgos >> id) & 1; + } + + static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) + { +- return t->ealgos & (1 << d->desc.sadb_alg_id); ++ unsigned int id = d->desc.sadb_alg_id; ++ ++ if (id >= sizeof(t->ealgos) * 8) ++ return 0; ++ ++ return (t->ealgos >> id) & 1; + } + + static int count_ah_combs(struct xfrm_tmpl *t) +diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c +index c7b5d93..69e77d5 100644 +--- a/net/netrom/nr_dev.c ++++ b/net/netrom/nr_dev.c +@@ -56,7 +56,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) + + /* Spoof incoming device */ + skb->dev = dev; +- skb_reset_mac_header(skb); ++ skb->mac_header = skb->network_header; + skb_reset_network_header(skb); + skb->pkt_type = PACKET_HOST; + +diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c +index 8738ec7..3447803 100644 +--- a/net/x25/x25_forward.c ++++ b/net/x25/x25_forward.c +@@ -118,13 +118,14 @@ int x25_forward_data(int lci, struct x25_neigh *from, struct sk_buff *skb) { + goto out; + + if ( (skbn = pskb_copy(skb, GFP_ATOMIC)) == NULL){ +- goto out; ++ goto output; + + } + x25_transmit_link(skbn, nb); + +- x25_neigh_put(nb); + rc = 1; ++output: ++ x25_neigh_put(nb); + out: + return rc; + } +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index b48f06f..1c86a23 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -1479,8 +1479,9 @@ restart: + + if (sk && sk->sk_policy[1]) { + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); ++ err = PTR_ERR(policy); + if (IS_ERR(policy)) +- return PTR_ERR(policy); ++ goto dropdst; + } + + if (!policy) { +@@ -1491,8 +1492,9 @@ restart: + + policy = flow_cache_lookup(fl, dst_orig->ops->family, + dir, xfrm_policy_lookup); ++ err = PTR_ERR(policy); + if (IS_ERR(policy)) +- return PTR_ERR(policy); ++ goto dropdst; + } + + if (!policy) +@@ -1661,8 +1663,9 @@ restart: + return 0; + + error: +- dst_release(dst_orig); + xfrm_pols_put(pols, npols); ++dropdst: ++ dst_release(dst_orig); + *dst_p = NULL; + return err; + } +diff --git a/sound/oss/via82cxxx_audio.c b/sound/oss/via82cxxx_audio.c +index 5d3c037..f95aa09 100644 +--- a/sound/oss/via82cxxx_audio.c ++++ b/sound/oss/via82cxxx_audio.c +@@ -2104,6 +2104,7 @@ static struct page * via_mm_nopage (struct vm_area_struct * vma, + { + struct via_info *card = vma->vm_private_data; + struct via_channel *chan = &card->ch_out; ++ unsigned long max_bufs; + struct page *dmapage; + unsigned long pgoff; + int rd, wr; +@@ -2127,14 +2128,11 @@ static struct page * via_mm_nopage (struct vm_area_struct * vma, + rd = card->ch_in.is_mapped; + wr = card->ch_out.is_mapped; + +-#ifndef VIA_NDEBUG +- { +- unsigned long max_bufs = chan->frag_number; +- if (rd && wr) max_bufs *= 2; +- /* via_dsp_mmap() should ensure this */ +- assert (pgoff < max_bufs); +- } +-#endif ++ max_bufs = chan->frag_number; ++ if (rd && wr) ++ max_bufs *= 2; ++ if (pgoff >= max_bufs) ++ return NOPAGE_SIGBUS; + + /* if full-duplex (read+write) and we have two sets of bufs, + * then the playback buffers come first, sez soundcard.c */ +diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c +index b76b3dd..e617d7e 100644 +--- a/sound/usb/usx2y/usX2Yhwdep.c ++++ b/sound/usb/usx2y/usX2Yhwdep.c +@@ -88,7 +88,7 @@ static int snd_us428ctls_mmap(struct snd_hwdep * hw, struct file *filp, struct v + us428->us428ctls_sharedmem->CtlSnapShotLast = -2; + } + area->vm_ops = &us428ctls_vm_ops; +- area->vm_flags |= VM_RESERVED; ++ area->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + area->vm_private_data = hw->private_data; + return 0; + } +diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c +index a5e7bcd..6e70520 100644 +--- a/sound/usb/usx2y/usx2yhwdeppcm.c ++++ b/sound/usb/usx2y/usx2yhwdeppcm.c +@@ -728,7 +728,7 @@ static int snd_usX2Y_hwdep_pcm_mmap(struct snd_hwdep * hw, struct file *filp, st + return -ENODEV; + } + area->vm_ops = &snd_usX2Y_hwdep_pcm_vm_ops; +- area->vm_flags |= VM_RESERVED; ++ area->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + area->vm_private_data = hw->private_data; + return 0; + } diff --git a/tags/2.6.22-2/01017_linux-2.6.22.18.patch b/tags/2.6.22-2/01017_linux-2.6.22.18.patch new file mode 100644 index 0000000..4f87816 --- /dev/null +++ b/tags/2.6.22-2/01017_linux-2.6.22.18.patch @@ -0,0 +1,14 @@ +diff --git a/fs/splice.c b/fs/splice.c +index e263d3b..dbbe267 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -1182,6 +1182,9 @@ static int get_iovec_page_array(const struct iovec __user *iov, + if (unlikely(!base)) + break; + ++ if (!access_ok(VERIFY_READ, base, len)) ++ break; ++ + /* + * Get this base offset and number of pages, then map + * in the user pages. diff --git a/tags/2.6.22-2/20001_x86-early-quirks-unificiation.patch1 b/tags/2.6.22-2/20001_x86-early-quirks-unificiation.patch1 new file mode 100644 index 0000000..12dd843 --- /dev/null +++ b/tags/2.6.22-2/20001_x86-early-quirks-unificiation.patch1 @@ -0,0 +1,237 @@ +Subject: x86: Unify i386 and x86-64 early quirks + +They were already very similar; just use the same file now. + +Cc: lenb@kernel.org + +Signed-off-by: Andi Kleen + +--- + arch/i386/kernel/Makefile | 2 + arch/i386/kernel/acpi/Makefile | 3 - + arch/i386/kernel/acpi/earlyquirk.c | 84 ------------------------------------- + arch/i386/kernel/setup.c | 4 - + arch/x86_64/kernel/early-quirks.c | 11 ++++ + include/asm-i386/acpi.h | 6 -- + include/asm-i386/dma.h | 2 + include/asm-x86_64/io_apic.h | 2 + include/asm-x86_64/proto.h | 2 + 9 files changed, 18 insertions(+), 98 deletions(-) + +--- a/arch/i386/kernel/Makefile 2007-08-27 14:01:19.000000000 -0400 ++++ b/arch/i386/kernel/Makefile 2007-08-27 14:02:11.000000000 -0400 +@@ -17,6 +17,7 @@ obj-$(CONFIG_MCA) += mca.o + obj-$(CONFIG_X86_MSR) += msr.o + obj-$(CONFIG_X86_CPUID) += cpuid.o + obj-$(CONFIG_MICROCODE) += microcode.o ++obj-$(CONFIG_PCI) += early-quirks.o + obj-$(CONFIG_APM) += apm.o + obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o + obj-$(CONFIG_SMP) += smpcommon.o +@@ -84,4 +85,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall. + + k8-y += ../../x86_64/kernel/k8.o + stacktrace-y += ../../x86_64/kernel/stacktrace.o ++early-quirks-y += ../../x86_64/kernel/early-quirks.o + +--- a/arch/i386/kernel/acpi/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ b/arch/i386/kernel/acpi/Makefile 2007-08-27 14:02:11.000000000 -0400 +@@ -1,7 +1,4 @@ + obj-$(CONFIG_ACPI) += boot.o +-ifneq ($(CONFIG_PCI),) +-obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o +-endif + obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o + + ifneq ($(CONFIG_ACPI_PROCESSOR),) +--- a/arch/i386/kernel/acpi/earlyquirk.c 2007-07-08 19:32:17.000000000 -0400 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,84 +0,0 @@ +-/* +- * Do early PCI probing for bug detection when the main PCI subsystem is +- * not up yet. +- */ +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +- +-#ifdef CONFIG_ACPI +- +-static int __init nvidia_hpet_check(struct acpi_table_header *header) +-{ +- return 0; +-} +-#endif +- +-static int __init check_bridge(int vendor, int device) +-{ +-#ifdef CONFIG_ACPI +- static int warned; +- /* According to Nvidia all timer overrides are bogus unless HPET +- is enabled. */ +- if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { +- if (!warned && acpi_table_parse(ACPI_SIG_HPET, +- nvidia_hpet_check)) { +- warned = 1; +- acpi_skip_timer_override = 1; +- printk(KERN_INFO "Nvidia board " +- "detected. Ignoring ACPI " +- "timer override.\n"); +- printk(KERN_INFO "If you got timer trouble " +- "try acpi_use_timer_override\n"); +- +- } +- } +-#endif +- if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { +- timer_over_8254 = 0; +- printk(KERN_INFO "ATI board detected. Disabling timer routing " +- "over 8254.\n"); +- } +- return 0; +-} +- +-void __init check_acpi_pci(void) +-{ +- int num, slot, func; +- +- /* Assume the machine supports type 1. If not it will +- always read ffffffff and should not have any side effect. +- Actually a few buggy systems can machine check. Allow the user +- to disable it by command line option at least -AK */ +- if (!early_pci_allowed()) +- return; +- +- /* Poor man's PCI discovery */ +- for (num = 0; num < 32; num++) { +- for (slot = 0; slot < 32; slot++) { +- for (func = 0; func < 8; func++) { +- u32 class; +- u32 vendor; +- class = read_pci_config(num, slot, func, +- PCI_CLASS_REVISION); +- if (class == 0xffffffff) +- break; +- +- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) +- continue; +- +- vendor = read_pci_config(num, slot, func, +- PCI_VENDOR_ID); +- +- if (check_bridge(vendor & 0xffff, vendor >> 16)) +- return; +- } +- +- } +- } +-} +--- a/arch/i386/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ b/arch/i386/kernel/setup.c 2007-08-27 14:01:19.000000000 -0400 +@@ -627,9 +627,7 @@ void __init setup_arch(char **cmdline_p) + #endif + + #ifdef CONFIG_PCI +-#ifdef CONFIG_X86_IO_APIC +- check_acpi_pci(); /* Checks more than just ACPI actually */ +-#endif ++ early_quirks(); + #endif + + #ifdef CONFIG_ACPI +--- a/arch/x86_64/kernel/early-quirks.c 2007-07-08 19:32:17.000000000 -0400 ++++ b/arch/x86_64/kernel/early-quirks.c 2007-08-27 14:04:27.000000000 -0400 +@@ -13,9 +13,14 @@ + #include + #include + #include +-#include ++#include ++#include + #include + ++#ifdef CONFIG_X86_64 ++#include ++#endif ++ + static void __init via_bugs(void) + { + #ifdef CONFIG_IOMMU +@@ -39,6 +44,7 @@ static int __init nvidia_hpet_check(stru + static void __init nvidia_bugs(void) + { + #ifdef CONFIG_ACPI ++#ifdef CONFIG_X86_IO_APIC + /* + * All timer overrides on Nvidia are + * wrong unless HPET is enabled. +@@ -58,17 +64,20 @@ static void __init nvidia_bugs(void) + "try acpi_use_timer_override\n"); + } + #endif ++#endif + /* RED-PEN skip them on mptables too? */ + + } + + static void __init ati_bugs(void) + { ++#ifdef CONFIG_X86_IO_APIC + if (timer_over_8254 == 1) { + timer_over_8254 = 0; + printk(KERN_INFO + "ATI board detected. Disabling timer routing over 8254.\n"); + } ++#endif + } + + struct chipset { +--- a/include/asm-i386/acpi.h 2007-07-08 19:32:17.000000000 -0400 ++++ b/include/asm-i386/acpi.h 2007-08-27 14:02:03.000000000 -0400 +@@ -81,11 +81,7 @@ int __acpi_release_global_lock(unsigned + :"=r"(n_hi), "=r"(n_lo) \ + :"0"(n_hi), "1"(n_lo)) + +-#ifdef CONFIG_X86_IO_APIC +-extern void check_acpi_pci(void); +-#else +-static inline void check_acpi_pci(void) { } +-#endif ++extern void early_quirks(void); + + #ifdef CONFIG_ACPI + extern int acpi_lapic; +--- a/include/asm-i386/dma.h 2007-07-08 19:32:17.000000000 -0400 ++++ b/include/asm-i386/dma.h 2007-08-27 14:01:19.000000000 -0400 +@@ -294,4 +294,6 @@ extern int isa_dma_bridge_buggy; + #define isa_dma_bridge_buggy (0) + #endif + ++#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT) ++ + #endif /* _ASM_DMA_H */ +--- a/include/asm-x86_64/io_apic.h 2007-07-08 19:32:17.000000000 -0400 ++++ b/include/asm-x86_64/io_apic.h 2007-08-27 14:01:51.000000000 -0400 +@@ -127,4 +127,6 @@ void enable_NMI_through_LVT0 (void * dum + + extern spinlock_t i8259A_lock; + ++extern int timer_over_8254; ++ + #endif +--- a/include/asm-x86_64/proto.h 2007-07-08 19:32:17.000000000 -0400 ++++ b/include/asm-x86_64/proto.h 2007-08-27 14:01:19.000000000 -0400 +@@ -106,8 +106,6 @@ extern int fix_aperture; + extern int reboot_force; + extern int notsc_setup(char *); + +-extern int timer_over_8254; +- + extern int gsi_irq_sharing(int gsi); + + extern void smp_local_timer_interrupt(void); diff --git a/tags/2.6.22-2/20002_add-console-use-vt.patch1 b/tags/2.6.22-2/20002_add-console-use-vt.patch1 new file mode 100644 index 0000000..a63a027 --- /dev/null +++ b/tags/2.6.22-2/20002_add-console-use-vt.patch1 @@ -0,0 +1,58 @@ +Subject: add console_use_vt +From: kraxel@suse.de +Patch-mainline: no + +$subject says all + +--- + drivers/char/tty_io.c | 7 ++++++- + include/linux/console.h | 1 + + 2 files changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/char/tty_io.c 2007-08-27 14:01:21.000000000 -0400 ++++ b/drivers/char/tty_io.c 2007-08-27 14:01:24.000000000 -0400 +@@ -133,6 +133,8 @@ LIST_HEAD(tty_drivers); /* linked list + DEFINE_MUTEX(tty_mutex); + EXPORT_SYMBOL(tty_mutex); + ++int console_use_vt = 1; ++ + #ifdef CONFIG_UNIX98_PTYS + extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */ + extern int pty_limit; /* Config limit on Unix98 ptys */ +@@ -2581,7 +2583,7 @@ retry_open: + goto got_driver; + } + #ifdef CONFIG_VT +- if (device == MKDEV(TTY_MAJOR,0)) { ++ if (console_use_vt && device == MKDEV(TTY_MAJOR,0)) { + extern struct tty_driver *console_driver; + driver = console_driver; + index = fg_console; +@@ -4048,6 +4050,8 @@ static int __init tty_init(void) + #endif + + #ifdef CONFIG_VT ++ if (!console_use_vt) ++ goto out_vt; + cdev_init(&vc0_cdev, &console_fops); + if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) || + register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0) +@@ -4055,6 +4059,7 @@ static int __init tty_init(void) + device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), "tty0"); + + vty_init(); ++ out_vt: + #endif + return 0; + } +--- a/include/linux/console.h 2007-08-27 14:01:24.000000000 -0400 ++++ b/include/linux/console.h 2007-08-27 14:01:24.000000000 -0400 +@@ -63,6 +63,7 @@ extern const struct consw dummy_con; /* + extern const struct consw vga_con; /* VGA text console */ + extern const struct consw newport_con; /* SGI Newport console */ + extern const struct consw prom_con; /* SPARC PROM console */ ++extern int console_use_vt; + + int con_is_bound(const struct consw *csw); + int register_con_driver(const struct consw *csw, int first, int last); diff --git a/tags/2.6.22-2/20003_linux-2.6.19-rc1-kexec-move_segment_code-i386.patch1 b/tags/2.6.22-2/20003_linux-2.6.19-rc1-kexec-move_segment_code-i386.patch1 new file mode 100644 index 0000000..0e266e1 --- /dev/null +++ b/tags/2.6.22-2/20003_linux-2.6.19-rc1-kexec-move_segment_code-i386.patch1 @@ -0,0 +1,172 @@ +Subject: kexec: Move asm segment handling code to the assembly file (i386) +From: http://xenbits.xensource.com/xen-unstable.hg (tip 13816) +Patch-mainline: obsolete + +This patch moves the idt, gdt, and segment handling code from machine_kexec.c +to relocate_kernel.S. The main reason behind this move is to avoid code +duplication in the Xen hypervisor. With this patch all code required to kexec +is put on the control page. + +On top of that this patch also counts as a cleanup - I think it is much +nicer to write assembly directly in assembly files than wrap inline assembly +in C functions for no apparent reason. + +Signed-off-by: Magnus Damm +Acked-by: jbeulich@novell.com +--- + + Applies to 2.6.19-rc1. + + arch/i386/kernel/machine_kexec.c | 59 ------------------------------------- + arch/i386/kernel/relocate_kernel.S | 58 +++++++++++++++++++++++++++++++++--- + 2 files changed, 53 insertions(+), 64 deletions(-) + +--- a/arch/i386/kernel/machine_kexec.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/arch/i386/kernel/machine_kexec.c 2007-08-27 14:02:11.000000000 -0400 +@@ -29,48 +29,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + +-static void set_idt(void *newidt, __u16 limit) +-{ +- struct Xgt_desc_struct curidt; +- +- /* ia32 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- load_idt(&curidt); +-}; +- +- +-static void set_gdt(void *newgdt, __u16 limit) +-{ +- struct Xgt_desc_struct curgdt; +- +- /* ia32 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- load_gdt(&curgdt); +-}; +- +-static void load_segments(void) +-{ +-#define __STR(X) #X +-#define STR(X) __STR(X) +- +- __asm__ __volatile__ ( +- "\tljmp $"STR(__KERNEL_CS)",$1f\n" +- "\t1:\n" +- "\tmovl $"STR(__KERNEL_DS)",%%eax\n" +- "\tmovl %%eax,%%ds\n" +- "\tmovl %%eax,%%es\n" +- "\tmovl %%eax,%%fs\n" +- "\tmovl %%eax,%%gs\n" +- "\tmovl %%eax,%%ss\n" +- ::: "eax", "memory"); +-#undef STR +-#undef __STR +-} +- + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -127,23 +85,6 @@ NORET_TYPE void machine_kexec(struct kim + page_list[PA_PTE_1] = __pa(kexec_pte1); + page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start, cpu_has_pae); + } +--- a/arch/i386/kernel/relocate_kernel.S 2007-08-27 12:09:26.000000000 -0400 ++++ b/arch/i386/kernel/relocate_kernel.S 2007-08-27 14:01:24.000000000 -0400 +@@ -154,14 +154,45 @@ relocate_new_kernel: + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, %cr3 + ++ /* setup idt */ ++ movl %edi, %eax ++ addl $(idt_48 - relocate_kernel), %eax ++ lidtl (%eax) ++ ++ /* setup gdt */ ++ movl %edi, %eax ++ addl $(gdt - relocate_kernel), %eax ++ movl %edi, %esi ++ addl $((gdt_48 - relocate_kernel) + 2), %esi ++ movl %eax, (%esi) ++ ++ movl %edi, %eax ++ addl $(gdt_48 - relocate_kernel), %eax ++ lgdtl (%eax) ++ ++ /* setup data segment registers */ ++ mov $(gdt_ds - gdt), %eax ++ mov %eax, %ds ++ mov %eax, %es ++ mov %eax, %fs ++ mov %eax, %gs ++ mov %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea 4096(%edi), %esp + +- /* jump to identity mapped page */ +- movl %edi, %eax +- addl $(identity_mapped - relocate_kernel), %eax +- pushl %eax +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movl %edi, %esi ++ xorl %eax, %eax ++ pushl %eax ++ pushl %esi ++ pushl %eax ++ movl $(gdt_cs - gdt), %eax ++ pushl %eax ++ movl %edi, %eax ++ addl $(identity_mapped - relocate_kernel),%eax ++ pushl %eax ++ iretl + + identity_mapped: + /* store the start address on the stack */ +@@ -250,3 +281,20 @@ identity_mapped: + xorl %edi, %edi + xorl %ebp, %ebp + ret ++ ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ ++gdt_ds: ++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ ++gdt_end: ++ ++gdt_48: ++ .word gdt_end - gdt - 1 /* limit */ ++ .long 0 /* base - filled in by code above */ ++ ++idt_48: ++ .word 0 /* limit */ ++ .long 0 /* base */ diff --git a/tags/2.6.22-2/20004_linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch1 b/tags/2.6.22-2/20004_linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch1 new file mode 100644 index 0000000..d3abb10 --- /dev/null +++ b/tags/2.6.22-2/20004_linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch1 @@ -0,0 +1,164 @@ +Subject: kexec: Move asm segment handling code to the assembly file (x86_64) +From: http://xenbits.xensource.com/xen-unstable.hg (tip 13816) +Patch-mainline: obsolete + +This patch moves the idt, gdt, and segment handling code from machine_kexec.c +to relocate_kernel.S. The main reason behind this move is to avoid code +duplication in the Xen hypervisor. With this patch all code required to kexec +is put on the control page. + +On top of that this patch also counts as a cleanup - I think it is much +nicer to write assembly directly in assembly files than wrap inline assembly +in C functions for no apparent reason. + +Signed-off-by: Magnus Damm +Acked-by: jbeulich@novell.com +--- + + Applies to 2.6.19-rc1. + + arch/x86_64/kernel/machine_kexec.c | 58 ----------------------------------- + arch/x86_64/kernel/relocate_kernel.S | 50 +++++++++++++++++++++++++++--- + 2 files changed, 45 insertions(+), 63 deletions(-) + +--- a/arch/x86_64/kernel/machine_kexec.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/arch/x86_64/kernel/machine_kexec.c 2007-08-27 14:02:11.000000000 -0400 +@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i + return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); + } + +-static void set_idt(void *newidt, u16 limit) +-{ +- struct desc_ptr curidt; +- +- /* x86-64 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- __asm__ __volatile__ ( +- "lidtq %0\n" +- : : "m" (curidt) +- ); +-}; +- +- +-static void set_gdt(void *newgdt, u16 limit) +-{ +- struct desc_ptr curgdt; +- +- /* x86-64 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- __asm__ __volatile__ ( +- "lgdtq %0\n" +- : : "m" (curgdt) +- ); +-}; +- +-static void load_segments(void) +-{ +- __asm__ __volatile__ ( +- "\tmovl %0,%%ds\n" +- "\tmovl %0,%%es\n" +- "\tmovl %0,%%ss\n" +- "\tmovl %0,%%fs\n" +- "\tmovl %0,%%gs\n" +- : : "a" (__KERNEL_DS) : "memory" +- ); +-} +- + int machine_kexec_prepare(struct kimage *image) + { + unsigned long start_pgtable; +@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim + page_list[PA_TABLE_PAGE] = + (unsigned long)__pa(page_address(image->control_code_page)); + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start); + } +--- a/arch/x86_64/kernel/relocate_kernel.S 2007-08-27 12:09:26.000000000 -0400 ++++ b/arch/x86_64/kernel/relocate_kernel.S 2007-08-27 14:01:24.000000000 -0400 +@@ -159,13 +159,39 @@ relocate_new_kernel: + movq PTR(PA_PGD)(%rsi), %r9 + movq %r9, %cr3 + ++ /* setup idt */ ++ movq %r8, %rax ++ addq $(idt_80 - relocate_kernel), %rax ++ lidtq (%rax) ++ ++ /* setup gdt */ ++ movq %r8, %rax ++ addq $(gdt - relocate_kernel), %rax ++ movq %r8, %r9 ++ addq $((gdt_80 - relocate_kernel) + 2), %r9 ++ movq %rax, (%r9) ++ ++ movq %r8, %rax ++ addq $(gdt_80 - relocate_kernel), %rax ++ lgdtq (%rax) ++ ++ /* setup data segment registers */ ++ xorl %eax, %eax ++ movl %eax, %ds ++ movl %eax, %es ++ movl %eax, %fs ++ movl %eax, %gs ++ movl %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea 4096(%r8), %rsp + +- /* jump to identity mapped page */ +- addq $(identity_mapped - relocate_kernel), %r8 +- pushq %r8 +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movq %r8, %rax ++ addq $(identity_mapped - relocate_kernel), %rax ++ pushq $(gdt_cs - gdt) ++ pushq %rax ++ lretq + + identity_mapped: + /* store the start address on the stack */ +@@ -272,5 +298,19 @@ identity_mapped: + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 +- + ret ++ ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00af9a000000ffff ++gdt_end: ++ ++gdt_80: ++ .word gdt_end - gdt - 1 /* limit */ ++ .quad 0 /* base - filled in by code above */ ++ ++idt_80: ++ .word 0 /* limit */ ++ .quad 0 /* base */ diff --git a/tags/2.6.22-2/20005_blktap-aio-16_03_06.patch1 b/tags/2.6.22-2/20005_blktap-aio-16_03_06.patch1 new file mode 100644 index 0000000..9331e9f --- /dev/null +++ b/tags/2.6.22-2/20005_blktap-aio-16_03_06.patch1 @@ -0,0 +1,209 @@ +Subject: AIO/POLL interaction +From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042) +Acked-by: jbeulich@novell.com + +--- + fs/aio.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++---- + include/linux/aio.h | 5 ++ + 2 files changed, 116 insertions(+), 9 deletions(-) + +--- a/fs/aio.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/fs/aio.c 2007-08-27 14:01:24.000000000 -0400 +@@ -36,6 +36,11 @@ + #include + #include + ++#ifdef CONFIG_EPOLL ++#include ++#include ++#endif ++ + #if DEBUG > 1 + #define dprintk printk + #else +@@ -1009,6 +1014,11 @@ put_rq: + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + ++#ifdef CONFIG_EPOLL ++ if (ctx->file && waitqueue_active(&ctx->poll_wait)) ++ wake_up(&ctx->poll_wait); ++#endif ++ + spin_unlock_irqrestore(&ctx->ctx_lock, flags); + return ret; + } +@@ -1016,6 +1026,8 @@ put_rq: + /* aio_read_evt + * Pull an event off of the ioctx's event ring. Returns the number of + * events fetched (0 or 1 ;-) ++ * If ent parameter is 0, just returns the number of events that would ++ * be fetched. + * FIXME: make this use cmpxchg. + * TODO: make the ringbuffer user mmap()able (requires FIXME). + */ +@@ -1038,13 +1050,18 @@ static int aio_read_evt(struct kioctx *i + + head = ring->head % info->nr; + if (head != ring->tail) { +- struct io_event *evp = aio_ring_event(info, head, KM_USER1); +- *ent = *evp; +- head = (head + 1) % info->nr; +- smp_mb(); /* finish reading the event before updatng the head */ +- ring->head = head; +- ret = 1; +- put_aio_ring_event(evp, KM_USER1); ++ if (ent) { /* event requested */ ++ struct io_event *evp = ++ aio_ring_event(info, head, KM_USER1); ++ *ent = *evp; ++ head = (head + 1) % info->nr; ++ /* finish reading the event before updatng the head */ ++ smp_mb(); ++ ring->head = head; ++ ret = 1; ++ put_aio_ring_event(evp, KM_USER1); ++ } else /* only need to know availability */ ++ ret = 1; + } + spin_unlock(&info->ring_lock); + +@@ -1227,9 +1244,78 @@ static void io_destroy(struct kioctx *io + + aio_cancel_all(ioctx); + wait_for_all_aios(ioctx); ++#ifdef CONFIG_EPOLL ++ /* forget the poll file, but it's up to the user to close it */ ++ if (ioctx->file) { ++ ioctx->file->private_data = 0; ++ ioctx->file = 0; ++ } ++#endif + put_ioctx(ioctx); /* once for the lookup */ + } + ++#ifdef CONFIG_EPOLL ++ ++static int aio_queue_fd_close(struct inode *inode, struct file *file) ++{ ++ struct kioctx *ioctx = file->private_data; ++ if (ioctx) { ++ file->private_data = 0; ++ spin_lock_irq(&ioctx->ctx_lock); ++ ioctx->file = 0; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ return 0; ++} ++ ++static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) ++{ unsigned int pollflags = 0; ++ struct kioctx *ioctx = file->private_data; ++ ++ if (ioctx) { ++ ++ spin_lock_irq(&ioctx->ctx_lock); ++ /* Insert inside our poll wait queue */ ++ poll_wait(file, &ioctx->poll_wait, wait); ++ ++ /* Check our condition */ ++ if (aio_read_evt(ioctx, 0)) ++ pollflags = POLLIN | POLLRDNORM; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ ++ return pollflags; ++} ++ ++static const struct file_operations aioq_fops = { ++ .release = aio_queue_fd_close, ++ .poll = aio_queue_fd_poll ++}; ++ ++/* make_aio_fd: ++ * Create a file descriptor that can be used to poll the event queue. ++ * Based on the excellent epoll code. ++ */ ++ ++static int make_aio_fd(struct kioctx *ioctx) ++{ ++ int error, fd; ++ struct inode *inode; ++ struct file *file; ++ ++ error = anon_inode_getfd(&fd, &inode, &file, "[aioq]", ++ &aioq_fops, ioctx); ++ if (error) ++ return error; ++ ++ /* associate the file with the IO context */ ++ ioctx->file = file; ++ init_waitqueue_head(&ioctx->poll_wait); ++ return fd; ++} ++#endif ++ ++ + /* sys_io_setup: + * Create an aio_context capable of receiving at least nr_events. + * ctxp must not point to an aio_context that already exists, and +@@ -1242,18 +1328,30 @@ static void io_destroy(struct kioctx *io + * resources are available. May fail with -EFAULT if an invalid + * pointer is passed for ctxp. Will fail with -ENOSYS if not + * implemented. ++ * ++ * To request a selectable fd, the user context has to be initialized ++ * to 1, instead of 0, and the return value is the fd. ++ * This keeps the system call compatible, since a non-zero value ++ * was not allowed so far. + */ + asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) + { + struct kioctx *ioctx = NULL; + unsigned long ctx; + long ret; ++ int make_fd = 0; + + ret = get_user(ctx, ctxp); + if (unlikely(ret)) + goto out; + + ret = -EINVAL; ++#ifdef CONFIG_EPOLL ++ if (ctx == 1) { ++ make_fd = 1; ++ ctx = 0; ++ } ++#endif + if (unlikely(ctx || nr_events == 0)) { + pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", + ctx, nr_events); +@@ -1264,8 +1362,12 @@ asmlinkage long sys_io_setup(unsigned nr + ret = PTR_ERR(ioctx); + if (!IS_ERR(ioctx)) { + ret = put_user(ioctx->user_id, ctxp); +- if (!ret) +- return 0; ++#ifdef CONFIG_EPOLL ++ if (make_fd && ret >= 0) ++ ret = make_aio_fd(ioctx); ++#endif ++ if (ret >= 0) ++ return ret; + + get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ + io_destroy(ioctx); +--- a/include/linux/aio.h 2007-08-27 12:09:26.000000000 -0400 ++++ b/include/linux/aio.h 2007-08-27 14:01:24.000000000 -0400 +@@ -201,6 +201,11 @@ struct kioctx { + struct aio_ring_info ring_info; + + struct delayed_work wq; ++#ifdef CONFIG_EPOLL ++ // poll integration ++ wait_queue_head_t poll_wait; ++ struct file *file; ++#endif + }; + + /* prototypes */ diff --git a/tags/2.6.22-2/20006_fix-ide-cd-pio-mode.patch1 b/tags/2.6.22-2/20006_fix-ide-cd-pio-mode.patch1 new file mode 100644 index 0000000..1ca415c --- /dev/null +++ b/tags/2.6.22-2/20006_fix-ide-cd-pio-mode.patch1 @@ -0,0 +1,36 @@ +Subject: Fix IDE CD-drive PIO mode. +From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042) +Signed-off-by: Christian Limpach + +CD drives in PIO mode don't work under Xen because of a change in Linux +between 2.6.12 and 2.6.16, as a result of the following thread: +http://lists.parisc-linux.org/pipermail/parisc-linux/2005-August/027197.html + +The change breaks systems which have highmem and a swiotlb because the +ide-cd driver doesn't use the swiotlb, resulting in read/writes to/from +highmem pages in PIO mode not working any longer. Xen kernels usually have +both highmem and a swiotlb. + +Acked-by: Jan Beulich + +--- + drivers/ide/ide-lib.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/ide/ide-lib.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/drivers/ide/ide-lib.c 2007-08-27 14:02:05.000000000 -0400 +@@ -341,10 +341,10 @@ void ide_toggle_bounce(ide_drive_t *driv + { + u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ + +- if (!PCI_DMA_BUS_IS_PHYS) { +- addr = BLK_BOUNCE_ANY; +- } else if (on && drive->media == ide_disk) { +- if (HWIF(drive)->pci_dev) ++ if (on && drive->media == ide_disk) { ++ if (!PCI_DMA_BUS_IS_PHYS) ++ addr = BLK_BOUNCE_ANY; ++ else if (HWIF(drive)->pci_dev) + addr = HWIF(drive)->pci_dev->dma_mask; + } + diff --git a/tags/2.6.22-2/20007_i386-mach-io-check-nmi.patch1 b/tags/2.6.22-2/20007_i386-mach-io-check-nmi.patch1 new file mode 100644 index 0000000..c6fccf6 --- /dev/null +++ b/tags/2.6.22-2/20007_i386-mach-io-check-nmi.patch1 @@ -0,0 +1,53 @@ +Subject: Separate out clearing of NMI I/O check error +From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +--- + arch/i386/kernel/traps.c | 9 +-------- + include/asm-i386/mach-default/mach_traps.h | 12 ++++++++++++ + 2 files changed, 13 insertions(+), 8 deletions(-) + +--- a/arch/i386/kernel/traps.c 2007-08-27 14:01:24.000000000 -0400 ++++ b/arch/i386/kernel/traps.c 2007-08-27 14:01:24.000000000 -0400 +@@ -656,18 +656,11 @@ mem_parity_error(unsigned char reason, s + static __kprobes void + io_check_error(unsigned char reason, struct pt_regs * regs) + { +- unsigned long i; +- + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ +- reason = (reason & 0xf) | 8; +- outb(reason, 0x61); +- i = 2000; +- while (--i) udelay(1000); +- reason &= ~8; +- outb(reason, 0x61); ++ clear_io_check_error(reason); + } + + static __kprobes void +--- a/include/asm-i386/mach-default/mach_traps.h 2007-08-27 12:09:26.000000000 -0400 ++++ b/include/asm-i386/mach-default/mach_traps.h 2007-08-27 14:01:24.000000000 -0400 +@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig + outb(reason, 0x61); + } + ++static inline void clear_io_check_error(unsigned char reason) ++{ ++ unsigned long i; ++ ++ reason = (reason & 0xf) | 8; ++ outb(reason, 0x61); ++ i = 2000; ++ while (--i) udelay(1000); ++ reason &= ~8; ++ outb(reason, 0x61); ++} ++ + static inline unsigned char get_nmi_reason(void) + { + return inb(0x61); diff --git a/tags/2.6.22-2/20008_net-csum.patch1 b/tags/2.6.22-2/20008_net-csum.patch1 new file mode 100644 index 0000000..f52f99b --- /dev/null +++ b/tags/2.6.22-2/20008_net-csum.patch1 @@ -0,0 +1,50 @@ +Subject: xen3 TCP/UDP checksumming +From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042) +Acked-by: jbeulich@novell.com + +This is only a guess, based on suggestions from Keir Fraser. + +--- + net/ipv4/netfilter/nf_nat_proto_tcp.c | 3 +++ + net/ipv4/netfilter/nf_nat_proto_udp.c | 4 ++++ + net/ipv4/xfrm4_output.c | 4 ++++ + 3 files changed, 11 insertions(+) + +--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c 2007-08-27 14:01:24.000000000 -0400 +@@ -132,6 +132,9 @@ tcp_manip_pkt(struct sk_buff **pskb, + if (hdrsize < sizeof(*hdr)) + return 1; + ++ if (skb_checksum_setup(*pskb)) ++ return 0; ++ + nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0); + return 1; +--- a/net/ipv4/netfilter/nf_nat_proto_udp.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/net/ipv4/netfilter/nf_nat_proto_udp.c 2007-08-27 14:01:24.000000000 -0400 +@@ -116,6 +116,10 @@ udp_manip_pkt(struct sk_buff **pskb, + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } ++ ++ if (skb_checksum_setup(*pskb)) ++ return 0; ++ + if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, +--- a/net/ipv4/xfrm4_output.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/net/ipv4/xfrm4_output.c 2007-08-27 14:01:24.000000000 -0400 +@@ -47,6 +47,10 @@ static int xfrm4_output_one(struct sk_bu + struct xfrm_state *x = dst->xfrm; + int err; + ++ err = skb_checksum_setup(skb); ++ if (err) ++ goto error_nolock; ++ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (err) diff --git a/tags/2.6.22-2/20009_xenoprof-generic.patch1 b/tags/2.6.22-2/20009_xenoprof-generic.patch1 new file mode 100644 index 0000000..5b73392 --- /dev/null +++ b/tags/2.6.22-2/20009_xenoprof-generic.patch1 @@ -0,0 +1,669 @@ +Subject: Xen oprofile +From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042) +Acked-by: jbeulich@novell.com + +--- + drivers/oprofile/buffer_sync.c | 87 ++++++++++++---- + drivers/oprofile/cpu_buffer.c | 51 +++++++-- + drivers/oprofile/cpu_buffer.h | 9 + + drivers/oprofile/event_buffer.h | 7 + + drivers/oprofile/oprof.c | 32 +++++- + drivers/oprofile/oprof.h | 3 + drivers/oprofile/oprofile_files.c | 201 +++++++++++++++++++++++++++++++++++++- + include/linux/oprofile.h | 9 + + 8 files changed, 360 insertions(+), 39 deletions(-) + +--- a/drivers/oprofile/buffer_sync.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/drivers/oprofile/buffer_sync.c 2007-08-27 14:02:05.000000000 -0400 +@@ -6,6 +6,10 @@ + * + * @author John Levon + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * This is the core of the buffer management. Each + * CPU buffer is processed and entered into the + * global event buffer. Such processing is necessary +@@ -39,6 +43,7 @@ static cpumask_t marked_cpus = CPU_MASK_ + static DEFINE_SPINLOCK(task_mortuary); + static void process_task_mortuary(void); + ++static int cpu_current_domain[NR_CPUS]; + + /* Take ownership of the task struct and place it on the + * list for processing. Only after two full buffer syncs +@@ -147,6 +152,11 @@ static void end_sync(void) + int sync_start(void) + { + int err; ++ int i; ++ ++ for (i = 0; i < NR_CPUS; i++) { ++ cpu_current_domain[i] = COORDINATOR_DOMAIN; ++ } + + start_cpu_work(); + +@@ -276,15 +286,31 @@ static void add_cpu_switch(int i) + last_cookie = INVALID_COOKIE; + } + +-static void add_kernel_ctx_switch(unsigned int in_kernel) ++static void add_cpu_mode_switch(unsigned int cpu_mode) + { + add_event_entry(ESCAPE_CODE); +- if (in_kernel) +- add_event_entry(KERNEL_ENTER_SWITCH_CODE); +- else +- add_event_entry(KERNEL_EXIT_SWITCH_CODE); ++ switch (cpu_mode) { ++ case CPU_MODE_USER: ++ add_event_entry(USER_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_KERNEL: ++ add_event_entry(KERNEL_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_XEN: ++ add_event_entry(XEN_ENTER_SWITCH_CODE); ++ break; ++ default: ++ break; ++ } + } +- ++ ++static void add_domain_switch(unsigned long domain_id) ++{ ++ add_event_entry(ESCAPE_CODE); ++ add_event_entry(DOMAIN_SWITCH_CODE); ++ add_event_entry(domain_id); ++} ++ + static void + add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) + { +@@ -349,9 +375,9 @@ static int add_us_sample(struct mm_struc + * for later lookup from userspace. + */ + static int +-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) ++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) + { +- if (in_kernel) { ++ if (cpu_mode >= CPU_MODE_KERNEL) { + add_sample_entry(s->eip, s->event); + return 1; + } else if (mm) { +@@ -497,15 +523,21 @@ void sync_buffer(int cpu) + struct mm_struct *mm = NULL; + struct task_struct * new; + unsigned long cookie = 0; +- int in_kernel = 1; ++ int cpu_mode = 1; + unsigned int i; + sync_buffer_state state = sb_buffer_start; + unsigned long available; ++ int domain_switch = 0; + + mutex_lock(&buffer_mutex); + + add_cpu_switch(cpu); + ++ /* We need to assign the first samples in this CPU buffer to the ++ same domain that we were processing at the last sync_buffer */ ++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { ++ add_domain_switch(cpu_current_domain[cpu]); ++ } + /* Remember, only we can modify tail_pos */ + + available = get_slots(cpu_buf); +@@ -513,16 +545,18 @@ void sync_buffer(int cpu) + for (i = 0; i < available; ++i) { + struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; + +- if (is_code(s->eip)) { +- if (s->event <= CPU_IS_KERNEL) { +- /* kernel/userspace switch */ +- in_kernel = s->event; ++ if (is_code(s->eip) && !domain_switch) { ++ if (s->event <= CPU_MODE_XEN) { ++ /* xen/kernel/userspace switch */ ++ cpu_mode = s->event; + if (state == sb_buffer_start) + state = sb_sample_start; +- add_kernel_ctx_switch(s->event); ++ add_cpu_mode_switch(s->event); + } else if (s->event == CPU_TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); ++ } else if (s->event == CPU_DOMAIN_SWITCH) { ++ domain_switch = 1; + } else { + struct mm_struct * oldmm = mm; + +@@ -536,11 +570,21 @@ void sync_buffer(int cpu) + add_user_ctx_switch(new, cookie); + } + } else { +- if (state >= sb_bt_start && +- !add_sample(mm, s, in_kernel)) { +- if (state == sb_bt_start) { +- state = sb_bt_ignore; +- atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ if (domain_switch) { ++ cpu_current_domain[cpu] = s->eip; ++ add_domain_switch(s->eip); ++ domain_switch = 0; ++ } else { ++ if (cpu_current_domain[cpu] != ++ COORDINATOR_DOMAIN) { ++ add_sample_entry(s->eip, s->event); ++ } ++ else if (state >= sb_bt_start && ++ !add_sample(mm, s, cpu_mode)) { ++ if (state == sb_bt_start) { ++ state = sb_bt_ignore; ++ atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ } + } + } + } +@@ -549,6 +593,11 @@ void sync_buffer(int cpu) + } + release_mm(mm); + ++ /* We reset domain to COORDINATOR at each CPU switch */ ++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { ++ add_domain_switch(COORDINATOR_DOMAIN); ++ } ++ + mark_done(cpu); + + mutex_unlock(&buffer_mutex); +--- a/drivers/oprofile/cpu_buffer.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/drivers/oprofile/cpu_buffer.c 2007-08-27 14:02:05.000000000 -0400 +@@ -6,6 +6,10 @@ + * + * @author John Levon + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * Each CPU has a local buffer that stores PC value/event + * pairs. We also log context switches when we notice them. + * Eventually each CPU's buffer is processed into the global +@@ -34,6 +38,8 @@ static void wq_sync_buffer(struct work_s + #define DEFAULT_TIMER_EXPIRE (HZ / 10) + static int work_enabled; + ++static int32_t current_domain = COORDINATOR_DOMAIN; ++ + void free_cpu_buffers(void) + { + int i; +@@ -57,7 +63,7 @@ int alloc_cpu_buffers(void) + goto fail; + + b->last_task = NULL; +- b->last_is_kernel = -1; ++ b->last_cpu_mode = -1; + b->tracing = 0; + b->buffer_size = buffer_size; + b->tail_pos = 0; +@@ -113,7 +119,7 @@ void cpu_buffer_reset(struct oprofile_cp + * collected will populate the buffer with proper + * values to initialize the buffer + */ +- cpu_buf->last_is_kernel = -1; ++ cpu_buf->last_cpu_mode = -1; + cpu_buf->last_task = NULL; + } + +@@ -163,13 +169,13 @@ add_code(struct oprofile_cpu_buffer * bu + * because of the head/tail separation of the writer and reader + * of the CPU buffer. + * +- * is_kernel is needed because on some architectures you cannot ++ * cpu_mode is needed because on some architectures you cannot + * tell if you are in kernel or user space simply by looking at +- * pc. We tag this in the buffer by generating kernel enter/exit +- * events whenever is_kernel changes ++ * pc. We tag this in the buffer by generating kernel/user (and xen) ++ * enter events whenever cpu_mode changes + */ + static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, +- int is_kernel, unsigned long event) ++ int cpu_mode, unsigned long event) + { + struct task_struct * task; + +@@ -180,18 +186,18 @@ static int log_sample(struct oprofile_cp + return 0; + } + +- is_kernel = !!is_kernel; +- + task = current; + + /* notice a switch from user->kernel or vice versa */ +- if (cpu_buf->last_is_kernel != is_kernel) { +- cpu_buf->last_is_kernel = is_kernel; +- add_code(cpu_buf, is_kernel); ++ if (cpu_buf->last_cpu_mode != cpu_mode) { ++ cpu_buf->last_cpu_mode = cpu_mode; ++ add_code(cpu_buf, cpu_mode); + } +- ++ + /* notice a task switch */ +- if (cpu_buf->last_task != task) { ++ /* if not processing other domain samples */ ++ if ((cpu_buf->last_task != task) && ++ (current_domain == COORDINATOR_DOMAIN)) { + cpu_buf->last_task = task; + add_code(cpu_buf, (unsigned long)task); + } +@@ -275,6 +281,25 @@ void oprofile_add_trace(unsigned long pc + add_sample(cpu_buf, pc, 0); + } + ++int oprofile_add_domain_switch(int32_t domain_id) ++{ ++ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; ++ ++ /* should have space for switching into and out of domain ++ (2 slots each) plus one sample and one cpu mode switch */ ++ if (((nr_available_slots(cpu_buf) < 6) && ++ (domain_id != COORDINATOR_DOMAIN)) || ++ (nr_available_slots(cpu_buf) < 2)) ++ return 0; ++ ++ add_code(cpu_buf, CPU_DOMAIN_SWITCH); ++ add_sample(cpu_buf, domain_id, 0); ++ ++ current_domain = domain_id; ++ ++ return 1; ++} ++ + /* + * This serves to avoid cpu buffer overflow, and makes sure + * the task mortuary progresses +--- a/drivers/oprofile/cpu_buffer.h 2007-08-27 12:09:26.000000000 -0400 ++++ b/drivers/oprofile/cpu_buffer.h 2007-08-27 14:01:24.000000000 -0400 +@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer { + volatile unsigned long tail_pos; + unsigned long buffer_size; + struct task_struct * last_task; +- int last_is_kernel; ++ int last_cpu_mode; + int tracing; + struct op_sample * buffer; + unsigned long sample_received; +@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_bu + void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); + + /* transient events for the CPU buffer -> event buffer */ +-#define CPU_IS_KERNEL 1 +-#define CPU_TRACE_BEGIN 2 ++#define CPU_MODE_USER 0 ++#define CPU_MODE_KERNEL 1 ++#define CPU_MODE_XEN 2 ++#define CPU_TRACE_BEGIN 3 ++#define CPU_DOMAIN_SWITCH 4 + + #endif /* OPROFILE_CPU_BUFFER_H */ +--- a/drivers/oprofile/event_buffer.h 2007-08-27 12:09:26.000000000 -0400 ++++ b/drivers/oprofile/event_buffer.h 2007-08-27 14:01:24.000000000 -0400 +@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void); + #define CPU_SWITCH_CODE 2 + #define COOKIE_SWITCH_CODE 3 + #define KERNEL_ENTER_SWITCH_CODE 4 +-#define KERNEL_EXIT_SWITCH_CODE 5 ++#define USER_ENTER_SWITCH_CODE 5 + #define MODULE_LOADED_CODE 6 + #define CTX_TGID_CODE 7 + #define TRACE_BEGIN_CODE 8 + #define TRACE_END_CODE 9 ++#define XEN_ENTER_SWITCH_CODE 10 ++#define DOMAIN_SWITCH_CODE 11 + + #define INVALID_COOKIE ~0UL + #define NO_COOKIE 0UL + ++/* Constant used to refer to coordinator domain (Xen) */ ++#define COORDINATOR_DOMAIN -1 ++ + /* add data to the event buffer */ + void add_event_entry(unsigned long data); + +--- a/drivers/oprofile/oprof.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/drivers/oprofile/oprof.c 2007-08-27 14:02:05.000000000 -0400 +@@ -5,6 +5,10 @@ + * @remark Read the file COPYING + * + * @author John Levon ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include +@@ -19,7 +23,7 @@ + #include "cpu_buffer.h" + #include "buffer_sync.h" + #include "oprofile_stats.h" +- ++ + struct oprofile_operations oprofile_ops; + + unsigned long oprofile_started; +@@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex); + */ + static int timer = 0; + ++int oprofile_set_active(int active_domains[], unsigned int adomains) ++{ ++ int err; ++ ++ if (!oprofile_ops.set_active) ++ return -EINVAL; ++ ++ mutex_lock(&start_mutex); ++ err = oprofile_ops.set_active(active_domains, adomains); ++ mutex_unlock(&start_mutex); ++ return err; ++} ++ ++int oprofile_set_passive(int passive_domains[], unsigned int pdomains) ++{ ++ int err; ++ ++ if (!oprofile_ops.set_passive) ++ return -EINVAL; ++ ++ mutex_lock(&start_mutex); ++ err = oprofile_ops.set_passive(passive_domains, pdomains); ++ mutex_unlock(&start_mutex); ++ return err; ++} ++ + int oprofile_setup(void) + { + int err; +--- a/drivers/oprofile/oprof.h 2007-08-27 12:09:26.000000000 -0400 ++++ b/drivers/oprofile/oprof.h 2007-08-27 14:01:24.000000000 -0400 +@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_ + void oprofile_timer_init(struct oprofile_operations * ops); + + int oprofile_set_backtrace(unsigned long depth); ++ ++int oprofile_set_active(int active_domains[], unsigned int adomains); ++int oprofile_set_passive(int passive_domains[], unsigned int pdomains); + + #endif /* OPROF_H */ +--- a/drivers/oprofile/oprofile_files.c 2007-08-27 12:09:26.000000000 -0400 ++++ b/drivers/oprofile/oprofile_files.c 2007-08-27 14:02:05.000000000 -0400 +@@ -5,15 +5,21 @@ + * @remark Read the file COPYING + * + * @author John Levon ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include + #include ++#include ++#include + + #include "event_buffer.h" + #include "oprofile_stats.h" + #include "oprof.h" +- ++ + unsigned long fs_buffer_size = 131072; + unsigned long fs_cpu_buffer_size = 8192; + unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file * + static const struct file_operations dump_fops = { + .write = dump_write, + }; +- ++ ++#define TMPBUFSIZE 512 ++ ++static unsigned int adomains = 0; ++static int active_domains[MAX_OPROF_DOMAINS + 1]; ++static DEFINE_MUTEX(adom_mutex); ++ ++static ssize_t adomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char *tmpbuf; ++ char *startp, *endp; ++ int i; ++ unsigned long val; ++ ssize_t retval = count; ++ ++ if (*offset) ++ return -EINVAL; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmpbuf, buf, count)) { ++ kfree(tmpbuf); ++ return -EFAULT; ++ } ++ tmpbuf[count] = 0; ++ ++ mutex_lock(&adom_mutex); ++ ++ startp = tmpbuf; ++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ ++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { ++ val = simple_strtoul(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp) || isspace(*endp)) ++ endp++; ++ active_domains[i] = val; ++ if (active_domains[i] != val) ++ /* Overflow, force error below */ ++ i = MAX_OPROF_DOMAINS + 1; ++ startp = endp; ++ } ++ /* Force error on trailing junk */ ++ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; ++ ++ kfree(tmpbuf); ++ ++ if (adomains > MAX_OPROF_DOMAINS ++ || oprofile_set_active(active_domains, adomains)) { ++ adomains = 0; ++ retval = -EINVAL; ++ } ++ ++ mutex_unlock(&adom_mutex); ++ return retval; ++} ++ ++static ssize_t adomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char * tmpbuf; ++ size_t len; ++ int i; ++ ssize_t retval; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ mutex_lock(&adom_mutex); ++ ++ len = 0; ++ for (i = 0; i < adomains; i++) ++ len += snprintf(tmpbuf + len, ++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, ++ "%u ", active_domains[i]); ++ WARN_ON(len > TMPBUFSIZE); ++ if (len != 0 && len <= TMPBUFSIZE) ++ tmpbuf[len-1] = '\n'; ++ ++ mutex_unlock(&adom_mutex); ++ ++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); ++ ++ kfree(tmpbuf); ++ return retval; ++} ++ ++ ++static struct file_operations active_domain_ops = { ++ .read = adomain_read, ++ .write = adomain_write, ++}; ++ ++static unsigned int pdomains = 0; ++static int passive_domains[MAX_OPROF_DOMAINS]; ++static DEFINE_MUTEX(pdom_mutex); ++ ++static ssize_t pdomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char *tmpbuf; ++ char *startp, *endp; ++ int i; ++ unsigned long val; ++ ssize_t retval = count; ++ ++ if (*offset) ++ return -EINVAL; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmpbuf, buf, count)) { ++ kfree(tmpbuf); ++ return -EFAULT; ++ } ++ tmpbuf[count] = 0; ++ ++ mutex_lock(&pdom_mutex); ++ ++ startp = tmpbuf; ++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ ++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { ++ val = simple_strtoul(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp) || isspace(*endp)) ++ endp++; ++ passive_domains[i] = val; ++ if (passive_domains[i] != val) ++ /* Overflow, force error below */ ++ i = MAX_OPROF_DOMAINS + 1; ++ startp = endp; ++ } ++ /* Force error on trailing junk */ ++ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; ++ ++ kfree(tmpbuf); ++ ++ if (pdomains > MAX_OPROF_DOMAINS ++ || oprofile_set_passive(passive_domains, pdomains)) { ++ pdomains = 0; ++ retval = -EINVAL; ++ } ++ ++ mutex_unlock(&pdom_mutex); ++ return retval; ++} ++ ++static ssize_t pdomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char * tmpbuf; ++ size_t len; ++ int i; ++ ssize_t retval; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ mutex_lock(&pdom_mutex); ++ ++ len = 0; ++ for (i = 0; i < pdomains; i++) ++ len += snprintf(tmpbuf + len, ++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, ++ "%u ", passive_domains[i]); ++ WARN_ON(len > TMPBUFSIZE); ++ if (len != 0 && len <= TMPBUFSIZE) ++ tmpbuf[len-1] = '\n'; ++ ++ mutex_unlock(&pdom_mutex); ++ ++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); ++ ++ kfree(tmpbuf); ++ return retval; ++} ++ ++static struct file_operations passive_domain_ops = { ++ .read = pdomain_read, ++ .write = pdomain_write, ++}; ++ + void oprofile_create_files(struct super_block * sb, struct dentry * root) + { + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); ++ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); ++ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); +--- a/include/linux/oprofile.h 2007-08-27 12:09:26.000000000 -0400 ++++ b/include/linux/oprofile.h 2007-08-27 14:02:05.000000000 -0400 +@@ -16,6 +16,8 @@ + #include + #include + #include ++ ++#include + + struct super_block; + struct dentry; +@@ -27,6 +29,11 @@ struct oprofile_operations { + /* create any necessary configuration files in the oprofile fs. + * Optional. */ + int (*create_files)(struct super_block * sb, struct dentry * root); ++ /* setup active domains with Xen */ ++ int (*set_active)(int *active_domains, unsigned int adomains); ++ /* setup passive domains with Xen */ ++ int (*set_passive)(int *passive_domains, unsigned int pdomains); ++ + /* Do any necessary interrupt setup. Optional. */ + int (*setup)(void); + /* Do any necessary interrupt shutdown. Optional. */ +@@ -78,6 +85,8 @@ void oprofile_add_pc(unsigned long pc, i + /* add a backtrace entry, to be called from the ->backtrace callback */ + void oprofile_add_trace(unsigned long eip); + ++/* add a domain switch entry */ ++int oprofile_add_domain_switch(int32_t domain_id); + + /** + * Create a file of the given name as a child of the given root, with diff --git a/tags/2.6.22-2/20010_softlockup-no-idle-hz.patch1 b/tags/2.6.22-2/20010_softlockup-no-idle-hz.patch1 new file mode 100644 index 0000000..e67c89c --- /dev/null +++ b/tags/2.6.22-2/20010_softlockup-no-idle-hz.patch1 @@ -0,0 +1,75 @@ +Subject: xen3 softlockup - no-idle-hz interaction fix +From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042) +Acked-by: jbeulich@novell.com + +Index: head-2007-09-25/include/linux/sched.h +=================================================================== +--- head-2007-09-25.orig/include/linux/sched.h 2007-09-25 14:22:37.000000000 +0200 ++++ head-2007-09-25/include/linux/sched.h 2007-09-25 14:32:18.000000000 +0200 +@@ -236,11 +236,16 @@ extern void update_process_times(int use + extern void scheduler_tick(void); + + #ifdef CONFIG_DETECT_SOFTLOCKUP ++extern unsigned long softlockup_get_next_event(void); + extern void softlockup_tick(void); + extern void spawn_softlockup_task(void); + extern void touch_softlockup_watchdog(void); + extern void touch_all_softlockup_watchdogs(void); + #else ++static inline unsigned long softlockup_get_next_event(void) ++{ ++ return MAX_JIFFY_OFFSET; ++} + static inline void softlockup_tick(void) + { + } +Index: head-2007-09-25/kernel/softlockup.c +=================================================================== +--- head-2007-09-25.orig/kernel/softlockup.c 2007-09-25 14:22:37.000000000 +0200 ++++ head-2007-09-25/kernel/softlockup.c 2007-09-25 14:32:18.000000000 +0200 +@@ -60,6 +60,19 @@ void touch_all_softlockup_watchdogs(void + } + EXPORT_SYMBOL(touch_all_softlockup_watchdogs); + ++unsigned long softlockup_get_next_event(void) ++{ ++ int this_cpu = smp_processor_id(); ++ unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu); ++ ++ if (per_cpu(print_timestamp, this_cpu) == touch_timestamp || ++ did_panic || ++ !per_cpu(watchdog_task, this_cpu)) ++ return MAX_JIFFY_OFFSET; ++ ++ return max_t(long, 0, touch_timestamp + HZ - jiffies); ++} ++ + /* + * This callback runs from the timer interrupt, and checks + * whether the watchdog thread has hung or not: +Index: head-2007-09-25/kernel/timer.c +=================================================================== +--- head-2007-09-25.orig/kernel/timer.c 2007-09-25 14:22:37.000000000 +0200 ++++ head-2007-09-25/kernel/timer.c 2007-09-25 14:32:18.000000000 +0200 +@@ -781,7 +781,7 @@ static unsigned long cmp_next_hrtimer_ev + unsigned long get_next_timer_interrupt(unsigned long now) + { + tvec_base_t *base = __get_cpu_var(tvec_bases); +- unsigned long expires; ++ unsigned long expires, sl_next; + + spin_lock(&base->lock); + expires = __next_timer_interrupt(base); +@@ -790,7 +790,11 @@ unsigned long get_next_timer_interrupt(u + if (time_before_eq(expires, now)) + return now; + +- return cmp_next_hrtimer_event(now, expires); ++ expires = cmp_next_hrtimer_event(now, expires); ++ sl_next = softlockup_get_next_event(); ++ ++ return expires <= now || expires - now < sl_next ++ ? expires : now + sl_next; + } + + #ifdef CONFIG_NO_IDLE_HZ diff --git a/tags/2.6.22-2/20011_xen3-auto-xen-arch.patch1 b/tags/2.6.22-2/20011_xen3-auto-xen-arch.patch1 new file mode 100644 index 0000000..0252943 --- /dev/null +++ b/tags/2.6.22-2/20011_xen3-auto-xen-arch.patch1 @@ -0,0 +1,49825 @@ +Subject: xen3 xen-arch +From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +List of files having Xen derivates (perhaps created during the merging +of newer kernel versions), for xen-port-patches.py to pick up (i.e. this +must be retained here until the XenSource tree has picked up the +respective kernel.org version): + ++++ linux/arch/i386/kernel/e820-xen.c ++++ linux/drivers/char/mem-xen.c ++++ linux/lib/swiotlb-xen.c + +--- + arch/i386/boot-xen/Makefile | 21 + arch/i386/kernel/acpi/boot-xen.c | 1168 ++++++++ + arch/i386/kernel/apic-xen.c | 155 + + arch/i386/kernel/cpu/common-xen.c | 743 +++++ + arch/i386/kernel/cpu/mtrr/main-xen.c | 197 + + arch/i386/kernel/early_printk-xen.c | 2 + arch/i386/kernel/entry-xen.S | 1216 ++++++++ + arch/i386/kernel/fixup.c | 88 + arch/i386/kernel/head-xen.S | 207 + + arch/i386/kernel/init_task-xen.c | 51 + arch/i386/kernel/io_apic-xen.c | 2777 ++++++++++++++++++++ + arch/i386/kernel/ioport-xen.c | 122 + arch/i386/kernel/irq-xen.c | 324 ++ + arch/i386/kernel/ldt-xen.c | 270 + + arch/i386/kernel/microcode-xen.c | 144 + + arch/i386/kernel/mpparse-xen.c | 1185 ++++++++ + arch/i386/kernel/pci-dma-xen.c | 366 ++ + arch/i386/kernel/process-xen.c | 853 ++++++ + arch/i386/kernel/quirks-xen.c | 47 + arch/i386/kernel/setup-xen.c | 1871 +++++++++++++ + arch/i386/kernel/smp-xen.c | 624 ++++ + arch/i386/kernel/swiotlb.c | 716 +++++ + arch/i386/kernel/time-xen.c | 1141 ++++++++ + arch/i386/kernel/traps-xen.c | 1186 ++++++++ + arch/i386/kernel/vsyscall-note-xen.S | 32 + arch/i386/mach-xen/Makefile | 5 + arch/i386/mach-xen/setup.c | 147 + + arch/i386/mm/fault-xen.c | 769 +++++ + arch/i386/mm/highmem-xen.c | 136 + arch/i386/mm/hypervisor.c | 451 +++ + arch/i386/mm/init-xen.c | 850 ++++++ + arch/i386/mm/ioremap-xen.c | 443 +++ + arch/i386/mm/pgtable-xen.c | 727 +++++ + arch/i386/oprofile/xenoprof.c | 179 + + arch/i386/pci/irq-xen.c | 1205 ++++++++ + arch/i386/pci/pcifront.c | 55 + arch/x86_64/ia32/ia32entry-xen.S | 743 +++++ + arch/x86_64/ia32/syscall32-xen.c | 128 + arch/x86_64/ia32/syscall32_syscall-xen.S | 28 + arch/x86_64/ia32/vsyscall-int80.S | 58 + arch/x86_64/kernel/apic-xen.c | 197 + + arch/x86_64/kernel/e820-xen.c | 774 +++++ + arch/x86_64/kernel/early_printk-xen.c | 302 ++ + arch/x86_64/kernel/entry-xen.S | 1325 +++++++++ + arch/x86_64/kernel/genapic-xen.c | 143 + + arch/x86_64/kernel/genapic_xen.c | 161 + + arch/x86_64/kernel/head-xen.S | 203 + + arch/x86_64/kernel/head64-xen.c | 162 + + arch/x86_64/kernel/io_apic-xen.c | 2269 ++++++++++++++++ + arch/x86_64/kernel/ioport-xen.c | 99 + arch/x86_64/kernel/irq-xen.c | 197 + + arch/x86_64/kernel/ldt-xen.c | 282 ++ + arch/x86_64/kernel/mpparse-xen.c | 1011 +++++++ + arch/x86_64/kernel/pci-swiotlb-xen.c | 55 + arch/x86_64/kernel/process-xen.c | 829 +++++ + arch/x86_64/kernel/setup-xen.c | 1650 +++++++++++ + arch/x86_64/kernel/setup64-xen.c | 361 ++ + arch/x86_64/kernel/smp-xen.c | 600 ++++ + arch/x86_64/kernel/traps-xen.c | 1175 ++++++++ + arch/x86_64/kernel/vsyscall-xen.c | 239 + + arch/x86_64/kernel/xen_entry.S | 40 + arch/x86_64/mm/fault-xen.c | 724 +++++ + arch/x86_64/mm/init-xen.c | 1241 ++++++++ + arch/x86_64/mm/pageattr-xen.c | 433 +++ + include/asm-i386/mach-xen/asm/agp.h | 37 + include/asm-i386/mach-xen/asm/desc.h | 164 + + include/asm-i386/mach-xen/asm/dma-mapping.h | 157 + + include/asm-i386/mach-xen/asm/fixmap.h | 155 + + include/asm-i386/mach-xen/asm/floppy.h | 147 + + include/asm-i386/mach-xen/asm/highmem.h | 80 + include/asm-i386/mach-xen/asm/hw_irq.h | 72 + include/asm-i386/mach-xen/asm/hypercall.h | 407 ++ + include/asm-i386/mach-xen/asm/hypervisor.h | 258 + + include/asm-i386/mach-xen/asm/io.h | 390 ++ + include/asm-i386/mach-xen/asm/irqflags.h | 127 + include/asm-i386/mach-xen/asm/maddr.h | 193 + + include/asm-i386/mach-xen/asm/mmu.h | 29 + include/asm-i386/mach-xen/asm/mmu_context.h | 108 + include/asm-i386/mach-xen/asm/page.h | 229 + + include/asm-i386/mach-xen/asm/param.h | 23 + include/asm-i386/mach-xen/asm/pci.h | 146 + + include/asm-i386/mach-xen/asm/pgalloc.h | 59 + include/asm-i386/mach-xen/asm/pgtable-2level-defs.h | 20 + include/asm-i386/mach-xen/asm/pgtable-2level.h | 118 + include/asm-i386/mach-xen/asm/pgtable-3level-defs.h | 24 + include/asm-i386/mach-xen/asm/pgtable-3level.h | 206 + + include/asm-i386/mach-xen/asm/pgtable.h | 530 +++ + include/asm-i386/mach-xen/asm/processor.h | 741 +++++ + include/asm-i386/mach-xen/asm/ptrace.h | 90 + include/asm-i386/mach-xen/asm/scatterlist.h | 22 + include/asm-i386/mach-xen/asm/segment.h | 117 + include/asm-i386/mach-xen/asm/setup.h | 81 + include/asm-i386/mach-xen/asm/smp.h | 103 + include/asm-i386/mach-xen/asm/spinlock.h | 202 + + include/asm-i386/mach-xen/asm/swiotlb.h | 43 + include/asm-i386/mach-xen/asm/synch_bitops.h | 145 + + include/asm-i386/mach-xen/asm/system.h | 488 +++ + include/asm-i386/mach-xen/asm/tlbflush.h | 101 + include/asm-i386/mach-xen/asm/vga.h | 20 + include/asm-i386/mach-xen/asm/xenoprof.h | 48 + include/asm-i386/mach-xen/irq_vectors.h | 125 + include/asm-i386/mach-xen/mach_traps.h | 33 + include/asm-i386/mach-xen/setup_arch.h | 5 + include/asm-x86_64/mach-xen/asm/agp.h | 35 + include/asm-x86_64/mach-xen/asm/arch_hooks.h | 27 + include/asm-x86_64/mach-xen/asm/bootsetup.h | 42 + include/asm-x86_64/mach-xen/asm/desc.h | 263 + + include/asm-x86_64/mach-xen/asm/dma-mapping.h | 207 + + include/asm-x86_64/mach-xen/asm/e820.h | 66 + include/asm-x86_64/mach-xen/asm/fixmap.h | 112 + include/asm-x86_64/mach-xen/asm/floppy.h | 206 + + include/asm-x86_64/mach-xen/asm/hw_irq.h | 136 + include/asm-x86_64/mach-xen/asm/hypercall.h | 406 ++ + include/asm-x86_64/mach-xen/asm/hypervisor.h | 2 + include/asm-x86_64/mach-xen/asm/io.h | 330 ++ + include/asm-x86_64/mach-xen/asm/irq.h | 38 + include/asm-x86_64/mach-xen/asm/irqflags.h | 139 + + include/asm-x86_64/mach-xen/asm/maddr.h | 161 + + include/asm-x86_64/mach-xen/asm/mmu.h | 38 + include/asm-x86_64/mach-xen/asm/mmu_context.h | 136 + include/asm-x86_64/mach-xen/asm/msr.h | 399 ++ + include/asm-x86_64/mach-xen/asm/nmi.h | 93 + include/asm-x86_64/mach-xen/asm/page.h | 214 + + include/asm-x86_64/mach-xen/asm/pci.h | 166 + + include/asm-x86_64/mach-xen/asm/pgalloc.h | 204 + + include/asm-x86_64/mach-xen/asm/pgtable.h | 573 ++++ + include/asm-x86_64/mach-xen/asm/processor.h | 506 +++ + include/asm-x86_64/mach-xen/asm/ptrace.h | 127 + include/asm-x86_64/mach-xen/asm/smp.h | 150 + + include/asm-x86_64/mach-xen/asm/synch_bitops.h | 2 + include/asm-x86_64/mach-xen/asm/system.h | 262 + + include/asm-x86_64/mach-xen/asm/timer.h | 67 + include/asm-x86_64/mach-xen/asm/tlbflush.h | 103 + include/asm-x86_64/mach-xen/asm/vga.h | 20 + include/asm-x86_64/mach-xen/asm/xenoprof.h | 1 + include/asm-x86_64/mach-xen/asm/xor.h | 328 ++ + include/asm-x86_64/mach-xen/irq_vectors.h | 123 + include/asm-x86_64/mach-xen/mach_time.h | 111 + include/asm-x86_64/mach-xen/mach_timer.h | 50 + include/asm-x86_64/mach-xen/setup_arch_post.h | 63 + include/asm-x86_64/mach-xen/setup_arch_pre.h | 5 + include/xen/balloon.h | 57 + include/xen/blkif.h | 97 + include/xen/cpu_hotplug.h | 44 + include/xen/driver_util.h | 14 + include/xen/evtchn.h | 126 + include/xen/features.h | 20 + include/xen/gnttab.h | 138 + include/xen/hvm.h | 23 + include/xen/hypercall.h | 24 + include/xen/hypervisor_sysfs.h | 32 + include/xen/pcifront.h | 76 + include/xen/public/evtchn.h | 88 + include/xen/public/gntdev.h | 105 + include/xen/public/privcmd.h | 79 + include/xen/xen_proc.h | 12 + include/xen/xenbus.h | 302 ++ + include/xen/xencons.h | 19 + include/xen/xenoprof.h | 42 + scripts/Makefile.xen | 14 + 160 files changed, 49168 insertions(+) + +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/boot-xen/Makefile 2007-08-27 14:01:24.000000000 -0400 +@@ -0,0 +1,21 @@ ++ ++OBJCOPYFLAGS := -g --strip-unneeded ++ ++vmlinuz: vmlinux-stripped FORCE ++ $(call if_changed,gzip) ++ ++vmlinux-stripped: vmlinux FORCE ++ $(call if_changed,objcopy) ++ ++INSTALL_ROOT := $(patsubst %/boot,%,$(INSTALL_PATH)) ++ ++XINSTALL_NAME ?= $(KERNELRELEASE) ++install: ++ mkdir -p $(INSTALL_ROOT)/boot ++ ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_ROOT)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(XENGUEST)$(INSTALL_SUFFIX) ++ rm -f $(INSTALL_ROOT)/boot/vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) ++ install -m0644 vmlinuz $(INSTALL_ROOT)/boot/vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) ++ install -m0644 vmlinux $(INSTALL_ROOT)/boot/vmlinux-syms-$(XINSTALL_NAME)$(INSTALL_SUFFIX) ++ install -m0664 .config $(INSTALL_ROOT)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX) ++ install -m0664 System.map $(INSTALL_ROOT)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX) ++ ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_ROOT)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX) +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/acpi/boot-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,1168 @@ ++/* ++ * boot.c - Architecture-Specific Low-Level ACPI Boot Support ++ * ++ * Copyright (C) 2001, 2002 Paul Diefenbaugh ++ * Copyright (C) 2001 Jun Nakajima ++ * ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_X86_64 ++ ++extern void __init clustered_apic_check(void); ++ ++extern int gsi_irq_sharing(int gsi); ++#include ++ ++static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } ++ ++ ++#else /* X86 */ ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++#include ++#include ++#endif /* CONFIG_X86_LOCAL_APIC */ ++ ++static inline int gsi_irq_sharing(int gsi) { return gsi; } ++ ++#endif /* X86 */ ++ ++#define BAD_MADT_ENTRY(entry, end) ( \ ++ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ++ ((acpi_table_entry_header *)entry)->length < sizeof(*entry)) ++ ++#define PREFIX "ACPI: " ++ ++int acpi_noirq __initdata; /* skip ACPI IRQ initialization */ ++int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ ++int acpi_ht __initdata = 1; /* enable HT */ ++ ++int acpi_lapic; ++int acpi_ioapic; ++int acpi_strict; ++EXPORT_SYMBOL(acpi_strict); ++ ++acpi_interrupt_flags acpi_sci_flags __initdata; ++int acpi_sci_override_gsi __initdata; ++int acpi_skip_timer_override __initdata; ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; ++#endif ++ ++#ifndef __HAVE_ARCH_CMPXCHG ++#warning ACPI uses CMPXCHG, i486 and later hardware ++#endif ++ ++#define MAX_MADT_ENTRIES 256 ++u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] = ++ {[0 ... MAX_MADT_ENTRIES - 1] = 0xff }; ++EXPORT_SYMBOL(x86_acpiid_to_apicid); ++ ++/* -------------------------------------------------------------------------- ++ Boot-time Configuration ++ -------------------------------------------------------------------------- */ ++ ++/* ++ * The default interrupt routing model is PIC (8259). This gets ++ * overriden if IOAPICs are enumerated (below). ++ */ ++enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; ++ ++#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) ++ ++/* rely on all ACPI tables being in the direct mapping */ ++char *__acpi_map_table(unsigned long phys_addr, unsigned long size) ++{ ++ if (!phys_addr || !size) ++ return NULL; ++ ++ if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE) ++ return __va(phys_addr); ++ ++ return NULL; ++} ++ ++#else ++ ++/* ++ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, ++ * to map the target physical address. The problem is that set_fixmap() ++ * provides a single page, and it is possible that the page is not ++ * sufficient. ++ * By using this area, we can map up to MAX_IO_APICS pages temporarily, ++ * i.e. until the next __va_range() call. ++ * ++ * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* ++ * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and ++ * count idx down while incrementing the phys address. ++ */ ++char *__acpi_map_table(unsigned long phys, unsigned long size) ++{ ++ unsigned long base, offset, mapped_size; ++ int idx; ++ ++#ifndef CONFIG_XEN ++ if (phys + size < 8 * 1024 * 1024) ++ return __va(phys); ++#endif ++ ++ offset = phys & (PAGE_SIZE - 1); ++ mapped_size = PAGE_SIZE - offset; ++ set_fixmap(FIX_ACPI_END, phys); ++ base = fix_to_virt(FIX_ACPI_END); ++ ++ /* ++ * Most cases can be covered by the below. ++ */ ++ idx = FIX_ACPI_END; ++ while (mapped_size < size) { ++ if (--idx < FIX_ACPI_BEGIN) ++ return NULL; /* cannot handle this */ ++ phys += PAGE_SIZE; ++ set_fixmap(idx, phys); ++ mapped_size += PAGE_SIZE; ++ } ++ ++ return ((unsigned char *)base + offset); ++} ++#endif ++ ++#ifdef CONFIG_PCI_MMCONFIG ++/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ ++struct acpi_table_mcfg_config *pci_mmcfg_config; ++int pci_mmcfg_config_num; ++ ++int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) ++{ ++ struct acpi_table_mcfg *mcfg; ++ unsigned long i; ++ int config_size; ++ ++ if (!phys_addr || !size) ++ return -EINVAL; ++ ++ mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size); ++ if (!mcfg) { ++ printk(KERN_WARNING PREFIX "Unable to map MCFG\n"); ++ return -ENODEV; ++ } ++ ++ /* how many config structures do we have */ ++ pci_mmcfg_config_num = 0; ++ i = size - sizeof(struct acpi_table_mcfg); ++ while (i >= sizeof(struct acpi_table_mcfg_config)) { ++ ++pci_mmcfg_config_num; ++ i -= sizeof(struct acpi_table_mcfg_config); ++ }; ++ if (pci_mmcfg_config_num == 0) { ++ printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); ++ return -ENODEV; ++ } ++ ++ config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); ++ pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); ++ if (!pci_mmcfg_config) { ++ printk(KERN_WARNING PREFIX ++ "No memory for MCFG config tables\n"); ++ return -ENOMEM; ++ } ++ ++ memcpy(pci_mmcfg_config, &mcfg->config, config_size); ++ for (i = 0; i < pci_mmcfg_config_num; ++i) { ++ if (mcfg->config[i].base_reserved) { ++ printk(KERN_ERR PREFIX ++ "MMCONFIG not in low 4GB of memory\n"); ++ kfree(pci_mmcfg_config); ++ pci_mmcfg_config_num = 0; ++ return -ENODEV; ++ } ++ } ++ ++ return 0; ++} ++#endif /* CONFIG_PCI_MMCONFIG */ ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size) ++{ ++ struct acpi_table_madt *madt = NULL; ++ ++ if (!phys_addr || !size || !cpu_has_apic) ++ return -EINVAL; ++ ++ madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size); ++ if (!madt) { ++ printk(KERN_WARNING PREFIX "Unable to map MADT\n"); ++ return -ENODEV; ++ } ++ ++ if (madt->lapic_address) { ++ acpi_lapic_addr = (u64) madt->lapic_address; ++ ++ printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", ++ madt->lapic_address); ++ } ++ ++ acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); ++ ++ return 0; ++} ++ ++static int __init ++acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end) ++{ ++ struct acpi_table_lapic *processor = NULL; ++ ++ processor = (struct acpi_table_lapic *)header; ++ ++ if (BAD_MADT_ENTRY(processor, end)) ++ return -EINVAL; ++ ++ acpi_table_print_madt_entry(header); ++ ++ /* Record local apic id only when enabled */ ++ if (processor->flags.enabled) ++ x86_acpiid_to_apicid[processor->acpi_id] = processor->id; ++ ++ /* ++ * We need to register disabled CPU as well to permit ++ * counting disabled CPUs. This allows us to size ++ * cpus_possible_map more accurately, to permit ++ * to not preallocating memory for all NR_CPUS ++ * when we use CPU hotplug. ++ */ ++ mp_register_lapic(processor->id, /* APIC ID */ ++ processor->flags.enabled); /* Enabled? */ ++ ++ return 0; ++} ++ ++static int __init ++acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header, ++ const unsigned long end) ++{ ++ struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; ++ ++ lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header; ++ ++ if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) ++ return -EINVAL; ++ ++ acpi_lapic_addr = lapic_addr_ovr->address; ++ ++ return 0; ++} ++ ++static int __init ++acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end) ++{ ++ struct acpi_table_lapic_nmi *lapic_nmi = NULL; ++ ++ lapic_nmi = (struct acpi_table_lapic_nmi *)header; ++ ++ if (BAD_MADT_ENTRY(lapic_nmi, end)) ++ return -EINVAL; ++ ++ acpi_table_print_madt_entry(header); ++ ++ if (lapic_nmi->lint != 1) ++ printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); ++ ++ return 0; ++} ++ ++#endif /*CONFIG_X86_LOCAL_APIC */ ++ ++#ifdef CONFIG_X86_IO_APIC ++ ++static int __init ++acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) ++{ ++ struct acpi_table_ioapic *ioapic = NULL; ++ ++ ioapic = (struct acpi_table_ioapic *)header; ++ ++ if (BAD_MADT_ENTRY(ioapic, end)) ++ return -EINVAL; ++ ++ acpi_table_print_madt_entry(header); ++ ++ mp_register_ioapic(ioapic->id, ++ ioapic->address, ioapic->global_irq_base); ++ ++ return 0; ++} ++ ++/* ++ * Parse Interrupt Source Override for the ACPI SCI ++ */ ++static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) ++{ ++ if (trigger == 0) /* compatible SCI trigger is level */ ++ trigger = 3; ++ ++ if (polarity == 0) /* compatible SCI polarity is low */ ++ polarity = 3; ++ ++ /* Command-line over-ride via acpi_sci= */ ++ if (acpi_sci_flags.trigger) ++ trigger = acpi_sci_flags.trigger; ++ ++ if (acpi_sci_flags.polarity) ++ polarity = acpi_sci_flags.polarity; ++ ++ /* ++ * mp_config_acpi_legacy_irqs() already setup IRQs < 16 ++ * If GSI is < 16, this will update its flags, ++ * else it will create a new mp_irqs[] entry. ++ */ ++ mp_override_legacy_irq(gsi, polarity, trigger, gsi); ++ ++ /* ++ * stash over-ride to indicate we've been here ++ * and for later update of acpi_fadt ++ */ ++ acpi_sci_override_gsi = gsi; ++ return; ++} ++ ++static int __init ++acpi_parse_int_src_ovr(acpi_table_entry_header * header, ++ const unsigned long end) ++{ ++ struct acpi_table_int_src_ovr *intsrc = NULL; ++ ++ intsrc = (struct acpi_table_int_src_ovr *)header; ++ ++ if (BAD_MADT_ENTRY(intsrc, end)) ++ return -EINVAL; ++ ++ acpi_table_print_madt_entry(header); ++ ++ if (intsrc->bus_irq == acpi_fadt.sci_int) { ++ acpi_sci_ioapic_setup(intsrc->global_irq, ++ intsrc->flags.polarity, ++ intsrc->flags.trigger); ++ return 0; ++ } ++ ++ if (acpi_skip_timer_override && ++ intsrc->bus_irq == 0 && intsrc->global_irq == 2) { ++ printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); ++ return 0; ++ } ++ ++ mp_override_legacy_irq(intsrc->bus_irq, ++ intsrc->flags.polarity, ++ intsrc->flags.trigger, intsrc->global_irq); ++ ++ return 0; ++} ++ ++static int __init ++acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end) ++{ ++ struct acpi_table_nmi_src *nmi_src = NULL; ++ ++ nmi_src = (struct acpi_table_nmi_src *)header; ++ ++ if (BAD_MADT_ENTRY(nmi_src, end)) ++ return -EINVAL; ++ ++ acpi_table_print_madt_entry(header); ++ ++ /* TBD: Support nimsrc entries? */ ++ ++ return 0; ++} ++ ++#endif /* CONFIG_X86_IO_APIC */ ++ ++/* ++ * acpi_pic_sci_set_trigger() ++ * ++ * use ELCR to set PIC-mode trigger type for SCI ++ * ++ * If a PIC-mode SCI is not recognized or gives spurious IRQ7's ++ * it may require Edge Trigger -- use "acpi_sci=edge" ++ * ++ * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers ++ * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. ++ * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) ++ * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) ++ */ ++ ++void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) ++{ ++ unsigned int mask = 1 << irq; ++ unsigned int old, new; ++ ++ /* Real old ELCR mask */ ++ old = inb(0x4d0) | (inb(0x4d1) << 8); ++ ++ /* ++ * If we use ACPI to set PCI irq's, then we should clear ELCR ++ * since we will set it correctly as we enable the PCI irq ++ * routing. ++ */ ++ new = acpi_noirq ? old : 0; ++ ++ /* ++ * Update SCI information in the ELCR, it isn't in the PCI ++ * routing tables.. ++ */ ++ switch (trigger) { ++ case 1: /* Edge - clear */ ++ new &= ~mask; ++ break; ++ case 3: /* Level - set */ ++ new |= mask; ++ break; ++ } ++ ++ if (old == new) ++ return; ++ ++ printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old); ++ outb(new, 0x4d0); ++ outb(new >> 8, 0x4d1); ++} ++ ++int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) ++{ ++#ifdef CONFIG_X86_IO_APIC ++ if (use_pci_vector() && !platform_legacy_irq(gsi)) ++ *irq = IO_APIC_VECTOR(gsi); ++ else ++#endif ++ *irq = gsi_irq_sharing(gsi); ++ return 0; ++} ++ ++/* ++ * success: return IRQ number (>=0) ++ * failure: return < 0 ++ */ ++int acpi_register_gsi(u32 gsi, int triggering, int polarity) ++{ ++ unsigned int irq; ++ unsigned int plat_gsi = gsi; ++ ++#ifdef CONFIG_PCI ++ /* ++ * Make sure all (legacy) PCI IRQs are set as level-triggered. ++ */ ++ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { ++ extern void eisa_set_level_irq(unsigned int irq); ++ ++ if (triggering == ACPI_LEVEL_SENSITIVE) ++ eisa_set_level_irq(gsi); ++ } ++#endif ++ ++#ifdef CONFIG_X86_IO_APIC ++ if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { ++ plat_gsi = mp_register_gsi(gsi, triggering, polarity); ++ } ++#endif ++ acpi_gsi_to_irq(plat_gsi, &irq); ++ return irq; ++} ++ ++EXPORT_SYMBOL(acpi_register_gsi); ++ ++/* ++ * ACPI based hotplug support for CPU ++ */ ++#ifdef CONFIG_ACPI_HOTPLUG_CPU ++int acpi_map_lsapic(acpi_handle handle, int *pcpu) ++{ ++ /* TBD */ ++ return -EINVAL; ++} ++ ++EXPORT_SYMBOL(acpi_map_lsapic); ++ ++int acpi_unmap_lsapic(int cpu) ++{ ++ /* TBD */ ++ return -EINVAL; ++} ++ ++EXPORT_SYMBOL(acpi_unmap_lsapic); ++#endif /* CONFIG_ACPI_HOTPLUG_CPU */ ++ ++int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) ++{ ++ /* TBD */ ++ return -EINVAL; ++} ++ ++EXPORT_SYMBOL(acpi_register_ioapic); ++ ++int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) ++{ ++ /* TBD */ ++ return -EINVAL; ++} ++ ++EXPORT_SYMBOL(acpi_unregister_ioapic); ++ ++static unsigned long __init ++acpi_scan_rsdp(unsigned long start, unsigned long length) ++{ ++ unsigned long offset = 0; ++ unsigned long sig_len = sizeof("RSD PTR ") - 1; ++ unsigned long vstart = (unsigned long)isa_bus_to_virt(start); ++ ++ /* ++ * Scan all 16-byte boundaries of the physical memory region for the ++ * RSDP signature. ++ */ ++ for (offset = 0; offset < length; offset += 16) { ++ if (strncmp((char *)(vstart + offset), "RSD PTR ", sig_len)) ++ continue; ++ return (start + offset); ++ } ++ ++ return 0; ++} ++ ++static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) ++{ ++ struct acpi_table_sbf *sb; ++ ++ if (!phys_addr || !size) ++ return -EINVAL; ++ ++ sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size); ++ if (!sb) { ++ printk(KERN_WARNING PREFIX "Unable to map SBF\n"); ++ return -ENODEV; ++ } ++ ++ sbf_port = sb->sbf_cmos; /* Save CMOS port */ ++ ++ return 0; ++} ++ ++#ifdef CONFIG_HPET_TIMER ++ ++static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) ++{ ++ struct acpi_table_hpet *hpet_tbl; ++ ++ if (!phys || !size) ++ return -EINVAL; ++ ++ hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size); ++ if (!hpet_tbl) { ++ printk(KERN_WARNING PREFIX "Unable to map HPET\n"); ++ return -ENODEV; ++ } ++ ++ if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) { ++ printk(KERN_WARNING PREFIX "HPET timers must be located in " ++ "memory.\n"); ++ return -1; ++ } ++#ifdef CONFIG_X86_64 ++ vxtime.hpet_address = hpet_tbl->addr.addrl | ++ ((long)hpet_tbl->addr.addrh << 32); ++ ++ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", ++ hpet_tbl->id, vxtime.hpet_address); ++#else /* X86 */ ++ { ++ extern unsigned long hpet_address; ++ ++ hpet_address = hpet_tbl->addr.addrl; ++ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", ++ hpet_tbl->id, hpet_address); ++ } ++#endif /* X86 */ ++ ++ return 0; ++} ++#else ++#define acpi_parse_hpet NULL ++#endif ++ ++#ifdef CONFIG_X86_PM_TIMER ++extern u32 pmtmr_ioport; ++#endif ++ ++static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) ++{ ++ struct fadt_descriptor *fadt = NULL; ++ ++ fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size); ++ if (!fadt) { ++ printk(KERN_WARNING PREFIX "Unable to map FADT\n"); ++ return 0; ++ } ++ /* initialize sci_int early for INT_SRC_OVR MADT parsing */ ++ acpi_fadt.sci_int = fadt->sci_int; ++ ++ /* initialize rev and apic_phys_dest_mode for x86_64 genapic */ ++ acpi_fadt.revision = fadt->revision; ++ acpi_fadt.force_apic_physical_destination_mode = ++ fadt->force_apic_physical_destination_mode; ++ ++#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN) ++ /* detect the location of the ACPI PM Timer */ ++ if (fadt->revision >= FADT2_REVISION_ID) { ++ /* FADT rev. 2 */ ++ if (fadt->xpm_tmr_blk.address_space_id != ++ ACPI_ADR_SPACE_SYSTEM_IO) ++ return 0; ++ ++ pmtmr_ioport = fadt->xpm_tmr_blk.address; ++ /* ++ * "X" fields are optional extensions to the original V1.0 ++ * fields, so we must selectively expand V1.0 fields if the ++ * corresponding X field is zero. ++ */ ++ if (!pmtmr_ioport) ++ pmtmr_ioport = fadt->V1_pm_tmr_blk; ++ } else { ++ /* FADT rev. 1 */ ++ pmtmr_ioport = fadt->V1_pm_tmr_blk; ++ } ++ if (pmtmr_ioport) ++ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", ++ pmtmr_ioport); ++#endif ++ return 0; ++} ++ ++unsigned long __init acpi_find_rsdp(void) ++{ ++ unsigned long rsdp_phys = 0; ++ ++ if (efi_enabled) { ++ if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) ++ return efi.acpi20; ++ else if (efi.acpi != EFI_INVALID_TABLE_ADDR) ++ return efi.acpi; ++ } ++ /* ++ * Scan memory looking for the RSDP signature. First search EBDA (low ++ * memory) paragraphs and then search upper memory (E0000-FFFFF). ++ */ ++ rsdp_phys = acpi_scan_rsdp(0, 0x400); ++ if (!rsdp_phys) ++ rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000); ++ ++ return rsdp_phys; ++} ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++/* ++ * Parse LAPIC entries in MADT ++ * returns 0 on success, < 0 on error ++ */ ++static int __init acpi_parse_madt_lapic_entries(void) ++{ ++ int count; ++ ++ if (!cpu_has_apic) ++ return -ENODEV; ++ ++ /* ++ * Note that the LAPIC address is obtained from the MADT (32-bit value) ++ * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). ++ */ ++ ++ count = ++ acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, ++ acpi_parse_lapic_addr_ovr, 0); ++ if (count < 0) { ++ printk(KERN_ERR PREFIX ++ "Error parsing LAPIC address override entry\n"); ++ return count; ++ } ++ ++ mp_register_lapic_address(acpi_lapic_addr); ++ ++ count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic, ++ MAX_APICS); ++ if (!count) { ++ printk(KERN_ERR PREFIX "No LAPIC entries present\n"); ++ /* TBD: Cleanup to allow fallback to MPS */ ++ return -ENODEV; ++ } else if (count < 0) { ++ printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); ++ /* TBD: Cleanup to allow fallback to MPS */ ++ return count; ++ } ++ ++ count = ++ acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); ++ if (count < 0) { ++ printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); ++ /* TBD: Cleanup to allow fallback to MPS */ ++ return count; ++ } ++ return 0; ++} ++#endif /* CONFIG_X86_LOCAL_APIC */ ++ ++#ifdef CONFIG_X86_IO_APIC ++/* ++ * Parse IOAPIC related entries in MADT ++ * returns 0 on success, < 0 on error ++ */ ++static int __init acpi_parse_madt_ioapic_entries(void) ++{ ++ int count; ++ ++ /* ++ * ACPI interpreter is required to complete interrupt setup, ++ * so if it is off, don't enumerate the io-apics with ACPI. ++ * If MPS is present, it will handle them, ++ * otherwise the system will stay in PIC mode ++ */ ++ if (acpi_disabled || acpi_noirq) { ++ return -ENODEV; ++ } ++ ++ if (!cpu_has_apic) ++ return -ENODEV; ++ ++ /* ++ * if "noapic" boot option, don't look for IO-APICs ++ */ ++ if (skip_ioapic_setup) { ++ printk(KERN_INFO PREFIX "Skipping IOAPIC probe " ++ "due to 'noapic' option.\n"); ++ return -ENODEV; ++ } ++ ++ count = ++ acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, ++ MAX_IO_APICS); ++ if (!count) { ++ printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); ++ return -ENODEV; ++ } else if (count < 0) { ++ printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); ++ return count; ++ } ++ ++ count = ++ acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, ++ NR_IRQ_VECTORS); ++ if (count < 0) { ++ printk(KERN_ERR PREFIX ++ "Error parsing interrupt source overrides entry\n"); ++ /* TBD: Cleanup to allow fallback to MPS */ ++ return count; ++ } ++ ++ /* ++ * If BIOS did not supply an INT_SRC_OVR for the SCI ++ * pretend we got one so we can set the SCI flags. ++ */ ++ if (!acpi_sci_override_gsi) ++ acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); ++ ++ /* Fill in identity legacy mapings where no override */ ++ mp_config_acpi_legacy_irqs(); ++ ++ count = ++ acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, ++ NR_IRQ_VECTORS); ++ if (count < 0) { ++ printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); ++ /* TBD: Cleanup to allow fallback to MPS */ ++ return count; ++ } ++ ++ return 0; ++} ++#else ++static inline int acpi_parse_madt_ioapic_entries(void) ++{ ++ return -1; ++} ++#endif /* !CONFIG_X86_IO_APIC */ ++ ++static void __init acpi_process_madt(void) ++{ ++#ifdef CONFIG_X86_LOCAL_APIC ++ int count, error; ++ ++ count = acpi_table_parse(ACPI_APIC, acpi_parse_madt); ++ if (count >= 1) { ++ ++ /* ++ * Parse MADT LAPIC entries ++ */ ++ error = acpi_parse_madt_lapic_entries(); ++ if (!error) { ++ acpi_lapic = 1; ++ ++#ifdef CONFIG_X86_GENERICARCH ++ generic_bigsmp_probe(); ++#endif ++ /* ++ * Parse MADT IO-APIC entries ++ */ ++ error = acpi_parse_madt_ioapic_entries(); ++ if (!error) { ++ acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; ++ acpi_irq_balance_set(NULL); ++ acpi_ioapic = 1; ++ ++ smp_found_config = 1; ++ clustered_apic_check(); ++ } ++ } ++ if (error == -EINVAL) { ++ /* ++ * Dell Precision Workstation 410, 610 come here. ++ */ ++ printk(KERN_ERR PREFIX ++ "Invalid BIOS MADT, disabling ACPI\n"); ++ disable_acpi(); ++ } ++ } ++#endif ++ return; ++} ++ ++extern int acpi_force; ++ ++#ifdef __i386__ ++ ++static int __init disable_acpi_irq(struct dmi_system_id *d) ++{ ++ if (!acpi_force) { ++ printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", ++ d->ident); ++ acpi_noirq_set(); ++ } ++ return 0; ++} ++ ++static int __init disable_acpi_pci(struct dmi_system_id *d) ++{ ++ if (!acpi_force) { ++ printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", ++ d->ident); ++ acpi_disable_pci(); ++ } ++ return 0; ++} ++ ++static int __init dmi_disable_acpi(struct dmi_system_id *d) ++{ ++ if (!acpi_force) { ++ printk(KERN_NOTICE "%s detected: acpi off\n", d->ident); ++ disable_acpi(); ++ } else { ++ printk(KERN_NOTICE ++ "Warning: DMI blacklist says broken, but acpi forced\n"); ++ } ++ return 0; ++} ++ ++/* ++ * Limit ACPI to CPU enumeration for HT ++ */ ++static int __init force_acpi_ht(struct dmi_system_id *d) ++{ ++ if (!acpi_force) { ++ printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", ++ d->ident); ++ disable_acpi(); ++ acpi_ht = 1; ++ } else { ++ printk(KERN_NOTICE ++ "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); ++ } ++ return 0; ++} ++ ++/* ++ * If your system is blacklisted here, but you find that acpi=force ++ * works for you, please contact acpi-devel@sourceforge.net ++ */ ++static struct dmi_system_id __initdata acpi_dmi_table[] = { ++ /* ++ * Boxes that need ACPI disabled ++ */ ++ { ++ .callback = dmi_disable_acpi, ++ .ident = "IBM Thinkpad", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), ++ DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), ++ }, ++ }, ++ ++ /* ++ * Boxes that need acpi=ht ++ */ ++ { ++ .callback = force_acpi_ht, ++ .ident = "FSC Primergy T850", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "DELL GX240", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), ++ DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "HP VISUALIZE NT Workstation", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "Compaq Workstation W8000", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "ASUS P4B266", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "P4B266"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "ASUS P2B-DS", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "ASUS CUR-DLS", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "ABIT i440BX-W83977", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "ABIT "), ++ DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "IBM Bladecenter", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), ++ DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "IBM eServer xSeries 360", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), ++ DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "IBM eserver xSeries 330", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), ++ DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), ++ }, ++ }, ++ { ++ .callback = force_acpi_ht, ++ .ident = "IBM eserver xSeries 440", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), ++ }, ++ }, ++ ++ /* ++ * Boxes that need ACPI PCI IRQ routing disabled ++ */ ++ { ++ .callback = disable_acpi_irq, ++ .ident = "ASUS A7V", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), ++ DMI_MATCH(DMI_BOARD_NAME, ""), ++ /* newer BIOS, Revision 1011, does work */ ++ DMI_MATCH(DMI_BIOS_VERSION, ++ "ASUS A7V ACPI BIOS Revision 1007"), ++ }, ++ }, ++ ++ /* ++ * Boxes that need ACPI PCI IRQ routing and PCI scan disabled ++ */ ++ { /* _BBN 0 bug */ ++ .callback = disable_acpi_pci, ++ .ident = "ASUS PR-DLS", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), ++ DMI_MATCH(DMI_BIOS_VERSION, ++ "ASUS PR-DLS ACPI BIOS Revision 1010"), ++ DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") ++ }, ++ }, ++ { ++ .callback = disable_acpi_pci, ++ .ident = "Acer TravelMate 36x Laptop", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), ++ }, ++ }, ++ {} ++}; ++ ++#endif /* __i386__ */ ++ ++/* ++ * acpi_boot_table_init() and acpi_boot_init() ++ * called from setup_arch(), always. ++ * 1. checksums all tables ++ * 2. enumerates lapics ++ * 3. enumerates io-apics ++ * ++ * acpi_table_init() is separate to allow reading SRAT without ++ * other side effects. ++ * ++ * side effects of acpi_boot_init: ++ * acpi_lapic = 1 if LAPIC found ++ * acpi_ioapic = 1 if IOAPIC found ++ * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; ++ * if acpi_blacklisted() acpi_disabled = 1; ++ * acpi_irq_model=... ++ * ... ++ * ++ * return value: (currently ignored) ++ * 0: success ++ * !0: failure ++ */ ++ ++int __init acpi_boot_table_init(void) ++{ ++ int error; ++ ++#ifdef __i386__ ++ dmi_check_system(acpi_dmi_table); ++#endif ++ ++ /* ++ * If acpi_disabled, bail out ++ * One exception: acpi=ht continues far enough to enumerate LAPICs ++ */ ++ if (acpi_disabled && !acpi_ht) ++ return 1; ++ ++ /* ++ * Initialize the ACPI boot-time table parser. ++ */ ++ error = acpi_table_init(); ++ if (error) { ++ disable_acpi(); ++ return error; ++ } ++ ++ acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); ++ ++ /* ++ * blacklist may disable ACPI entirely ++ */ ++ error = acpi_blacklisted(); ++ if (error) { ++ if (acpi_force) { ++ printk(KERN_WARNING PREFIX "acpi=force override\n"); ++ } else { ++ printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); ++ disable_acpi(); ++ return error; ++ } ++ } ++ ++ return 0; ++} ++ ++int __init acpi_boot_init(void) ++{ ++ /* ++ * If acpi_disabled, bail out ++ * One exception: acpi=ht continues far enough to enumerate LAPICs ++ */ ++ if (acpi_disabled && !acpi_ht) ++ return 1; ++ ++ acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); ++ ++ /* ++ * set sci_int and PM timer address ++ */ ++ acpi_table_parse(ACPI_FADT, acpi_parse_fadt); ++ ++ /* ++ * Process the Multiple APIC Description Table (MADT), if present ++ */ ++ acpi_process_madt(); ++ ++ acpi_table_parse(ACPI_HPET, acpi_parse_hpet); ++ ++ return 0; ++} +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/apic-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,155 @@ ++/* ++ * Local APIC handling, local APIC timers ++ * ++ * (c) 1999, 2000 Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively. ++ * Maciej W. Rozycki : Various updates and fixes. ++ * Mikael Pettersson : Power Management for UP-APIC. ++ * Pavel Machek and ++ * Mikael Pettersson : PM converted to driver model. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "io_ports.h" ++ ++#ifndef CONFIG_XEN ++/* ++ * cpu_mask that denotes the CPUs that needs timer interrupt coming in as ++ * IPIs in place of local APIC timers ++ */ ++static cpumask_t timer_bcast_ipi; ++#endif ++ ++/* ++ * Knob to control our willingness to enable the local APIC. ++ */ ++int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ ++ ++/* ++ * Debug level ++ */ ++int apic_verbosity; ++ ++#ifndef CONFIG_XEN ++static int modern_apic(void) ++{ ++ unsigned int lvr, version; ++ /* AMD systems use old APIC versions, so check the CPU */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && ++ boot_cpu_data.x86 >= 0xf) ++ return 1; ++ lvr = apic_read(APIC_LVR); ++ version = GET_APIC_VERSION(lvr); ++ return version >= 0x14; ++} ++#endif /* !CONFIG_XEN */ ++ ++/* ++ * 'what should we do if we get a hw irq event on an illegal vector'. ++ * each architecture has to answer this themselves. ++ */ ++void ack_bad_irq(unsigned int irq) ++{ ++ printk("unexpected IRQ trap at vector %02x\n", irq); ++ /* ++ * Currently unexpected vectors happen only on SMP and APIC. ++ * We _must_ ack these because every local APIC has only N ++ * irq slots per priority level, and a 'hanging, unacked' IRQ ++ * holds up an irq slot - in excessive cases (when multiple ++ * unexpected vectors occur) that might lock up the APIC ++ * completely. ++ * But only ack when the APIC is enabled -AK ++ */ ++ if (cpu_has_apic) ++ ack_APIC_irq(); ++} ++ ++int get_physical_broadcast(void) ++{ ++ return 0xff; ++} ++ ++#ifndef CONFIG_XEN ++#ifndef CONFIG_SMP ++static void up_apic_timer_interrupt_call(struct pt_regs *regs) ++{ ++ int cpu = smp_processor_id(); ++ ++ /* ++ * the NMI deadlock-detector uses this. ++ */ ++ per_cpu(irq_stat, cpu).apic_timer_irqs++; ++ ++ smp_local_timer_interrupt(regs); ++} ++#endif ++ ++void smp_send_timer_broadcast_ipi(struct pt_regs *regs) ++{ ++ cpumask_t mask; ++ ++ cpus_and(mask, cpu_online_map, timer_bcast_ipi); ++ if (!cpus_empty(mask)) { ++#ifdef CONFIG_SMP ++ send_IPI_mask(mask, LOCAL_TIMER_VECTOR); ++#else ++ /* ++ * We can directly call the apic timer interrupt handler ++ * in UP case. Minus all irq related functions ++ */ ++ up_apic_timer_interrupt_call(regs); ++#endif ++ } ++} ++#endif ++ ++int setup_profiling_timer(unsigned int multiplier) ++{ ++ return -EINVAL; ++} ++ ++/* ++ * This initializes the IO-APIC and APIC hardware if this is ++ * a UP kernel. ++ */ ++int __init APIC_init_uniprocessor (void) ++{ ++#ifdef CONFIG_X86_IO_APIC ++ if (smp_found_config) ++ if (!skip_ioapic_setup && nr_ioapics) ++ setup_IO_APIC(); ++#endif ++ ++ return 0; ++} +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/cpu/common-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,743 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_X86_LOCAL_APIC ++#include ++#include ++#include ++#else ++#ifdef CONFIG_XEN ++#define phys_pkg_id(a,b) a ++#endif ++#endif ++#include ++ ++#include "cpu.h" ++ ++DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); ++EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); ++ ++#ifndef CONFIG_XEN ++DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); ++EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); ++#endif ++ ++static int cachesize_override __cpuinitdata = -1; ++static int disable_x86_fxsr __cpuinitdata; ++static int disable_x86_serial_nr __cpuinitdata = 1; ++static int disable_x86_sep __cpuinitdata; ++ ++struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; ++ ++extern int disable_pse; ++ ++static void default_init(struct cpuinfo_x86 * c) ++{ ++ /* Not much we can do here... */ ++ /* Check if at least it has cpuid */ ++ if (c->cpuid_level == -1) { ++ /* No cpuid. It must be an ancient CPU */ ++ if (c->x86 == 4) ++ strcpy(c->x86_model_id, "486"); ++ else if (c->x86 == 3) ++ strcpy(c->x86_model_id, "386"); ++ } ++} ++ ++static struct cpu_dev default_cpu = { ++ .c_init = default_init, ++ .c_vendor = "Unknown", ++}; ++static struct cpu_dev * this_cpu = &default_cpu; ++ ++static int __init cachesize_setup(char *str) ++{ ++ get_option (&str, &cachesize_override); ++ return 1; ++} ++__setup("cachesize=", cachesize_setup); ++ ++int __cpuinit get_model_name(struct cpuinfo_x86 *c) ++{ ++ unsigned int *v; ++ char *p, *q; ++ ++ if (cpuid_eax(0x80000000) < 0x80000004) ++ return 0; ++ ++ v = (unsigned int *) c->x86_model_id; ++ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); ++ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); ++ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); ++ c->x86_model_id[48] = 0; ++ ++ /* Intel chips right-justify this string for some dumb reason; ++ undo that brain damage */ ++ p = q = &c->x86_model_id[0]; ++ while ( *p == ' ' ) ++ p++; ++ if ( p != q ) { ++ while ( *p ) ++ *q++ = *p++; ++ while ( q <= &c->x86_model_id[48] ) ++ *q++ = '\0'; /* Zero-pad the rest */ ++ } ++ ++ return 1; ++} ++ ++ ++void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) ++{ ++ unsigned int n, dummy, ecx, edx, l2size; ++ ++ n = cpuid_eax(0x80000000); ++ ++ if (n >= 0x80000005) { ++ cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); ++ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", ++ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); ++ c->x86_cache_size=(ecx>>24)+(edx>>24); ++ } ++ ++ if (n < 0x80000006) /* Some chips just has a large L1. */ ++ return; ++ ++ ecx = cpuid_ecx(0x80000006); ++ l2size = ecx >> 16; ++ ++ /* do processor-specific cache resizing */ ++ if (this_cpu->c_size_cache) ++ l2size = this_cpu->c_size_cache(c,l2size); ++ ++ /* Allow user to override all this if necessary. */ ++ if (cachesize_override != -1) ++ l2size = cachesize_override; ++ ++ if ( l2size == 0 ) ++ return; /* Again, no L2 cache is possible */ ++ ++ c->x86_cache_size = l2size; ++ ++ printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", ++ l2size, ecx & 0xFF); ++} ++ ++/* Naming convention should be: [()] */ ++/* This table only is used unless init_() below doesn't set it; */ ++/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ ++ ++/* Look up CPU names by table lookup. */ ++static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) ++{ ++ struct cpu_model_info *info; ++ ++ if ( c->x86_model >= 16 ) ++ return NULL; /* Range check */ ++ ++ if (!this_cpu) ++ return NULL; ++ ++ info = this_cpu->c_models; ++ ++ while (info && info->family) { ++ if (info->family == c->x86) ++ return info->model_names[c->x86_model]; ++ info++; ++ } ++ return NULL; /* Not found */ ++} ++ ++ ++static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) ++{ ++ char *v = c->x86_vendor_id; ++ int i; ++ static int printed; ++ ++ for (i = 0; i < X86_VENDOR_NUM; i++) { ++ if (cpu_devs[i]) { ++ if (!strcmp(v,cpu_devs[i]->c_ident[0]) || ++ (cpu_devs[i]->c_ident[1] && ++ !strcmp(v,cpu_devs[i]->c_ident[1]))) { ++ c->x86_vendor = i; ++ if (!early) ++ this_cpu = cpu_devs[i]; ++ return; ++ } ++ } ++ } ++ if (!printed) { ++ printed++; ++ printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); ++ printk(KERN_ERR "CPU: Your system may be unstable.\n"); ++ } ++ c->x86_vendor = X86_VENDOR_UNKNOWN; ++ this_cpu = &default_cpu; ++} ++ ++ ++static int __init x86_fxsr_setup(char * s) ++{ ++ disable_x86_fxsr = 1; ++ return 1; ++} ++__setup("nofxsr", x86_fxsr_setup); ++ ++ ++static int __init x86_sep_setup(char * s) ++{ ++ disable_x86_sep = 1; ++ return 1; ++} ++__setup("nosep", x86_sep_setup); ++ ++ ++/* Standard macro to see if a specific flag is changeable */ ++static inline int flag_is_changeable_p(u32 flag) ++{ ++ u32 f1, f2; ++ ++ asm("pushfl\n\t" ++ "pushfl\n\t" ++ "popl %0\n\t" ++ "movl %0,%1\n\t" ++ "xorl %2,%0\n\t" ++ "pushl %0\n\t" ++ "popfl\n\t" ++ "pushfl\n\t" ++ "popl %0\n\t" ++ "popfl\n\t" ++ : "=&r" (f1), "=&r" (f2) ++ : "ir" (flag)); ++ ++ return ((f1^f2) & flag) != 0; ++} ++ ++ ++/* Probe for the CPUID instruction */ ++static int __cpuinit have_cpuid_p(void) ++{ ++ return flag_is_changeable_p(X86_EFLAGS_ID); ++} ++ ++/* Do minimum CPU detection early. ++ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. ++ The others are not touched to avoid unwanted side effects. ++ ++ WARNING: this function is only called on the BP. Don't add code here ++ that is supposed to run on all CPUs. */ ++static void __init early_cpu_detect(void) ++{ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++ ++ c->x86_cache_alignment = 32; ++ ++ if (!have_cpuid_p()) ++ return; ++ ++ /* Get vendor name */ ++ cpuid(0x00000000, &c->cpuid_level, ++ (int *)&c->x86_vendor_id[0], ++ (int *)&c->x86_vendor_id[8], ++ (int *)&c->x86_vendor_id[4]); ++ ++ get_cpu_vendor(c, 1); ++ ++ c->x86 = 4; ++ if (c->cpuid_level >= 0x00000001) { ++ u32 junk, tfms, cap0, misc; ++ cpuid(0x00000001, &tfms, &misc, &junk, &cap0); ++ c->x86 = (tfms >> 8) & 15; ++ c->x86_model = (tfms >> 4) & 15; ++ if (c->x86 == 0xf) ++ c->x86 += (tfms >> 20) & 0xff; ++ if (c->x86 >= 0x6) ++ c->x86_model += ((tfms >> 16) & 0xF) << 4; ++ c->x86_mask = tfms & 15; ++ if (cap0 & (1<<19)) ++ c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; ++ } ++} ++ ++void __cpuinit generic_identify(struct cpuinfo_x86 * c) ++{ ++ u32 tfms, xlvl; ++ int ebx; ++ ++ if (have_cpuid_p()) { ++ /* Get vendor name */ ++ cpuid(0x00000000, &c->cpuid_level, ++ (int *)&c->x86_vendor_id[0], ++ (int *)&c->x86_vendor_id[8], ++ (int *)&c->x86_vendor_id[4]); ++ ++ get_cpu_vendor(c, 0); ++ /* Initialize the standard set of capabilities */ ++ /* Note that the vendor-specific code below might override */ ++ ++ /* Intel-defined flags: level 0x00000001 */ ++ if ( c->cpuid_level >= 0x00000001 ) { ++ u32 capability, excap; ++ cpuid(0x00000001, &tfms, &ebx, &excap, &capability); ++ c->x86_capability[0] = capability; ++ c->x86_capability[4] = excap; ++ c->x86 = (tfms >> 8) & 15; ++ c->x86_model = (tfms >> 4) & 15; ++ if (c->x86 == 0xf) ++ c->x86 += (tfms >> 20) & 0xff; ++ if (c->x86 >= 0x6) ++ c->x86_model += ((tfms >> 16) & 0xF) << 4; ++ c->x86_mask = tfms & 15; ++#ifdef CONFIG_X86_HT ++ c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); ++#else ++ c->apicid = (ebx >> 24) & 0xFF; ++#endif ++ } else { ++ /* Have CPUID level 0 only - unheard of */ ++ c->x86 = 4; ++ } ++ ++ /* AMD-defined flags: level 0x80000001 */ ++ xlvl = cpuid_eax(0x80000000); ++ if ( (xlvl & 0xffff0000) == 0x80000000 ) { ++ if ( xlvl >= 0x80000001 ) { ++ c->x86_capability[1] = cpuid_edx(0x80000001); ++ c->x86_capability[6] = cpuid_ecx(0x80000001); ++ } ++ if ( xlvl >= 0x80000004 ) ++ get_model_name(c); /* Default name */ ++ } ++ } ++ ++ early_intel_workaround(c); ++ ++#ifdef CONFIG_X86_HT ++ c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; ++#endif ++} ++ ++static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) ++{ ++ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { ++ /* Disable processor serial number */ ++ unsigned long lo,hi; ++ rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); ++ lo |= 0x200000; ++ wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); ++ printk(KERN_NOTICE "CPU serial number disabled.\n"); ++ clear_bit(X86_FEATURE_PN, c->x86_capability); ++ ++ /* Disabling the serial number may affect the cpuid level */ ++ c->cpuid_level = cpuid_eax(0); ++ } ++} ++ ++static int __init x86_serial_nr_setup(char *s) ++{ ++ disable_x86_serial_nr = 0; ++ return 1; ++} ++__setup("serialnumber", x86_serial_nr_setup); ++ ++ ++ ++/* ++ * This does the hard work of actually picking apart the CPU stuff... ++ */ ++void __cpuinit identify_cpu(struct cpuinfo_x86 *c) ++{ ++ int i; ++ ++ c->loops_per_jiffy = loops_per_jiffy; ++ c->x86_cache_size = -1; ++ c->x86_vendor = X86_VENDOR_UNKNOWN; ++ c->cpuid_level = -1; /* CPUID not detected */ ++ c->x86_model = c->x86_mask = 0; /* So far unknown... */ ++ c->x86_vendor_id[0] = '\0'; /* Unset */ ++ c->x86_model_id[0] = '\0'; /* Unset */ ++ c->x86_max_cores = 1; ++ memset(&c->x86_capability, 0, sizeof c->x86_capability); ++ ++ if (!have_cpuid_p()) { ++ /* First of all, decide if this is a 486 or higher */ ++ /* It's a 486 if we can modify the AC flag */ ++ if ( flag_is_changeable_p(X86_EFLAGS_AC) ) ++ c->x86 = 4; ++ else ++ c->x86 = 3; ++ } ++ ++ generic_identify(c); ++ ++ printk(KERN_DEBUG "CPU: After generic identify, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ ++ if (this_cpu->c_identify) { ++ this_cpu->c_identify(c); ++ ++ printk(KERN_DEBUG "CPU: After vendor identify, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ } ++ ++ /* ++ * Vendor-specific initialization. In this section we ++ * canonicalize the feature flags, meaning if there are ++ * features a certain CPU supports which CPUID doesn't ++ * tell us, CPUID claiming incorrect flags, or other bugs, ++ * we handle them here. ++ * ++ * At the end of this section, c->x86_capability better ++ * indicate the features this CPU genuinely supports! ++ */ ++ if (this_cpu->c_init) ++ this_cpu->c_init(c); ++ ++ /* Disable the PN if appropriate */ ++ squash_the_stupid_serial_number(c); ++ ++ /* ++ * The vendor-specific functions might have changed features. Now ++ * we do "generic changes." ++ */ ++ ++ /* TSC disabled? */ ++ if ( tsc_disable ) ++ clear_bit(X86_FEATURE_TSC, c->x86_capability); ++ ++ /* FXSR disabled? */ ++ if (disable_x86_fxsr) { ++ clear_bit(X86_FEATURE_FXSR, c->x86_capability); ++ clear_bit(X86_FEATURE_XMM, c->x86_capability); ++ } ++ ++ /* SEP disabled? */ ++ if (disable_x86_sep) ++ clear_bit(X86_FEATURE_SEP, c->x86_capability); ++ ++ if (disable_pse) ++ clear_bit(X86_FEATURE_PSE, c->x86_capability); ++ ++ /* If the model name is still unset, do table lookup. */ ++ if ( !c->x86_model_id[0] ) { ++ char *p; ++ p = table_lookup_model(c); ++ if ( p ) ++ strcpy(c->x86_model_id, p); ++ else ++ /* Last resort... */ ++ sprintf(c->x86_model_id, "%02x/%02x", ++ c->x86, c->x86_model); ++ } ++ ++ /* Now the feature flags better reflect actual CPU features! */ ++ ++ printk(KERN_DEBUG "CPU: After all inits, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ ++ /* ++ * On SMP, boot_cpu_data holds the common feature set between ++ * all CPUs; so make sure that we indicate which features are ++ * common between the CPUs. The first time this routine gets ++ * executed, c == &boot_cpu_data. ++ */ ++ if ( c != &boot_cpu_data ) { ++ /* AND the already accumulated flags with these */ ++ for ( i = 0 ; i < NCAPINTS ; i++ ) ++ boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; ++ } ++ ++ /* Init Machine Check Exception if available. */ ++ mcheck_init(c); ++ ++ if (c == &boot_cpu_data) ++ sysenter_setup(); ++ enable_sep_cpu(); ++ ++ if (c == &boot_cpu_data) ++ mtrr_bp_init(); ++ else ++ mtrr_ap_init(); ++} ++ ++#ifdef CONFIG_X86_HT ++void __cpuinit detect_ht(struct cpuinfo_x86 *c) ++{ ++ u32 eax, ebx, ecx, edx; ++ int index_msb, core_bits; ++ ++ cpuid(1, &eax, &ebx, &ecx, &edx); ++ ++ if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) ++ return; ++ ++ smp_num_siblings = (ebx & 0xff0000) >> 16; ++ ++ if (smp_num_siblings == 1) { ++ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); ++ } else if (smp_num_siblings > 1 ) { ++ ++ if (smp_num_siblings > NR_CPUS) { ++ printk(KERN_WARNING "CPU: Unsupported number of the " ++ "siblings %d", smp_num_siblings); ++ smp_num_siblings = 1; ++ return; ++ } ++ ++ index_msb = get_count_order(smp_num_siblings); ++ c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); ++ ++ printk(KERN_INFO "CPU: Physical Processor ID: %d\n", ++ c->phys_proc_id); ++ ++ smp_num_siblings = smp_num_siblings / c->x86_max_cores; ++ ++ index_msb = get_count_order(smp_num_siblings) ; ++ ++ core_bits = get_count_order(c->x86_max_cores); ++ ++ c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & ++ ((1 << core_bits) - 1); ++ ++ if (c->x86_max_cores > 1) ++ printk(KERN_INFO "CPU: Processor Core ID: %d\n", ++ c->cpu_core_id); ++ } ++} ++#endif ++ ++void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) ++{ ++ char *vendor = NULL; ++ ++ if (c->x86_vendor < X86_VENDOR_NUM) ++ vendor = this_cpu->c_vendor; ++ else if (c->cpuid_level >= 0) ++ vendor = c->x86_vendor_id; ++ ++ if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) ++ printk("%s ", vendor); ++ ++ if (!c->x86_model_id[0]) ++ printk("%d86", c->x86); ++ else ++ printk("%s", c->x86_model_id); ++ ++ if (c->x86_mask || c->cpuid_level >= 0) ++ printk(" stepping %02x\n", c->x86_mask); ++ else ++ printk("\n"); ++} ++ ++cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; ++ ++/* This is hacky. :) ++ * We're emulating future behavior. ++ * In the future, the cpu-specific init functions will be called implicitly ++ * via the magic of initcalls. ++ * They will insert themselves into the cpu_devs structure. ++ * Then, when cpu_init() is called, we can just iterate over that array. ++ */ ++ ++extern int intel_cpu_init(void); ++extern int cyrix_init_cpu(void); ++extern int nsc_init_cpu(void); ++extern int amd_init_cpu(void); ++extern int centaur_init_cpu(void); ++extern int transmeta_init_cpu(void); ++extern int rise_init_cpu(void); ++extern int nexgen_init_cpu(void); ++extern int umc_init_cpu(void); ++ ++void __init early_cpu_init(void) ++{ ++ intel_cpu_init(); ++ cyrix_init_cpu(); ++ nsc_init_cpu(); ++ amd_init_cpu(); ++ centaur_init_cpu(); ++ transmeta_init_cpu(); ++ rise_init_cpu(); ++ nexgen_init_cpu(); ++ umc_init_cpu(); ++ early_cpu_detect(); ++ ++#ifdef CONFIG_DEBUG_PAGEALLOC ++ /* pse is not compatible with on-the-fly unmapping, ++ * disable it even if the cpus claim to support it. ++ */ ++ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); ++ disable_pse = 1; ++#endif ++} ++ ++void __cpuinit cpu_gdt_init(struct Xgt_desc_struct *gdt_descr) ++{ ++ unsigned long frames[16]; ++ unsigned long va; ++ int f; ++ ++ for (va = gdt_descr->address, f = 0; ++ va < gdt_descr->address + gdt_descr->size; ++ va += PAGE_SIZE, f++) { ++ frames[f] = virt_to_mfn(va); ++ make_lowmem_page_readonly( ++ (void *)va, XENFEAT_writable_descriptor_tables); ++ } ++ if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8)) ++ BUG(); ++} ++ ++/* ++ * cpu_init() initializes state that is per-CPU. Some data is already ++ * initialized (naturally) in the bootstrap process, such as the GDT ++ * and IDT. We reload them nevertheless, this function acts as a ++ * 'CPU state barrier', nothing should get across. ++ */ ++void __cpuinit cpu_init(void) ++{ ++ int cpu = smp_processor_id(); ++#ifndef CONFIG_X86_NO_TSS ++ struct tss_struct * t = &per_cpu(init_tss, cpu); ++#endif ++ struct thread_struct *thread = ¤t->thread; ++ struct desc_struct *gdt; ++ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++ ++ if (cpu_test_and_set(cpu, cpu_initialized)) { ++ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); ++ for (;;) local_irq_enable(); ++ } ++ printk(KERN_INFO "Initializing CPU#%d\n", cpu); ++ ++ if (cpu_has_vme || cpu_has_de) ++ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); ++ if (tsc_disable && cpu_has_tsc) { ++ printk(KERN_NOTICE "Disabling TSC...\n"); ++ /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ ++ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); ++ set_in_cr4(X86_CR4_TSD); ++ } ++ ++#ifndef CONFIG_XEN ++ /* The CPU hotplug case */ ++ if (cpu_gdt_descr->address) { ++ gdt = (struct desc_struct *)cpu_gdt_descr->address; ++ memset(gdt, 0, PAGE_SIZE); ++ goto old_gdt; ++ } ++ /* ++ * This is a horrible hack to allocate the GDT. The problem ++ * is that cpu_init() is called really early for the boot CPU ++ * (and hence needs bootmem) but much later for the secondary ++ * CPUs, when bootmem will have gone away ++ */ ++ if (NODE_DATA(0)->bdata->node_bootmem_map) { ++ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); ++ /* alloc_bootmem_pages panics on failure, so no check */ ++ memset(gdt, 0, PAGE_SIZE); ++ } else { ++ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); ++ if (unlikely(!gdt)) { ++ printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); ++ for (;;) ++ local_irq_enable(); ++ } ++ } ++old_gdt: ++ /* ++ * Initialize the per-CPU GDT with the boot GDT, ++ * and set up the GDT descriptor: ++ */ ++ memcpy(gdt, cpu_gdt_table, GDT_SIZE); ++ ++ /* Set up GDT entry for 16bit stack */ ++ *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= ++ ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | ++ ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | ++ (CPU_16BIT_STACK_SIZE - 1); ++ ++ cpu_gdt_descr->size = GDT_SIZE - 1; ++ cpu_gdt_descr->address = (unsigned long)gdt; ++#else ++ if (cpu == 0 && cpu_gdt_descr->address == 0) { ++ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); ++ /* alloc_bootmem_pages panics on failure, so no check */ ++ memset(gdt, 0, PAGE_SIZE); ++ ++ memcpy(gdt, cpu_gdt_table, GDT_SIZE); ++ ++ cpu_gdt_descr->size = GDT_SIZE; ++ cpu_gdt_descr->address = (unsigned long)gdt; ++ } ++#endif ++ ++ cpu_gdt_init(cpu_gdt_descr); ++ ++ /* ++ * Set up and load the per-CPU TSS and LDT ++ */ ++ atomic_inc(&init_mm.mm_count); ++ current->active_mm = &init_mm; ++ if (current->mm) ++ BUG(); ++ enter_lazy_tlb(&init_mm, current); ++ ++ load_esp0(t, thread); ++ ++ load_LDT(&init_mm.context); ++ ++#ifdef CONFIG_DOUBLEFAULT ++ /* Set up doublefault TSS pointer in the GDT */ ++ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); ++#endif ++ ++ /* Clear %fs and %gs. */ ++ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); ++ ++ /* Clear all 6 debug registers: */ ++ set_debugreg(0, 0); ++ set_debugreg(0, 1); ++ set_debugreg(0, 2); ++ set_debugreg(0, 3); ++ set_debugreg(0, 6); ++ set_debugreg(0, 7); ++ ++ /* ++ * Force FPU initialization: ++ */ ++ current_thread_info()->status = 0; ++ clear_used_math(); ++ mxcsr_feature_mask_init(); ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++void __cpuinit cpu_uninit(void) ++{ ++ int cpu = raw_smp_processor_id(); ++ cpu_clear(cpu, cpu_initialized); ++ ++ /* lazy TLB state */ ++ per_cpu(cpu_tlbstate, cpu).state = 0; ++ per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; ++} ++#endif +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/cpu/mtrr/main-xen.c 2007-08-27 14:02:01.000000000 -0400 +@@ -0,0 +1,197 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include "mtrr.h" ++ ++static DEFINE_MUTEX(mtrr_mutex); ++ ++void generic_get_mtrr(unsigned int reg, unsigned long *base, ++ unsigned int *size, mtrr_type * type) ++{ ++ struct xen_platform_op op; ++ ++ op.cmd = XENPF_read_memtype; ++ op.u.read_memtype.reg = reg; ++ (void)HYPERVISOR_platform_op(&op); ++ ++ *size = op.u.read_memtype.nr_mfns; ++ *base = op.u.read_memtype.mfn; ++ *type = op.u.read_memtype.type; ++} ++ ++struct mtrr_ops generic_mtrr_ops = { ++ .use_intel_if = 1, ++ .get = generic_get_mtrr, ++}; ++ ++struct mtrr_ops *mtrr_if = &generic_mtrr_ops; ++unsigned int num_var_ranges; ++unsigned int *usage_table; ++ ++static void __init set_num_var_ranges(void) ++{ ++ struct xen_platform_op op; ++ ++ for (num_var_ranges = 0; ; num_var_ranges++) { ++ op.cmd = XENPF_read_memtype; ++ op.u.read_memtype.reg = num_var_ranges; ++ if (HYPERVISOR_platform_op(&op) != 0) ++ break; ++ } ++} ++ ++static void __init init_table(void) ++{ ++ int i, max; ++ ++ max = num_var_ranges; ++ if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) ++ == NULL) { ++ printk(KERN_ERR "mtrr: could not allocate\n"); ++ return; ++ } ++ for (i = 0; i < max; i++) ++ usage_table[i] = 0; ++} ++ ++int mtrr_add_page(unsigned long base, unsigned long size, ++ unsigned int type, char increment) ++{ ++ int error; ++ struct xen_platform_op op; ++ ++ mutex_lock(&mtrr_mutex); ++ ++ op.cmd = XENPF_add_memtype; ++ op.u.add_memtype.mfn = base; ++ op.u.add_memtype.nr_mfns = size; ++ op.u.add_memtype.type = type; ++ error = HYPERVISOR_platform_op(&op); ++ if (error) { ++ mutex_unlock(&mtrr_mutex); ++ BUG_ON(error > 0); ++ return error; ++ } ++ ++ if (increment) ++ ++usage_table[op.u.add_memtype.reg]; ++ ++ mutex_unlock(&mtrr_mutex); ++ ++ return op.u.add_memtype.reg; ++} ++ ++static int mtrr_check(unsigned long base, unsigned long size) ++{ ++ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { ++ printk(KERN_WARNING ++ "mtrr: size and base must be multiples of 4 kiB\n"); ++ printk(KERN_DEBUG ++ "mtrr: size: 0x%lx base: 0x%lx\n", size, base); ++ dump_stack(); ++ return -1; ++ } ++ return 0; ++} ++ ++int ++mtrr_add(unsigned long base, unsigned long size, unsigned int type, ++ char increment) ++{ ++ if (mtrr_check(base, size)) ++ return -EINVAL; ++ return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, ++ increment); ++} ++ ++int mtrr_del_page(int reg, unsigned long base, unsigned long size) ++{ ++ unsigned i; ++ mtrr_type ltype; ++ unsigned long lbase; ++ unsigned int lsize; ++ int error = -EINVAL; ++ struct xen_platform_op op; ++ ++ mutex_lock(&mtrr_mutex); ++ ++ if (reg < 0) { ++ /* Search for existing MTRR */ ++ for (i = 0; i < num_var_ranges; ++i) { ++ mtrr_if->get(i, &lbase, &lsize, <ype); ++ if (lbase == base && lsize == size) { ++ reg = i; ++ break; ++ } ++ } ++ if (reg < 0) { ++ printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, ++ size); ++ goto out; ++ } ++ } ++ if (usage_table[reg] < 1) { ++ printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); ++ goto out; ++ } ++ if (--usage_table[reg] < 1) { ++ op.cmd = XENPF_del_memtype; ++ op.u.del_memtype.handle = 0; ++ op.u.del_memtype.reg = reg; ++ error = HYPERVISOR_platform_op(&op); ++ if (error) { ++ BUG_ON(error > 0); ++ goto out; ++ } ++ } ++ error = reg; ++ out: ++ mutex_unlock(&mtrr_mutex); ++ return error; ++} ++ ++int ++mtrr_del(int reg, unsigned long base, unsigned long size) ++{ ++ if (mtrr_check(base, size)) ++ return -EINVAL; ++ return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); ++} ++ ++EXPORT_SYMBOL(mtrr_add); ++EXPORT_SYMBOL(mtrr_del); ++ ++void __init mtrr_bp_init(void) ++{ ++} ++ ++void mtrr_ap_init(void) ++{ ++} ++ ++static int __init mtrr_init(void) ++{ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++ ++ if (!is_initial_xendomain()) ++ return -ENODEV; ++ ++ if ((!cpu_has(c, X86_FEATURE_MTRR)) && ++ (!cpu_has(c, X86_FEATURE_K6_MTRR)) && ++ (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && ++ (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) ++ return -ENODEV; ++ ++ set_num_var_ranges(); ++ init_table(); ++ ++ return 0; ++} ++ ++subsys_initcall(mtrr_init); +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/early_printk-xen.c 2007-08-27 14:01:24.000000000 -0400 +@@ -0,0 +1,2 @@ ++ ++#include "../../x86_64/kernel/early_printk-xen.c" +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/entry-xen.S 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,1216 @@ ++/* ++ * linux/arch/i386/entry.S ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ */ ++ ++/* ++ * entry.S contains the system-call and fault low-level handling routines. ++ * This also contains the timer-interrupt handler, as well as all interrupts ++ * and faults that can result in a task-switch. ++ * ++ * NOTE: This code handles signal-recognition, which happens every time ++ * after a timer-interrupt and after each system call. ++ * ++ * I changed all the .align's to 4 (16 byte alignment), as that's faster ++ * on a 486. ++ * ++ * Stack layout in 'ret_from_system_call': ++ * ptrace needs to have all regs on the stack. ++ * if the order here is changed, it needs to be ++ * updated in fork.c:copy_process, signal.c:do_signal, ++ * ptrace.c and ptrace.h ++ * ++ * 0(%esp) - %ebx ++ * 4(%esp) - %ecx ++ * 8(%esp) - %edx ++ * C(%esp) - %esi ++ * 10(%esp) - %edi ++ * 14(%esp) - %ebp ++ * 18(%esp) - %eax ++ * 1C(%esp) - %ds ++ * 20(%esp) - %es ++ * 24(%esp) - orig_eax ++ * 28(%esp) - %eip ++ * 2C(%esp) - %cs ++ * 30(%esp) - %eflags ++ * 34(%esp) - %oldesp ++ * 38(%esp) - %oldss ++ * ++ * "current" is in register %ebx during any slow entries. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "irq_vectors.h" ++#include ++ ++#define nr_syscalls ((syscall_table_size)/4) ++ ++EBX = 0x00 ++ECX = 0x04 ++EDX = 0x08 ++ESI = 0x0C ++EDI = 0x10 ++EBP = 0x14 ++EAX = 0x18 ++DS = 0x1C ++ES = 0x20 ++ORIG_EAX = 0x24 ++EIP = 0x28 ++CS = 0x2C ++EFLAGS = 0x30 ++OLDESP = 0x34 ++OLDSS = 0x38 ++ ++CF_MASK = 0x00000001 ++TF_MASK = 0x00000100 ++IF_MASK = 0x00000200 ++DF_MASK = 0x00000400 ++NT_MASK = 0x00004000 ++VM_MASK = 0x00020000 ++/* Pseudo-eflags. */ ++NMI_MASK = 0x80000000 ++ ++#ifndef CONFIG_XEN ++#define DISABLE_INTERRUPTS cli ++#define ENABLE_INTERRUPTS sti ++#else ++/* Offsets into shared_info_t. */ ++#define evtchn_upcall_pending /* 0 */ ++#define evtchn_upcall_mask 1 ++ ++#define sizeof_vcpu_shift 6 ++ ++#ifdef CONFIG_SMP ++#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ ++ shl $sizeof_vcpu_shift,%esi ; \ ++ addl HYPERVISOR_shared_info,%esi ++#else ++#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi ++#endif ++ ++#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) ++#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) ++#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ ++ __DISABLE_INTERRUPTS ++#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ ++ __ENABLE_INTERRUPTS ++#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) ++#endif ++ ++#ifdef CONFIG_PREEMPT ++#define preempt_stop cli; TRACE_IRQS_OFF ++#else ++#define preempt_stop ++#define resume_kernel restore_nocheck ++#endif ++ ++.macro TRACE_IRQS_IRET ++#ifdef CONFIG_TRACE_IRQFLAGS ++ testl $IF_MASK,EFLAGS(%esp) # interrupts off? ++ jz 1f ++ TRACE_IRQS_ON ++1: ++#endif ++.endm ++ ++#ifdef CONFIG_VM86 ++#define resume_userspace_sig check_userspace ++#else ++#define resume_userspace_sig resume_userspace ++#endif ++ ++#define SAVE_ALL \ ++ cld; \ ++ pushl %es; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ /*CFI_REL_OFFSET es, 0;*/\ ++ pushl %ds; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ /*CFI_REL_OFFSET ds, 0;*/\ ++ pushl %eax; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET eax, 0;\ ++ pushl %ebp; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ebp, 0;\ ++ pushl %edi; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET edi, 0;\ ++ pushl %esi; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET esi, 0;\ ++ pushl %edx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET edx, 0;\ ++ pushl %ecx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ecx, 0;\ ++ pushl %ebx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ebx, 0;\ ++ movl $(__USER_DS), %edx; \ ++ movl %edx, %ds; \ ++ movl %edx, %es; ++ ++#define RESTORE_INT_REGS \ ++ popl %ebx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ebx;\ ++ popl %ecx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ecx;\ ++ popl %edx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE edx;\ ++ popl %esi; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE esi;\ ++ popl %edi; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE edi;\ ++ popl %ebp; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ebp;\ ++ popl %eax; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE eax ++ ++#define RESTORE_REGS \ ++ RESTORE_INT_REGS; \ ++1: popl %ds; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ /*CFI_RESTORE ds;*/\ ++2: popl %es; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ /*CFI_RESTORE es;*/\ ++.section .fixup,"ax"; \ ++3: movl $0,(%esp); \ ++ jmp 1b; \ ++4: movl $0,(%esp); \ ++ jmp 2b; \ ++.previous; \ ++.section __ex_table,"a";\ ++ .align 4; \ ++ .long 1b,3b; \ ++ .long 2b,4b; \ ++.previous ++ ++#define RING0_INT_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, 3*4;\ ++ /*CFI_OFFSET cs, -2*4;*/\ ++ CFI_OFFSET eip, -3*4 ++ ++#define RING0_EC_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, 4*4;\ ++ /*CFI_OFFSET cs, -2*4;*/\ ++ CFI_OFFSET eip, -3*4 ++ ++#define RING0_PTREGS_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, OLDESP-EBX;\ ++ /*CFI_OFFSET cs, CS-OLDESP;*/\ ++ CFI_OFFSET eip, EIP-OLDESP;\ ++ /*CFI_OFFSET es, ES-OLDESP;*/\ ++ /*CFI_OFFSET ds, DS-OLDESP;*/\ ++ CFI_OFFSET eax, EAX-OLDESP;\ ++ CFI_OFFSET ebp, EBP-OLDESP;\ ++ CFI_OFFSET edi, EDI-OLDESP;\ ++ CFI_OFFSET esi, ESI-OLDESP;\ ++ CFI_OFFSET edx, EDX-OLDESP;\ ++ CFI_OFFSET ecx, ECX-OLDESP;\ ++ CFI_OFFSET ebx, EBX-OLDESP ++ ++ENTRY(ret_from_fork) ++ CFI_STARTPROC ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ call schedule_tail ++ GET_THREAD_INFO(%ebp) ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ pushl $0x0202 # Reset kernel eflags ++ CFI_ADJUST_CFA_OFFSET 4 ++ popfl ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp syscall_exit ++ CFI_ENDPROC ++ ++/* ++ * Return to user mode is not as complex as all this looks, ++ * but we want the default path for a system call return to ++ * go as quickly as possible which is why some of this is ++ * less clear than it otherwise should be. ++ */ ++ ++ # userspace resumption stub bypassing syscall exit tracing ++ ALIGN ++ RING0_PTREGS_FRAME ++ret_from_exception: ++ preempt_stop ++ret_from_intr: ++ GET_THREAD_INFO(%ebp) ++check_userspace: ++ movl EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb CS(%esp), %al ++ testl $(VM_MASK | 2), %eax ++ jz resume_kernel ++ENTRY(resume_userspace) ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ movl TI_flags(%ebp), %ecx ++ andl $_TIF_WORK_MASK, %ecx # is there any work to be done on ++ # int/exception return? ++ jne work_pending ++ jmp restore_all ++ ++#ifdef CONFIG_PREEMPT ++ENTRY(resume_kernel) ++ cli ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? ++ jnz restore_nocheck ++need_resched: ++ movl TI_flags(%ebp), %ecx # need_resched set ? ++ testb $_TIF_NEED_RESCHED, %cl ++ jz restore_all ++ testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? ++ jz restore_all ++ call preempt_schedule_irq ++ jmp need_resched ++#endif ++ CFI_ENDPROC ++ ++/* SYSENTER_RETURN points to after the "sysenter" instruction in ++ the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ ++ ++ # sysenter call handler stub ++ENTRY(sysenter_entry) ++ CFI_STARTPROC simple ++ CFI_DEF_CFA esp, 0 ++ CFI_REGISTER esp, ebp ++ movl SYSENTER_stack_esp0(%esp),%esp ++sysenter_past_esp: ++ /* ++ * No need to follow this irqs on/off section: the syscall ++ * disabled irqs and here we enable it straight after entry: ++ */ ++ sti ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET ss, 0*/ ++ pushl %ebp ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET esp, 0 ++ pushfl ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $(__USER_CS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET cs, 0*/ ++ /* ++ * Push current_thread_info()->sysenter_return to the stack. ++ * A tiny bit of offset fixup is necessary - 4*4 means the 4 words ++ * pushed above; +8 corresponds to copy_thread's esp0 setting. ++ */ ++ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET eip, 0 ++ ++/* ++ * Load the potential sixth argument from user stack. ++ * Careful about security. ++ */ ++ cmpl $__PAGE_OFFSET-3,%ebp ++ jae syscall_fault ++1: movl (%ebp),%ebp ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,syscall_fault ++.previous ++ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ ++ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ ++ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) ++ jnz syscall_trace_entry ++ cmpl $(nr_syscalls), %eax ++ jae syscall_badsys ++ call *sys_call_table(,%eax,4) ++ movl %eax,EAX(%esp) ++ DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ testw $_TIF_ALLWORK_MASK, %cx ++ jne syscall_exit_work ++/* if something modifies registers it must also disable sysexit */ ++ movl EIP(%esp), %edx ++ movl OLDESP(%esp), %ecx ++ xorl %ebp,%ebp ++#ifdef CONFIG_XEN ++ TRACE_IRQS_ON ++ __ENABLE_INTERRUPTS ++sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ++ __TEST_PENDING ++ jnz 14f # process more events if necessary... ++ movl ESI(%esp), %esi ++ sysexit ++14: __DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ++ push %esp ++ call evtchn_do_upcall ++ add $4,%esp ++ jmp ret_from_intr ++#else ++ TRACE_IRQS_ON ++ sti ++ sysexit ++#endif /* !CONFIG_XEN */ ++ CFI_ENDPROC ++ ++ ++ # system call handler stub ++ENTRY(system_call) ++ RING0_INT_FRAME # can't unwind into user space anyway ++ pushl %eax # save orig_eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ testl $TF_MASK,EFLAGS(%esp) ++ jz no_singlestep ++ orl $_TIF_SINGLESTEP,TI_flags(%ebp) ++no_singlestep: ++ # system call tracing in operation / emulation ++ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ ++ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) ++ jnz syscall_trace_entry ++ cmpl $(nr_syscalls), %eax ++ jae syscall_badsys ++syscall_call: ++ call *sys_call_table(,%eax,4) ++ movl %eax,EAX(%esp) # store the return value ++syscall_exit: ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ testw $_TIF_ALLWORK_MASK, %cx # current->work ++ jne syscall_exit_work ++ ++restore_all: ++#ifndef CONFIG_XEN ++ movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS ++ # Warning: OLDSS(%esp) contains the wrong/random values if we ++ # are returning to the kernel. ++ # See comments in process.c:copy_thread() for details. ++ movb OLDSS(%esp), %ah ++ movb CS(%esp), %al ++ andl $(VM_MASK | (4 << 8) | 3), %eax ++ cmpl $((4 << 8) | 3), %eax ++ CFI_REMEMBER_STATE ++ je ldt_ss # returning to user-space with LDT SS ++restore_nocheck: ++#else ++restore_nocheck: ++ movl EFLAGS(%esp), %eax ++ testl $(VM_MASK|NMI_MASK), %eax ++ CFI_REMEMBER_STATE ++ jnz hypervisor_iret ++ shr $9, %eax # EAX[0] == IRET_EFLAGS.IF ++ GET_VCPU_INFO ++ andb evtchn_upcall_mask(%esi),%al ++ andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask ++ CFI_REMEMBER_STATE ++ jnz restore_all_enable_events # != 0 => enable event delivery ++#endif ++ TRACE_IRQS_IRET ++restore_nocheck_notrace: ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++1: iret ++.section .fixup,"ax" ++iret_exc: ++#ifndef CONFIG_XEN ++ TRACE_IRQS_ON ++ sti ++#endif ++ pushl $0 # no error code ++ pushl $do_iret_error ++ jmp error_code ++.previous ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++ ++ CFI_RESTORE_STATE ++#ifndef CONFIG_XEN ++ldt_ss: ++ larl OLDSS(%esp), %eax ++ jnz restore_nocheck ++ testl $0x00400000, %eax # returning to 32bit stack? ++ jnz restore_nocheck # allright, normal return ++ /* If returning to userspace with 16bit stack, ++ * try to fix the higher word of ESP, as the CPU ++ * won't restore it. ++ * This is an "official" bug of all the x86-compatible ++ * CPUs, which we can try to work around to make ++ * dosemu and wine happy. */ ++ subl $8, %esp # reserve space for switch16 pointer ++ CFI_ADJUST_CFA_OFFSET 8 ++ cli ++ TRACE_IRQS_OFF ++ movl %esp, %eax ++ /* Set up the 16bit stack frame with switch32 pointer on top, ++ * and a switch16 pointer on top of the current frame. */ ++ call setup_x86_bogus_stack ++ CFI_ADJUST_CFA_OFFSET -8 # frame has moved ++ TRACE_IRQS_IRET ++ RESTORE_REGS ++ lss 20+4(%esp), %esp # switch to 16bit stack ++1: iret ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++#else ++ ALIGN ++restore_all_enable_events: ++ TRACE_IRQS_ON ++ __ENABLE_INTERRUPTS ++scrit: /**** START OF CRITICAL REGION ****/ ++ __TEST_PENDING ++ jnz 14f # process more events if necessary... ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++1: iret ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++14: __DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++ jmp 11f ++ecrit: /**** END OF CRITICAL REGION ****/ ++ ++ CFI_RESTORE_STATE ++hypervisor_iret: ++ andl $~NMI_MASK, EFLAGS(%esp) ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp hypercall_page + (__HYPERVISOR_iret * 32) ++#endif ++ CFI_ENDPROC ++ ++ # perform work that needs to be done immediately before resumption ++ ALIGN ++ RING0_PTREGS_FRAME # can't unwind into user space anyway ++work_pending: ++ testb $_TIF_NEED_RESCHED, %cl ++ jz work_notifysig ++work_resched: ++ call schedule ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ andl $_TIF_WORK_MASK, %ecx # is there any work to be done other ++ # than syscall tracing? ++ jz restore_all ++ testb $_TIF_NEED_RESCHED, %cl ++ jnz work_resched ++ ++work_notifysig: # deal with pending signals and ++ # notify-resume requests ++ testl $VM_MASK, EFLAGS(%esp) ++ movl %esp, %eax ++ jne work_notifysig_v86 # returning to kernel-space or ++ # vm86-space ++ xorl %edx, %edx ++ call do_notify_resume ++ jmp resume_userspace_sig ++ ++ ALIGN ++work_notifysig_v86: ++#ifdef CONFIG_VM86 ++ pushl %ecx # save ti_flags for do_notify_resume ++ CFI_ADJUST_CFA_OFFSET 4 ++ call save_v86_state # %eax contains pt_regs pointer ++ popl %ecx ++ CFI_ADJUST_CFA_OFFSET -4 ++ movl %eax, %esp ++ xorl %edx, %edx ++ call do_notify_resume ++ jmp resume_userspace_sig ++#endif ++ ++ # perform syscall exit tracing ++ ALIGN ++syscall_trace_entry: ++ movl $-ENOSYS,EAX(%esp) ++ movl %esp, %eax ++ xorl %edx,%edx ++ call do_syscall_trace ++ cmpl $0, %eax ++ jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, ++ # so must skip actual syscall ++ movl ORIG_EAX(%esp), %eax ++ cmpl $(nr_syscalls), %eax ++ jnae syscall_call ++ jmp syscall_exit ++ ++ # perform syscall exit tracing ++ ALIGN ++syscall_exit_work: ++ testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl ++ jz work_pending ++ TRACE_IRQS_ON ++ ENABLE_INTERRUPTS # could let do_syscall_trace() call ++ # schedule() instead ++ movl %esp, %eax ++ movl $1, %edx ++ call do_syscall_trace ++ jmp resume_userspace ++ CFI_ENDPROC ++ ++ RING0_INT_FRAME # can't unwind into user space anyway ++syscall_fault: ++ pushl %eax # save orig_eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ movl $-EFAULT,EAX(%esp) ++ jmp resume_userspace ++ ++syscall_badsys: ++ movl $-ENOSYS,EAX(%esp) ++ jmp resume_userspace ++ CFI_ENDPROC ++ ++#ifndef CONFIG_XEN ++#define FIXUP_ESPFIX_STACK \ ++ movl %esp, %eax; \ ++ /* switch to 32bit stack using the pointer on top of 16bit stack */ \ ++ lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ ++ /* copy data from 16bit stack to 32bit stack */ \ ++ call fixup_x86_bogus_stack; \ ++ /* put ESP to the proper location */ \ ++ movl %eax, %esp; ++#define UNWIND_ESPFIX_STACK \ ++ pushl %eax; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ movl %ss, %eax; \ ++ /* see if on 16bit stack */ \ ++ cmpw $__ESPFIX_SS, %ax; \ ++ je 28f; \ ++27: popl %eax; \ ++ CFI_ADJUST_CFA_OFFSET -4; \ ++.section .fixup,"ax"; \ ++28: movl $__KERNEL_DS, %eax; \ ++ movl %eax, %ds; \ ++ movl %eax, %es; \ ++ /* switch to 32bit stack */ \ ++ FIXUP_ESPFIX_STACK; \ ++ jmp 27b; \ ++.previous ++ ++/* ++ * Build the entry stubs and pointer table with ++ * some assembler magic. ++ */ ++.data ++ENTRY(interrupt) ++.text ++ ++vector=0 ++ENTRY(irq_entries_start) ++ RING0_INT_FRAME ++.rept NR_IRQS ++ ALIGN ++ .if vector ++ CFI_ADJUST_CFA_OFFSET -4 ++ .endif ++1: pushl $~(vector) ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp common_interrupt ++.data ++ .long 1b ++.text ++vector=vector+1 ++.endr ++ ++/* ++ * the CPU automatically disables interrupts when executing an IRQ vector, ++ * so IRQ-flags tracing has to follow that: ++ */ ++ ALIGN ++common_interrupt: ++ SAVE_ALL ++ TRACE_IRQS_OFF ++ movl %esp,%eax ++ call do_IRQ ++ jmp ret_from_intr ++ CFI_ENDPROC ++ ++#define BUILD_INTERRUPT(name, nr) \ ++ENTRY(name) \ ++ RING0_INT_FRAME; \ ++ pushl $~(nr); \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ SAVE_ALL; \ ++ TRACE_IRQS_OFF \ ++ movl %esp,%eax; \ ++ call smp_/**/name; \ ++ jmp ret_from_intr; \ ++ CFI_ENDPROC ++ ++/* The include is where all of the SMP etc. interrupts come from */ ++#include "entry_arch.h" ++#else ++#define UNWIND_ESPFIX_STACK ++#endif ++ ++ENTRY(divide_error) ++ RING0_INT_FRAME ++ pushl $0 # no error code ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_divide_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ ALIGN ++error_code: ++ pushl %ds ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET ds, 0*/ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET eax, 0 ++ xorl %eax, %eax ++ pushl %ebp ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebp, 0 ++ pushl %edi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edi, 0 ++ pushl %esi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET esi, 0 ++ pushl %edx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edx, 0 ++ decl %eax # eax = -1 ++ pushl %ecx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ecx, 0 ++ pushl %ebx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebx, 0 ++ cld ++ pushl %es ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET es, 0*/ ++ UNWIND_ESPFIX_STACK ++ popl %ecx ++ CFI_ADJUST_CFA_OFFSET -4 ++ /*CFI_REGISTER es, ecx*/ ++ movl ES(%esp), %edi # get the function address ++ movl ORIG_EAX(%esp), %edx # get the error code ++ movl %eax, ORIG_EAX(%esp) ++ movl %ecx, ES(%esp) ++ /*CFI_REL_OFFSET es, ES*/ ++ movl $(__USER_DS), %ecx ++ movl %ecx, %ds ++ movl %ecx, %es ++ movl %esp,%eax # pt_regs pointer ++ call *%edi ++ jmp ret_from_exception ++ CFI_ENDPROC ++ ++#ifdef CONFIG_XEN ++# A note on the "critical region" in our callback handler. ++# We want to avoid stacking callback handlers due to events occurring ++# during handling of the last event. To do this, we keep events disabled ++# until we've done all processing. HOWEVER, we must enable events before ++# popping the stack frame (can't be done atomically) and so it would still ++# be possible to get enough handler activations to overflow the stack. ++# Although unlikely, bugs of that kind are hard to track down, so we'd ++# like to avoid the possibility. ++# So, on entry to the handler we detect whether we interrupted an ++# existing activation in its critical region -- if so, we pop the current ++# activation and restart the handler using the previous one. ++# ++# The sysexit critical region is slightly different. sysexit ++# atomically removes the entire stack frame. If we interrupt in the ++# critical region we know that the entire frame is present and correct ++# so we can simply throw away the new one. ++ENTRY(hypervisor_callback) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ movl EIP(%esp),%eax ++ cmpl $scrit,%eax ++ jb 11f ++ cmpl $ecrit,%eax ++ jb critical_region_fixup ++ cmpl $sysexit_scrit,%eax ++ jb 11f ++ cmpl $sysexit_ecrit,%eax ++ ja 11f ++ addl $OLDESP,%esp # Remove eflags...ebx from stack frame. ++11: push %esp ++ CFI_ADJUST_CFA_OFFSET 4 ++ call evtchn_do_upcall ++ add $4,%esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp ret_from_intr ++ CFI_ENDPROC ++ ++# [How we do the fixup]. We want to merge the current stack frame with the ++# just-interrupted frame. How we do this depends on where in the critical ++# region the interrupted handler was executing, and so how many saved ++# registers are in each frame. We do this quickly using the lookup table ++# 'critical_fixup_table'. For each byte offset in the critical region, it ++# provides the number of bytes which have already been popped from the ++# interrupted stack frame. ++critical_region_fixup: ++ movzbl critical_fixup_table-scrit(%eax),%ecx # %eax contains num bytes popped ++ cmpb $0xff,%cl # 0xff => vcpu_info critical region ++ jne 15f ++ xorl %ecx,%ecx ++15: leal (%esp,%ecx),%esi # %esi points at end of src region ++ leal OLDESP(%esp),%edi # %edi points at end of dst region ++ shrl $2,%ecx # convert words to bytes ++ je 17f # skip loop if nothing to copy ++16: subl $4,%esi # pre-decrementing copy loop ++ subl $4,%edi ++ movl (%esi),%eax ++ movl %eax,(%edi) ++ loop 16b ++17: movl %edi,%esp # final %edi is top of merged stack ++ jmp 11b ++ ++.section .rodata,"a" ++critical_fixup_table: ++ .byte 0xff,0xff,0xff # testb $0xff,(%esi) = __TEST_PENDING ++ .byte 0xff,0xff # jnz 14f ++ .byte 0x00 # pop %ebx ++ .byte 0x04 # pop %ecx ++ .byte 0x08 # pop %edx ++ .byte 0x0c # pop %esi ++ .byte 0x10 # pop %edi ++ .byte 0x14 # pop %ebp ++ .byte 0x18 # pop %eax ++ .byte 0x1c # pop %ds ++ .byte 0x20 # pop %es ++ .byte 0x24,0x24,0x24 # add $4,%esp ++ .byte 0x28 # iret ++ .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) ++ .byte 0x00,0x00 # jmp 11b ++.previous ++ ++# Hypervisor uses this for application faults while it executes. ++# We get here for two reasons: ++# 1. Fault while reloading DS, ES, FS or GS ++# 2. Fault while executing IRET ++# Category 1 we fix up by reattempting the load, and zeroing the segment ++# register if the load fails. ++# Category 2 we fix up by jumping to do_iret_error. We cannot use the ++# normal Linux return path in this case because if we use the IRET hypercall ++# to pop the stack frame we end up in an infinite loop of failsafe callbacks. ++# We distinguish between categories by maintaining a status value in EAX. ++ENTRY(failsafe_callback) ++ pushl %eax ++ movl $1,%eax ++1: mov 4(%esp),%ds ++2: mov 8(%esp),%es ++3: mov 12(%esp),%fs ++4: mov 16(%esp),%gs ++ testl %eax,%eax ++ popl %eax ++ jz 5f ++ addl $16,%esp # EAX != 0 => Category 2 (Bad IRET) ++ jmp iret_exc ++5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment) ++ RING0_INT_FRAME ++ pushl $0 ++ SAVE_ALL ++ jmp ret_from_exception ++.section .fixup,"ax"; \ ++6: xorl %eax,%eax; \ ++ movl %eax,4(%esp); \ ++ jmp 1b; \ ++7: xorl %eax,%eax; \ ++ movl %eax,8(%esp); \ ++ jmp 2b; \ ++8: xorl %eax,%eax; \ ++ movl %eax,12(%esp); \ ++ jmp 3b; \ ++9: xorl %eax,%eax; \ ++ movl %eax,16(%esp); \ ++ jmp 4b; \ ++.previous; \ ++.section __ex_table,"a"; \ ++ .align 4; \ ++ .long 1b,6b; \ ++ .long 2b,7b; \ ++ .long 3b,8b; \ ++ .long 4b,9b; \ ++.previous ++#endif ++ CFI_ENDPROC ++ ++ENTRY(coprocessor_error) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_coprocessor_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(simd_coprocessor_error) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_simd_coprocessor_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(device_not_available) ++ RING0_INT_FRAME ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++#ifndef CONFIG_XEN ++ movl %cr0, %eax ++ testl $0x4, %eax # EM (math emulation bit) ++ je device_available_emulate ++ pushl $0 # temporary storage for ORIG_EIP ++ CFI_ADJUST_CFA_OFFSET 4 ++ call math_emulate ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp ret_from_exception ++device_available_emulate: ++#endif ++ preempt_stop ++ call math_state_restore ++ jmp ret_from_exception ++ CFI_ENDPROC ++ ++#ifndef CONFIG_XEN ++/* ++ * Debug traps and NMI can happen at the one SYSENTER instruction ++ * that sets up the real kernel stack. Check here, since we can't ++ * allow the wrong stack to be used. ++ * ++ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have ++ * already pushed 3 words if it hits on the sysenter instruction: ++ * eflags, cs and eip. ++ * ++ * We just load the right stack, and push the three (known) values ++ * by hand onto the new stack - while updating the return eip past ++ * the instruction that would have done it for sysenter. ++ */ ++#define FIX_STACK(offset, ok, label) \ ++ cmpw $__KERNEL_CS,4(%esp); \ ++ jne ok; \ ++label: \ ++ movl SYSENTER_stack_esp0+offset(%esp),%esp; \ ++ pushfl; \ ++ pushl $__KERNEL_CS; \ ++ pushl $sysenter_past_esp ++#endif /* CONFIG_XEN */ ++ ++KPROBE_ENTRY(debug) ++ RING0_INT_FRAME ++#ifndef CONFIG_XEN ++ cmpl $sysenter_entry,(%esp) ++ jne debug_stack_correct ++ FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) ++debug_stack_correct: ++#endif /* !CONFIG_XEN */ ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # error code 0 ++ movl %esp,%eax # pt_regs pointer ++ call do_debug ++ jmp ret_from_exception ++ CFI_ENDPROC ++ .previous .text ++#ifndef CONFIG_XEN ++/* ++ * NMI is doubly nasty. It can happen _while_ we're handling ++ * a debug fault, and the debug fault hasn't yet been able to ++ * clear up the stack. So we first check whether we got an ++ * NMI on the sysenter entry path, but after that we need to ++ * check whether we got an NMI on the debug path where the debug ++ * fault happened on the sysenter path. ++ */ ++ENTRY(nmi) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ movl %ss, %eax ++ cmpw $__ESPFIX_SS, %ax ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ je nmi_16bit_stack ++ cmpl $sysenter_entry,(%esp) ++ je nmi_stack_fixup ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ movl %esp,%eax ++ /* Do not access memory above the end of our stack page, ++ * it might not exist. ++ */ ++ andl $(THREAD_SIZE-1),%eax ++ cmpl $(THREAD_SIZE-20),%eax ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ jae nmi_stack_correct ++ cmpl $sysenter_entry,12(%esp) ++ je nmi_debug_stack_check ++nmi_stack_correct: ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_nmi ++ jmp restore_nocheck_notrace ++ CFI_ENDPROC ++ ++nmi_stack_fixup: ++ FIX_STACK(12,nmi_stack_correct, 1) ++ jmp nmi_stack_correct ++nmi_debug_stack_check: ++ cmpw $__KERNEL_CS,16(%esp) ++ jne nmi_stack_correct ++ cmpl $debug,(%esp) ++ jb nmi_stack_correct ++ cmpl $debug_esp_fix_insn,(%esp) ++ ja nmi_stack_correct ++ FIX_STACK(24,nmi_stack_correct, 1) ++ jmp nmi_stack_correct ++ ++nmi_16bit_stack: ++ RING0_INT_FRAME ++ /* create the pointer to lss back */ ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl %esp ++ CFI_ADJUST_CFA_OFFSET 4 ++ movzwl %sp, %esp ++ addw $4, (%esp) ++ /* copy the iret frame of 12 bytes */ ++ .rept 3 ++ pushl 16(%esp) ++ CFI_ADJUST_CFA_OFFSET 4 ++ .endr ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ FIXUP_ESPFIX_STACK # %eax == %esp ++ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved ++ xorl %edx,%edx # zero error code ++ call do_nmi ++ RESTORE_REGS ++ lss 12+4(%esp), %esp # back to 16bit stack ++1: iret ++ CFI_ENDPROC ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++#else ++ENTRY(nmi) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_nmi ++ orl $NMI_MASK, EFLAGS(%esp) ++ jmp restore_all ++ CFI_ENDPROC ++#endif ++ ++KPROBE_ENTRY(int3) ++ RING0_INT_FRAME ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_int3 ++ jmp ret_from_exception ++ CFI_ENDPROC ++ .previous .text ++ ++ENTRY(overflow) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_overflow ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(bounds) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_bounds ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(invalid_op) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_invalid_op ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(coprocessor_segment_overrun) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_coprocessor_segment_overrun ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(invalid_TSS) ++ RING0_EC_FRAME ++ pushl $do_invalid_TSS ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(segment_not_present) ++ RING0_EC_FRAME ++ pushl $do_segment_not_present ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(stack_segment) ++ RING0_EC_FRAME ++ pushl $do_stack_segment ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++KPROBE_ENTRY(general_protection) ++ RING0_EC_FRAME ++ pushl $do_general_protection ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ .previous .text ++ ++ENTRY(alignment_check) ++ RING0_EC_FRAME ++ pushl $do_alignment_check ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++KPROBE_ENTRY(page_fault) ++ RING0_EC_FRAME ++ pushl $do_page_fault ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ .previous .text ++ ++#ifdef CONFIG_X86_MCE ++ENTRY(machine_check) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl machine_check_vector ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++#endif ++ ++#ifndef CONFIG_XEN ++ENTRY(spurious_interrupt_bug) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_spurious_interrupt_bug ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++#endif /* !CONFIG_XEN */ ++ ++#ifdef CONFIG_STACK_UNWIND ++ENTRY(arch_unwind_init_running) ++ CFI_STARTPROC ++ movl 4(%esp), %edx ++ movl (%esp), %ecx ++ leal 4(%esp), %eax ++ movl %ebx, EBX(%edx) ++ xorl %ebx, %ebx ++ movl %ebx, ECX(%edx) ++ movl %ebx, EDX(%edx) ++ movl %esi, ESI(%edx) ++ movl %edi, EDI(%edx) ++ movl %ebp, EBP(%edx) ++ movl %ebx, EAX(%edx) ++ movl $__USER_DS, DS(%edx) ++ movl $__USER_DS, ES(%edx) ++ movl %ebx, ORIG_EAX(%edx) ++ movl %ecx, EIP(%edx) ++ movl 12(%esp), %ecx ++ movl $__KERNEL_CS, CS(%edx) ++ movl %ebx, EFLAGS(%edx) ++ movl %eax, OLDESP(%edx) ++ movl 8(%esp), %eax ++ movl %ecx, 8(%esp) ++ movl EBX(%edx), %ebx ++ movl $__KERNEL_DS, OLDSS(%edx) ++ jmpl *%eax ++ CFI_ENDPROC ++ENDPROC(arch_unwind_init_running) ++#endif ++ ++ENTRY(fixup_4gb_segment) ++ RING0_EC_FRAME ++ pushl $do_fixup_4gb_segment ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++.section .rodata,"a" ++.align 4 ++#include "syscall_table.S" ++ ++syscall_table_size=(.-sys_call_table) +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/fixup.c 2007-08-27 14:01:24.000000000 -0400 +@@ -0,0 +1,88 @@ ++/****************************************************************************** ++ * fixup.c ++ * ++ * Binary-rewriting of certain IA32 instructions, on notification by Xen. ++ * Used to avoid repeated slow emulation of common instructions used by the ++ * user-space TLS (Thread-Local Storage) libraries. ++ * ++ * **** NOTE **** ++ * Issues with the binary rewriting have caused it to be removed. Instead ++ * we rely on Xen's emulator to boot the kernel, and then print a banner ++ * message recommending that the user disables /lib/tls. ++ * ++ * Copyright (c) 2004, K A Fraser ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args ) ++ ++fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) ++{ ++ static unsigned long printed = 0; ++ char info[100]; ++ int i; ++ ++ /* Ignore statically-linked init. */ ++ if (current->tgid == 1) ++ return; ++ ++ HYPERVISOR_vm_assist( ++ VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify); ++ ++ if (test_and_set_bit(0, &printed)) ++ return; ++ ++ sprintf(info, "%s (pid=%d)", current->comm, current->tgid); ++ ++ DP(""); ++ DP("***************************************************************"); ++ DP("***************************************************************"); ++ DP("** WARNING: Currently emulating unsupported memory accesses **"); ++ DP("** in /lib/tls glibc libraries. The emulation is **"); ++ DP("** slow. To ensure full performance you should **"); ++ DP("** install a 'xen-friendly' (nosegneg) version of **"); ++ DP("** the library, or disable tls support by executing **"); ++ DP("** the following as root: **"); ++ DP("** mv /lib/tls /lib/tls.disabled **"); ++ DP("** Offending process: %-38.38s **", info); ++ DP("***************************************************************"); ++ DP("***************************************************************"); ++ DP(""); ++ ++ for (i = 5; i > 0; i--) { ++ touch_softlockup_watchdog(); ++ printk("Pausing... %d", i); ++ mdelay(1000); ++ printk("\b\b\b\b\b\b\b\b\b\b\b\b"); ++ } ++ ++ printk("Continuing...\n\n"); ++} ++ ++static int __init fixup_init(void) ++{ ++ HYPERVISOR_vm_assist( ++ VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify); ++ return 0; ++} ++__initcall(fixup_init); +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/head-xen.S 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,207 @@ ++ ++ ++.text ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * References to members of the new_cpu_data structure. ++ */ ++ ++#define X86 new_cpu_data+CPUINFO_x86 ++#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor ++#define X86_MODEL new_cpu_data+CPUINFO_x86_model ++#define X86_MASK new_cpu_data+CPUINFO_x86_mask ++#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math ++#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level ++#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability ++#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id ++ ++#define VIRT_ENTRY_OFFSET 0x0 ++.org VIRT_ENTRY_OFFSET ++ENTRY(startup_32) ++ movl %esi,xen_start_info ++ cld ++ ++ /* Set up the stack pointer */ ++ movl $(init_thread_union+THREAD_SIZE),%esp ++ ++ /* get vendor info */ ++ xorl %eax,%eax # call CPUID with 0 -> return vendor ID ++ XEN_CPUID ++ movl %eax,X86_CPUID # save CPUID level ++ movl %ebx,X86_VENDOR_ID # lo 4 chars ++ movl %edx,X86_VENDOR_ID+4 # next 4 chars ++ movl %ecx,X86_VENDOR_ID+8 # last 4 chars ++ ++ movl $1,%eax # Use the CPUID instruction to get CPU type ++ XEN_CPUID ++ movb %al,%cl # save reg for future use ++ andb $0x0f,%ah # mask processor family ++ movb %ah,X86 ++ andb $0xf0,%al # mask model ++ shrb $4,%al ++ movb %al,X86_MODEL ++ andb $0x0f,%cl # mask mask revision ++ movb %cl,X86_MASK ++ movl %edx,X86_CAPABILITY ++ ++ movb $1,X86_HARD_MATH ++ ++ xorl %eax,%eax # Clear FS/GS and LDT ++ movl %eax,%fs ++ movl %eax,%gs ++ cld # gcc2 wants the direction flag cleared at all times ++ ++ pushl %eax # fake return address ++ jmp start_kernel ++ ++#define HYPERCALL_PAGE_OFFSET 0x1000 ++.org HYPERCALL_PAGE_OFFSET ++ENTRY(hypercall_page) ++ CFI_STARTPROC ++.skip 0x1000 ++ CFI_ENDPROC ++ ++/* ++ * Real beginning of normal "text" segment ++ */ ++ENTRY(stext) ++ENTRY(_stext) ++ ++/* ++ * BSS section ++ */ ++.section ".bss.page_aligned","w" ++ENTRY(empty_zero_page) ++ .fill 4096,1,0 ++ ++/* ++ * This starts the data section. ++ */ ++.data ++ ++/* ++ * The Global Descriptor Table contains 28 quadwords, per-CPU. ++ */ ++ .align L1_CACHE_BYTES ++ENTRY(cpu_gdt_table) ++ .quad 0x0000000000000000 /* NULL descriptor */ ++ .quad 0x0000000000000000 /* 0x0b reserved */ ++ .quad 0x0000000000000000 /* 0x13 reserved */ ++ .quad 0x0000000000000000 /* 0x1b reserved */ ++ .quad 0x0000000000000000 /* 0x20 unused */ ++ .quad 0x0000000000000000 /* 0x28 unused */ ++ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ ++ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ ++ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ ++ .quad 0x0000000000000000 /* 0x4b reserved */ ++ .quad 0x0000000000000000 /* 0x53 reserved */ ++ .quad 0x0000000000000000 /* 0x5b reserved */ ++ ++ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ ++ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ ++ .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ ++ .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ ++ ++ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ ++ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ ++ ++ /* ++ * Segments used for calling PnP BIOS have byte granularity. ++ * They code segments and data segments have fixed 64k limits, ++ * the transfer segment sizes are set at run time. ++ */ ++ .quad 0x0000000000000000 /* 0x90 32-bit code */ ++ .quad 0x0000000000000000 /* 0x98 16-bit code */ ++ .quad 0x0000000000000000 /* 0xa0 16-bit data */ ++ .quad 0x0000000000000000 /* 0xa8 16-bit data */ ++ .quad 0x0000000000000000 /* 0xb0 16-bit data */ ++ ++ /* ++ * The APM segments have byte granularity and their bases ++ * are set at run time. All have 64k limits. ++ */ ++ .quad 0x0000000000000000 /* 0xb8 APM CS code */ ++ .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ ++ .quad 0x0000000000000000 /* 0xc8 APM DS data */ ++ ++ .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */ ++ .quad 0x0000000000000000 /* 0xd8 - unused */ ++ .quad 0x0000000000000000 /* 0xe0 - unused */ ++ .quad 0x0000000000000000 /* 0xe8 - unused */ ++ .quad 0x0000000000000000 /* 0xf0 - unused */ ++ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ ++ ++#if CONFIG_XEN_COMPAT <= 0x030002 ++/* ++ * __xen_guest information ++ */ ++.macro utoa value ++ .if (\value) < 0 || (\value) >= 0x10 ++ utoa (((\value)>>4)&0x0fffffff) ++ .endif ++ .if ((\value) & 0xf) < 10 ++ .byte '0' + ((\value) & 0xf) ++ .else ++ .byte 'A' + ((\value) & 0xf) - 10 ++ .endif ++.endm ++ ++.section __xen_guest ++ .ascii "GUEST_OS=linux,GUEST_VER=2.6" ++ .ascii ",XEN_VER=xen-3.0" ++ .ascii ",VIRT_BASE=0x" ++ utoa __PAGE_OFFSET ++ .ascii ",ELF_PADDR_OFFSET=0x" ++ utoa __PAGE_OFFSET ++ .ascii ",VIRT_ENTRY=0x" ++ utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET) ++ .ascii ",HYPERCALL_PAGE=0x" ++ utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) ++ .ascii ",FEATURES=writable_page_tables" ++ .ascii "|writable_descriptor_tables" ++ .ascii "|auto_translated_physmap" ++ .ascii "|pae_pgdir_above_4gb" ++ .ascii "|supervisor_mode_kernel" ++#ifdef CONFIG_X86_PAE ++ .ascii ",PAE=yes[extended-cr3]" ++#else ++ .ascii ",PAE=no" ++#endif ++ .ascii ",LOADER=generic" ++ .byte 0 ++#endif /* CONFIG_XEN_COMPAT <= 0x030002 */ ++ ++ ++ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux") ++ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6") ++ ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") ++ ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET) ++#if CONFIG_XEN_COMPAT <= 0x030002 ++ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, __PAGE_OFFSET) ++#else ++ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0) ++#endif ++ ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32) ++ ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page) ++ ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START) ++ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") ++#ifdef CONFIG_X86_PAE ++ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") ++ ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT) ++#else ++ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no") ++ ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, _PAGE_PRESENT,_PAGE_PRESENT) ++#endif ++ ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") ++ ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1) +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/init_task-xen.c 2007-08-27 14:01:24.000000000 -0400 +@@ -0,0 +1,51 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static struct fs_struct init_fs = INIT_FS; ++static struct files_struct init_files = INIT_FILES; ++static struct signal_struct init_signals = INIT_SIGNALS(init_signals); ++static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); ++ ++#define swapper_pg_dir ((pgd_t *)NULL) ++struct mm_struct init_mm = INIT_MM(init_mm); ++#undef swapper_pg_dir ++ ++EXPORT_SYMBOL(init_mm); ++ ++/* ++ * Initial thread structure. ++ * ++ * We need to make sure that this is THREAD_SIZE aligned due to the ++ * way process stacks are handled. This is done by having a special ++ * "init_task" linker map entry.. ++ */ ++union thread_union init_thread_union ++ __attribute__((__section__(".data.init_task"))) = ++ { INIT_THREAD_INFO(init_task) }; ++ ++/* ++ * Initial task structure. ++ * ++ * All other task structs will be allocated on slabs in fork.c ++ */ ++struct task_struct init_task = INIT_TASK(init_task); ++ ++EXPORT_SYMBOL(init_task); ++ ++#ifndef CONFIG_X86_NO_TSS ++/* ++ * per-CPU TSS segments. Threads are completely 'soft' on Linux, ++ * no more per-task TSS's. ++ */ ++DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; ++#endif ++ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/io_apic-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,2777 @@ ++/* ++ * Intel IO-APIC support for multi-Pentium hosts. ++ * ++ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo ++ * ++ * Many thanks to Stig Venaas for trying out countless experimental ++ * patches and reporting/debugging problems patiently! ++ * ++ * (c) 1999, Multiple IO-APIC support, developed by ++ * Ken-ichi Yaku and ++ * Hidemi Kishimoto , ++ * further tested and cleaned up by Zach Brown ++ * and Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively ++ * Paul Diefenbaugh : Added full ACPI support ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "io_ports.h" ++ ++#ifdef CONFIG_XEN ++ ++#include ++#include ++ ++/* Fake i8259 */ ++#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) ++#define disable_8259A_irq(_irq) ((void)0) ++#define i8259A_irq_pending(_irq) (0) ++ ++unsigned long io_apic_irqs; ++ ++static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) ++{ ++ struct physdev_apic apic_op; ++ int ret; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; ++ apic_op.reg = reg; ++ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); ++ if (ret) ++ return ret; ++ return apic_op.value; ++} ++ ++static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) ++{ ++ struct physdev_apic apic_op; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; ++ apic_op.reg = reg; ++ apic_op.value = value; ++ HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); ++} ++ ++#define io_apic_read(a,r) xen_io_apic_read(a,r) ++#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) ++ ++#endif /* CONFIG_XEN */ ++ ++int (*ioapic_renumber_irq)(int ioapic, int irq); ++atomic_t irq_mis_count; ++ ++/* Where if anywhere is the i8259 connect in external int mode */ ++static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; ++ ++static DEFINE_SPINLOCK(ioapic_lock); ++static DEFINE_SPINLOCK(vector_lock); ++ ++int timer_over_8254 __initdata = 1; ++ ++/* ++ * Is the SiS APIC rmw bug present ? ++ * -1 = don't know, 0 = no, 1 = yes ++ */ ++int sis_apic_bug = -1; ++ ++/* ++ * # of IRQ routing registers ++ */ ++int nr_ioapic_registers[MAX_IO_APICS]; ++ ++int disable_timer_pin_1 __initdata; ++ ++/* ++ * Rough estimation of how many shared IRQs there are, can ++ * be changed anytime. ++ */ ++#define MAX_PLUS_SHARED_IRQS NR_IRQS ++#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) ++ ++/* ++ * This is performance-critical, we want to do it O(1) ++ * ++ * the indexing order of this array favors 1:1 mappings ++ * between pins and IRQs. ++ */ ++ ++static struct irq_pin_list { ++ int apic, pin, next; ++} irq_2_pin[PIN_MAP_SIZE]; ++ ++int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; ++#ifdef CONFIG_PCI_MSI ++#define vector_to_irq(vector) \ ++ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) ++#else ++#define vector_to_irq(vector) (vector) ++#endif ++ ++/* ++ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are ++ * shared ISA-space IRQs, so we have to support them. We are super ++ * fast in the common case, and fast for shared ISA-space IRQs. ++ */ ++static void add_pin_to_irq(unsigned int irq, int apic, int pin) ++{ ++ static int first_free_entry = NR_IRQS; ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ ++ while (entry->next) ++ entry = irq_2_pin + entry->next; ++ ++ if (entry->pin != -1) { ++ entry->next = first_free_entry; ++ entry = irq_2_pin + entry->next; ++ if (++first_free_entry >= PIN_MAP_SIZE) ++ panic("io_apic.c: whoops"); ++ } ++ entry->apic = apic; ++ entry->pin = pin; ++} ++ ++#ifdef CONFIG_XEN ++#define clear_IO_APIC() ((void)0) ++#else ++/* ++ * Reroute an IRQ to a different pin. ++ */ ++static void __init replace_pin_at_irq(unsigned int irq, ++ int oldapic, int oldpin, ++ int newapic, int newpin) ++{ ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ ++ while (1) { ++ if (entry->apic == oldapic && entry->pin == oldpin) { ++ entry->apic = newapic; ++ entry->pin = newpin; ++ } ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++} ++ ++static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) ++{ ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ unsigned int pin, reg; ++ ++ for (;;) { ++ pin = entry->pin; ++ if (pin == -1) ++ break; ++ reg = io_apic_read(entry->apic, 0x10 + pin*2); ++ reg &= ~disable; ++ reg |= enable; ++ io_apic_modify(entry->apic, 0x10 + pin*2, reg); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++} ++ ++/* mask = 1 */ ++static void __mask_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00010000, 0); ++} ++ ++/* mask = 0 */ ++static void __unmask_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0, 0x00010000); ++} ++ ++/* mask = 1, trigger = 0 */ ++static void __mask_and_edge_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); ++} ++ ++/* mask = 0, trigger = 1 */ ++static void __unmask_and_level_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); ++} ++ ++static void mask_IO_APIC_irq (unsigned int irq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __mask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void unmask_IO_APIC_irq (unsigned int irq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __unmask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ /* Check delivery_mode to be sure we're not clearing an SMI pin */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ if (entry.delivery_mode == dest_SMI) ++ return; ++ ++ /* ++ * Disable it in the IO-APIC irq-routing table: ++ */ ++ memset(&entry, 0, sizeof(entry)); ++ entry.mask = 1; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void clear_IO_APIC (void) ++{ ++ int apic, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ++ clear_IO_APIC_pin(apic, pin); ++} ++ ++#ifdef CONFIG_SMP ++static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) ++{ ++ unsigned long flags; ++ int pin; ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ unsigned int apicid_value; ++ cpumask_t tmp; ++ ++ cpus_and(tmp, cpumask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ tmp = TARGET_CPUS; ++ ++ cpus_and(cpumask, tmp, CPU_MASK_ALL); ++ ++ apicid_value = cpu_mask_to_apicid(cpumask); ++ /* Prepare to do the io_apic_write */ ++ apicid_value = apicid_value << 24; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ for (;;) { ++ pin = entry->pin; ++ if (pin == -1) ++ break; ++ io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++ set_irq_info(irq, cpumask); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#if defined(CONFIG_IRQBALANCE) ++# include /* kernel_thread() */ ++# include /* kstat */ ++# include /* kmalloc() */ ++# include /* time_after() */ ++ ++#ifdef CONFIG_BALANCED_IRQ_DEBUG ++# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) ++# define Dprintk(x...) do { TDprintk(x); } while (0) ++# else ++# define TDprintk(x...) ++# define Dprintk(x...) ++# endif ++ ++#define IRQBALANCE_CHECK_ARCH -999 ++#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) ++#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) ++#define BALANCED_IRQ_MORE_DELTA (HZ/10) ++#define BALANCED_IRQ_LESS_DELTA (HZ) ++ ++static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; ++static int physical_balance __read_mostly; ++static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; ++ ++static struct irq_cpu_info { ++ unsigned long * last_irq; ++ unsigned long * irq_delta; ++ unsigned long irq; ++} irq_cpu_data[NR_CPUS]; ++ ++#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) ++#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) ++#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) ++ ++#define IDLE_ENOUGH(cpu,now) \ ++ (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) ++ ++#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) ++ ++#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) ++ ++static cpumask_t balance_irq_affinity[NR_IRQS] = { ++ [0 ... NR_IRQS-1] = CPU_MASK_ALL ++}; ++ ++void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ balance_irq_affinity[irq] = mask; ++} ++ ++static unsigned long move(int curr_cpu, cpumask_t allowed_mask, ++ unsigned long now, int direction) ++{ ++ int search_idle = 1; ++ int cpu = curr_cpu; ++ ++ goto inside; ++ ++ do { ++ if (unlikely(cpu == curr_cpu)) ++ search_idle = 0; ++inside: ++ if (direction == 1) { ++ cpu++; ++ if (cpu >= NR_CPUS) ++ cpu = 0; ++ } else { ++ cpu--; ++ if (cpu == -1) ++ cpu = NR_CPUS-1; ++ } ++ } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || ++ (search_idle && !IDLE_ENOUGH(cpu,now))); ++ ++ return cpu; ++} ++ ++static inline void balance_irq(int cpu, int irq) ++{ ++ unsigned long now = jiffies; ++ cpumask_t allowed_mask; ++ unsigned int new_cpu; ++ ++ if (irqbalance_disabled) ++ return; ++ ++ cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); ++ new_cpu = move(cpu, allowed_mask, now, 1); ++ if (cpu != new_cpu) { ++ set_pending_irq(irq, cpumask_of_cpu(new_cpu)); ++ } ++} ++ ++static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) ++{ ++ int i, j; ++ Dprintk("Rotating IRQs among CPUs.\n"); ++ for_each_online_cpu(i) { ++ for (j = 0; j < NR_IRQS; j++) { ++ if (!irq_desc[j].action) ++ continue; ++ /* Is it a significant load ? */ ++ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < ++ useful_load_threshold) ++ continue; ++ balance_irq(i, j); ++ } ++ } ++ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); ++ return; ++} ++ ++static void do_irq_balance(void) ++{ ++ int i, j; ++ unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); ++ unsigned long move_this_load = 0; ++ int max_loaded = 0, min_loaded = 0; ++ int load; ++ unsigned long useful_load_threshold = balanced_irq_interval + 10; ++ int selected_irq; ++ int tmp_loaded, first_attempt = 1; ++ unsigned long tmp_cpu_irq; ++ unsigned long imbalance = 0; ++ cpumask_t allowed_mask, target_cpu_mask, tmp; ++ ++ for_each_possible_cpu(i) { ++ int package_index; ++ CPU_IRQ(i) = 0; ++ if (!cpu_online(i)) ++ continue; ++ package_index = CPU_TO_PACKAGEINDEX(i); ++ for (j = 0; j < NR_IRQS; j++) { ++ unsigned long value_now, delta; ++ /* Is this an active IRQ? */ ++ if (!irq_desc[j].action) ++ continue; ++ if ( package_index == i ) ++ IRQ_DELTA(package_index,j) = 0; ++ /* Determine the total count per processor per IRQ */ ++ value_now = (unsigned long) kstat_cpu(i).irqs[j]; ++ ++ /* Determine the activity per processor per IRQ */ ++ delta = value_now - LAST_CPU_IRQ(i,j); ++ ++ /* Update last_cpu_irq[][] for the next time */ ++ LAST_CPU_IRQ(i,j) = value_now; ++ ++ /* Ignore IRQs whose rate is less than the clock */ ++ if (delta < useful_load_threshold) ++ continue; ++ /* update the load for the processor or package total */ ++ IRQ_DELTA(package_index,j) += delta; ++ ++ /* Keep track of the higher numbered sibling as well */ ++ if (i != package_index) ++ CPU_IRQ(i) += delta; ++ /* ++ * We have sibling A and sibling B in the package ++ * ++ * cpu_irq[A] = load for cpu A + load for cpu B ++ * cpu_irq[B] = load for cpu B ++ */ ++ CPU_IRQ(package_index) += delta; ++ } ++ } ++ /* Find the least loaded processor package */ ++ for_each_online_cpu(i) { ++ if (i != CPU_TO_PACKAGEINDEX(i)) ++ continue; ++ if (min_cpu_irq > CPU_IRQ(i)) { ++ min_cpu_irq = CPU_IRQ(i); ++ min_loaded = i; ++ } ++ } ++ max_cpu_irq = ULONG_MAX; ++ ++tryanothercpu: ++ /* Look for heaviest loaded processor. ++ * We may come back to get the next heaviest loaded processor. ++ * Skip processors with trivial loads. ++ */ ++ tmp_cpu_irq = 0; ++ tmp_loaded = -1; ++ for_each_online_cpu(i) { ++ if (i != CPU_TO_PACKAGEINDEX(i)) ++ continue; ++ if (max_cpu_irq <= CPU_IRQ(i)) ++ continue; ++ if (tmp_cpu_irq < CPU_IRQ(i)) { ++ tmp_cpu_irq = CPU_IRQ(i); ++ tmp_loaded = i; ++ } ++ } ++ ++ if (tmp_loaded == -1) { ++ /* In the case of small number of heavy interrupt sources, ++ * loading some of the cpus too much. We use Ingo's original ++ * approach to rotate them around. ++ */ ++ if (!first_attempt && imbalance >= useful_load_threshold) { ++ rotate_irqs_among_cpus(useful_load_threshold); ++ return; ++ } ++ goto not_worth_the_effort; ++ } ++ ++ first_attempt = 0; /* heaviest search */ ++ max_cpu_irq = tmp_cpu_irq; /* load */ ++ max_loaded = tmp_loaded; /* processor */ ++ imbalance = (max_cpu_irq - min_cpu_irq) / 2; ++ ++ Dprintk("max_loaded cpu = %d\n", max_loaded); ++ Dprintk("min_loaded cpu = %d\n", min_loaded); ++ Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); ++ Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); ++ Dprintk("load imbalance = %lu\n", imbalance); ++ ++ /* if imbalance is less than approx 10% of max load, then ++ * observe diminishing returns action. - quit ++ */ ++ if (imbalance < (max_cpu_irq >> 3)) { ++ Dprintk("Imbalance too trivial\n"); ++ goto not_worth_the_effort; ++ } ++ ++tryanotherirq: ++ /* if we select an IRQ to move that can't go where we want, then ++ * see if there is another one to try. ++ */ ++ move_this_load = 0; ++ selected_irq = -1; ++ for (j = 0; j < NR_IRQS; j++) { ++ /* Is this an active IRQ? */ ++ if (!irq_desc[j].action) ++ continue; ++ if (imbalance <= IRQ_DELTA(max_loaded,j)) ++ continue; ++ /* Try to find the IRQ that is closest to the imbalance ++ * without going over. ++ */ ++ if (move_this_load < IRQ_DELTA(max_loaded,j)) { ++ move_this_load = IRQ_DELTA(max_loaded,j); ++ selected_irq = j; ++ } ++ } ++ if (selected_irq == -1) { ++ goto tryanothercpu; ++ } ++ ++ imbalance = move_this_load; ++ ++ /* For physical_balance case, we accumlated both load ++ * values in the one of the siblings cpu_irq[], ++ * to use the same code for physical and logical processors ++ * as much as possible. ++ * ++ * NOTE: the cpu_irq[] array holds the sum of the load for ++ * sibling A and sibling B in the slot for the lowest numbered ++ * sibling (A), _AND_ the load for sibling B in the slot for ++ * the higher numbered sibling. ++ * ++ * We seek the least loaded sibling by making the comparison ++ * (A+B)/2 vs B ++ */ ++ load = CPU_IRQ(min_loaded) >> 1; ++ for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { ++ if (load > CPU_IRQ(j)) { ++ /* This won't change cpu_sibling_map[min_loaded] */ ++ load = CPU_IRQ(j); ++ min_loaded = j; ++ } ++ } ++ ++ cpus_and(allowed_mask, ++ cpu_online_map, ++ balance_irq_affinity[selected_irq]); ++ target_cpu_mask = cpumask_of_cpu(min_loaded); ++ cpus_and(tmp, target_cpu_mask, allowed_mask); ++ ++ if (!cpus_empty(tmp)) { ++ ++ Dprintk("irq = %d moved to cpu = %d\n", ++ selected_irq, min_loaded); ++ /* mark for change destination */ ++ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); ++ ++ /* Since we made a change, come back sooner to ++ * check for more variation. ++ */ ++ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); ++ return; ++ } ++ goto tryanotherirq; ++ ++not_worth_the_effort: ++ /* ++ * if we did not find an IRQ to move, then adjust the time interval ++ * upward ++ */ ++ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); ++ Dprintk("IRQ worth rotating not found\n"); ++ return; ++} ++ ++static int balanced_irq(void *unused) ++{ ++ int i; ++ unsigned long prev_balance_time = jiffies; ++ long time_remaining = balanced_irq_interval; ++ ++ daemonize("kirqd"); ++ ++ /* push everything to CPU 0 to give us a starting point. */ ++ for (i = 0 ; i < NR_IRQS ; i++) { ++ irq_desc[i].pending_mask = cpumask_of_cpu(0); ++ set_pending_irq(i, cpumask_of_cpu(0)); ++ } ++ ++ for ( ; ; ) { ++ time_remaining = schedule_timeout_interruptible(time_remaining); ++ try_to_freeze(); ++ if (time_after(jiffies, ++ prev_balance_time+balanced_irq_interval)) { ++ preempt_disable(); ++ do_irq_balance(); ++ prev_balance_time = jiffies; ++ time_remaining = balanced_irq_interval; ++ preempt_enable(); ++ } ++ } ++ return 0; ++} ++ ++static int __init balanced_irq_init(void) ++{ ++ int i; ++ struct cpuinfo_x86 *c; ++ cpumask_t tmp; ++ ++ cpus_shift_right(tmp, cpu_online_map, 2); ++ c = &boot_cpu_data; ++ /* When not overwritten by the command line ask subarchitecture. */ ++ if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) ++ irqbalance_disabled = NO_BALANCE_IRQ; ++ if (irqbalance_disabled) ++ return 0; ++ ++ /* disable irqbalance completely if there is only one processor online */ ++ if (num_online_cpus() < 2) { ++ irqbalance_disabled = 1; ++ return 0; ++ } ++ /* ++ * Enable physical balance only if more than 1 physical processor ++ * is present ++ */ ++ if (smp_num_siblings > 1 && !cpus_empty(tmp)) ++ physical_balance = 1; ++ ++ for_each_online_cpu(i) { ++ irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); ++ irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); ++ if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { ++ printk(KERN_ERR "balanced_irq_init: out of memory"); ++ goto failed; ++ } ++ memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); ++ memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); ++ } ++ ++ printk(KERN_INFO "Starting balanced_irq\n"); ++ if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) ++ return 0; ++ else ++ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); ++failed: ++ for_each_possible_cpu(i) { ++ kfree(irq_cpu_data[i].irq_delta); ++ irq_cpu_data[i].irq_delta = NULL; ++ kfree(irq_cpu_data[i].last_irq); ++ irq_cpu_data[i].last_irq = NULL; ++ } ++ return 0; ++} ++ ++int __init irqbalance_disable(char *str) ++{ ++ irqbalance_disabled = 1; ++ return 1; ++} ++ ++__setup("noirqbalance", irqbalance_disable); ++ ++late_initcall(balanced_irq_init); ++#endif /* CONFIG_IRQBALANCE */ ++#endif /* CONFIG_SMP */ ++#endif ++ ++#ifndef CONFIG_SMP ++void fastcall send_IPI_self(int vector) ++{ ++#ifndef CONFIG_XEN ++ unsigned int cfg; ++ ++ /* ++ * Wait for idle. ++ */ ++ apic_wait_icr_idle(); ++ cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; ++ /* ++ * Send the IPI. The write to APIC_ICR fires this off. ++ */ ++ apic_write_around(APIC_ICR, cfg); ++#endif ++} ++#endif /* !CONFIG_SMP */ ++ ++ ++/* ++ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to ++ * specific CPU-side IRQs. ++ */ ++ ++#define MAX_PIRQS 8 ++static int pirq_entries [MAX_PIRQS]; ++static int pirqs_enabled; ++int skip_ioapic_setup; ++ ++static int __init ioapic_setup(char *str) ++{ ++ skip_ioapic_setup = 1; ++ return 1; ++} ++ ++__setup("noapic", ioapic_setup); ++ ++static int __init ioapic_pirq_setup(char *str) ++{ ++ int i, max; ++ int ints[MAX_PIRQS+1]; ++ ++ get_options(str, ARRAY_SIZE(ints), ints); ++ ++ for (i = 0; i < MAX_PIRQS; i++) ++ pirq_entries[i] = -1; ++ ++ pirqs_enabled = 1; ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "PIRQ redirection, working around broken MP-BIOS.\n"); ++ max = MAX_PIRQS; ++ if (ints[0] < MAX_PIRQS) ++ max = ints[0]; ++ ++ for (i = 0; i < max; i++) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); ++ /* ++ * PIRQs are mapped upside down, usually. ++ */ ++ pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; ++ } ++ return 1; ++} ++ ++__setup("pirq=", ioapic_pirq_setup); ++ ++/* ++ * Find the IRQ entry number of a certain pin. ++ */ ++static int find_irq_entry(int apic, int pin, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mpc_irqtype == type && ++ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || ++ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && ++ mp_irqs[i].mpc_dstirq == pin) ++ return i; ++ ++ return -1; ++} ++ ++/* ++ * Find the pin to which IRQ[irq] (ISA) is connected ++ */ ++static int __init find_isa_irq_pin(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_EISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_MCA || ++ mp_bus_id_to_type[lbus] == MP_BUS_NEC98 ++ ) && ++ (mp_irqs[i].mpc_irqtype == type) && ++ (mp_irqs[i].mpc_srcbusirq == irq)) ++ ++ return mp_irqs[i].mpc_dstirq; ++ } ++ return -1; ++} ++ ++static int __init find_isa_irq_apic(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_EISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_MCA || ++ mp_bus_id_to_type[lbus] == MP_BUS_NEC98 ++ ) && ++ (mp_irqs[i].mpc_irqtype == type) && ++ (mp_irqs[i].mpc_srcbusirq == irq)) ++ break; ++ } ++ if (i < mp_irq_entries) { ++ int apic; ++ for(apic = 0; apic < nr_ioapics; apic++) { ++ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) ++ return apic; ++ } ++ } ++ ++ return -1; ++} ++ ++/* ++ * Find a specific PCI IRQ entry. ++ * Not an __init, possibly needed by modules ++ */ ++static int pin_2_irq(int idx, int apic, int pin); ++ ++int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) ++{ ++ int apic, i, best_guess = -1; ++ ++ apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " ++ "slot:%d, pin:%d.\n", bus, slot, pin); ++ if (mp_bus_id_to_pci_bus[bus] == -1) { ++ printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); ++ return -1; ++ } ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || ++ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) ++ break; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && ++ !mp_irqs[i].mpc_irqtype && ++ (bus == lbus) && ++ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { ++ int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); ++ ++ if (!(apic || IO_APIC_IRQ(irq))) ++ continue; ++ ++ if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) ++ return irq; ++ /* ++ * Use the first all-but-pin matching entry as a ++ * best-guess fuzzy result for broken mptables. ++ */ ++ if (best_guess < 0) ++ best_guess = irq; ++ } ++ } ++ return best_guess; ++} ++EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); ++ ++/* ++ * This function currently is only a helper for the i386 smp boot process where ++ * we need to reprogram the ioredtbls to cater for the cpus which have come online ++ * so mask in all cases should simply be TARGET_CPUS ++ */ ++#ifdef CONFIG_SMP ++#ifndef CONFIG_XEN ++void __init setup_ioapic_dest(void) ++{ ++ int pin, ioapic, irq, irq_entry; ++ ++ if (skip_ioapic_setup == 1) ++ return; ++ ++ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { ++ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { ++ irq_entry = find_irq_entry(ioapic, pin, mp_INT); ++ if (irq_entry == -1) ++ continue; ++ irq = pin_2_irq(irq_entry, ioapic, pin); ++ set_ioapic_affinity_irq(irq, TARGET_CPUS); ++ } ++ ++ } ++} ++#endif /* !CONFIG_XEN */ ++#endif ++ ++/* ++ * EISA Edge/Level control register, ELCR ++ */ ++static int EISA_ELCR(unsigned int irq) ++{ ++ if (irq < 16) { ++ unsigned int port = 0x4d0 + (irq >> 3); ++ return (inb(port) >> (irq & 7)) & 1; ++ } ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "Broken MPtable reports ISA irq %d\n", irq); ++ return 0; ++} ++ ++/* EISA interrupts are always polarity zero and can be edge or level ++ * trigger depending on the ELCR value. If an interrupt is listed as ++ * EISA conforming in the MP table, that means its trigger type must ++ * be read in from the ELCR */ ++ ++#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) ++#define default_EISA_polarity(idx) (0) ++ ++/* ISA interrupts are always polarity zero edge triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_ISA_trigger(idx) (0) ++#define default_ISA_polarity(idx) (0) ++ ++/* PCI interrupts are always polarity one level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_PCI_trigger(idx) (1) ++#define default_PCI_polarity(idx) (1) ++ ++/* MCA interrupts are always polarity zero level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_MCA_trigger(idx) (1) ++#define default_MCA_polarity(idx) (0) ++ ++/* NEC98 interrupts are always polarity zero edge triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_NEC98_trigger(idx) (0) ++#define default_NEC98_polarity(idx) (0) ++ ++static int __init MPBIOS_polarity(int idx) ++{ ++ int bus = mp_irqs[idx].mpc_srcbus; ++ int polarity; ++ ++ /* ++ * Determine IRQ line polarity (high active or low active): ++ */ ++ switch (mp_irqs[idx].mpc_irqflag & 3) ++ { ++ case 0: /* conforms, ie. bus-type dependent polarity */ ++ { ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ { ++ polarity = default_ISA_polarity(idx); ++ break; ++ } ++ case MP_BUS_EISA: /* EISA pin */ ++ { ++ polarity = default_EISA_polarity(idx); ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ polarity = default_PCI_polarity(idx); ++ break; ++ } ++ case MP_BUS_MCA: /* MCA pin */ ++ { ++ polarity = default_MCA_polarity(idx); ++ break; ++ } ++ case MP_BUS_NEC98: /* NEC 98 pin */ ++ { ++ polarity = default_NEC98_polarity(idx); ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ } ++ break; ++ } ++ case 1: /* high active */ ++ { ++ polarity = 0; ++ break; ++ } ++ case 2: /* reserved */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ case 3: /* low active */ ++ { ++ polarity = 1; ++ break; ++ } ++ default: /* invalid */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ } ++ return polarity; ++} ++ ++static int MPBIOS_trigger(int idx) ++{ ++ int bus = mp_irqs[idx].mpc_srcbus; ++ int trigger; ++ ++ /* ++ * Determine IRQ trigger mode (edge or level sensitive): ++ */ ++ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) ++ { ++ case 0: /* conforms, ie. bus-type dependent */ ++ { ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ { ++ trigger = default_ISA_trigger(idx); ++ break; ++ } ++ case MP_BUS_EISA: /* EISA pin */ ++ { ++ trigger = default_EISA_trigger(idx); ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ trigger = default_PCI_trigger(idx); ++ break; ++ } ++ case MP_BUS_MCA: /* MCA pin */ ++ { ++ trigger = default_MCA_trigger(idx); ++ break; ++ } ++ case MP_BUS_NEC98: /* NEC 98 pin */ ++ { ++ trigger = default_NEC98_trigger(idx); ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 1; ++ break; ++ } ++ } ++ break; ++ } ++ case 1: /* edge */ ++ { ++ trigger = 0; ++ break; ++ } ++ case 2: /* reserved */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 1; ++ break; ++ } ++ case 3: /* level */ ++ { ++ trigger = 1; ++ break; ++ } ++ default: /* invalid */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 0; ++ break; ++ } ++ } ++ return trigger; ++} ++ ++static inline int irq_polarity(int idx) ++{ ++ return MPBIOS_polarity(idx); ++} ++ ++static inline int irq_trigger(int idx) ++{ ++ return MPBIOS_trigger(idx); ++} ++ ++static int pin_2_irq(int idx, int apic, int pin) ++{ ++ int irq, i; ++ int bus = mp_irqs[idx].mpc_srcbus; ++ ++ /* ++ * Debugging check, we are in big trouble if this message pops up! ++ */ ++ if (mp_irqs[idx].mpc_dstirq != pin) ++ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); ++ ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ case MP_BUS_EISA: ++ case MP_BUS_MCA: ++ case MP_BUS_NEC98: ++ { ++ irq = mp_irqs[idx].mpc_srcbusirq; ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ /* ++ * PCI IRQs are mapped in order ++ */ ++ i = irq = 0; ++ while (i < apic) ++ irq += nr_ioapic_registers[i++]; ++ irq += pin; ++ ++ /* ++ * For MPS mode, so far only needed by ES7000 platform ++ */ ++ if (ioapic_renumber_irq) ++ irq = ioapic_renumber_irq(apic, irq); ++ ++ break; ++ } ++ default: ++ { ++ printk(KERN_ERR "unknown bus type %d.\n",bus); ++ irq = 0; ++ break; ++ } ++ } ++ ++ /* ++ * PCI IRQ command line redirection. Yes, limits are hardcoded. ++ */ ++ if ((pin >= 16) && (pin <= 23)) { ++ if (pirq_entries[pin-16] != -1) { ++ if (!pirq_entries[pin-16]) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "disabling PIRQ%d\n", pin-16); ++ } else { ++ irq = pirq_entries[pin-16]; ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "using PIRQ%d -> IRQ %d\n", ++ pin-16, irq); ++ } ++ } ++ } ++ return irq; ++} ++ ++static inline int IO_APIC_irq_trigger(int irq) ++{ ++ int apic, idx, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ idx = find_irq_entry(apic,pin,mp_INT); ++ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) ++ return irq_trigger(idx); ++ } ++ } ++ /* ++ * nonexistent IRQs are edge default ++ */ ++ return 0; ++} ++ ++/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ ++u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ ++ ++int assign_irq_vector(int irq) ++{ ++ unsigned long flags; ++ int vector; ++ struct physdev_irq irq_op; ++ ++ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ ++ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { ++ spin_unlock_irqrestore(&vector_lock, flags); ++ return IO_APIC_VECTOR(irq); ++ } ++ ++ irq_op.irq = irq; ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { ++ spin_unlock_irqrestore(&vector_lock, flags); ++ return -ENOSPC; ++ } ++ ++ vector = irq_op.vector; ++ vector_irq[vector] = irq; ++ if (irq != AUTO_ASSIGN) ++ IO_APIC_VECTOR(irq) = vector; ++ ++ spin_unlock_irqrestore(&vector_lock, flags); ++ ++ return vector; ++} ++ ++#ifndef CONFIG_XEN ++static struct hw_interrupt_type ioapic_level_type; ++static struct hw_interrupt_type ioapic_edge_type; ++ ++#define IOAPIC_AUTO -1 ++#define IOAPIC_EDGE 0 ++#define IOAPIC_LEVEL 1 ++ ++static void ioapic_register_intr(int irq, int vector, unsigned long trigger) ++{ ++ unsigned idx; ++ ++ idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; ++ ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ irq_desc[idx].chip = &ioapic_level_type; ++ else ++ irq_desc[idx].chip = &ioapic_edge_type; ++ set_intr_gate(vector, interrupt[idx]); ++} ++#else ++#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) ++#endif ++ ++static void __init setup_IO_APIC_irqs(void) ++{ ++ struct IO_APIC_route_entry entry; ++ int apic, pin, idx, irq, first_notcon = 1, vector; ++ unsigned long flags; ++ ++ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ ++ /* ++ * add it to the IO-APIC irq-routing table: ++ */ ++ memset(&entry,0,sizeof(entry)); ++ ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.dest_mode = INT_DEST_MODE; ++ entry.mask = 0; /* enable IRQ */ ++ entry.dest.logical.logical_dest = ++ cpu_mask_to_apicid(TARGET_CPUS); ++ ++ idx = find_irq_entry(apic,pin,mp_INT); ++ if (idx == -1) { ++ if (first_notcon) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ " IO-APIC (apicid-pin) %d-%d", ++ mp_ioapics[apic].mpc_apicid, ++ pin); ++ first_notcon = 0; ++ } else ++ apic_printk(APIC_VERBOSE, ", %d-%d", ++ mp_ioapics[apic].mpc_apicid, pin); ++ continue; ++ } ++ ++ entry.trigger = irq_trigger(idx); ++ entry.polarity = irq_polarity(idx); ++ ++ if (irq_trigger(idx)) { ++ entry.trigger = 1; ++ entry.mask = 1; ++ } ++ ++ irq = pin_2_irq(idx, apic, pin); ++ /* ++ * skip adding the timer int on secondary nodes, which causes ++ * a small but painful rift in the time-space continuum ++ */ ++ if (multi_timer_check(apic, irq)) ++ continue; ++ else ++ add_pin_to_irq(irq, apic, pin); ++ ++ if (/*!apic &&*/ !IO_APIC_IRQ(irq)) ++ continue; ++ ++ if (IO_APIC_IRQ(irq)) { ++ vector = assign_irq_vector(irq); ++ entry.vector = vector; ++ ioapic_register_intr(irq, vector, IOAPIC_AUTO); ++ ++ if (!apic && (irq < 16)) ++ disable_8259A_irq(irq); ++ } ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); ++ set_native_irq_info(irq, TARGET_CPUS); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ } ++ } ++ ++ if (!first_notcon) ++ apic_printk(APIC_VERBOSE, " not connected.\n"); ++} ++ ++/* ++ * Set up the 8259A-master output pin: ++ */ ++#ifndef CONFIG_XEN ++static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ memset(&entry,0,sizeof(entry)); ++ ++ disable_8259A_irq(0); ++ ++ /* mask LVT0 */ ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); ++ ++ /* ++ * We use logical delivery to get the timer IRQ ++ * to the first CPU. ++ */ ++ entry.dest_mode = INT_DEST_MODE; ++ entry.mask = 0; /* unmask IRQ now */ ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.polarity = 0; ++ entry.trigger = 0; ++ entry.vector = vector; ++ ++ /* ++ * The timer IRQ doesn't have to know that behind the ++ * scene we have a 8259A-master in AEOI mode ... ++ */ ++ irq_desc[0].chip = &ioapic_edge_type; ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ enable_8259A_irq(0); ++} ++ ++static inline void UNEXPECTED_IO_APIC(void) ++{ ++} ++ ++void __init print_IO_APIC(void) ++{ ++ int apic, i; ++ union IO_APIC_reg_00 reg_00; ++ union IO_APIC_reg_01 reg_01; ++ union IO_APIC_reg_02 reg_02; ++ union IO_APIC_reg_03 reg_03; ++ unsigned long flags; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); ++ for (i = 0; i < nr_ioapics; i++) ++ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", ++ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); ++ ++ /* ++ * We are a bit conservative about what we expect. We have to ++ * know about every hardware change ASAP. ++ */ ++ printk(KERN_INFO "testing the IO APIC.......................\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ reg_01.raw = io_apic_read(apic, 1); ++ if (reg_01.bits.version >= 0x10) ++ reg_02.raw = io_apic_read(apic, 2); ++ if (reg_01.bits.version >= 0x20) ++ reg_03.raw = io_apic_read(apic, 3); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); ++ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); ++ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); ++ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); ++ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); ++ if (reg_00.bits.ID >= get_physical_broadcast()) ++ UNEXPECTED_IO_APIC(); ++ if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ ++ printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); ++ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); ++ if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ ++ (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ ++ (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ ++ (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ ++ (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ ++ (reg_01.bits.entries != 0x2E) && ++ (reg_01.bits.entries != 0x3F) ++ ) ++ UNEXPECTED_IO_APIC(); ++ ++ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); ++ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); ++ if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ ++ (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ ++ (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ ++ (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ ++ (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ ++ ) ++ UNEXPECTED_IO_APIC(); ++ if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, ++ * but the value of reg_02 is read as the previous read register ++ * value, so ignore it if reg_02 == reg_01. ++ */ ++ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); ++ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); ++ if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ } ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 ++ * or reg_03, but the value of reg_0[23] is read as the previous read ++ * register value, so ignore it if reg_03 == reg_0[12]. ++ */ ++ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && ++ reg_03.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); ++ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); ++ if (reg_03.bits.__reserved_1) ++ UNEXPECTED_IO_APIC(); ++ } ++ ++ printk(KERN_DEBUG ".... IRQ redirection table:\n"); ++ ++ printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" ++ " Stat Dest Deli Vect: \n"); ++ ++ for (i = 0; i <= reg_01.bits.entries; i++) { ++ struct IO_APIC_route_entry entry; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); ++ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ printk(KERN_DEBUG " %02x %03X %02X ", ++ i, ++ entry.dest.logical.logical_dest, ++ entry.dest.physical.physical_dest ++ ); ++ ++ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", ++ entry.mask, ++ entry.trigger, ++ entry.irr, ++ entry.polarity, ++ entry.delivery_status, ++ entry.dest_mode, ++ entry.delivery_mode, ++ entry.vector ++ ); ++ } ++ } ++ if (use_pci_vector()) ++ printk(KERN_INFO "Using vector-based indexing\n"); ++ printk(KERN_DEBUG "IRQ to pin mappings:\n"); ++ for (i = 0; i < NR_IRQS; i++) { ++ struct irq_pin_list *entry = irq_2_pin + i; ++ if (entry->pin < 0) ++ continue; ++ if (use_pci_vector() && !platform_legacy_irq(i)) ++ printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); ++ else ++ printk(KERN_DEBUG "IRQ%d ", i); ++ for (;;) { ++ printk("-> %d:%d", entry->apic, entry->pin); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++ printk("\n"); ++ } ++ ++ printk(KERN_INFO ".................................... done.\n"); ++ ++ return; ++} ++ ++#if 0 ++ ++static void print_APIC_bitfield (int base) ++{ ++ unsigned int v; ++ int i, j; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); ++ for (i = 0; i < 8; i++) { ++ v = apic_read(base + i*0x10); ++ for (j = 0; j < 32; j++) { ++ if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ v = apic_read(APIC_ESR); ++ printk(KERN_DEBUG "... APIC ESR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_ICR); ++ printk(KERN_DEBUG "... APIC ICR: %08x\n", v); ++ v = apic_read(APIC_ICR2); ++ printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); ++ ++ v = apic_read(APIC_LVTT); ++ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); ++ ++ if (maxlvt > 3) { /* PC is LVT#4. */ ++ v = apic_read(APIC_LVTPC); ++ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); ++ } ++ v = apic_read(APIC_LVT0); ++ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); ++ v = apic_read(APIC_LVT1); ++ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); ++ ++ if (maxlvt > 2) { /* ERR is LVT#3. */ ++ v = apic_read(APIC_LVTERR); ++ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_TMICT); ++ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); ++ v = apic_read(APIC_TMCCT); ++ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); ++ v = apic_read(APIC_TDCR); ++ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); ++ printk("\n"); ++} ++ ++void print_all_local_APICs (void) ++{ ++ on_each_cpu(print_local_APIC, NULL, 1, 1); ++} ++ ++void /*__init*/ print_PIC(void) ++{ ++ unsigned int v; ++ unsigned long flags; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "\nprinting PIC contents\n"); ++ ++ spin_lock_irqsave(&i8259A_lock, flags); ++ ++ v = inb(0xa1) << 8 | inb(0x21); ++ printk(KERN_DEBUG "... PIC IMR: %04x\n", v); ++ ++ v = inb(0xa0) << 8 | inb(0x20); ++ printk(KERN_DEBUG "... PIC IRR: %04x\n", v); ++ ++ outb(0x0b,0xa0); ++ outb(0x0b,0x20); ++ v = inb(0xa0) << 8 | inb(0x20); ++ outb(0x0a,0xa0); ++ outb(0x0a,0x20); ++ ++ spin_unlock_irqrestore(&i8259A_lock, flags); ++ ++ printk(KERN_DEBUG "... PIC ISR: %04x\n", v); ++ ++ v = inb(0x4d1) << 8 | inb(0x4d0); ++ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); ++} ++ ++#endif /* 0 */ ++ ++#else ++void __init print_IO_APIC(void) { } ++#endif /* !CONFIG_XEN */ ++ ++static void __init enable_IO_APIC(void) ++{ ++ union IO_APIC_reg_01 reg_01; ++ int i8259_apic, i8259_pin; ++ int i, apic; ++ unsigned long flags; ++ ++ for (i = 0; i < PIN_MAP_SIZE; i++) { ++ irq_2_pin[i].pin = -1; ++ irq_2_pin[i].next = 0; ++ } ++ if (!pirqs_enabled) ++ for (i = 0; i < MAX_PIRQS; i++) ++ pirq_entries[i] = -1; ++ ++ /* ++ * The number of IO-APIC IRQ registers (== #pins): ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(apic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ nr_ioapic_registers[apic] = reg_01.bits.entries+1; ++ } ++ for(apic = 0; apic < nr_ioapics; apic++) { ++ int pin; ++ /* See if any of the pins is in ExtINT mode */ ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ struct IO_APIC_route_entry entry; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ ++ /* If the interrupt line is enabled and in ExtInt mode ++ * I have found the pin where the i8259 is connected. ++ */ ++ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { ++ ioapic_i8259.apic = apic; ++ ioapic_i8259.pin = pin; ++ goto found_i8259; ++ } ++ } ++ } ++ found_i8259: ++ /* Look to see what if the MP table has reported the ExtINT */ ++ /* If we could not find the appropriate pin by looking at the ioapic ++ * the i8259 probably is not connected the ioapic but give the ++ * mptable a chance anyway. ++ */ ++ i8259_pin = find_isa_irq_pin(0, mp_ExtINT); ++ i8259_apic = find_isa_irq_apic(0, mp_ExtINT); ++ /* Trust the MP table if nothing is setup in the hardware */ ++ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { ++ printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); ++ ioapic_i8259.pin = i8259_pin; ++ ioapic_i8259.apic = i8259_apic; ++ } ++ /* Complain if the MP table and the hardware disagree */ ++ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && ++ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) ++ { ++ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); ++ } ++ ++ /* ++ * Do not trust the IO-APIC being empty at bootup ++ */ ++ clear_IO_APIC(); ++} ++ ++/* ++ * Not an __init, needed by the reboot code ++ */ ++void disable_IO_APIC(void) ++{ ++ /* ++ * Clear the IO-APIC before rebooting: ++ */ ++ clear_IO_APIC(); ++ ++#ifndef CONFIG_XEN ++ /* ++ * If the i8259 is routed through an IOAPIC ++ * Put that IOAPIC in virtual wire mode ++ * so legacy interrupts can be delivered. ++ */ ++ if (ioapic_i8259.pin != -1) { ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ memset(&entry, 0, sizeof(entry)); ++ entry.mask = 0; /* Enabled */ ++ entry.trigger = 0; /* Edge */ ++ entry.irr = 0; ++ entry.polarity = 0; /* High */ ++ entry.delivery_status = 0; ++ entry.dest_mode = 0; /* Physical */ ++ entry.delivery_mode = dest_ExtINT; /* ExtInt */ ++ entry.vector = 0; ++ entry.dest.physical.physical_dest = ++ GET_APIC_ID(apic_read(APIC_ID)); ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, ++ *(((int *)&entry)+1)); ++ io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, ++ *(((int *)&entry)+0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ } ++ disconnect_bsp_APIC(ioapic_i8259.pin != -1); ++#endif ++} ++ ++/* ++ * function to set the IO-APIC physical IDs based on the ++ * values stored in the MPC table. ++ * ++ * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 ++ */ ++ ++#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ) ++static void __init setup_ioapic_ids_from_mpc(void) ++{ ++ union IO_APIC_reg_00 reg_00; ++ physid_mask_t phys_id_present_map; ++ int apic; ++ int i; ++ unsigned char old_id; ++ unsigned long flags; ++ ++ /* ++ * Don't check I/O APIC IDs for xAPIC systems. They have ++ * no meaning without the serial APIC bus. ++ */ ++ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) ++ return; ++ /* ++ * This is broken; anything with a real cpu count has to ++ * circumvent this idiocy regardless. ++ */ ++ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); ++ ++ /* ++ * Set the IOAPIC ID to the value stored in the MPC table. ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ++ /* Read the register 0 value */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ old_id = mp_ioapics[apic].mpc_apicid; ++ ++ if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", ++ apic, mp_ioapics[apic].mpc_apicid); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ reg_00.bits.ID); ++ mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; ++ } ++ ++ /* ++ * Sanity check, is the ID really free? Every APIC in a ++ * system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (check_apicid_used(phys_id_present_map, ++ mp_ioapics[apic].mpc_apicid)) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", ++ apic, mp_ioapics[apic].mpc_apicid); ++ for (i = 0; i < get_physical_broadcast(); i++) ++ if (!physid_isset(i, phys_id_present_map)) ++ break; ++ if (i >= get_physical_broadcast()) ++ panic("Max APIC ID exceeded!\n"); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ i); ++ physid_set(i, phys_id_present_map); ++ mp_ioapics[apic].mpc_apicid = i; ++ } else { ++ physid_mask_t tmp; ++ tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); ++ apic_printk(APIC_VERBOSE, "Setting %d in the " ++ "phys_id_present_map\n", ++ mp_ioapics[apic].mpc_apicid); ++ physids_or(phys_id_present_map, phys_id_present_map, tmp); ++ } ++ ++ ++ /* ++ * We need to adjust the IRQ routing table ++ * if the ID changed. ++ */ ++ if (old_id != mp_ioapics[apic].mpc_apicid) ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mpc_dstapic == old_id) ++ mp_irqs[i].mpc_dstapic ++ = mp_ioapics[apic].mpc_apicid; ++ ++ /* ++ * Read the right value from the MPC table and ++ * write it into the ID register. ++ */ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "...changing IO-APIC physical APIC ID to %d ...", ++ mp_ioapics[apic].mpc_apicid); ++ ++ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0, reg_00.raw); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* ++ * Sanity check ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) ++ printk("could not set ID!\n"); ++ else ++ apic_printk(APIC_VERBOSE, " ok.\n"); ++ } ++} ++#else ++static void __init setup_ioapic_ids_from_mpc(void) { } ++#endif ++ ++#ifndef CONFIG_XEN ++/* ++ * There is a nasty bug in some older SMP boards, their mptable lies ++ * about the timer IRQ. We do the following to work around the situation: ++ * ++ * - timer IRQ defaults to IO-APIC IRQ ++ * - if this function detects that timer IRQs are defunct, then we fall ++ * back to ISA timer IRQs ++ */ ++static int __init timer_irq_works(void) ++{ ++ unsigned long t1 = jiffies; ++ ++ local_irq_enable(); ++ /* Let ten ticks pass... */ ++ mdelay((10 * 1000) / HZ); ++ ++ /* ++ * Expect a few ticks at least, to be sure some possible ++ * glue logic does not lock up after one or two first ++ * ticks in a non-ExtINT mode. Also the local APIC ++ * might have cached one ExtINT interrupt. Finally, at ++ * least one tick may be lost due to delays. ++ */ ++ if (jiffies - t1 > 4) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * In the SMP+IOAPIC case it might happen that there are an unspecified ++ * number of pending IRQ events unhandled. These cases are very rare, ++ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much ++ * better to do it this way as thus we do not have to be aware of ++ * 'pending' interrupts in the IRQ path, except at this point. ++ */ ++/* ++ * Edge triggered needs to resend any interrupt ++ * that was delayed but this is now handled in the device ++ * independent code. ++ */ ++ ++/* ++ * Starting up a edge-triggered IO-APIC interrupt is ++ * nasty - we need to make sure that we get the edge. ++ * If it is already asserted for some reason, we need ++ * return 1 to indicate that is was pending. ++ * ++ * This is not complete - we should be able to fake ++ * an edge even if it isn't on the 8259A... ++ */ ++static unsigned int startup_edge_ioapic_irq(unsigned int irq) ++{ ++ int was_pending = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ if (irq < 16) { ++ disable_8259A_irq(irq); ++ if (i8259A_irq_pending(irq)) ++ was_pending = 1; ++ } ++ __unmask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return was_pending; ++} ++ ++/* ++ * Once we have recorded IRQ_PENDING already, we can mask the ++ * interrupt for real. This prevents IRQ storms from unhandled ++ * devices. ++ */ ++static void ack_edge_ioapic_irq(unsigned int irq) ++{ ++ move_irq(irq); ++ if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) ++ == (IRQ_PENDING | IRQ_DISABLED)) ++ mask_IO_APIC_irq(irq); ++ ack_APIC_irq(); ++} ++ ++/* ++ * Level triggered interrupts can just be masked, ++ * and shutting down and starting up the interrupt ++ * is the same as enabling and disabling them -- except ++ * with a startup need to return a "was pending" value. ++ * ++ * Level triggered interrupts are special because we ++ * do not touch any IO-APIC register while handling ++ * them. We ack the APIC in the end-IRQ handler, not ++ * in the start-IRQ-handler. Protection against reentrance ++ * from the same interrupt is still provided, both by the ++ * generic IRQ layer and by the fact that an unacked local ++ * APIC does not accept IRQs. ++ */ ++static unsigned int startup_level_ioapic_irq (unsigned int irq) ++{ ++ unmask_IO_APIC_irq(irq); ++ ++ return 0; /* don't check for pending */ ++} ++ ++static void end_level_ioapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ int i; ++ ++ move_irq(irq); ++/* ++ * It appears there is an erratum which affects at least version 0x11 ++ * of I/O APIC (that's the 82093AA and cores integrated into various ++ * chipsets). Under certain conditions a level-triggered interrupt is ++ * erroneously delivered as edge-triggered one but the respective IRR ++ * bit gets set nevertheless. As a result the I/O unit expects an EOI ++ * message but it will never arrive and further interrupts are blocked ++ * from the source. The exact reason is so far unknown, but the ++ * phenomenon was observed when two consecutive interrupt requests ++ * from a given source get delivered to the same CPU and the source is ++ * temporarily disabled in between. ++ * ++ * A workaround is to simulate an EOI message manually. We achieve it ++ * by setting the trigger mode to edge and then to level when the edge ++ * trigger mode gets detected in the TMR of a local APIC for a ++ * level-triggered interrupt. We mask the source for the time of the ++ * operation to prevent an edge-triggered interrupt escaping meanwhile. ++ * The idea is from Manfred Spraul. --macro ++ */ ++ i = IO_APIC_VECTOR(irq); ++ ++ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ++ ++ ack_APIC_irq(); ++ ++ if (!(v & (1 << (i & 0x1f)))) { ++ atomic_inc(&irq_mis_count); ++ spin_lock(&ioapic_lock); ++ __mask_and_edge_IO_APIC_irq(irq); ++ __unmask_and_level_IO_APIC_irq(irq); ++ spin_unlock(&ioapic_lock); ++ } ++} ++ ++#ifdef CONFIG_PCI_MSI ++static unsigned int startup_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_edge_ioapic_irq(irq); ++} ++ ++static void ack_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ move_native_irq(vector); ++ ack_edge_ioapic_irq(irq); ++} ++ ++static unsigned int startup_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_level_ioapic_irq (irq); ++} ++ ++static void end_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ move_native_irq(vector); ++ end_level_ioapic_irq(irq); ++} ++ ++static void mask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ mask_IO_APIC_irq(irq); ++} ++ ++static void unmask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ unmask_IO_APIC_irq(irq); ++} ++ ++#ifdef CONFIG_SMP ++static void set_ioapic_affinity_vector (unsigned int vector, ++ cpumask_t cpu_mask) ++{ ++ int irq = vector_to_irq(vector); ++ ++ set_native_irq_info(vector, cpu_mask); ++ set_ioapic_affinity_irq(irq, cpu_mask); ++} ++#endif ++#endif ++ ++static int ioapic_retrigger(unsigned int irq) ++{ ++ send_IPI_self(IO_APIC_VECTOR(irq)); ++ ++ return 1; ++} ++ ++/* ++ * Level and edge triggered IO-APIC interrupts need different handling, ++ * so we use two separate IRQ descriptors. Edge triggered IRQs can be ++ * handled with the level-triggered descriptor, but that one has slightly ++ * more overhead. Level-triggered interrupts cannot be handled with the ++ * edge-triggered handler, without risking IRQ storms and other ugly ++ * races. ++ */ ++static struct hw_interrupt_type ioapic_edge_type __read_mostly = { ++ .typename = "IO-APIC-edge", ++ .startup = startup_edge_ioapic, ++ .shutdown = shutdown_edge_ioapic, ++ .enable = enable_edge_ioapic, ++ .disable = disable_edge_ioapic, ++ .ack = ack_edge_ioapic, ++ .end = end_edge_ioapic, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ioapic_affinity, ++#endif ++ .retrigger = ioapic_retrigger, ++}; ++ ++static struct hw_interrupt_type ioapic_level_type __read_mostly = { ++ .typename = "IO-APIC-level", ++ .startup = startup_level_ioapic, ++ .shutdown = shutdown_level_ioapic, ++ .enable = enable_level_ioapic, ++ .disable = disable_level_ioapic, ++ .ack = mask_and_ack_level_ioapic, ++ .end = end_level_ioapic, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ioapic_affinity, ++#endif ++ .retrigger = ioapic_retrigger, ++}; ++#endif /* !CONFIG_XEN */ ++ ++static inline void init_IO_APIC_traps(void) ++{ ++ int irq; ++ ++ /* ++ * NOTE! The local APIC isn't very good at handling ++ * multiple interrupts at the same interrupt level. ++ * As the interrupt level is determined by taking the ++ * vector number and shifting that right by 4, we ++ * want to spread these out a bit so that they don't ++ * all fall in the same interrupt level. ++ * ++ * Also, we've got to be careful not to trash gate ++ * 0x80, because int 0x80 is hm, kind of importantish. ;) ++ */ ++ for (irq = 0; irq < NR_IRQS ; irq++) { ++ int tmp = irq; ++ if (use_pci_vector()) { ++ if (!platform_legacy_irq(tmp)) ++ if ((tmp = vector_to_irq(tmp)) == -1) ++ continue; ++ } ++ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { ++ /* ++ * Hmm.. We don't have an entry for this, ++ * so default to an old-fashioned 8259 ++ * interrupt if we can.. ++ */ ++ if (irq < 16) ++ make_8259A_irq(irq); ++#ifndef CONFIG_XEN ++ else ++ /* Strange. Oh, well.. */ ++ irq_desc[irq].chip = &no_irq_type; ++#endif ++ } ++ } ++} ++ ++#ifndef CONFIG_XEN ++static void enable_lapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); ++} ++ ++static void disable_lapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); ++} ++ ++static void ack_lapic_irq (unsigned int irq) ++{ ++ ack_APIC_irq(); ++} ++ ++static void end_lapic_irq (unsigned int i) { /* nothing */ } ++ ++static struct hw_interrupt_type lapic_irq_type __read_mostly = { ++ .typename = "local-APIC-edge", ++ .startup = NULL, /* startup_irq() not used for IRQ0 */ ++ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ ++ .enable = enable_lapic_irq, ++ .disable = disable_lapic_irq, ++ .ack = ack_lapic_irq, ++ .end = end_lapic_irq ++}; ++ ++static void setup_nmi (void) ++{ ++ /* ++ * Dirty trick to enable the NMI watchdog ... ++ * We put the 8259A master into AEOI mode and ++ * unmask on all local APICs LVT0 as NMI. ++ * ++ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') ++ * is from Maciej W. Rozycki - so we do not have to EOI from ++ * the NMI handler or the timer interrupt. ++ */ ++ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); ++ ++ on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); ++ ++ apic_printk(APIC_VERBOSE, " done.\n"); ++} ++ ++/* ++ * This looks a bit hackish but it's about the only one way of sending ++ * a few INTA cycles to 8259As and any associated glue logic. ICR does ++ * not support the ExtINT mode, unfortunately. We need to send these ++ * cycles as some i82489DX-based boards have glue logic that keeps the ++ * 8259A interrupt line asserted until INTA. --macro ++ */ ++static inline void unlock_ExtINT_logic(void) ++{ ++ int apic, pin, i; ++ struct IO_APIC_route_entry entry0, entry1; ++ unsigned char save_control, save_freq_select; ++ unsigned long flags; ++ ++ pin = find_isa_irq_pin(8, mp_INT); ++ apic = find_isa_irq_apic(8, mp_INT); ++ if (pin == -1) ++ return; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ clear_IO_APIC_pin(apic, pin); ++ ++ memset(&entry1, 0, sizeof(entry1)); ++ ++ entry1.dest_mode = 0; /* physical delivery */ ++ entry1.mask = 0; /* unmask IRQ now */ ++ entry1.dest.physical.physical_dest = hard_smp_processor_id(); ++ entry1.delivery_mode = dest_ExtINT; ++ entry1.polarity = entry0.polarity; ++ entry1.trigger = 0; ++ entry1.vector = 0; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ save_control = CMOS_READ(RTC_CONTROL); ++ save_freq_select = CMOS_READ(RTC_FREQ_SELECT); ++ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, ++ RTC_FREQ_SELECT); ++ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); ++ ++ i = 100; ++ while (i-- > 0) { ++ mdelay(10); ++ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) ++ i -= 10; ++ } ++ ++ CMOS_WRITE(save_control, RTC_CONTROL); ++ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); ++ clear_IO_APIC_pin(apic, pin); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++int timer_uses_ioapic_pin_0; ++ ++/* ++ * This code may look a bit paranoid, but it's supposed to cooperate with ++ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ ++ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast ++ * fanatically on his truly buggy board. ++ */ ++static inline void check_timer(void) ++{ ++ int apic1, pin1, apic2, pin2; ++ int vector; ++ ++ /* ++ * get/set the timer IRQ vector: ++ */ ++ disable_8259A_irq(0); ++ vector = assign_irq_vector(0); ++ set_intr_gate(vector, interrupt[0]); ++ ++ /* ++ * Subtle, code in do_timer_interrupt() expects an AEOI ++ * mode for the 8259A whenever interrupts are routed ++ * through I/O APICs. Also IRQ0 has to be enabled in ++ * the 8259A which implies the virtual wire has to be ++ * disabled in the local APIC. ++ */ ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); ++ init_8259A(1); ++ timer_ack = 1; ++ if (timer_over_8254 > 0) ++ enable_8259A_irq(0); ++ ++ pin1 = find_isa_irq_pin(0, mp_INT); ++ apic1 = find_isa_irq_apic(0, mp_INT); ++ pin2 = ioapic_i8259.pin; ++ apic2 = ioapic_i8259.apic; ++ ++ if (pin1 == 0) ++ timer_uses_ioapic_pin_0 = 1; ++ ++ printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", ++ vector, apic1, pin1, apic2, pin2); ++ ++ if (pin1 != -1) { ++ /* ++ * Ok, does IRQ0 through the IOAPIC work? ++ */ ++ unmask_IO_APIC_irq(0); ++ if (timer_irq_works()) { ++ if (nmi_watchdog == NMI_IO_APIC) { ++ disable_8259A_irq(0); ++ setup_nmi(); ++ enable_8259A_irq(0); ++ } ++ if (disable_timer_pin_1 > 0) ++ clear_IO_APIC_pin(0, pin1); ++ return; ++ } ++ clear_IO_APIC_pin(apic1, pin1); ++ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " ++ "IO-APIC\n"); ++ } ++ ++ printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); ++ if (pin2 != -1) { ++ printk("\n..... (found pin %d) ...", pin2); ++ /* ++ * legacy devices should be connected to IO APIC #0 ++ */ ++ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); ++ if (timer_irq_works()) { ++ printk("works.\n"); ++ if (pin1 != -1) ++ replace_pin_at_irq(0, apic1, pin1, apic2, pin2); ++ else ++ add_pin_to_irq(0, apic2, pin2); ++ if (nmi_watchdog == NMI_IO_APIC) { ++ setup_nmi(); ++ } ++ return; ++ } ++ /* ++ * Cleanup, just in case ... ++ */ ++ clear_IO_APIC_pin(apic2, pin2); ++ } ++ printk(" failed.\n"); ++ ++ if (nmi_watchdog == NMI_IO_APIC) { ++ printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); ++ nmi_watchdog = 0; ++ } ++ ++ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); ++ ++ disable_8259A_irq(0); ++ irq_desc[0].chip = &lapic_irq_type; ++ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ ++ enable_8259A_irq(0); ++ ++ if (timer_irq_works()) { ++ printk(" works.\n"); ++ return; ++ } ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); ++ printk(" failed.\n"); ++ ++ printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); ++ ++ timer_ack = 0; ++ init_8259A(0); ++ make_8259A_irq(0); ++ apic_write_around(APIC_LVT0, APIC_DM_EXTINT); ++ ++ unlock_ExtINT_logic(); ++ ++ if (timer_irq_works()) { ++ printk(" works.\n"); ++ return; ++ } ++ printk(" failed :(.\n"); ++ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " ++ "report. Then try booting with the 'noapic' option"); ++} ++#else ++int timer_uses_ioapic_pin_0 = 0; ++#define check_timer() ((void)0) ++#endif ++ ++/* ++ * ++ * IRQ's that are handled by the PIC in the MPS IOAPIC case. ++ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. ++ * Linux doesn't really care, as it's not actually used ++ * for any interrupt handling anyway. ++ */ ++#define PIC_IRQS (1 << PIC_CASCADE_IR) ++ ++void __init setup_IO_APIC(void) ++{ ++ enable_IO_APIC(); ++ ++ if (acpi_ioapic) ++ io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ ++ else ++ io_apic_irqs = ~PIC_IRQS; ++ ++ printk("ENABLING IO-APIC IRQs\n"); ++ ++ /* ++ * Set up IO-APIC IRQ routing. ++ */ ++ if (!acpi_ioapic) ++ setup_ioapic_ids_from_mpc(); ++#ifndef CONFIG_XEN ++ sync_Arb_IDs(); ++#endif ++ setup_IO_APIC_irqs(); ++ init_IO_APIC_traps(); ++ check_timer(); ++ if (!acpi_ioapic) ++ print_IO_APIC(); ++} ++ ++static int __init setup_disable_8254_timer(char *s) ++{ ++ timer_over_8254 = -1; ++ return 1; ++} ++static int __init setup_enable_8254_timer(char *s) ++{ ++ timer_over_8254 = 2; ++ return 1; ++} ++ ++__setup("disable_8254_timer", setup_disable_8254_timer); ++__setup("enable_8254_timer", setup_enable_8254_timer); ++ ++/* ++ * Called after all the initialization is done. If we didnt find any ++ * APIC bugs then we can allow the modify fast path ++ */ ++ ++static int __init io_apic_bug_finalize(void) ++{ ++ if(sis_apic_bug == -1) ++ sis_apic_bug = 0; ++ if (is_initial_xendomain()) { ++ struct xen_platform_op op = { .cmd = XENPF_platform_quirk }; ++ op.u.platform_quirk.quirk_id = sis_apic_bug ? ++ QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL; ++ HYPERVISOR_platform_op(&op); ++ } ++ return 0; ++} ++ ++late_initcall(io_apic_bug_finalize); ++ ++struct sysfs_ioapic_data { ++ struct sys_device dev; ++ struct IO_APIC_route_entry entry[0]; ++}; ++static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; ++ ++static int ioapic_suspend(struct sys_device *dev, pm_message_t state) ++{ ++ struct IO_APIC_route_entry *entry; ++ struct sysfs_ioapic_data *data; ++ unsigned long flags; ++ int i; ++ ++ data = container_of(dev, struct sysfs_ioapic_data, dev); ++ entry = data->entry; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { ++ *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); ++ *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++static int ioapic_resume(struct sys_device *dev) ++{ ++ struct IO_APIC_route_entry *entry; ++ struct sysfs_ioapic_data *data; ++ unsigned long flags; ++ union IO_APIC_reg_00 reg_00; ++ int i; ++ ++ data = container_of(dev, struct sysfs_ioapic_data, dev); ++ entry = data->entry; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(dev->id, 0); ++ if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { ++ reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; ++ io_apic_write(dev->id, 0, reg_00.raw); ++ } ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { ++ io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); ++ io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++static struct sysdev_class ioapic_sysdev_class = { ++ set_kset_name("ioapic"), ++ .suspend = ioapic_suspend, ++ .resume = ioapic_resume, ++}; ++ ++static int __init ioapic_init_sysfs(void) ++{ ++ struct sys_device * dev; ++ int i, size, error = 0; ++ ++ error = sysdev_class_register(&ioapic_sysdev_class); ++ if (error) ++ return error; ++ ++ for (i = 0; i < nr_ioapics; i++ ) { ++ size = sizeof(struct sys_device) + nr_ioapic_registers[i] ++ * sizeof(struct IO_APIC_route_entry); ++ mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); ++ if (!mp_ioapic_data[i]) { ++ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); ++ continue; ++ } ++ memset(mp_ioapic_data[i], 0, size); ++ dev = &mp_ioapic_data[i]->dev; ++ dev->id = i; ++ dev->cls = &ioapic_sysdev_class; ++ error = sysdev_register(dev); ++ if (error) { ++ kfree(mp_ioapic_data[i]); ++ mp_ioapic_data[i] = NULL; ++ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); ++ continue; ++ } ++ } ++ ++ return 0; ++} ++ ++device_initcall(ioapic_init_sysfs); ++ ++/* -------------------------------------------------------------------------- ++ ACPI-based IOAPIC Configuration ++ -------------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_ACPI ++ ++int __init io_apic_get_unique_id (int ioapic, int apic_id) ++{ ++#ifndef CONFIG_XEN ++ union IO_APIC_reg_00 reg_00; ++ static physid_mask_t apic_id_map = PHYSID_MASK_NONE; ++ physid_mask_t tmp; ++ unsigned long flags; ++ int i = 0; ++ ++ /* ++ * The P4 platform supports up to 256 APIC IDs on two separate APIC ++ * buses (one for LAPICs, one for IOAPICs), where predecessors only ++ * supports up to 16 on one shared APIC bus. ++ * ++ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full ++ * advantage of new APIC bus architecture. ++ */ ++ ++ if (physids_empty(apic_id_map)) ++ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ if (apic_id >= get_physical_broadcast()) { ++ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " ++ "%d\n", ioapic, apic_id, reg_00.bits.ID); ++ apic_id = reg_00.bits.ID; ++ } ++ ++ /* ++ * Every APIC in a system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (check_apicid_used(apic_id_map, apic_id)) { ++ ++ for (i = 0; i < get_physical_broadcast(); i++) { ++ if (!check_apicid_used(apic_id_map, i)) ++ break; ++ } ++ ++ if (i == get_physical_broadcast()) ++ panic("Max apic_id exceeded!\n"); ++ ++ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " ++ "trying %d\n", ioapic, apic_id, i); ++ ++ apic_id = i; ++ } ++ ++ tmp = apicid_to_cpu_present(apic_id); ++ physids_or(apic_id_map, apic_id_map, tmp); ++ ++ if (reg_00.bits.ID != apic_id) { ++ reg_00.bits.ID = apic_id; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic, 0, reg_00.raw); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* Sanity check */ ++ if (reg_00.bits.ID != apic_id) { ++ printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); ++ return -1; ++ } ++ } ++ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); ++#endif /* !CONFIG_XEN */ ++ ++ return apic_id; ++} ++ ++ ++int __init io_apic_get_version (int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.version; ++} ++ ++ ++int __init io_apic_get_redir_entries (int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.entries; ++} ++ ++ ++int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ if (!IO_APIC_IRQ(irq)) { ++ printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ++ ioapic); ++ return -EINVAL; ++ } ++ ++ /* ++ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. ++ * Note that we mask (disable) IRQs now -- these get enabled when the ++ * corresponding device driver registers for this IRQ. ++ */ ++ ++ memset(&entry,0,sizeof(entry)); ++ ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.dest_mode = INT_DEST_MODE; ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.trigger = edge_level; ++ entry.polarity = active_high_low; ++ entry.mask = 1; ++ ++ /* ++ * IRQs < 16 are already in the irq_2_pin[] map ++ */ ++ if (irq >= 16) ++ add_pin_to_irq(irq, ioapic, pin); ++ ++ entry.vector = assign_irq_vector(irq); ++ ++ apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " ++ "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, ++ mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, ++ edge_level, active_high_low); ++ ++ ioapic_register_intr(irq, entry.vector, edge_level); ++ ++ if (!ioapic && (irq < 16)) ++ disable_8259A_irq(irq); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); ++ set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++#endif /* CONFIG_ACPI */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/ioport-xen.c 2007-08-27 14:01:58.000000000 -0400 +@@ -0,0 +1,122 @@ ++/* ++ * linux/arch/i386/kernel/ioport.c ++ * ++ * This contains the io-permission bitmap code - written by obz, with changes ++ * by Linus. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ ++static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) ++{ ++ unsigned long mask; ++ unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); ++ unsigned int low_index = base & (BITS_PER_LONG-1); ++ int length = low_index + extent; ++ ++ if (low_index != 0) { ++ mask = (~0UL << low_index); ++ if (length < BITS_PER_LONG) ++ mask &= ~(~0UL << length); ++ if (new_value) ++ *bitmap_base++ |= mask; ++ else ++ *bitmap_base++ &= ~mask; ++ length -= BITS_PER_LONG; ++ } ++ ++ mask = (new_value ? ~0UL : 0UL); ++ while (length >= BITS_PER_LONG) { ++ *bitmap_base++ = mask; ++ length -= BITS_PER_LONG; ++ } ++ ++ if (length > 0) { ++ mask = ~(~0UL << length); ++ if (new_value) ++ *bitmap_base++ |= mask; ++ else ++ *bitmap_base++ &= ~mask; ++ } ++} ++ ++ ++/* ++ * this changes the io permissions bitmap in the current task. ++ */ ++asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) ++{ ++ struct thread_struct * t = ¤t->thread; ++ unsigned long *bitmap; ++ struct physdev_set_iobitmap set_iobitmap; ++ ++ if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) ++ return -EINVAL; ++ if (turn_on && !capable(CAP_SYS_RAWIO)) ++ return -EPERM; ++ ++ /* ++ * If it's the first ioperm() call in this thread's lifetime, set the ++ * IO bitmap up. ioperm() is much less timing critical than clone(), ++ * this is why we delay this operation until now: ++ */ ++ if (!t->io_bitmap_ptr) { ++ bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); ++ if (!bitmap) ++ return -ENOMEM; ++ ++ memset(bitmap, 0xff, IO_BITMAP_BYTES); ++ t->io_bitmap_ptr = bitmap; ++ set_thread_flag(TIF_IO_BITMAP); ++ ++ set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap); ++ set_iobitmap.nr_ports = IO_BITMAP_BITS; ++ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &set_iobitmap); ++ } ++ ++ set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); ++ ++ return 0; ++} ++ ++/* ++ * sys_iopl has to be used when you want to access the IO ports ++ * beyond the 0x3ff range: to get the full 65536 ports bitmapped ++ * you'd need 8kB of bitmaps/process, which is a bit excessive. ++ * ++ * Here we just change the eflags value on the stack: we allow ++ * only the super-user to do it. This depends on the stack-layout ++ * on system-call entry - see also fork() and the signal handling ++ * code. ++ */ ++ ++asmlinkage long sys_iopl(unsigned long unused) ++{ ++ volatile struct pt_regs * regs = (struct pt_regs *) &unused; ++ unsigned int level = regs->ebx; ++ struct thread_struct *t = ¤t->thread; ++ unsigned int old = (t->iopl >> 12) & 3; ++ ++ if (level > 3) ++ return -EINVAL; ++ /* Trying to gain more privileges? */ ++ if (level > old) { ++ if (!capable(CAP_SYS_RAWIO)) ++ return -EPERM; ++ } ++ t->iopl = level << 12; ++ set_iopl_mask(t->iopl); ++ return 0; ++} +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/irq-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,324 @@ ++/* ++ * linux/arch/i386/kernel/irq.c ++ * ++ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar ++ * ++ * This file contains the lowest level x86-specific interrupt ++ * entry, irq-stacks and irq statistics code. All the remaining ++ * irq logic is done by the generic kernel/irq/ code and ++ * by the x86-specific irq controller code. (e.g. i8259.c and ++ * io_apic.c.) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; ++EXPORT_PER_CPU_SYMBOL(irq_stat); ++ ++#ifndef CONFIG_X86_LOCAL_APIC ++/* ++ * 'what should we do if we get a hw irq event on an illegal vector'. ++ * each architecture has to answer this themselves. ++ */ ++void ack_bad_irq(unsigned int irq) ++{ ++ printk("unexpected IRQ trap at vector %02x\n", irq); ++} ++#endif ++ ++#ifdef CONFIG_4KSTACKS ++/* ++ * per-CPU IRQ handling contexts (thread information and stack) ++ */ ++union irq_ctx { ++ struct thread_info tinfo; ++ u32 stack[THREAD_SIZE/sizeof(u32)]; ++}; ++ ++static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; ++static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; ++#endif ++ ++/* ++ * do_IRQ handles all normal device IRQ's (the special ++ * SMP cross-CPU interrupts have their own specific ++ * handlers). ++ */ ++fastcall unsigned int do_IRQ(struct pt_regs *regs) ++{ ++ /* high bit used in ret_from_ code */ ++ int irq = ~regs->orig_eax; ++#ifdef CONFIG_4KSTACKS ++ union irq_ctx *curctx, *irqctx; ++ u32 *isp; ++#endif ++ ++ if (unlikely((unsigned)irq >= NR_IRQS)) { ++ printk(KERN_EMERG "%s: cannot handle IRQ %d\n", ++ __FUNCTION__, irq); ++ BUG(); ++ } ++ ++ irq_enter(); ++#ifdef CONFIG_DEBUG_STACKOVERFLOW ++ /* Debugging check for stack overflow: is there less than 1KB free? */ ++ { ++ long esp; ++ ++ __asm__ __volatile__("andl %%esp,%0" : ++ "=r" (esp) : "0" (THREAD_SIZE - 1)); ++ if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { ++ printk("do_IRQ: stack overflow: %ld\n", ++ esp - sizeof(struct thread_info)); ++ dump_stack(); ++ } ++ } ++#endif ++ ++#ifdef CONFIG_4KSTACKS ++ ++ curctx = (union irq_ctx *) current_thread_info(); ++ irqctx = hardirq_ctx[smp_processor_id()]; ++ ++ /* ++ * this is where we switch to the IRQ stack. However, if we are ++ * already using the IRQ stack (because we interrupted a hardirq ++ * handler) we can't do that and just have to keep using the ++ * current stack (which is the irq stack already after all) ++ */ ++ if (curctx != irqctx) { ++ int arg1, arg2, ebx; ++ ++ /* build the stack frame on the IRQ stack */ ++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); ++ irqctx->tinfo.task = curctx->tinfo.task; ++ irqctx->tinfo.previous_esp = current_stack_pointer; ++ ++ /* ++ * Copy the softirq bits in preempt_count so that the ++ * softirq checks work in the hardirq context. ++ */ ++ irqctx->tinfo.preempt_count = ++ (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | ++ (curctx->tinfo.preempt_count & SOFTIRQ_MASK); ++ ++ asm volatile( ++ " xchgl %%ebx,%%esp \n" ++ " call __do_IRQ \n" ++ " movl %%ebx,%%esp \n" ++ : "=a" (arg1), "=d" (arg2), "=b" (ebx) ++ : "0" (irq), "1" (regs), "2" (isp) ++ : "memory", "cc", "ecx" ++ ); ++ } else ++#endif ++ __do_IRQ(irq, regs); ++ ++ irq_exit(); ++ ++ return 1; ++} ++ ++#ifdef CONFIG_4KSTACKS ++ ++/* ++ * These should really be __section__(".bss.page_aligned") as well, but ++ * gcc's 3.0 and earlier don't handle that correctly. ++ */ ++static char softirq_stack[NR_CPUS * THREAD_SIZE] ++ __attribute__((__aligned__(THREAD_SIZE))); ++ ++static char hardirq_stack[NR_CPUS * THREAD_SIZE] ++ __attribute__((__aligned__(THREAD_SIZE))); ++ ++/* ++ * allocate per-cpu stacks for hardirq and for softirq processing ++ */ ++void irq_ctx_init(int cpu) ++{ ++ union irq_ctx *irqctx; ++ ++ if (hardirq_ctx[cpu]) ++ return; ++ ++ irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; ++ irqctx->tinfo.task = NULL; ++ irqctx->tinfo.exec_domain = NULL; ++ irqctx->tinfo.cpu = cpu; ++ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; ++ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); ++ ++ hardirq_ctx[cpu] = irqctx; ++ ++ irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; ++ irqctx->tinfo.task = NULL; ++ irqctx->tinfo.exec_domain = NULL; ++ irqctx->tinfo.cpu = cpu; ++ irqctx->tinfo.preempt_count = 0; ++ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); ++ ++ softirq_ctx[cpu] = irqctx; ++ ++ printk("CPU %u irqstacks, hard=%p soft=%p\n", ++ cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); ++} ++ ++void irq_ctx_exit(int cpu) ++{ ++ hardirq_ctx[cpu] = NULL; ++} ++ ++extern asmlinkage void __do_softirq(void); ++ ++asmlinkage void do_softirq(void) ++{ ++ unsigned long flags; ++ struct thread_info *curctx; ++ union irq_ctx *irqctx; ++ u32 *isp; ++ ++ if (in_interrupt()) ++ return; ++ ++ local_irq_save(flags); ++ ++ if (local_softirq_pending()) { ++ curctx = current_thread_info(); ++ irqctx = softirq_ctx[smp_processor_id()]; ++ irqctx->tinfo.task = curctx->task; ++ irqctx->tinfo.previous_esp = current_stack_pointer; ++ ++ /* build the stack frame on the softirq stack */ ++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); ++ ++ asm volatile( ++ " xchgl %%ebx,%%esp \n" ++ " call __do_softirq \n" ++ " movl %%ebx,%%esp \n" ++ : "=b"(isp) ++ : "0"(isp) ++ : "memory", "cc", "edx", "ecx", "eax" ++ ); ++ /* ++ * Shouldnt happen, we returned above if in_interrupt(): ++ */ ++ WARN_ON_ONCE(softirq_count()); ++ } ++ ++ local_irq_restore(flags); ++} ++ ++EXPORT_SYMBOL(do_softirq); ++#endif ++ ++/* ++ * Interrupt statistics: ++ */ ++ ++atomic_t irq_err_count; ++ ++/* ++ * /proc/interrupts printing: ++ */ ++ ++int show_interrupts(struct seq_file *p, void *v) ++{ ++ int i = *(loff_t *) v, j; ++ struct irqaction * action; ++ unsigned long flags; ++ ++ if (i == 0) { ++ seq_printf(p, " "); ++ for_each_online_cpu(j) ++ seq_printf(p, "CPU%-8d",j); ++ seq_putc(p, '\n'); ++ } ++ ++ if (i < NR_IRQS) { ++ spin_lock_irqsave(&irq_desc[i].lock, flags); ++ action = irq_desc[i].action; ++ if (!action) ++ goto skip; ++ seq_printf(p, "%3d: ",i); ++#ifndef CONFIG_SMP ++ seq_printf(p, "%10u ", kstat_irqs(i)); ++#else ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); ++#endif ++ seq_printf(p, " %14s", irq_desc[i].chip->typename); ++ seq_printf(p, " %s", action->name); ++ ++ for (action=action->next; action; action = action->next) ++ seq_printf(p, ", %s", action->name); ++ ++ seq_putc(p, '\n'); ++skip: ++ spin_unlock_irqrestore(&irq_desc[i].lock, flags); ++ } else if (i == NR_IRQS) { ++ seq_printf(p, "NMI: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", nmi_count(j)); ++ seq_putc(p, '\n'); ++#ifdef CONFIG_X86_LOCAL_APIC ++ seq_printf(p, "LOC: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", ++ per_cpu(irq_stat,j).apic_timer_irqs); ++ seq_putc(p, '\n'); ++#endif ++ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); ++#if defined(CONFIG_X86_IO_APIC) ++ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); ++#endif ++ } ++ return 0; ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++ ++void fixup_irqs(cpumask_t map) ++{ ++ unsigned int irq; ++ static int warned; ++ ++ for (irq = 0; irq < NR_IRQS; irq++) { ++ cpumask_t mask; ++ if (irq == 2) ++ continue; ++ ++ cpus_and(mask, irq_desc[irq].affinity, map); ++ if (any_online_cpu(mask) == NR_CPUS) { ++ /*printk("Breaking affinity for irq %i\n", irq);*/ ++ mask = map; ++ } ++ if (irq_desc[irq].chip->set_affinity) ++ irq_desc[irq].chip->set_affinity(irq, mask); ++ else if (irq_desc[irq].action && !(warned++)) ++ printk("Cannot set affinity for irq %i\n", irq); ++ } ++ ++#if 0 ++ barrier(); ++ /* Ingo Molnar says: "after the IO-APIC masks have been redirected ++ [note the nop - the interrupt-enable boundary on x86 is two ++ instructions from sti] - to flush out pending hardirqs and ++ IPIs. After this point nothing is supposed to reach this CPU." */ ++ __asm__ __volatile__("sti; nop; cli"); ++ barrier(); ++#else ++ /* That doesn't seem sufficient. Give it 1ms. */ ++ local_irq_enable(); ++ mdelay(1); ++ local_irq_disable(); ++#endif ++} ++#endif ++ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/ldt-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,270 @@ ++/* ++ * linux/kernel/ldt.c ++ * ++ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds ++ * Copyright (C) 1999 Ingo Molnar ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ ++static void flush_ldt(void *null) ++{ ++ if (current->active_mm) ++ load_LDT(¤t->active_mm->context); ++} ++#endif ++ ++static int alloc_ldt(mm_context_t *pc, int mincount, int reload) ++{ ++ void *oldldt; ++ void *newldt; ++ int oldsize; ++ ++ if (mincount <= pc->size) ++ return 0; ++ oldsize = pc->size; ++ mincount = (mincount+511)&(~511); ++ if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) ++ newldt = vmalloc(mincount*LDT_ENTRY_SIZE); ++ else ++ newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); ++ ++ if (!newldt) ++ return -ENOMEM; ++ ++ if (oldsize) ++ memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); ++ oldldt = pc->ldt; ++ memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); ++ pc->ldt = newldt; ++ wmb(); ++ pc->size = mincount; ++ wmb(); ++ ++ if (reload) { ++#ifdef CONFIG_SMP ++ cpumask_t mask; ++ preempt_disable(); ++#endif ++ make_pages_readonly( ++ pc->ldt, ++ (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ load_LDT(pc); ++#ifdef CONFIG_SMP ++ mask = cpumask_of_cpu(smp_processor_id()); ++ if (!cpus_equal(current->mm->cpu_vm_mask, mask)) ++ smp_call_function(flush_ldt, NULL, 1, 1); ++ preempt_enable(); ++#endif ++ } ++ if (oldsize) { ++ make_pages_writable( ++ oldldt, ++ (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) ++ vfree(oldldt); ++ else ++ kfree(oldldt); ++ } ++ return 0; ++} ++ ++static inline int copy_ldt(mm_context_t *new, mm_context_t *old) ++{ ++ int err = alloc_ldt(new, old->size, 0); ++ if (err < 0) ++ return err; ++ memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); ++ make_pages_readonly( ++ new->ldt, ++ (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ return 0; ++} ++ ++/* ++ * we do not have to muck with descriptors here, that is ++ * done in switch_mm() as needed. ++ */ ++int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ++{ ++ struct mm_struct * old_mm; ++ int retval = 0; ++ ++ init_MUTEX(&mm->context.sem); ++ mm->context.size = 0; ++ mm->context.has_foreign_mappings = 0; ++ old_mm = current->mm; ++ if (old_mm && old_mm->context.size > 0) { ++ down(&old_mm->context.sem); ++ retval = copy_ldt(&mm->context, &old_mm->context); ++ up(&old_mm->context.sem); ++ } ++ return retval; ++} ++ ++/* ++ * No need to lock the MM as we are the last user ++ */ ++void destroy_context(struct mm_struct *mm) ++{ ++ if (mm->context.size) { ++ if (mm == current->active_mm) ++ clear_LDT(); ++ make_pages_writable( ++ mm->context.ldt, ++ (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) ++ vfree(mm->context.ldt); ++ else ++ kfree(mm->context.ldt); ++ mm->context.size = 0; ++ } ++} ++ ++static int read_ldt(void __user * ptr, unsigned long bytecount) ++{ ++ int err; ++ unsigned long size; ++ struct mm_struct * mm = current->mm; ++ ++ if (!mm->context.size) ++ return 0; ++ if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) ++ bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; ++ ++ down(&mm->context.sem); ++ size = mm->context.size*LDT_ENTRY_SIZE; ++ if (size > bytecount) ++ size = bytecount; ++ ++ err = 0; ++ if (copy_to_user(ptr, mm->context.ldt, size)) ++ err = -EFAULT; ++ up(&mm->context.sem); ++ if (err < 0) ++ goto error_return; ++ if (size != bytecount) { ++ /* zero-fill the rest */ ++ if (clear_user(ptr+size, bytecount-size) != 0) { ++ err = -EFAULT; ++ goto error_return; ++ } ++ } ++ return bytecount; ++error_return: ++ return err; ++} ++ ++static int read_default_ldt(void __user * ptr, unsigned long bytecount) ++{ ++ int err; ++ unsigned long size; ++ void *address; ++ ++ err = 0; ++ address = &default_ldt[0]; ++ size = 5*sizeof(struct desc_struct); ++ if (size > bytecount) ++ size = bytecount; ++ ++ err = size; ++ if (copy_to_user(ptr, address, size)) ++ err = -EFAULT; ++ ++ return err; ++} ++ ++static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) ++{ ++ struct mm_struct * mm = current->mm; ++ __u32 entry_1, entry_2; ++ int error; ++ struct user_desc ldt_info; ++ ++ error = -EINVAL; ++ if (bytecount != sizeof(ldt_info)) ++ goto out; ++ error = -EFAULT; ++ if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) ++ goto out; ++ ++ error = -EINVAL; ++ if (ldt_info.entry_number >= LDT_ENTRIES) ++ goto out; ++ if (ldt_info.contents == 3) { ++ if (oldmode) ++ goto out; ++ if (ldt_info.seg_not_present == 0) ++ goto out; ++ } ++ ++ down(&mm->context.sem); ++ if (ldt_info.entry_number >= mm->context.size) { ++ error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); ++ if (error < 0) ++ goto out_unlock; ++ } ++ ++ /* Allow LDTs to be cleared by the user. */ ++ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { ++ if (oldmode || LDT_empty(&ldt_info)) { ++ entry_1 = 0; ++ entry_2 = 0; ++ goto install; ++ } ++ } ++ ++ entry_1 = LDT_entry_a(&ldt_info); ++ entry_2 = LDT_entry_b(&ldt_info); ++ if (oldmode) ++ entry_2 &= ~(1 << 20); ++ ++ /* Install the new entry ... */ ++install: ++ error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number, ++ entry_1, entry_2); ++ ++out_unlock: ++ up(&mm->context.sem); ++out: ++ return error; ++} ++ ++asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) ++{ ++ int ret = -ENOSYS; ++ ++ switch (func) { ++ case 0: ++ ret = read_ldt(ptr, bytecount); ++ break; ++ case 1: ++ ret = write_ldt(ptr, bytecount, 1); ++ break; ++ case 2: ++ ret = read_default_ldt(ptr, bytecount); ++ break; ++ case 0x11: ++ ret = write_ldt(ptr, bytecount, 0); ++ break; ++ } ++ return ret; ++} +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/microcode-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,144 @@ ++/* ++ * Intel CPU Microcode Update Driver for Linux ++ * ++ * Copyright (C) 2000-2004 Tigran Aivazian ++ * ++ * This driver allows to upgrade microcode on Intel processors ++ * belonging to IA-32 family - PentiumPro, Pentium II, ++ * Pentium III, Xeon, Pentium 4, etc. ++ * ++ * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, ++ * Order Number 245472 or free download from: ++ * ++ * http://developer.intel.com/design/pentium4/manuals/245472.htm ++ * ++ * For more information, go to http://www.urbanmyth.org/microcode ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++ ++//#define DEBUG /* pr_debug */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); ++MODULE_AUTHOR("Tigran Aivazian "); ++MODULE_LICENSE("GPL"); ++ ++static int verbose; ++module_param(verbose, int, 0644); ++ ++#define MICROCODE_VERSION "1.14a-xen" ++ ++#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ ++#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ ++#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ ++ ++/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ ++static DEFINE_MUTEX(microcode_mutex); ++ ++static int microcode_open (struct inode *unused1, struct file *unused2) ++{ ++ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; ++} ++ ++ ++static int do_microcode_update (const void __user *ubuf, size_t len) ++{ ++ int err; ++ void *kbuf; ++ ++ kbuf = vmalloc(len); ++ if (!kbuf) ++ return -ENOMEM; ++ ++ if (copy_from_user(kbuf, ubuf, len) == 0) { ++ struct xen_platform_op op; ++ ++ op.cmd = XENPF_microcode_update; ++ set_xen_guest_handle(op.u.microcode.data, kbuf); ++ op.u.microcode.length = len; ++ err = HYPERVISOR_platform_op(&op); ++ } else ++ err = -EFAULT; ++ ++ vfree(kbuf); ++ ++ return err; ++} ++ ++static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) ++{ ++ ssize_t ret; ++ ++ if (len < MC_HEADER_SIZE) { ++ printk(KERN_ERR "microcode: not enough data\n"); ++ return -EINVAL; ++ } ++ ++ mutex_lock(µcode_mutex); ++ ++ ret = do_microcode_update(buf, len); ++ if (!ret) ++ ret = (ssize_t)len; ++ ++ mutex_unlock(µcode_mutex); ++ ++ return ret; ++} ++ ++static struct file_operations microcode_fops = { ++ .owner = THIS_MODULE, ++ .write = microcode_write, ++ .open = microcode_open, ++}; ++ ++static struct miscdevice microcode_dev = { ++ .minor = MICROCODE_MINOR, ++ .name = "microcode", ++ .fops = µcode_fops, ++}; ++ ++static int __init microcode_init (void) ++{ ++ int error; ++ ++ error = misc_register(µcode_dev); ++ if (error) { ++ printk(KERN_ERR ++ "microcode: can't misc_register on minor=%d\n", ++ MICROCODE_MINOR); ++ return error; ++ } ++ ++ printk(KERN_INFO ++ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); ++ return 0; ++} ++ ++static void __exit microcode_exit (void) ++{ ++ misc_deregister(µcode_dev); ++} ++ ++module_init(microcode_init) ++module_exit(microcode_exit) ++MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/mpparse-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,1185 @@ ++/* ++ * Intel Multiprocessor Specification 1.1 and 1.4 ++ * compliant MP-table parsing routines. ++ * ++ * (c) 1995 Alan Cox, Building #3 ++ * (c) 1998, 1999, 2000 Ingo Molnar ++ * ++ * Fixes ++ * Erich Boleyn : MP v1.4 and additional changes. ++ * Alan Cox : Added EBDA scanning ++ * Ingo Molnar : various cleanups and rewrites ++ * Maciej W. Rozycki: Bits for default MP configurations ++ * Paul Diefenbaugh: Added full ACPI support ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* Have we found an MP table */ ++int smp_found_config; ++unsigned int __initdata maxcpus = NR_CPUS; ++ ++/* ++ * Various Linux-internal data structures created from the ++ * MP-table. ++ */ ++int apic_version [MAX_APICS]; ++int mp_bus_id_to_type [MAX_MP_BUSSES]; ++int mp_bus_id_to_node [MAX_MP_BUSSES]; ++int mp_bus_id_to_local [MAX_MP_BUSSES]; ++int quad_local_to_mp_bus_id [NR_CPUS/4][4]; ++int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; ++static int mp_current_pci_id; ++ ++/* I/O APIC entries */ ++struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; ++ ++/* # of MP IRQ source entries */ ++struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; ++ ++/* MP IRQ source entries */ ++int mp_irq_entries; ++ ++int nr_ioapics; ++ ++int pic_mode; ++unsigned long mp_lapic_addr; ++ ++unsigned int def_to_bigsmp = 0; ++ ++/* Processor that is doing the boot up */ ++unsigned int boot_cpu_physical_apicid = -1U; ++/* Internal processor count */ ++static unsigned int __devinitdata num_processors; ++ ++/* Bitmask of physically existing CPUs */ ++physid_mask_t phys_cpu_present_map; ++ ++u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; ++ ++/* ++ * Intel MP BIOS table parsing routines: ++ */ ++ ++ ++/* ++ * Checksum an MP configuration block. ++ */ ++ ++static int __init mpf_checksum(unsigned char *mp, int len) ++{ ++ int sum = 0; ++ ++ while (len--) ++ sum += *mp++; ++ ++ return sum & 0xFF; ++} ++ ++/* ++ * Have to match translation table entries to main table entries by counter ++ * hence the mpc_record variable .... can't see a less disgusting way of ++ * doing this .... ++ */ ++ ++static int mpc_record; ++static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; ++ ++#ifndef CONFIG_XEN ++static void __devinit MP_processor_info (struct mpc_config_processor *m) ++{ ++ int ver, apicid; ++ physid_mask_t phys_cpu; ++ ++ if (!(m->mpc_cpuflag & CPU_ENABLED)) ++ return; ++ ++ apicid = mpc_apic_id(m, translation_table[mpc_record]); ++ ++ if (m->mpc_featureflag&(1<<0)) ++ Dprintk(" Floating point unit present.\n"); ++ if (m->mpc_featureflag&(1<<7)) ++ Dprintk(" Machine Exception supported.\n"); ++ if (m->mpc_featureflag&(1<<8)) ++ Dprintk(" 64 bit compare & exchange supported.\n"); ++ if (m->mpc_featureflag&(1<<9)) ++ Dprintk(" Internal APIC present.\n"); ++ if (m->mpc_featureflag&(1<<11)) ++ Dprintk(" SEP present.\n"); ++ if (m->mpc_featureflag&(1<<12)) ++ Dprintk(" MTRR present.\n"); ++ if (m->mpc_featureflag&(1<<13)) ++ Dprintk(" PGE present.\n"); ++ if (m->mpc_featureflag&(1<<14)) ++ Dprintk(" MCA present.\n"); ++ if (m->mpc_featureflag&(1<<15)) ++ Dprintk(" CMOV present.\n"); ++ if (m->mpc_featureflag&(1<<16)) ++ Dprintk(" PAT present.\n"); ++ if (m->mpc_featureflag&(1<<17)) ++ Dprintk(" PSE present.\n"); ++ if (m->mpc_featureflag&(1<<18)) ++ Dprintk(" PSN present.\n"); ++ if (m->mpc_featureflag&(1<<19)) ++ Dprintk(" Cache Line Flush Instruction present.\n"); ++ /* 20 Reserved */ ++ if (m->mpc_featureflag&(1<<21)) ++ Dprintk(" Debug Trace and EMON Store present.\n"); ++ if (m->mpc_featureflag&(1<<22)) ++ Dprintk(" ACPI Thermal Throttle Registers present.\n"); ++ if (m->mpc_featureflag&(1<<23)) ++ Dprintk(" MMX present.\n"); ++ if (m->mpc_featureflag&(1<<24)) ++ Dprintk(" FXSR present.\n"); ++ if (m->mpc_featureflag&(1<<25)) ++ Dprintk(" XMM present.\n"); ++ if (m->mpc_featureflag&(1<<26)) ++ Dprintk(" Willamette New Instructions present.\n"); ++ if (m->mpc_featureflag&(1<<27)) ++ Dprintk(" Self Snoop present.\n"); ++ if (m->mpc_featureflag&(1<<28)) ++ Dprintk(" HT present.\n"); ++ if (m->mpc_featureflag&(1<<29)) ++ Dprintk(" Thermal Monitor present.\n"); ++ /* 30, 31 Reserved */ ++ ++ ++ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { ++ Dprintk(" Bootup CPU\n"); ++ boot_cpu_physical_apicid = m->mpc_apicid; ++ } ++ ++ ver = m->mpc_apicver; ++ ++ /* ++ * Validate version ++ */ ++ if (ver == 0x0) { ++ printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " ++ "fixing up to 0x10. (tell your hw vendor)\n", ++ m->mpc_apicid); ++ ver = 0x10; ++ } ++ apic_version[m->mpc_apicid] = ver; ++ ++ phys_cpu = apicid_to_cpu_present(apicid); ++ physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); ++ ++ if (num_processors >= NR_CPUS) { ++ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." ++ " Processor ignored.\n", NR_CPUS); ++ return; ++ } ++ ++ if (num_processors >= maxcpus) { ++ printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." ++ " Processor ignored.\n", maxcpus); ++ return; ++ } ++ ++ cpu_set(num_processors, cpu_possible_map); ++ num_processors++; ++ ++ /* ++ * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y ++ * but we need to work other dependencies like SMP_SUSPEND etc ++ * before this can be done without some confusion. ++ * if (CPU_HOTPLUG_ENABLED || num_processors > 8) ++ * - Ashok Raj ++ */ ++ if (num_processors > 8) { ++ switch (boot_cpu_data.x86_vendor) { ++ case X86_VENDOR_INTEL: ++ if (!APIC_XAPIC(ver)) { ++ def_to_bigsmp = 0; ++ break; ++ } ++ /* If P4 and above fall through */ ++ case X86_VENDOR_AMD: ++ def_to_bigsmp = 1; ++ } ++ } ++ bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; ++} ++#else ++void __init MP_processor_info (struct mpc_config_processor *m) ++{ ++ num_processors++; ++} ++#endif /* CONFIG_XEN */ ++ ++static void __init MP_bus_info (struct mpc_config_bus *m) ++{ ++ char str[7]; ++ ++ memcpy(str, m->mpc_bustype, 6); ++ str[6] = 0; ++ ++ mpc_oem_bus_info(m, str, translation_table[mpc_record]); ++ ++ if (m->mpc_busid >= MAX_MP_BUSSES) { ++ printk(KERN_WARNING "MP table busid value (%d) for bustype %s " ++ " is too large, max. supported is %d\n", ++ m->mpc_busid, str, MAX_MP_BUSSES - 1); ++ return; ++ } ++ ++ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; ++ } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; ++ } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { ++ mpc_oem_pci_bus(m, translation_table[mpc_record]); ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; ++ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; ++ mp_current_pci_id++; ++ } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; ++ } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; ++ } else { ++ printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); ++ } ++} ++ ++static void __init MP_ioapic_info (struct mpc_config_ioapic *m) ++{ ++ if (!(m->mpc_flags & MPC_APIC_USABLE)) ++ return; ++ ++ printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", ++ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); ++ if (nr_ioapics >= MAX_IO_APICS) { ++ printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", ++ MAX_IO_APICS, nr_ioapics); ++ panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); ++ } ++ if (!m->mpc_apicaddr) { ++ printk(KERN_ERR "WARNING: bogus zero I/O APIC address" ++ " found in MP table, skipping!\n"); ++ return; ++ } ++ mp_ioapics[nr_ioapics] = *m; ++ nr_ioapics++; ++} ++ ++static void __init MP_intsrc_info (struct mpc_config_intsrc *m) ++{ ++ mp_irqs [mp_irq_entries] = *m; ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d," ++ " IRQ %02x, APIC ID %x, APIC INT %02x\n", ++ m->mpc_irqtype, m->mpc_irqflag & 3, ++ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, ++ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!!\n"); ++} ++ ++static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) ++{ ++ Dprintk("Lint: type %d, pol %d, trig %d, bus %d," ++ " IRQ %02x, APIC ID %x, APIC LINT %02x\n", ++ m->mpc_irqtype, m->mpc_irqflag & 3, ++ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, ++ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); ++ /* ++ * Well it seems all SMP boards in existence ++ * use ExtINT/LVT1 == LINT0 and ++ * NMI/LVT2 == LINT1 - the following check ++ * will show us if this assumptions is false. ++ * Until then we do not have to add baggage. ++ */ ++ if ((m->mpc_irqtype == mp_ExtINT) && ++ (m->mpc_destapiclint != 0)) ++ BUG(); ++ if ((m->mpc_irqtype == mp_NMI) && ++ (m->mpc_destapiclint != 1)) ++ BUG(); ++} ++ ++#ifdef CONFIG_X86_NUMAQ ++static void __init MP_translation_info (struct mpc_config_translation *m) ++{ ++ printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); ++ ++ if (mpc_record >= MAX_MPC_ENTRY) ++ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); ++ else ++ translation_table[mpc_record] = m; /* stash this for later */ ++ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) ++ node_set_online(m->trans_quad); ++} ++ ++/* ++ * Read/parse the MPC oem tables ++ */ ++ ++static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ ++ unsigned short oemsize) ++{ ++ int count = sizeof (*oemtable); /* the header size */ ++ unsigned char *oemptr = ((unsigned char *)oemtable)+count; ++ ++ mpc_record = 0; ++ printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); ++ if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) ++ { ++ printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", ++ oemtable->oem_signature[0], ++ oemtable->oem_signature[1], ++ oemtable->oem_signature[2], ++ oemtable->oem_signature[3]); ++ return; ++ } ++ if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) ++ { ++ printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); ++ return; ++ } ++ while (count < oemtable->oem_length) { ++ switch (*oemptr) { ++ case MP_TRANSLATION: ++ { ++ struct mpc_config_translation *m= ++ (struct mpc_config_translation *)oemptr; ++ MP_translation_info(m); ++ oemptr += sizeof(*m); ++ count += sizeof(*m); ++ ++mpc_record; ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr); ++ return; ++ } ++ } ++ } ++} ++ ++static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, ++ char *productid) ++{ ++ if (strncmp(oem, "IBM NUMA", 8)) ++ printk("Warning! May not be a NUMA-Q system!\n"); ++ if (mpc->mpc_oemptr) ++ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, ++ mpc->mpc_oemsize); ++} ++#endif /* CONFIG_X86_NUMAQ */ ++ ++/* ++ * Read/parse the MPC ++ */ ++ ++static int __init smp_read_mpc(struct mp_config_table *mpc) ++{ ++ char str[16]; ++ char oem[10]; ++ int count=sizeof(*mpc); ++ unsigned char *mpt=((unsigned char *)mpc)+count; ++ ++ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { ++ printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n", ++ *(u32 *)mpc->mpc_signature); ++ return 0; ++ } ++ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { ++ printk(KERN_ERR "SMP mptable: checksum error!\n"); ++ return 0; ++ } ++ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { ++ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", ++ mpc->mpc_spec); ++ return 0; ++ } ++ if (!mpc->mpc_lapic) { ++ printk(KERN_ERR "SMP mptable: null local APIC address!\n"); ++ return 0; ++ } ++ memcpy(oem,mpc->mpc_oem,8); ++ oem[8]=0; ++ printk(KERN_INFO "OEM ID: %s ",oem); ++ ++ memcpy(str,mpc->mpc_productid,12); ++ str[12]=0; ++ printk("Product ID: %s ",str); ++ ++ mps_oem_check(mpc, oem, str); ++ ++ printk("APIC at: 0x%lX\n",mpc->mpc_lapic); ++ ++ /* ++ * Save the local APIC address (it might be non-default) -- but only ++ * if we're not using ACPI. ++ */ ++ if (!acpi_lapic) ++ mp_lapic_addr = mpc->mpc_lapic; ++ ++ /* ++ * Now process the configuration blocks. ++ */ ++ mpc_record = 0; ++ while (count < mpc->mpc_length) { ++ switch(*mpt) { ++ case MP_PROCESSOR: ++ { ++ struct mpc_config_processor *m= ++ (struct mpc_config_processor *)mpt; ++ /* ACPI may have already provided this data */ ++ if (!acpi_lapic) ++ MP_processor_info(m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); ++ break; ++ } ++ case MP_BUS: ++ { ++ struct mpc_config_bus *m= ++ (struct mpc_config_bus *)mpt; ++ MP_bus_info(m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); ++ break; ++ } ++ case MP_IOAPIC: ++ { ++ struct mpc_config_ioapic *m= ++ (struct mpc_config_ioapic *)mpt; ++ MP_ioapic_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ case MP_INTSRC: ++ { ++ struct mpc_config_intsrc *m= ++ (struct mpc_config_intsrc *)mpt; ++ ++ MP_intsrc_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ case MP_LINTSRC: ++ { ++ struct mpc_config_lintsrc *m= ++ (struct mpc_config_lintsrc *)mpt; ++ MP_lintsrc_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ default: ++ { ++ count = mpc->mpc_length; ++ break; ++ } ++ } ++ ++mpc_record; ++ } ++ clustered_apic_check(); ++ if (!num_processors) ++ printk(KERN_ERR "SMP mptable: no processors registered!\n"); ++ return num_processors; ++} ++ ++static int __init ELCR_trigger(unsigned int irq) ++{ ++ unsigned int port; ++ ++ port = 0x4d0 + (irq >> 3); ++ return (inb(port) >> (irq & 7)) & 1; ++} ++ ++static void __init construct_default_ioirq_mptable(int mpc_default_type) ++{ ++ struct mpc_config_intsrc intsrc; ++ int i; ++ int ELCR_fallback = 0; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqflag = 0; /* conforming */ ++ intsrc.mpc_srcbus = 0; ++ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; ++ ++ intsrc.mpc_irqtype = mp_INT; ++ ++ /* ++ * If true, we have an ISA/PCI system with no IRQ entries ++ * in the MP table. To prevent the PCI interrupts from being set up ++ * incorrectly, we try to use the ELCR. The sanity check to see if ++ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can ++ * never be level sensitive, so we simply see if the ELCR agrees. ++ * If it does, we assume it's valid. ++ */ ++ if (mpc_default_type == 5) { ++ printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); ++ ++ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) ++ printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n"); ++ else { ++ printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); ++ ELCR_fallback = 1; ++ } ++ } ++ ++ for (i = 0; i < 16; i++) { ++ switch (mpc_default_type) { ++ case 2: ++ if (i == 0 || i == 13) ++ continue; /* IRQ0 & IRQ13 not connected */ ++ /* fall through */ ++ default: ++ if (i == 2) ++ continue; /* IRQ2 is never connected */ ++ } ++ ++ if (ELCR_fallback) { ++ /* ++ * If the ELCR indicates a level-sensitive interrupt, we ++ * copy that information over to the MP table in the ++ * irqflag field (level sensitive, active high polarity). ++ */ ++ if (ELCR_trigger(i)) ++ intsrc.mpc_irqflag = 13; ++ else ++ intsrc.mpc_irqflag = 0; ++ } ++ ++ intsrc.mpc_srcbusirq = i; ++ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ ++ MP_intsrc_info(&intsrc); ++ } ++ ++ intsrc.mpc_irqtype = mp_ExtINT; ++ intsrc.mpc_srcbusirq = 0; ++ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ ++ MP_intsrc_info(&intsrc); ++} ++ ++static inline void __init construct_default_ISA_mptable(int mpc_default_type) ++{ ++ struct mpc_config_processor processor; ++ struct mpc_config_bus bus; ++ struct mpc_config_ioapic ioapic; ++ struct mpc_config_lintsrc lintsrc; ++ int linttypes[2] = { mp_ExtINT, mp_NMI }; ++ int i; ++ ++ /* ++ * local APIC has default address ++ */ ++ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; ++ ++ /* ++ * 2 CPUs, numbered 0 & 1. ++ */ ++ processor.mpc_type = MP_PROCESSOR; ++ /* Either an integrated APIC or a discrete 82489DX. */ ++ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ processor.mpc_cpuflag = CPU_ENABLED; ++ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | ++ (boot_cpu_data.x86_model << 4) | ++ boot_cpu_data.x86_mask; ++ processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; ++ processor.mpc_reserved[0] = 0; ++ processor.mpc_reserved[1] = 0; ++ for (i = 0; i < 2; i++) { ++ processor.mpc_apicid = i; ++ MP_processor_info(&processor); ++ } ++ ++ bus.mpc_type = MP_BUS; ++ bus.mpc_busid = 0; ++ switch (mpc_default_type) { ++ default: ++ printk("???\n"); ++ printk(KERN_ERR "Unknown standard configuration %d\n", ++ mpc_default_type); ++ /* fall through */ ++ case 1: ++ case 5: ++ memcpy(bus.mpc_bustype, "ISA ", 6); ++ break; ++ case 2: ++ case 6: ++ case 3: ++ memcpy(bus.mpc_bustype, "EISA ", 6); ++ break; ++ case 4: ++ case 7: ++ memcpy(bus.mpc_bustype, "MCA ", 6); ++ } ++ MP_bus_info(&bus); ++ if (mpc_default_type > 4) { ++ bus.mpc_busid = 1; ++ memcpy(bus.mpc_bustype, "PCI ", 6); ++ MP_bus_info(&bus); ++ } ++ ++ ioapic.mpc_type = MP_IOAPIC; ++ ioapic.mpc_apicid = 2; ++ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ ioapic.mpc_flags = MPC_APIC_USABLE; ++ ioapic.mpc_apicaddr = 0xFEC00000; ++ MP_ioapic_info(&ioapic); ++ ++ /* ++ * We set up most of the low 16 IO-APIC pins according to MPS rules. ++ */ ++ construct_default_ioirq_mptable(mpc_default_type); ++ ++ lintsrc.mpc_type = MP_LINTSRC; ++ lintsrc.mpc_irqflag = 0; /* conforming */ ++ lintsrc.mpc_srcbusid = 0; ++ lintsrc.mpc_srcbusirq = 0; ++ lintsrc.mpc_destapic = MP_APIC_ALL; ++ for (i = 0; i < 2; i++) { ++ lintsrc.mpc_irqtype = linttypes[i]; ++ lintsrc.mpc_destapiclint = i; ++ MP_lintsrc_info(&lintsrc); ++ } ++} ++ ++static struct intel_mp_floating *mpf_found; ++ ++/* ++ * Scan the memory blocks for an SMP configuration block. ++ */ ++void __init get_smp_config (void) ++{ ++ struct intel_mp_floating *mpf = mpf_found; ++ ++ /* ++ * ACPI supports both logical (e.g. Hyper-Threading) and physical ++ * processors, where MPS only supports physical. ++ */ ++ if (acpi_lapic && acpi_ioapic) { ++ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); ++ return; ++ } ++ else if (acpi_lapic) ++ printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); ++ ++ printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); ++ if (mpf->mpf_feature2 & (1<<7)) { ++ printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); ++ pic_mode = 1; ++ } else { ++ printk(KERN_INFO " Virtual Wire compatibility mode.\n"); ++ pic_mode = 0; ++ } ++ ++ /* ++ * Now see if we need to read further. ++ */ ++ if (mpf->mpf_feature1 != 0) { ++ ++ printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); ++ construct_default_ISA_mptable(mpf->mpf_feature1); ++ ++ } else if (mpf->mpf_physptr) { ++ ++ /* ++ * Read the physical hardware table. Anything here will ++ * override the defaults. ++ */ ++ if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) { ++ smp_found_config = 0; ++ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); ++ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); ++ return; ++ } ++ /* ++ * If there are no explicit MP IRQ entries, then we are ++ * broken. We set up most of the low 16 IO-APIC pins to ++ * ISA defaults and hope it will work. ++ */ ++ if (!mp_irq_entries) { ++ struct mpc_config_bus bus; ++ ++ printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); ++ ++ bus.mpc_type = MP_BUS; ++ bus.mpc_busid = 0; ++ memcpy(bus.mpc_bustype, "ISA ", 6); ++ MP_bus_info(&bus); ++ ++ construct_default_ioirq_mptable(0); ++ } ++ ++ } else ++ BUG(); ++ ++ printk(KERN_INFO "Processors: %d\n", num_processors); ++ /* ++ * Only use the first configuration found. ++ */ ++} ++ ++static int __init smp_scan_config (unsigned long base, unsigned long length) ++{ ++ unsigned long *bp = isa_bus_to_virt(base); ++ struct intel_mp_floating *mpf; ++ ++ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); ++ if (sizeof(*mpf) != 16) ++ printk("Error: MPF size\n"); ++ ++ while (length > 0) { ++ mpf = (struct intel_mp_floating *)bp; ++ if ((*bp == SMP_MAGIC_IDENT) && ++ (mpf->mpf_length == 1) && ++ !mpf_checksum((unsigned char *)bp, 16) && ++ ((mpf->mpf_specification == 1) ++ || (mpf->mpf_specification == 4)) ) { ++ ++ smp_found_config = 1; ++#ifndef CONFIG_XEN ++ printk(KERN_INFO "found SMP MP-table at %08lx\n", ++ virt_to_phys(mpf)); ++ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); ++ if (mpf->mpf_physptr) { ++ /* ++ * We cannot access to MPC table to compute ++ * table size yet, as only few megabytes from ++ * the bottom is mapped now. ++ * PC-9800's MPC table places on the very last ++ * of physical memory; so that simply reserving ++ * PAGE_SIZE from mpg->mpf_physptr yields BUG() ++ * in reserve_bootmem. ++ */ ++ unsigned long size = PAGE_SIZE; ++ unsigned long end = max_low_pfn * PAGE_SIZE; ++ if (mpf->mpf_physptr + size > end) ++ size = end - mpf->mpf_physptr; ++ reserve_bootmem(mpf->mpf_physptr, size); ++ } ++#else ++ printk(KERN_INFO "found SMP MP-table at %08lx\n", ++ ((unsigned long)bp - (unsigned long)isa_bus_to_virt(base)) + base); ++#endif ++ ++ mpf_found = mpf; ++ return 1; ++ } ++ bp += 4; ++ length -= 16; ++ } ++ return 0; ++} ++ ++void __init find_smp_config (void) ++{ ++#ifndef CONFIG_XEN ++ unsigned int address; ++#endif ++ ++ /* ++ * FIXME: Linux assumes you have 640K of base ram.. ++ * this continues the error... ++ * ++ * 1) Scan the bottom 1K for a signature ++ * 2) Scan the top 1K of base RAM ++ * 3) Scan the 64K of bios ++ */ ++ if (smp_scan_config(0x0,0x400) || ++ smp_scan_config(639*0x400,0x400) || ++ smp_scan_config(0xF0000,0x10000)) ++ return; ++ /* ++ * If it is an SMP machine we should know now, unless the ++ * configuration is in an EISA/MCA bus machine with an ++ * extended bios data area. ++ * ++ * there is a real-mode segmented pointer pointing to the ++ * 4K EBDA area at 0x40E, calculate and scan it here. ++ * ++ * NOTE! There are Linux loaders that will corrupt the EBDA ++ * area, and as such this kind of SMP config may be less ++ * trustworthy, simply because the SMP table may have been ++ * stomped on during early boot. These loaders are buggy and ++ * should be fixed. ++ * ++ * MP1.4 SPEC states to only scan first 1K of 4K EBDA. ++ */ ++ ++#ifndef CONFIG_XEN ++ address = get_bios_ebda(); ++ if (address) ++ smp_scan_config(address, 0x400); ++#endif ++} ++ ++int es7000_plat; ++ ++/* -------------------------------------------------------------------------- ++ ACPI-based MP Configuration ++ -------------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_ACPI ++ ++void __init mp_register_lapic_address ( ++ u64 address) ++{ ++#ifndef CONFIG_XEN ++ mp_lapic_addr = (unsigned long) address; ++ ++ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); ++ ++ if (boot_cpu_physical_apicid == -1U) ++ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); ++ ++ Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); ++#endif ++} ++ ++ ++void __devinit mp_register_lapic ( ++ u8 id, ++ u8 enabled) ++{ ++ struct mpc_config_processor processor; ++ int boot_cpu = 0; ++ ++ if (MAX_APICS - id <= 0) { ++ printk(KERN_WARNING "Processor #%d invalid (max %d)\n", ++ id, MAX_APICS); ++ return; ++ } ++ ++ if (id == boot_cpu_physical_apicid) ++ boot_cpu = 1; ++ ++#ifndef CONFIG_XEN ++ processor.mpc_type = MP_PROCESSOR; ++ processor.mpc_apicid = id; ++ processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); ++ processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); ++ processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); ++ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | ++ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; ++ processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; ++ processor.mpc_reserved[0] = 0; ++ processor.mpc_reserved[1] = 0; ++#endif ++ ++ MP_processor_info(&processor); ++} ++ ++#ifdef CONFIG_X86_IO_APIC ++ ++#define MP_ISA_BUS 0 ++#define MP_MAX_IOAPIC_PIN 127 ++ ++static struct mp_ioapic_routing { ++ int apic_id; ++ int gsi_base; ++ int gsi_end; ++ u32 pin_programmed[4]; ++} mp_ioapic_routing[MAX_IO_APICS]; ++ ++ ++static int mp_find_ioapic ( ++ int gsi) ++{ ++ int i = 0; ++ ++ /* Find the IOAPIC that manages this GSI. */ ++ for (i = 0; i < nr_ioapics; i++) { ++ if ((gsi >= mp_ioapic_routing[i].gsi_base) ++ && (gsi <= mp_ioapic_routing[i].gsi_end)) ++ return i; ++ } ++ ++ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); ++ ++ return -1; ++} ++ ++ ++void __init mp_register_ioapic ( ++ u8 id, ++ u32 address, ++ u32 gsi_base) ++{ ++ int idx = 0; ++ int tmpid; ++ ++ if (nr_ioapics >= MAX_IO_APICS) { ++ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " ++ "(found %d)\n", MAX_IO_APICS, nr_ioapics); ++ panic("Recompile kernel with bigger MAX_IO_APICS!\n"); ++ } ++ if (!address) { ++ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" ++ " found in MADT table, skipping!\n"); ++ return; ++ } ++ ++ idx = nr_ioapics++; ++ ++ mp_ioapics[idx].mpc_type = MP_IOAPIC; ++ mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; ++ mp_ioapics[idx].mpc_apicaddr = address; ++ ++#ifndef CONFIG_XEN ++ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); ++#endif ++ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) ++ tmpid = io_apic_get_unique_id(idx, id); ++ else ++ tmpid = id; ++ if (tmpid == -1) { ++ nr_ioapics--; ++ return; ++ } ++ mp_ioapics[idx].mpc_apicid = tmpid; ++ mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); ++ ++ /* ++ * Build basic GSI lookup table to facilitate gsi->io_apic lookups ++ * and to prevent reprogramming of IOAPIC pins (PCI GSIs). ++ */ ++ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; ++ mp_ioapic_routing[idx].gsi_base = gsi_base; ++ mp_ioapic_routing[idx].gsi_end = gsi_base + ++ io_apic_get_redir_entries(idx); ++ ++ printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " ++ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, ++ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, ++ mp_ioapic_routing[idx].gsi_base, ++ mp_ioapic_routing[idx].gsi_end); ++ ++ return; ++} ++ ++ ++void __init mp_override_legacy_irq ( ++ u8 bus_irq, ++ u8 polarity, ++ u8 trigger, ++ u32 gsi) ++{ ++ struct mpc_config_intsrc intsrc; ++ int ioapic = -1; ++ int pin = -1; ++ ++ /* ++ * Convert 'gsi' to 'ioapic.pin'. ++ */ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) ++ return; ++ pin = gsi - mp_ioapic_routing[ioapic].gsi_base; ++ ++ /* ++ * TBD: This check is for faulty timer entries, where the override ++ * erroneously sets the trigger to level, resulting in a HUGE ++ * increase of timer interrupts! ++ */ ++ if ((bus_irq == 0) && (trigger == 3)) ++ trigger = 1; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqtype = mp_INT; ++ intsrc.mpc_irqflag = (trigger << 2) | polarity; ++ intsrc.mpc_srcbus = MP_ISA_BUS; ++ intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ ++ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ ++ intsrc.mpc_dstirq = pin; /* INTIN# */ ++ ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", ++ intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, ++ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, ++ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); ++ ++ mp_irqs[mp_irq_entries] = intsrc; ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!\n"); ++ ++ return; ++} ++ ++void __init mp_config_acpi_legacy_irqs (void) ++{ ++ struct mpc_config_intsrc intsrc; ++ int i = 0; ++ int ioapic = -1; ++ ++ /* ++ * Fabricate the legacy ISA bus (bus #31). ++ */ ++ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; ++ Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); ++ ++ /* ++ * Older generations of ES7000 have no legacy identity mappings ++ */ ++ if (es7000_plat == 1) ++ return; ++ ++ /* ++ * Locate the IOAPIC that manages the ISA IRQs (0-15). ++ */ ++ ioapic = mp_find_ioapic(0); ++ if (ioapic < 0) ++ return; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqflag = 0; /* Conforming */ ++ intsrc.mpc_srcbus = MP_ISA_BUS; ++ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; ++ ++ /* ++ * Use the default configuration for the IRQs 0-15. Unless ++ * overriden by (MADT) interrupt source override entries. ++ */ ++ for (i = 0; i < 16; i++) { ++ int idx; ++ ++ for (idx = 0; idx < mp_irq_entries; idx++) { ++ struct mpc_config_intsrc *irq = mp_irqs + idx; ++ ++ /* Do we already have a mapping for this ISA IRQ? */ ++ if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) ++ break; ++ ++ /* Do we already have a mapping for this IOAPIC pin */ ++ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && ++ (irq->mpc_dstirq == i)) ++ break; ++ } ++ ++ if (idx != mp_irq_entries) { ++ printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); ++ continue; /* IRQ already used */ ++ } ++ ++ intsrc.mpc_irqtype = mp_INT; ++ intsrc.mpc_srcbusirq = i; /* Identity mapped */ ++ intsrc.mpc_dstirq = i; ++ ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " ++ "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, ++ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, ++ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, ++ intsrc.mpc_dstirq); ++ ++ mp_irqs[mp_irq_entries] = intsrc; ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!\n"); ++ } ++} ++ ++#define MAX_GSI_NUM 4096 ++ ++int mp_register_gsi (u32 gsi, int triggering, int polarity) ++{ ++ int ioapic = -1; ++ int ioapic_pin = 0; ++ int idx, bit = 0; ++ static int pci_irq = 16; ++ /* ++ * Mapping between Global System Interrups, which ++ * represent all possible interrupts, and IRQs ++ * assigned to actual devices. ++ */ ++ static int gsi_to_irq[MAX_GSI_NUM]; ++ ++ /* Don't set up the ACPI SCI because it's already set up */ ++ if (acpi_fadt.sci_int == gsi) ++ return gsi; ++ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) { ++ printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); ++ return gsi; ++ } ++ ++ ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; ++ ++ if (ioapic_renumber_irq) ++ gsi = ioapic_renumber_irq(ioapic, gsi); ++ ++ /* ++ * Avoid pin reprogramming. PRTs typically include entries ++ * with redundant pin->gsi mappings (but unique PCI devices); ++ * we only program the IOAPIC on the first. ++ */ ++ bit = ioapic_pin % 32; ++ idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); ++ if (idx > 3) { ++ printk(KERN_ERR "Invalid reference to IOAPIC pin " ++ "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, ++ ioapic_pin); ++ return gsi; ++ } ++ if ((1< 15), but ++ * avoid a problem where the 8254 timer (IRQ0) is setup ++ * via an override (so it's not on pin 0 of the ioapic), ++ * and at the same time, the pin 0 interrupt is a PCI ++ * type. The gsi > 15 test could cause these two pins ++ * to be shared as IRQ0, and they are not shareable. ++ * So test for this condition, and if necessary, avoid ++ * the pin collision. ++ */ ++ if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) ++ gsi = pci_irq++; ++ /* ++ * Don't assign IRQ used by ACPI SCI ++ */ ++ if (gsi == acpi_fadt.sci_int) ++ gsi = pci_irq++; ++ gsi_to_irq[irq] = gsi; ++ } else { ++ printk(KERN_ERR "GSI %u is too high\n", gsi); ++ return gsi; ++ } ++ } ++ ++ io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, ++ triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, ++ polarity == ACPI_ACTIVE_HIGH ? 0 : 1); ++ return gsi; ++} ++ ++#endif /* CONFIG_X86_IO_APIC */ ++#endif /* CONFIG_ACPI */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/pci-dma-xen.c 2007-08-27 14:02:10.000000000 -0400 +@@ -0,0 +1,366 @@ ++/* ++ * Dynamic DMA mapping support. ++ * ++ * On i386 there is no hardware dynamic DMA address translation, ++ * so consistent alloc/free are merely page allocation/freeing. ++ * The rest of the dynamic DMA mapping interface is implemented ++ * in asm/pci.h. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __x86_64__ ++#include ++ ++int iommu_merge __read_mostly = 0; ++EXPORT_SYMBOL(iommu_merge); ++ ++dma_addr_t bad_dma_address __read_mostly; ++EXPORT_SYMBOL(bad_dma_address); ++ ++/* This tells the BIO block layer to assume merging. Default to off ++ because we cannot guarantee merging later. */ ++int iommu_bio_merge __read_mostly = 0; ++EXPORT_SYMBOL(iommu_bio_merge); ++ ++int force_iommu __read_mostly= 0; ++ ++__init int iommu_setup(char *p) ++{ ++ return 1; ++} ++ ++void __init pci_iommu_alloc(void) ++{ ++#ifdef CONFIG_SWIOTLB ++ pci_swiotlb_init(); ++#endif ++} ++ ++static int __init pci_iommu_init(void) ++{ ++ no_iommu_init(); ++ return 0; ++} ++ ++/* Must execute after PCI subsystem */ ++fs_initcall(pci_iommu_init); ++#endif ++ ++struct dma_coherent_mem { ++ void *virt_base; ++ u32 device_base; ++ int size; ++ int flags; ++ unsigned long *bitmap; ++}; ++ ++#define IOMMU_BUG_ON(test) \ ++do { \ ++ if (unlikely(test)) { \ ++ printk(KERN_ALERT "Fatal DMA error! " \ ++ "Please use 'swiotlb=force'\n"); \ ++ BUG(); \ ++ } \ ++} while (0) ++ ++int ++dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, ++ enum dma_data_direction direction) ++{ ++ int i, rc; ++ ++ if (direction == DMA_NONE) ++ BUG(); ++ WARN_ON(nents == 0 || sg[0].length == 0); ++ ++ if (swiotlb) { ++ rc = swiotlb_map_sg(hwdev, sg, nents, direction); ++ } else { ++ for (i = 0; i < nents; i++ ) { ++ sg[i].dma_address = ++ page_to_bus(sg[i].page) + sg[i].offset; ++ sg[i].dma_length = sg[i].length; ++ BUG_ON(!sg[i].page); ++ IOMMU_BUG_ON(address_needs_mapping( ++ hwdev, sg[i].dma_address)); ++ } ++ rc = nents; ++ } ++ ++ flush_write_buffers(); ++ return rc; ++} ++EXPORT_SYMBOL(dma_map_sg); ++ ++void ++dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, ++ enum dma_data_direction direction) ++{ ++ BUG_ON(direction == DMA_NONE); ++ if (swiotlb) ++ swiotlb_unmap_sg(hwdev, sg, nents, direction); ++} ++EXPORT_SYMBOL(dma_unmap_sg); ++ ++#ifdef CONFIG_HIGHMEM ++dma_addr_t ++dma_map_page(struct device *dev, struct page *page, unsigned long offset, ++ size_t size, enum dma_data_direction direction) ++{ ++ dma_addr_t dma_addr; ++ ++ BUG_ON(direction == DMA_NONE); ++ ++ if (swiotlb) { ++ dma_addr = swiotlb_map_page( ++ dev, page, offset, size, direction); ++ } else { ++ dma_addr = page_to_bus(page) + offset; ++ IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr)); ++ } ++ ++ return dma_addr; ++} ++EXPORT_SYMBOL(dma_map_page); ++ ++void ++dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, ++ enum dma_data_direction direction) ++{ ++ BUG_ON(direction == DMA_NONE); ++ if (swiotlb) ++ swiotlb_unmap_page(dev, dma_address, size, direction); ++} ++EXPORT_SYMBOL(dma_unmap_page); ++#endif /* CONFIG_HIGHMEM */ ++ ++int ++dma_mapping_error(dma_addr_t dma_addr) ++{ ++ if (swiotlb) ++ return swiotlb_dma_mapping_error(dma_addr); ++ return 0; ++} ++EXPORT_SYMBOL(dma_mapping_error); ++ ++int ++dma_supported(struct device *dev, u64 mask) ++{ ++ if (swiotlb) ++ return swiotlb_dma_supported(dev, mask); ++ /* ++ * By default we'll BUG when an infeasible DMA is requested, and ++ * request swiotlb=force (see IOMMU_BUG_ON). ++ */ ++ return 1; ++} ++EXPORT_SYMBOL(dma_supported); ++ ++void *dma_alloc_coherent(struct device *dev, size_t size, ++ dma_addr_t *dma_handle, gfp_t gfp) ++{ ++ void *ret; ++ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; ++ unsigned int order = get_order(size); ++ unsigned long vstart; ++ u64 mask; ++ ++ /* ignore region specifiers */ ++ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); ++ ++ if (mem) { ++ int page = bitmap_find_free_region(mem->bitmap, mem->size, ++ order); ++ if (page >= 0) { ++ *dma_handle = mem->device_base + (page << PAGE_SHIFT); ++ ret = mem->virt_base + (page << PAGE_SHIFT); ++ memset(ret, 0, size); ++ return ret; ++ } ++ if (mem->flags & DMA_MEMORY_EXCLUSIVE) ++ return NULL; ++ } ++ ++ if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) ++ gfp |= GFP_DMA; ++ ++ vstart = __get_free_pages(gfp, order); ++ ret = (void *)vstart; ++ ++ if (dev != NULL && dev->coherent_dma_mask) ++ mask = dev->coherent_dma_mask; ++ else ++ mask = 0xffffffff; ++ ++ if (ret != NULL) { ++ if (xen_create_contiguous_region(vstart, order, ++ fls64(mask)) != 0) { ++ free_pages(vstart, order); ++ return NULL; ++ } ++ memset(ret, 0, size); ++ *dma_handle = virt_to_bus(ret); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(dma_alloc_coherent); ++ ++void dma_free_coherent(struct device *dev, size_t size, ++ void *vaddr, dma_addr_t dma_handle) ++{ ++ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; ++ int order = get_order(size); ++ ++ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { ++ int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; ++ ++ bitmap_release_region(mem->bitmap, page, order); ++ } else { ++ xen_destroy_contiguous_region((unsigned long)vaddr, order); ++ free_pages((unsigned long)vaddr, order); ++ } ++} ++EXPORT_SYMBOL(dma_free_coherent); ++ ++#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY ++int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, ++ dma_addr_t device_addr, size_t size, int flags) ++{ ++ void __iomem *mem_base; ++ int pages = size >> PAGE_SHIFT; ++ int bitmap_size = (pages + 31)/32; ++ ++ if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) ++ goto out; ++ if (!size) ++ goto out; ++ if (dev->dma_mem) ++ goto out; ++ ++ /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ ++ ++ mem_base = ioremap(bus_addr, size); ++ if (!mem_base) ++ goto out; ++ ++ dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); ++ if (!dev->dma_mem) ++ goto out; ++ memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); ++ dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); ++ if (!dev->dma_mem->bitmap) ++ goto free1_out; ++ memset(dev->dma_mem->bitmap, 0, bitmap_size); ++ ++ dev->dma_mem->virt_base = mem_base; ++ dev->dma_mem->device_base = device_addr; ++ dev->dma_mem->size = pages; ++ dev->dma_mem->flags = flags; ++ ++ if (flags & DMA_MEMORY_MAP) ++ return DMA_MEMORY_MAP; ++ ++ return DMA_MEMORY_IO; ++ ++ free1_out: ++ kfree(dev->dma_mem->bitmap); ++ out: ++ return 0; ++} ++EXPORT_SYMBOL(dma_declare_coherent_memory); ++ ++void dma_release_declared_memory(struct device *dev) ++{ ++ struct dma_coherent_mem *mem = dev->dma_mem; ++ ++ if(!mem) ++ return; ++ dev->dma_mem = NULL; ++ iounmap(mem->virt_base); ++ kfree(mem->bitmap); ++ kfree(mem); ++} ++EXPORT_SYMBOL(dma_release_declared_memory); ++ ++void *dma_mark_declared_memory_occupied(struct device *dev, ++ dma_addr_t device_addr, size_t size) ++{ ++ struct dma_coherent_mem *mem = dev->dma_mem; ++ int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ int pos, err; ++ ++ if (!mem) ++ return ERR_PTR(-EINVAL); ++ ++ pos = (device_addr - mem->device_base) >> PAGE_SHIFT; ++ err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); ++ if (err != 0) ++ return ERR_PTR(err); ++ return mem->virt_base + (pos << PAGE_SHIFT); ++} ++EXPORT_SYMBOL(dma_mark_declared_memory_occupied); ++#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ ++ ++dma_addr_t ++dma_map_single(struct device *dev, void *ptr, size_t size, ++ enum dma_data_direction direction) ++{ ++ dma_addr_t dma; ++ ++ if (direction == DMA_NONE) ++ BUG(); ++ WARN_ON(size == 0); ++ ++ if (swiotlb) { ++ dma = swiotlb_map_single(dev, ptr, size, direction); ++ } else { ++ dma = virt_to_bus(ptr); ++ IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size)); ++ IOMMU_BUG_ON(address_needs_mapping(dev, dma)); ++ } ++ ++ flush_write_buffers(); ++ return dma; ++} ++EXPORT_SYMBOL(dma_map_single); ++ ++void ++dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (direction == DMA_NONE) ++ BUG(); ++ if (swiotlb) ++ swiotlb_unmap_single(dev, dma_addr, size, direction); ++} ++EXPORT_SYMBOL(dma_unmap_single); ++ ++void ++dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (swiotlb) ++ swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction); ++} ++EXPORT_SYMBOL(dma_sync_single_for_cpu); ++ ++void ++dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (swiotlb) ++ swiotlb_sync_single_for_device(dev, dma_handle, size, direction); ++} ++EXPORT_SYMBOL(dma_sync_single_for_device); +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/process-xen.c 2007-08-27 14:02:03.000000000 -0400 +@@ -0,0 +1,853 @@ ++/* ++ * linux/arch/i386/kernel/process.c ++ * ++ * Copyright (C) 1995 Linus Torvalds ++ * ++ * Pentium III FXSR, SSE support ++ * Gareth Hughes , May 2000 ++ */ ++ ++/* ++ * This file handles the architecture-dependent parts of process handling.. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_MATH_EMULATION ++#include ++#endif ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); ++ ++static int hlt_counter; ++ ++unsigned long boot_option_idle_override = 0; ++EXPORT_SYMBOL(boot_option_idle_override); ++ ++/* ++ * Return saved PC of a blocked thread. ++ */ ++unsigned long thread_saved_pc(struct task_struct *tsk) ++{ ++ return ((unsigned long *)tsk->thread.esp)[3]; ++} ++ ++/* ++ * Powermanagement idle function, if any.. ++ */ ++void (*pm_idle)(void); ++EXPORT_SYMBOL(pm_idle); ++static DEFINE_PER_CPU(unsigned int, cpu_idle_state); ++ ++void disable_hlt(void) ++{ ++ hlt_counter++; ++} ++ ++EXPORT_SYMBOL(disable_hlt); ++ ++void enable_hlt(void) ++{ ++ hlt_counter--; ++} ++ ++EXPORT_SYMBOL(enable_hlt); ++ ++/* ++ * On SMP it's slightly faster (but much more power-consuming!) ++ * to poll the ->work.need_resched flag instead of waiting for the ++ * cross-CPU IPI to arrive. Use this option with caution. ++ */ ++static void poll_idle (void) ++{ ++ local_irq_enable(); ++ ++ asm volatile( ++ "2:" ++ "testl %0, %1;" ++ "rep; nop;" ++ "je 2b;" ++ : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); ++} ++ ++static void xen_idle(void) ++{ ++ local_irq_disable(); ++ ++ if (need_resched()) ++ local_irq_enable(); ++ else { ++ current_thread_info()->status &= ~TS_POLLING; ++ smp_mb__after_clear_bit(); ++ safe_halt(); ++ current_thread_info()->status |= TS_POLLING; ++ } ++} ++#ifdef CONFIG_APM_MODULE ++EXPORT_SYMBOL(default_idle); ++#endif ++ ++#ifdef CONFIG_HOTPLUG_CPU ++extern cpumask_t cpu_initialized; ++static inline void play_dead(void) ++{ ++ idle_task_exit(); ++ local_irq_disable(); ++ cpu_clear(smp_processor_id(), cpu_initialized); ++ preempt_enable_no_resched(); ++ HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); ++ cpu_bringup(); ++} ++#else ++static inline void play_dead(void) ++{ ++ BUG(); ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++/* ++ * The idle thread. There's no useful work to be ++ * done, so just try to conserve power and have a ++ * low exit latency (ie sit in a loop waiting for ++ * somebody to say that they'd like to reschedule) ++ */ ++void cpu_idle(void) ++{ ++ int cpu = smp_processor_id(); ++ ++ current_thread_info()->status |= TS_POLLING; ++ ++ /* endless idle loop with no priority at all */ ++ while (1) { ++ while (!need_resched()) { ++ void (*idle)(void); ++ ++ if (__get_cpu_var(cpu_idle_state)) ++ __get_cpu_var(cpu_idle_state) = 0; ++ ++ rmb(); ++ idle = xen_idle; /* no alternatives */ ++ ++ if (cpu_is_offline(cpu)) ++ play_dead(); ++ ++ __get_cpu_var(irq_stat).idle_timestamp = jiffies; ++ idle(); ++ } ++ preempt_enable_no_resched(); ++ schedule(); ++ preempt_disable(); ++ } ++} ++ ++void cpu_idle_wait(void) ++{ ++ unsigned int cpu, this_cpu = get_cpu(); ++ cpumask_t map; ++ ++ set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); ++ put_cpu(); ++ ++ cpus_clear(map); ++ for_each_online_cpu(cpu) { ++ per_cpu(cpu_idle_state, cpu) = 1; ++ cpu_set(cpu, map); ++ } ++ ++ __get_cpu_var(cpu_idle_state) = 0; ++ ++ wmb(); ++ do { ++ ssleep(1); ++ for_each_online_cpu(cpu) { ++ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) ++ cpu_clear(cpu, map); ++ } ++ cpus_and(map, map, cpu_online_map); ++ } while (!cpus_empty(map)); ++} ++EXPORT_SYMBOL_GPL(cpu_idle_wait); ++ ++void __devinit select_idle_routine(const struct cpuinfo_x86 *c) ++{ ++} ++ ++static int __init idle_setup (char *str) ++{ ++ if (!strncmp(str, "poll", 4)) { ++ printk("using polling idle threads.\n"); ++ pm_idle = poll_idle; ++ } ++ ++ boot_option_idle_override = 1; ++ return 1; ++} ++ ++__setup("idle=", idle_setup); ++ ++void show_regs(struct pt_regs * regs) ++{ ++ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; ++ ++ printk("\n"); ++ printk("Pid: %d, comm: %20s\n", current->pid, current->comm); ++ printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); ++ print_symbol("EIP is at %s\n", regs->eip); ++ ++ if (user_mode_vm(regs)) ++ printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); ++ printk(" EFLAGS: %08lx %s (%s %.*s)\n", ++ regs->eflags, print_tainted(), system_utsname.release, ++ (int)strcspn(system_utsname.version, " "), ++ system_utsname.version); ++ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", ++ regs->eax,regs->ebx,regs->ecx,regs->edx); ++ printk("ESI: %08lx EDI: %08lx EBP: %08lx", ++ regs->esi, regs->edi, regs->ebp); ++ printk(" DS: %04x ES: %04x\n", ++ 0xffff & regs->xds,0xffff & regs->xes); ++ ++ cr0 = read_cr0(); ++ cr2 = read_cr2(); ++ cr3 = read_cr3(); ++ cr4 = read_cr4_safe(); ++ printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); ++ show_trace(NULL, regs, ®s->esp); ++} ++ ++/* ++ * This gets run with %ebx containing the ++ * function to call, and %edx containing ++ * the "args". ++ */ ++extern void kernel_thread_helper(void); ++__asm__(".section .text\n" ++ ".align 4\n" ++ "kernel_thread_helper:\n\t" ++ "movl %edx,%eax\n\t" ++ "pushl %edx\n\t" ++ "call *%ebx\n\t" ++ "pushl %eax\n\t" ++ "call do_exit\n" ++ ".previous"); ++ ++/* ++ * Create a kernel thread ++ */ ++int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) ++{ ++ struct pt_regs regs; ++ ++ memset(®s, 0, sizeof(regs)); ++ ++ regs.ebx = (unsigned long) fn; ++ regs.edx = (unsigned long) arg; ++ ++ regs.xds = __USER_DS; ++ regs.xes = __USER_DS; ++ regs.orig_eax = -1; ++ regs.eip = (unsigned long) kernel_thread_helper; ++ regs.xcs = GET_KERNEL_CS(); ++ regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; ++ ++ /* Ok, create the new process.. */ ++ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); ++} ++EXPORT_SYMBOL(kernel_thread); ++ ++/* ++ * Free current thread data structures etc.. ++ */ ++void exit_thread(void) ++{ ++ /* The process may have allocated an io port bitmap... nuke it. */ ++ if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { ++ struct task_struct *tsk = current; ++ struct thread_struct *t = &tsk->thread; ++ struct physdev_set_iobitmap set_iobitmap; ++ memset(&set_iobitmap, 0, sizeof(set_iobitmap)); ++ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &set_iobitmap); ++ kfree(t->io_bitmap_ptr); ++ t->io_bitmap_ptr = NULL; ++ clear_thread_flag(TIF_IO_BITMAP); ++ } ++} ++ ++void flush_thread(void) ++{ ++ struct task_struct *tsk = current; ++ ++ memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); ++ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); ++ clear_tsk_thread_flag(tsk, TIF_DEBUG); ++ /* ++ * Forget coprocessor state.. ++ */ ++ clear_fpu(tsk); ++ clear_used_math(); ++} ++ ++void release_thread(struct task_struct *dead_task) ++{ ++ BUG_ON(dead_task->mm); ++ release_vm86_irqs(dead_task); ++} ++ ++/* ++ * This gets called before we allocate a new thread and copy ++ * the current task into it. ++ */ ++void prepare_to_copy(struct task_struct *tsk) ++{ ++ unlazy_fpu(tsk); ++} ++ ++int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, ++ unsigned long unused, ++ struct task_struct * p, struct pt_regs * regs) ++{ ++ struct pt_regs * childregs; ++ struct task_struct *tsk; ++ int err; ++ ++ childregs = task_pt_regs(p); ++ *childregs = *regs; ++ childregs->eax = 0; ++ childregs->esp = esp; ++ ++ p->thread.esp = (unsigned long) childregs; ++ p->thread.esp0 = (unsigned long) (childregs+1); ++ ++ p->thread.eip = (unsigned long) ret_from_fork; ++ ++ savesegment(fs,p->thread.fs); ++ savesegment(gs,p->thread.gs); ++ ++ tsk = current; ++ if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { ++ p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); ++ if (!p->thread.io_bitmap_ptr) { ++ p->thread.io_bitmap_max = 0; ++ return -ENOMEM; ++ } ++ memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr, ++ IO_BITMAP_BYTES); ++ set_tsk_thread_flag(p, TIF_IO_BITMAP); ++ } ++ ++ /* ++ * Set a new TLS for the child thread? ++ */ ++ if (clone_flags & CLONE_SETTLS) { ++ struct desc_struct *desc; ++ struct user_desc info; ++ int idx; ++ ++ err = -EFAULT; ++ if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) ++ goto out; ++ err = -EINVAL; ++ if (LDT_empty(&info)) ++ goto out; ++ ++ idx = info.entry_number; ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ goto out; ++ ++ desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; ++ desc->a = LDT_entry_a(&info); ++ desc->b = LDT_entry_b(&info); ++ } ++ ++ p->thread.iopl = current->thread.iopl; ++ ++ err = 0; ++ out: ++ if (err && p->thread.io_bitmap_ptr) { ++ kfree(p->thread.io_bitmap_ptr); ++ p->thread.io_bitmap_max = 0; ++ } ++ return err; ++} ++ ++/* ++ * fill in the user structure for a core dump.. ++ */ ++void dump_thread(struct pt_regs * regs, struct user * dump) ++{ ++ int i; ++ ++/* changed the size calculations - should hopefully work better. lbt */ ++ dump->magic = CMAGIC; ++ dump->start_code = 0; ++ dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); ++ dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; ++ dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; ++ dump->u_dsize -= dump->u_tsize; ++ dump->u_ssize = 0; ++ for (i = 0; i < 8; i++) ++ dump->u_debugreg[i] = current->thread.debugreg[i]; ++ ++ if (dump->start_stack < TASK_SIZE) ++ dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; ++ ++ dump->regs.ebx = regs->ebx; ++ dump->regs.ecx = regs->ecx; ++ dump->regs.edx = regs->edx; ++ dump->regs.esi = regs->esi; ++ dump->regs.edi = regs->edi; ++ dump->regs.ebp = regs->ebp; ++ dump->regs.eax = regs->eax; ++ dump->regs.ds = regs->xds; ++ dump->regs.es = regs->xes; ++ savesegment(fs,dump->regs.fs); ++ savesegment(gs,dump->regs.gs); ++ dump->regs.orig_eax = regs->orig_eax; ++ dump->regs.eip = regs->eip; ++ dump->regs.cs = regs->xcs; ++ dump->regs.eflags = regs->eflags; ++ dump->regs.esp = regs->esp; ++ dump->regs.ss = regs->xss; ++ ++ dump->u_fpvalid = dump_fpu (regs, &dump->i387); ++} ++EXPORT_SYMBOL(dump_thread); ++ ++/* ++ * Capture the user space registers if the task is not running (in user space) ++ */ ++int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) ++{ ++ struct pt_regs ptregs = *task_pt_regs(tsk); ++ ptregs.xcs &= 0xffff; ++ ptregs.xds &= 0xffff; ++ ptregs.xes &= 0xffff; ++ ptregs.xss &= 0xffff; ++ ++ elf_core_copy_regs(regs, &ptregs); ++ ++ return 1; ++} ++ ++static noinline void __switch_to_xtra(struct task_struct *next_p) ++{ ++ struct thread_struct *next; ++ ++ next = &next_p->thread; ++ ++ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { ++ set_debugreg(next->debugreg[0], 0); ++ set_debugreg(next->debugreg[1], 1); ++ set_debugreg(next->debugreg[2], 2); ++ set_debugreg(next->debugreg[3], 3); ++ /* no 4 and 5 */ ++ set_debugreg(next->debugreg[6], 6); ++ set_debugreg(next->debugreg[7], 7); ++ } ++} ++ ++/* ++ * This function selects if the context switch from prev to next ++ * has to tweak the TSC disable bit in the cr4. ++ */ ++static inline void disable_tsc(struct task_struct *prev_p, ++ struct task_struct *next_p) ++{ ++ struct thread_info *prev, *next; ++ ++ /* ++ * gcc should eliminate the ->thread_info dereference if ++ * has_secure_computing returns 0 at compile time (SECCOMP=n). ++ */ ++ prev = task_thread_info(prev_p); ++ next = task_thread_info(next_p); ++ ++ if (has_secure_computing(prev) || has_secure_computing(next)) { ++ /* slow path here */ ++ if (has_secure_computing(prev) && ++ !has_secure_computing(next)) { ++ write_cr4(read_cr4() & ~X86_CR4_TSD); ++ } else if (!has_secure_computing(prev) && ++ has_secure_computing(next)) ++ write_cr4(read_cr4() | X86_CR4_TSD); ++ } ++} ++ ++/* ++ * switch_to(x,yn) should switch tasks from x to y. ++ * ++ * We fsave/fwait so that an exception goes off at the right time ++ * (as a call from the fsave or fwait in effect) rather than to ++ * the wrong process. Lazy FP saving no longer makes any sense ++ * with modern CPU's, and this simplifies a lot of things (SMP ++ * and UP become the same). ++ * ++ * NOTE! We used to use the x86 hardware context switching. The ++ * reason for not using it any more becomes apparent when you ++ * try to recover gracefully from saved state that is no longer ++ * valid (stale segment register values in particular). With the ++ * hardware task-switch, there is no way to fix up bad state in ++ * a reasonable manner. ++ * ++ * The fact that Intel documents the hardware task-switching to ++ * be slow is a fairly red herring - this code is not noticeably ++ * faster. However, there _is_ some room for improvement here, ++ * so the performance issues may eventually be a valid point. ++ * More important, however, is the fact that this allows us much ++ * more flexibility. ++ * ++ * The return value (in %eax) will be the "prev" task after ++ * the task-switch, and shows up in ret_from_fork in entry.S, ++ * for example. ++ */ ++struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ++{ ++ struct thread_struct *prev = &prev_p->thread, ++ *next = &next_p->thread; ++ int cpu = smp_processor_id(); ++#ifndef CONFIG_X86_NO_TSS ++ struct tss_struct *tss = &per_cpu(init_tss, cpu); ++#endif ++ struct physdev_set_iopl iopl_op; ++ struct physdev_set_iobitmap iobmp_op; ++ multicall_entry_t _mcl[8], *mcl = _mcl; ++ ++ /* XEN NOTE: FS/GS saved in switch_mm(), not here. */ ++ ++ /* ++ * This is basically '__unlazy_fpu', except that we queue a ++ * multicall to indicate FPU task switch, rather than ++ * synchronously trapping to Xen. ++ */ ++ if (prev_p->thread_info->status & TS_USEDFPU) { ++ __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ ++ mcl->op = __HYPERVISOR_fpu_taskswitch; ++ mcl->args[0] = 1; ++ mcl++; ++ } ++#if 0 /* lazy fpu sanity check */ ++ else BUG_ON(!(read_cr0() & 8)); ++#endif ++ ++ /* ++ * Reload esp0. ++ * This is load_esp0(tss, next) with a multicall. ++ */ ++ mcl->op = __HYPERVISOR_stack_switch; ++ mcl->args[0] = __KERNEL_DS; ++ mcl->args[1] = next->esp0; ++ mcl++; ++ ++ /* ++ * Load the per-thread Thread-Local Storage descriptor. ++ * This is load_TLS(next, cpu) with multicalls. ++ */ ++#define C(i) do { \ ++ if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \ ++ next->tls_array[i].b != prev->tls_array[i].b)) { \ ++ mcl->op = __HYPERVISOR_update_descriptor; \ ++ *(u64 *)&mcl->args[0] = virt_to_machine( \ ++ &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\ ++ *(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i]; \ ++ mcl++; \ ++ } \ ++} while (0) ++ C(0); C(1); C(2); ++#undef C ++ ++ if (unlikely(prev->iopl != next->iopl)) { ++ iopl_op.iopl = (next->iopl == 0) ? 1 : (next->iopl >> 12) & 3; ++ mcl->op = __HYPERVISOR_physdev_op; ++ mcl->args[0] = PHYSDEVOP_set_iopl; ++ mcl->args[1] = (unsigned long)&iopl_op; ++ mcl++; ++ } ++ ++ if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { ++ set_xen_guest_handle(iobmp_op.bitmap, ++ (char *)next->io_bitmap_ptr); ++ iobmp_op.nr_ports = next->io_bitmap_ptr ? IO_BITMAP_BITS : 0; ++ mcl->op = __HYPERVISOR_physdev_op; ++ mcl->args[0] = PHYSDEVOP_set_iobitmap; ++ mcl->args[1] = (unsigned long)&iobmp_op; ++ mcl++; ++ } ++ ++ (void)HYPERVISOR_multicall(_mcl, mcl - _mcl); ++ ++ /* ++ * Restore %fs and %gs if needed. ++ * ++ * Glibc normally makes %fs be zero, and %gs is one of ++ * the TLS segments. ++ */ ++ if (unlikely(next->fs)) ++ loadsegment(fs, next->fs); ++ ++ if (next->gs) ++ loadsegment(gs, next->gs); ++ ++ /* ++ * Now maybe handle debug registers ++ */ ++ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) ++ __switch_to_xtra(next_p); ++ ++ disable_tsc(prev_p, next_p); ++ ++ return prev_p; ++} ++ ++asmlinkage int sys_fork(struct pt_regs regs) ++{ ++ return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); ++} ++ ++asmlinkage int sys_clone(struct pt_regs regs) ++{ ++ unsigned long clone_flags; ++ unsigned long newsp; ++ int __user *parent_tidptr, *child_tidptr; ++ ++ clone_flags = regs.ebx; ++ newsp = regs.ecx; ++ parent_tidptr = (int __user *)regs.edx; ++ child_tidptr = (int __user *)regs.edi; ++ if (!newsp) ++ newsp = regs.esp; ++ return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); ++} ++ ++/* ++ * This is trivial, and on the face of it looks like it ++ * could equally well be done in user mode. ++ * ++ * Not so, for quite unobvious reasons - register pressure. ++ * In user mode vfork() cannot have a stack frame, and if ++ * done by calling the "clone()" system call directly, you ++ * do not have enough call-clobbered registers to hold all ++ * the information you need. ++ */ ++asmlinkage int sys_vfork(struct pt_regs regs) ++{ ++ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); ++} ++ ++/* ++ * sys_execve() executes a new program. ++ */ ++asmlinkage int sys_execve(struct pt_regs regs) ++{ ++ int error; ++ char * filename; ++ ++ filename = getname((char __user *) regs.ebx); ++ error = PTR_ERR(filename); ++ if (IS_ERR(filename)) ++ goto out; ++ error = do_execve(filename, ++ (char __user * __user *) regs.ecx, ++ (char __user * __user *) regs.edx, ++ ®s); ++ if (error == 0) { ++ task_lock(current); ++ current->ptrace &= ~PT_DTRACE; ++ task_unlock(current); ++ /* Make sure we don't return using sysenter.. */ ++ set_thread_flag(TIF_IRET); ++ } ++ putname(filename); ++out: ++ return error; ++} ++ ++#define top_esp (THREAD_SIZE - sizeof(unsigned long)) ++#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) ++ ++unsigned long get_wchan(struct task_struct *p) ++{ ++ unsigned long ebp, esp, eip; ++ unsigned long stack_page; ++ int count = 0; ++ if (!p || p == current || p->state == TASK_RUNNING) ++ return 0; ++ stack_page = (unsigned long)task_stack_page(p); ++ esp = p->thread.esp; ++ if (!stack_page || esp < stack_page || esp > top_esp+stack_page) ++ return 0; ++ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ++ ebp = *(unsigned long *) esp; ++ do { ++ if (ebp < stack_page || ebp > top_ebp+stack_page) ++ return 0; ++ eip = *(unsigned long *) (ebp+4); ++ if (!in_sched_functions(eip)) ++ return eip; ++ ebp = *(unsigned long *) ebp; ++ } while (count++ < 16); ++ return 0; ++} ++ ++/* ++ * sys_alloc_thread_area: get a yet unused TLS descriptor index. ++ */ ++static int get_free_idx(void) ++{ ++ struct thread_struct *t = ¤t->thread; ++ int idx; ++ ++ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) ++ if (desc_empty(t->tls_array + idx)) ++ return idx + GDT_ENTRY_TLS_MIN; ++ return -ESRCH; ++} ++ ++/* ++ * Set a given TLS descriptor: ++ */ ++asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) ++{ ++ struct thread_struct *t = ¤t->thread; ++ struct user_desc info; ++ struct desc_struct *desc; ++ int cpu, idx; ++ ++ if (copy_from_user(&info, u_info, sizeof(info))) ++ return -EFAULT; ++ idx = info.entry_number; ++ ++ /* ++ * index -1 means the kernel should try to find and ++ * allocate an empty descriptor: ++ */ ++ if (idx == -1) { ++ idx = get_free_idx(); ++ if (idx < 0) ++ return idx; ++ if (put_user(idx, &u_info->entry_number)) ++ return -EFAULT; ++ } ++ ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ return -EINVAL; ++ ++ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; ++ ++ /* ++ * We must not get preempted while modifying the TLS. ++ */ ++ cpu = get_cpu(); ++ ++ if (LDT_empty(&info)) { ++ desc->a = 0; ++ desc->b = 0; ++ } else { ++ desc->a = LDT_entry_a(&info); ++ desc->b = LDT_entry_b(&info); ++ } ++ load_TLS(t, cpu); ++ ++ put_cpu(); ++ ++ return 0; ++} ++ ++/* ++ * Get the current Thread-Local Storage area: ++ */ ++ ++#define GET_BASE(desc) ( \ ++ (((desc)->a >> 16) & 0x0000ffff) | \ ++ (((desc)->b << 16) & 0x00ff0000) | \ ++ ( (desc)->b & 0xff000000) ) ++ ++#define GET_LIMIT(desc) ( \ ++ ((desc)->a & 0x0ffff) | \ ++ ((desc)->b & 0xf0000) ) ++ ++#define GET_32BIT(desc) (((desc)->b >> 22) & 1) ++#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) ++#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) ++#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) ++#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) ++#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) ++ ++asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) ++{ ++ struct user_desc info; ++ struct desc_struct *desc; ++ int idx; ++ ++ if (get_user(idx, &u_info->entry_number)) ++ return -EFAULT; ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ return -EINVAL; ++ ++ memset(&info, 0, sizeof(info)); ++ ++ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; ++ ++ info.entry_number = idx; ++ info.base_addr = GET_BASE(desc); ++ info.limit = GET_LIMIT(desc); ++ info.seg_32bit = GET_32BIT(desc); ++ info.contents = GET_CONTENTS(desc); ++ info.read_exec_only = !GET_WRITABLE(desc); ++ info.limit_in_pages = GET_LIMIT_PAGES(desc); ++ info.seg_not_present = !GET_PRESENT(desc); ++ info.useable = GET_USEABLE(desc); ++ ++ if (copy_to_user(u_info, &info, sizeof(info))) ++ return -EFAULT; ++ return 0; ++} ++ ++unsigned long arch_align_stack(unsigned long sp) ++{ ++ if (randomize_va_space) ++ sp -= get_random_int() % 8192; ++ return sp & ~0xf; ++} +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/quirks-xen.c 2007-08-27 14:02:01.000000000 -0400 +@@ -0,0 +1,47 @@ ++/* ++ * This file contains work-arounds for x86 and x86_64 platform bugs. ++ */ ++#include ++#include ++ ++#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) ++ ++static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) ++{ ++ u8 config, rev; ++ u32 word; ++ ++ /* BIOS may enable hardware IRQ balancing for ++ * E7520/E7320/E7525(revision ID 0x9 and below) ++ * based platforms. ++ * Disable SW irqbalance/affinity on those platforms. ++ */ ++ pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); ++ if (rev > 0x9) ++ return; ++ ++ printk(KERN_INFO "Intel E7520/7320/7525 detected."); ++ ++ /* enable access to config space*/ ++ pci_read_config_byte(dev, 0xf4, &config); ++ pci_write_config_byte(dev, 0xf4, config|0x2); ++ ++ /* read xTPR register */ ++ raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); ++ ++ if (!(word & (1 << 13))) { ++ struct xen_platform_op op; ++ printk(KERN_INFO "Disabling irq balancing and affinity\n"); ++ op.cmd = XENPF_platform_quirk; ++ op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING; ++ (void)HYPERVISOR_platform_op(&op); ++ } ++ ++ /* put back the original value for config space*/ ++ if (!(config & 0x2)) ++ pci_write_config_byte(dev, 0xf4, config); ++} ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); ++#endif +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/arch/i386/kernel/setup-xen.c 2007-08-27 14:02:09.000000000 -0400 +@@ -0,0 +1,1871 @@ ++/* ++ * linux/arch/i386/kernel/setup.c ++ * ++ * Copyright (C) 1995 Linus Torvalds ++ * ++ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 ++ * ++ * Memory region support ++ * David Parsons , July-August 1999 ++ * ++ * Added E820 sanitization routine (removes overlapping memory regions); ++ * Brian Moyle , February 2001 ++ * ++ * Moved CPU detection code to cpu/${cpu}.c ++ * Patrick Mochel , March 2002 ++ * ++ * Provisions for empty E820 memory regions (reported by certain BIOSes). ++ * Alex Achenbach , December 2002. ++ * ++ */ ++ ++/* ++ * This file handles the architecture-dependent parts of initialization ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include