From: Christoph Lameter <christoph@lameter.com>

The definition of the irq_stat as an array means that the individual
elements of the irq_stat array are located on one NUMA node requiring
internode traffic to access irq_stat from other nodes.  This patch makes
irq_stat a per_cpu variable which allows most accesses to be local.

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Shai Fultheim <Shai@Scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/i386/kernel/apic.c    |    2 +-
 25-akpm/arch/i386/kernel/io_apic.c |    2 +-
 25-akpm/arch/i386/kernel/irq.c     |    5 ++++-
 25-akpm/arch/i386/kernel/nmi.c     |    4 ++--
 25-akpm/arch/i386/kernel/process.c |    2 +-
 25-akpm/include/asm-i386/hardirq.h |    7 ++++++-
 6 files changed, 15 insertions(+), 7 deletions(-)

diff -puN arch/i386/kernel/apic.c~per-cpu-irq-stat arch/i386/kernel/apic.c
--- 25/arch/i386/kernel/apic.c~per-cpu-irq-stat	2005-03-14 22:46:34.000000000 -0800
+++ 25-akpm/arch/i386/kernel/apic.c	2005-03-14 22:46:34.000000000 -0800
@@ -1165,7 +1165,7 @@ fastcall void smp_apic_timer_interrupt(s
 	/*
 	 * the NMI deadlock-detector uses this.
 	 */
-	irq_stat[cpu].apic_timer_irqs++;
+	per_cpu(irq_stat, cpu).apic_timer_irqs++;
 
 	/*
 	 * NOTE! We'd better ACK the irq immediately,
diff -puN arch/i386/kernel/io_apic.c~per-cpu-irq-stat arch/i386/kernel/io_apic.c
--- 25/arch/i386/kernel/io_apic.c~per-cpu-irq-stat	2005-03-14 22:46:34.000000000 -0800
+++ 25-akpm/arch/i386/kernel/io_apic.c	2005-03-14 22:46:34.000000000 -0800
@@ -275,7 +275,7 @@ static struct irq_cpu_info {
 #define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
 
 #define IDLE_ENOUGH(cpu,now) \
-		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
+	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
 
 #define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
 
diff -puN arch/i386/kernel/irq.c~per-cpu-irq-stat arch/i386/kernel/irq.c
--- 25/arch/i386/kernel/irq.c~per-cpu-irq-stat	2005-03-14 22:46:34.000000000 -0800
+++ 25-akpm/arch/i386/kernel/irq.c	2005-03-14 22:46:34.000000000 -0800
@@ -16,6 +16,9 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
 #ifndef CONFIG_X86_LOCAL_APIC
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
@@ -246,7 +249,7 @@ skip:
 		for (j = 0; j < NR_CPUS; j++)
 			if (cpu_online(j))
 				seq_printf(p, "%10u ",
-					irq_stat[j].apic_timer_irqs);
+					per_cpu(irq_stat,j).apic_timer_irqs);
 		seq_putc(p, '\n');
 #endif
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
diff -puN arch/i386/kernel/nmi.c~per-cpu-irq-stat arch/i386/kernel/nmi.c
--- 25/arch/i386/kernel/nmi.c~per-cpu-irq-stat	2005-03-14 22:46:34.000000000 -0800
+++ 25-akpm/arch/i386/kernel/nmi.c	2005-03-14 22:46:34.000000000 -0800
@@ -110,7 +110,7 @@ int __init check_nmi_watchdog (void)
 	printk(KERN_INFO "testing NMI watchdog ... ");
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
-		prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count;
+		prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
 	local_irq_enable();
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
@@ -488,7 +488,7 @@ void nmi_watchdog_tick (struct pt_regs *
 	 */
 	int sum, cpu = smp_processor_id();
 
-	sum = irq_stat[cpu].apic_timer_irqs;
+	sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
 
 	if (last_irq_sums[cpu] == sum) {
 		/*
diff -puN arch/i386/kernel/process.c~per-cpu-irq-stat arch/i386/kernel/process.c
--- 25/arch/i386/kernel/process.c~per-cpu-irq-stat	2005-03-14 22:46:34.000000000 -0800
+++ 25-akpm/arch/i386/kernel/process.c	2005-03-14 22:46:34.000000000 -0800
@@ -162,7 +162,7 @@ void cpu_idle (void)
 			if (!idle)
 				idle = default_idle;
 
-			irq_stat[cpu].idle_timestamp = jiffies;
+			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
 			idle();
 		}
 		schedule();
diff -puN include/asm-i386/hardirq.h~per-cpu-irq-stat include/asm-i386/hardirq.h
--- 25/include/asm-i386/hardirq.h~per-cpu-irq-stat	2005-03-14 22:46:34.000000000 -0800
+++ 25-akpm/include/asm-i386/hardirq.h	2005-03-14 22:46:34.000000000 -0800
@@ -12,8 +12,13 @@ typedef struct {
 	unsigned int apic_timer_irqs;	/* arch dependent */
 } ____cacheline_aligned irq_cpustat_t;
 
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+extern irq_cpustat_t irq_stat[];
+
+#define __ARCH_IRQ_STAT
+#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
 
 void ack_bad_irq(unsigned int irq);
+#include <linux/irq_cpustat.h>
 
 #endif /* __ASM_HARDIRQ_H */
_