diff -urN linux-2.4.19-pre7-ac2-rml/arch/i386/kernel/i8259.c linux/arch/i386/kernel/i8259.c
--- linux-2.4.19-pre7-ac2-rml/arch/i386/kernel/i8259.c	Sat Apr 20 17:55:10 2002
+++ linux/arch/i386/kernel/i8259.c	Sat Apr 20 18:29:10 2002
@@ -79,7 +79,6 @@
  * through the ICC by us (IPIs)
  */
 #ifdef CONFIG_SMP
-BUILD_SMP_INTERRUPT(task_migration_interrupt,TASK_MIGRATION_VECTOR)
 BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
@@ -474,9 +473,6 @@
 	 */
 	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
-	/* IPI for task migration */
-	set_intr_gate(TASK_MIGRATION_VECTOR, task_migration_interrupt);
-
 	/* IPI for invalidation */
 	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
 
diff -urN linux-2.4.19-pre7-ac2-rml/arch/i386/kernel/smp.c linux/arch/i386/kernel/smp.c
--- linux-2.4.19-pre7-ac2-rml/arch/i386/kernel/smp.c	Sat Apr 20 17:55:10 2002
+++ linux/arch/i386/kernel/smp.c	Sat Apr 20 18:29:10 2002
@@ -484,35 +484,6 @@
 	do_flush_tlb_all_local();
 }
 
-static spinlock_t migration_lock = SPIN_LOCK_UNLOCKED;
-static task_t *new_task;
-
-/*
- * This function sends a 'task migration' IPI to another CPU.
- * Must be called from syscall contexts, with interrupts *enabled*.
- */
-void smp_migrate_task(int cpu, task_t *p)
-{
-	/*
-	 * The target CPU will unlock the migration spinlock:
-	 */
-	spin_lock(&migration_lock);
-	new_task = p;
-	send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR);
-}
-
-/*
- * Task migration callback.
- */
-asmlinkage void smp_task_migration_interrupt(void)
-{
-	task_t *p;
-
-	ack_APIC_irq();
-	p = new_task;
-	spin_unlock(&migration_lock);
-	sched_task_migrated(p);
-}
 /*
  * this function sends a 'reschedule' IPI to another CPU.
  * it goes straight through and wastes no time serializing
diff -urN linux-2.4.19-pre7-ac2-rml/include/asm-i386/hw_irq.h linux/include/asm-i386/hw_irq.h
--- linux-2.4.19-pre7-ac2-rml/include/asm-i386/hw_irq.h	Sat Apr 20 17:54:55 2002
+++ linux/include/asm-i386/hw_irq.h	Sat Apr 20 18:29:10 2002
@@ -41,8 +41,7 @@
 #define ERROR_APIC_VECTOR	0xfe
 #define INVALIDATE_TLB_VECTOR	0xfd
 #define RESCHEDULE_VECTOR	0xfc
-#define TASK_MIGRATION_VECTOR	0xfb
-#define CALL_FUNCTION_VECTOR	0xfa
+#define CALL_FUNCTION_VECTOR	0xfb
 
 /*
  * Local APIC timer IRQ vector is on a different priority level,
diff -urN linux-2.4.19-pre7-ac2-rml/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.4.19-pre7-ac2-rml/include/linux/sched.h	Sat Apr 20 18:24:11 2002
+++ linux/include/linux/sched.h	Sat Apr 20 18:29:10 2002
@@ -149,8 +149,7 @@
 extern void update_one_process(task_t *p, unsigned long user,
 			       unsigned long system, int cpu);
 extern void scheduler_tick(int user_tick, int system);
-extern void sched_task_migrated(task_t *p);
-extern void smp_migrate_task(int cpu, task_t *task);
+extern void migration_init(void);
 extern unsigned long cache_decay_ticks;
 extern int set_user(uid_t new_ruid, int dumpclear);
 
@@ -450,7 +449,12 @@
  */
 #define _STK_LIM	(8*1024*1024)
 
+#if CONFIG_SMP
 extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
+#else
+#define set_cpus_allowed(p, new_mask)	do { } while (0)
+#endif
+
 extern void set_user_nice(task_t *p, long nice);
 extern int task_prio(task_t *p);
 extern int task_nice(task_t *p);
diff -urN linux-2.4.19-pre7-ac2-rml/init/main.c linux/init/main.c
--- linux-2.4.19-pre7-ac2-rml/init/main.c	Sat Apr 20 17:54:55 2002
+++ linux/init/main.c	Sat Apr 20 18:29:10 2002
@@ -458,6 +458,10 @@
  */
 static void __init do_basic_setup(void)
 {
+	/* Start the per-CPU migration threads */
+#if CONFIG_SMP
+	migration_init();
+#endif
 
 	/*
 	 * Tell the world that we're going to be the grim
diff -urN linux-2.4.19-pre7-ac2-rml/kernel/ksyms.c linux/kernel/ksyms.c
--- linux-2.4.19-pre7-ac2-rml/kernel/ksyms.c	Sat Apr 20 17:54:55 2002
+++ linux/kernel/ksyms.c	Sat Apr 20 18:29:10 2002
@@ -443,7 +443,9 @@
 EXPORT_SYMBOL(schedule_timeout);
 EXPORT_SYMBOL(sys_sched_yield);
 EXPORT_SYMBOL(set_user_nice);
-EXPORT_SYMBOL(set_cpus_allowed);
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL_GPL(set_cpus_allowed);
+#endif
 EXPORT_SYMBOL(jiffies);
 EXPORT_SYMBOL(xtime);
 EXPORT_SYMBOL(do_gettimeofday);
diff -urN linux-2.4.19-pre7-ac2-rml/kernel/sched.c linux/kernel/sched.c
--- linux-2.4.19-pre7-ac2-rml/kernel/sched.c	Sat Apr 20 18:28:18 2002
+++ linux/kernel/sched.c	Sat Apr 20 18:29:10 2002
@@ -144,6 +144,8 @@
 	task_t *curr, *idle;
 	prio_array_t *active, *expired, arrays[2];
 	int prev_nr_running[NR_CPUS];
+	task_t *migration_thread;
+	list_t migration_queue;
 } ____cacheline_aligned;
 
 static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
@@ -284,20 +286,6 @@
 }
 
 /*
- * The SMP message passing code calls this function whenever
- * the new task has arrived at the target CPU. We move the
- * new task into the local runqueue.
- *
- * This function must be called with interrupts disabled.
- */
-void sched_task_migrated(task_t *new_task)
-{
-	wait_task_inactive(new_task);
-	new_task->cpu = smp_processor_id();
-	wake_up_process(new_task);
-}
-
-/*
  * Kick the remote CPU if the task is running currently,
  * this code is used by the signal code to signal tasks
  * which are in user-mode as quickly as possible.
@@ -962,34 +950,6 @@
 	return timeout;
 }
 
-/*
- * Change the current task's CPU affinity. Migrate the process to a
- * proper CPU and schedule away if the current CPU is removed from
- * the allowed bitmask.
- */
-void set_cpus_allowed(task_t *p, unsigned long new_mask)
-{
-	new_mask &= cpu_online_map;
-	if (!new_mask)
-		BUG();
-	if (p != current)
-		BUG();
-
-	p->cpus_allowed = new_mask;
-	/*
-	 * Can the task run on the current CPU? If not then
-	 * migrate the process off to a proper CPU.
-	 */
-	if (new_mask & (1UL << smp_processor_id()))
-		return;
-#if CONFIG_SMP
-	current->state = TASK_UNINTERRUPTIBLE;
-	smp_migrate_task(__ffs(new_mask), current);
-
-	schedule();
-#endif
-}
-
 void scheduling_functions_end_here(void) { }
 
 void set_user_nice(task_t *p, long nice)
@@ -1475,6 +1435,7 @@
 		rq->expired = rq->arrays + 1;
 		spin_lock_init(&rq->lock);
 		spin_lock_init(&rq->frozen);
+		INIT_LIST_HEAD(&rq->migration_queue);
 
 		for (j = 0; j < 2; j++) {
 			array = rq->arrays + j;
@@ -1506,3 +1467,216 @@
 	atomic_inc(&init_mm.mm_count);
 	enter_lazy_tlb(&init_mm, current, smp_processor_id());
 }
+
+#if CONFIG_SMP
+
+/*
+ * This is how migration works:
+ *
+ * 1) we queue a migration_req_t structure in the source CPU's
+ *    runqueue and wake up that CPU's migration thread.
+ * 2) we down() the locked semaphore => thread blocks.
+ * 3) migration thread wakes up (implicitly it forces the migrated
+ *    thread off the CPU)
+ * 4) it gets the migration request and checks whether the migrated
+ *    task is still in the wrong runqueue.
+ * 5) if it's in the wrong runqueue then the migration thread removes
+ *    it and puts it into the right queue.
+ * 6) migration thread up()s the semaphore.
+ * 7) we wake up and the migration is done.
+ */
+
+typedef struct {
+	list_t list;
+	task_t *task;
+	struct semaphore sem;
+} migration_req_t;
+
+/*
+ * Change a given task's CPU affinity. Migrate the process to a
+ * proper CPU and schedule it away if the CPU it's executing on
+ * is removed from the allowed bitmask.
+ *
+ * NOTE: the caller must have a valid reference to the task, the
+ * task must not exit() & deallocate itself prematurely.  No
+ * spinlocks can be held.
+ */
+void set_cpus_allowed(task_t *p, unsigned long new_mask)
+{
+	unsigned long flags;
+	migration_req_t req;
+	runqueue_t *rq;
+
+	new_mask &= cpu_online_map;
+	if (!new_mask)
+		BUG();
+
+	rq = task_rq_lock(p, &flags);
+	p->cpus_allowed = new_mask;
+	/*
+	 * Can the task run on the task's current CPU? If not then
+	 * migrate the process off to a proper CPU.
+	 */
+	if (new_mask & (1UL << p->cpu)) {
+		task_rq_unlock(rq, &flags);
+		return;
+	}
+
+	init_MUTEX_LOCKED(&req.sem);
+	req.task = p;
+	list_add(&req.list, &rq->migration_queue);
+	task_rq_unlock(rq, &flags);
+	wake_up_process(rq->migration_thread);
+
+	down(&req.sem);
+}
+
+/*
+ * Treat the bits of migration_mask as lock bits.
+ * If the bit corresponding to the cpu a migration_thread is
+ * running on then we have failed to claim our cpu and must
+ * yield in order to find another.
+ */
+static volatile unsigned long migration_mask;
+static atomic_t migration_threads_seeking_cpu;
+static struct completion migration_complete
+			= COMPLETION_INITIALIZER(migration_complete);
+
+static int migration_thread(void * unused)
+{
+	struct sched_param param = { sched_priority: MAX_RT_PRIO - 1 };
+	runqueue_t *rq;
+	int ret;
+
+	daemonize();
+	sigfillset(&current->blocked);
+	set_fs(KERNEL_DS);
+	ret = setscheduler(0, SCHED_FIFO, &param);
+
+	/*
+	 * We have to migrate manually - there is no migration thread
+	 * to do this for us yet :-)
+	 *
+	 * We use the following property of the Linux scheduler. At
+	 * this point no other task is running, so by keeping all
+	 * migration threads running, the load-balancer will distribute
+	 * them between all CPUs equally. At that point every migration
+	 * task binds itself to the current CPU.
+	 */
+
+	/*
+	 * Enter the loop with preemption disabled so that
+	 * smp_processor_id() remains valid through the check. The
+	 * interior of the wait loop re-enables preemption in an
+	 * attempt to get scheduled off the current cpu. When the
+	 * loop is exited the lock bit in migration_mask is acquired
+	 * and preemption is disabled on the way out. This way the
+	 * cpu acquired remains valid when ->cpus_allowed is set.
+	 */
+	while (test_and_set_bit(smp_processor_id(), &migration_mask))
+		yield();
+
+	current->cpus_allowed = 1 << smp_processor_id();
+	rq = this_rq();
+	rq->migration_thread = current;
+
+	/*
+	 * Now that we've bound ourselves to a cpu, post to
+	 * migration_threads_seeking_cpu and wait for everyone else.
+	 * Preemption should remain disabled and the cpu should remain
+	 * in busywait. Yielding the cpu will allow the livelock
+	 * where where a timing pattern causes an idle task seeking a
+	 * migration_thread to always find the unbound migration_thread 
+	 * running on the cpu's it tries to steal tasks from.
+	 */
+	atomic_dec(&migration_threads_seeking_cpu);
+	while (atomic_read(&migration_threads_seeking_cpu))
+		cpu_relax();
+
+	sprintf(current->comm, "migration_CPU%d", smp_processor_id());
+
+	/*
+	 * Everyone's found their cpu, so now wake migration_init().
+	 * Multiple wakeups are harmless; removal from the waitqueue
+	 * has locking built-in, and waking an empty queue is valid.
+	 */
+	complete(&migration_complete);
+
+	/*
+	 * Initiate the event loop.
+	 */
+	for (;;) {
+		runqueue_t *rq_src, *rq_dest;
+		struct list_head *head;
+		int cpu_src, cpu_dest;
+		migration_req_t *req;
+		unsigned long flags;
+		task_t *p;
+
+		spin_lock_irqsave(&rq->lock, flags);
+		head = &rq->migration_queue;
+		current->state = TASK_INTERRUPTIBLE;
+		if (list_empty(head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			schedule();
+			continue;
+		}
+		req = list_entry(head->next, migration_req_t, list);
+		list_del_init(head->next);
+		spin_unlock_irqrestore(&rq->lock, flags);
+
+		p = req->task;
+		cpu_dest = __ffs(p->cpus_allowed);
+		rq_dest = cpu_rq(cpu_dest);
+repeat:
+		cpu_src = p->cpu;
+		rq_src = cpu_rq(cpu_src);
+
+		local_irq_save(flags);
+		double_rq_lock(rq_src, rq_dest);
+		if (p->cpu != cpu_src) {
+			local_irq_restore(flags);
+			double_rq_unlock(rq_src, rq_dest);
+			goto repeat;
+		}
+		if (rq_src == rq) {
+			p->cpu = cpu_dest;
+			if (p->array) {
+				deactivate_task(p, rq_src);
+				activate_task(p, rq_dest);
+			}
+		}
+		local_irq_restore(flags);
+		double_rq_unlock(rq_src, rq_dest);
+
+		up(&req->sem);
+	}
+}
+
+void __init migration_init(void)
+{
+	unsigned long orig_cache_decay_ticks;
+	int cpu;
+
+	atomic_set(&migration_threads_seeking_cpu, smp_num_cpus);
+
+	orig_cache_decay_ticks = cache_decay_ticks;
+	cache_decay_ticks = 0;
+
+	for (cpu = 0; cpu < smp_num_cpus; cpu++)
+		if (kernel_thread(migration_thread, NULL,
+				CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
+			BUG();
+
+	/*
+	 * We cannot have missed the wakeup for the migration_thread
+	 * bound for the cpu migration_init() is running on cannot
+	 * acquire this cpu until migration_init() has yielded it by
+	 * means of wait_for_completion().
+	 */
+	wait_for_completion(&migration_complete);
+
+	cache_decay_ticks = orig_cache_decay_ticks;
+}
+
+#endif /* CONFIG_SMP */
