Skip to content
switch.c 62.8 KiB
Newer Older
/*
 * spu_switch.c
 *
 * (C) Copyright IBM Corp. 2005
 *
 * Author: Mark Nutter <mnutter@us.ibm.com>
 *
 * Host-side part of SPU context switch sequence outlined in
 * Synergistic Processor Element, Book IV.
 *
 * A fully premptive switch of an SPE is very expensive in terms
 * of time and system resources.  SPE Book IV indicates that SPE
 * allocation should follow a "serially reusable device" model,
 * in which the SPE is assigned a task until it completes.  When
 * this is not possible, this sequence may be used to premptively
 * save, and then later (optionally) restore the context of a
 * program executing on an SPE.
 *
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/unistd.h>

#include <asm/io.h>
#include <asm/spu.h>
#include <asm/spu_priv1.h>
#include <asm/spu_csa.h>
#include <asm/mmu_context.h>

#include "spu_save_dump.h"
#include "spu_restore_dump.h"

#if 0
#define POLL_WHILE_TRUE(_c) {				\
    do {						\
    } while (_c);					\
  }
#else
#define RELAX_SPIN_COUNT				1000
#define POLL_WHILE_TRUE(_c) {				\
    do {						\
	int _i;						\
	for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \
	    cpu_relax();				\
	}						\
	if (unlikely(_c)) yield();			\
	else break;					\
    } while (_c);					\
  }
#endif				/* debug */

#define POLL_WHILE_FALSE(_c)	POLL_WHILE_TRUE(!(_c))

static inline void acquire_spu_lock(struct spu *spu)
{
	/* Save, Step 1:
	 * Restore, Step 1:
	 *    Acquire SPU-specific mutual exclusion lock.
	 *    TBD.
	 */
}

static inline void release_spu_lock(struct spu *spu)
{
	/* Restore, Step 76:
	 *    Release SPU-specific mutual exclusion lock.
	 *    TBD.
	 */
}

static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;
	u32 isolate_state;

	/* Save, Step 2:
	 * Save, Step 6:
	 *     If SPU_Status[E,L,IS] any field is '1', this
	 *     SPU is in isolate state and cannot be context
	 *     saved at this time.
	 */
	isolate_state = SPU_STATUS_ISOLATED_STATE |
	    SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
	return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
}

static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 3:
	 * Restore, Step 2:
	 *     Save INT_Mask_class0 in CSA.
	 *     Write INT_MASK_class0 with value of 0.
	 *     Save INT_Mask_class1 in CSA.
	 *     Write INT_MASK_class1 with value of 0.
	 *     Save INT_Mask_class2 in CSA.
	 *     Write INT_MASK_class2 with value of 0.
	 */
	spin_lock_irq(&spu->register_lock);
	if (csa) {
		csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0);
		csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1);
		csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2);
	spu_int_mask_set(spu, 0, 0ul);
	spu_int_mask_set(spu, 1, 0ul);
	spu_int_mask_set(spu, 2, 0ul);
	eieio();
	spin_unlock_irq(&spu->register_lock);
}

static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 4:
	 * Restore, Step 25.
	 *    Set a software watchdog timer, which specifies the
	 *    maximum allowable time for a context save sequence.
	 *
	 *    For present, this implementation will not set a global
	 *    watchdog timer, as virtualization & variable system load
	 *    may cause unpredictable execution times.
	 */
}

static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 5:
	 * Restore, Step 3:
	 *     Inhibit user-space access (if provided) to this
	 *     SPU by unmapping the virtual pages assigned to
	 *     the SPU memory-mapped I/O (MMIO) for problem
	 *     state. TBD.
	 */
}

static inline void set_switch_pending(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 7:
	 * Restore, Step 5:
	 *     Set a software context switch pending flag.
	 */
	set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
	mb();
}

static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 8:
	switch (in_be64(&priv2->mfc_control_RW) &
	       MFC_CNTL_SUSPEND_DMA_STATUS_MASK) {
	case MFC_CNTL_SUSPEND_IN_PROGRESS:
		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
				 MFC_CNTL_SUSPEND_COMPLETE);
		/* fall through */
	case MFC_CNTL_SUSPEND_COMPLETE:
		if (csa) {
			csa->priv2.mfc_control_RW =
				in_be64(&priv2->mfc_control_RW) |
				MFC_CNTL_SUSPEND_DMA_QUEUE;
		}
		break;
	case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION:
		out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
				 MFC_CNTL_SUSPEND_COMPLETE);
		if (csa) {
			csa->priv2.mfc_control_RW =
				in_be64(&priv2->mfc_control_RW) &
				~MFC_CNTL_SUSPEND_DMA_QUEUE;
		}
		break;
	}
}

static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 9:
	 *     Save SPU_Runcntl in the CSA.  This value contains
	 *     the "Application Desired State".
	 */
	csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW);
}

static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 10:
	 *     Save MFC_SR1 in the CSA.
	 */
	csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu);
}

static inline void save_spu_status(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 11:
	 *     Read SPU_Status[R], and save to CSA.
	 */
	if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) {
		csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
	} else {
		u32 stopped;

		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
		eieio();
		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
				SPU_STATUS_RUNNING);
		stopped =
		    SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
		    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
		if ((in_be32(&prob->spu_status_R) & stopped) == 0)
			csa->prob.spu_status_R = SPU_STATUS_RUNNING;
		else
			csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
	}
}

static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 12:
	 *     Read MFC_CNTL[Ds].  Update saved copy of
	 *     CSA.MFC_CNTL[Ds].
	 */
	if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) {
		csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
		csa->suspend_time = get_cycles();
		out_be64(&priv2->spu_chnlcntptr_RW, 7ULL);
		eieio();
		csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW);
		eieio();
	} else {
		csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
	}
}

static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 13:
	 *     Write MFC_CNTL[Dh] set to a '1' to halt
	 *     the decrementer.
	 */
	out_be64(&priv2->mfc_control_RW,
		 MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
	eieio();
}

static inline void save_timebase(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 14:
	 *    Read PPE Timebase High and Timebase low registers
	 *    and save in CSA.  TBD.
	 */
	csa->suspend_time = get_cycles();
}

static inline void remove_other_spu_access(struct spu_state *csa,
					   struct spu *spu)
{
	/* Save, Step 15:
	 *     Remove other SPU access to this SPU by unmapping
	 *     this SPU's pages from their address space.  TBD.
	 */
}

static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 16:
	 * Restore, Step 11.
	 *     Write SPU_MSSync register. Poll SPU_MSSync[P]
	 *     for a value of 0.
	 */
	out_be64(&prob->spc_mssync_RW, 1UL);
	POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING);
}

static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 17:
	 * Restore, Step 12.
	 * Restore, Step 48.
	 *     Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register.
	 *     Then issue a PPE sync instruction.
	 */
	spu_tlb_invalidate(spu);
	mb();
}

static inline void handle_pending_interrupts(struct spu_state *csa,
					     struct spu *spu)
{
	/* Save, Step 18:
	 *     Handle any pending interrupts from this SPU
	 *     here.  This is OS or hypervisor specific.  One
	 *     option is to re-enable interrupts to handle any
	 *     pending interrupts, with the interrupt handlers
	 *     recognizing the software Context Switch Pending
	 *     flag, to ensure the SPU execution or MFC command
	 *     queue is not restarted.  TBD.
	 */
}

static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;
	int i;

	/* Save, Step 19:
	 *     If MFC_Cntl[Se]=0 then save
	 *     MFC command queues.
	 */
	if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) {
		for (i = 0; i < 8; i++) {
			csa->priv2.puq[i].mfc_cq_data0_RW =
			    in_be64(&priv2->puq[i].mfc_cq_data0_RW);
			csa->priv2.puq[i].mfc_cq_data1_RW =
			    in_be64(&priv2->puq[i].mfc_cq_data1_RW);
			csa->priv2.puq[i].mfc_cq_data2_RW =
			    in_be64(&priv2->puq[i].mfc_cq_data2_RW);
			csa->priv2.puq[i].mfc_cq_data3_RW =
			    in_be64(&priv2->puq[i].mfc_cq_data3_RW);
		}
		for (i = 0; i < 16; i++) {
			csa->priv2.spuq[i].mfc_cq_data0_RW =
			    in_be64(&priv2->spuq[i].mfc_cq_data0_RW);
			csa->priv2.spuq[i].mfc_cq_data1_RW =
			    in_be64(&priv2->spuq[i].mfc_cq_data1_RW);
			csa->priv2.spuq[i].mfc_cq_data2_RW =
			    in_be64(&priv2->spuq[i].mfc_cq_data2_RW);
			csa->priv2.spuq[i].mfc_cq_data3_RW =
			    in_be64(&priv2->spuq[i].mfc_cq_data3_RW);
		}
	}
}

static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 20:
	 *     Save the PPU_QueryMask register
	 *     in the CSA.
	 */
	csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW);
}

static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 21:
	 *     Save the PPU_QueryType register
	 *     in the CSA.
	 */
	csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
}

static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save the Prxy_TagStatus register in the CSA.
	 *
	 * It is unnecessary to restore dma_tagstatus_R, however,
	 * dma_tagstatus_R in the CSA is accessed via backing_ops, so
	 * we must save it.
	 */
	csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
}

static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 22:
	 *     Save the MFC_CSR_TSQ register
	 *     in the LSCSA.
	 */
	csa->priv2.spu_tag_status_query_RW =
	    in_be64(&priv2->spu_tag_status_query_RW);
}

static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 23:
	 *     Save the MFC_CSR_CMD1 and MFC_CSR_CMD2
	 *     registers in the CSA.
	 */
	csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW);
	csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW);
}

static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 24:
	 *     Save the MFC_CSR_ATO register in
	 *     the CSA.
	 */
	csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW);
}

static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 25:
	 *     Save the MFC_TCLASS_ID register in
	 *     the CSA.
	 */
	csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu);
}

static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 26:
	 * Restore, Step 23.
	 *     Write the MFC_TCLASS_ID register with
	 *     the value 0x10000000.
	 */
	spu_mfc_tclass_id_set(spu, 0x10000000);
	eieio();
}

static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 27:
	 * Restore, Step 14.
	 *     Write MFC_CNTL[Pc]=1 (purge queue).
	 */
	out_be64(&priv2->mfc_control_RW, MFC_CNTL_PURGE_DMA_REQUEST);
	eieio();
}

static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 28:
	 *     Poll MFC_CNTL[Ps] until value '11' is read
	 *     (purge complete).
	 */
	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
			 MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
			 MFC_CNTL_PURGE_DMA_COMPLETE);
}

static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 30:
	 * Restore, Step 18:
	 *     Write MFC_SR1 with MFC_SR1[D=0,S=1] and
	 *     MFC_SR1[TL,R,Pr,T] set correctly for the
	 *     OS specific environment.
	 *
	 *     Implementation note: The SPU-side code
	 *     for save/restore is privileged, so the
	 *     MFC_SR1[Pr] bit is not set.
	 *
	 */
	spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK |
			      MFC_STATE1_RELOCATE_MASK |
			      MFC_STATE1_BUS_TLBIE_MASK));
}

static inline void save_spu_npc(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 31:
	 *     Save SPU_NPC in the CSA.
	 */
	csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW);
}

static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 32:
	 *     Save SPU_PrivCntl in the CSA.
	 */
	csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW);
}

static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 33:
	 * Restore, Step 16:
	 *     Write SPU_PrivCntl[S,Le,A] fields reset to 0.
	 */
	out_be64(&priv2->spu_privcntl_RW, 0UL);
	eieio();
}

static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 34:
	 *     Save SPU_LSLR in the CSA.
	 */
	csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW);
}

static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 35:
	 * Restore, Step 17.
	 *     Reset SPU_LSLR.
	 */
	out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK);
	eieio();
}

static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 36:
	 *     Save SPU_Cfg in the CSA.
	 */
	csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW);
}

static inline void save_pm_trace(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 37:
	 *     Save PM_Trace_Tag_Wait_Mask in the CSA.
	 *     Not performed by this implementation.
	 */
}

static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 38:
	 *     Save RA_GROUP_ID register and the
	 *     RA_ENABLE reigster in the CSA.
	 */
	csa->priv1.resource_allocation_groupID_RW =
		spu_resource_allocation_groupID_get(spu);
	csa->priv1.resource_allocation_enable_RW =
		spu_resource_allocation_enable_get(spu);
}

static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 39:
	 *     Save MB_Stat register in the CSA.
	 */
	csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R);
}

static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 40:
	 *     Save the PPU_MB register in the CSA.
	 */
	csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R);
}

static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 41:
	 *     Save the PPUINT_MB register in the CSA.
	 */
	csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R);
}

static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;
	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };

	/* Save CH 1, without channel count */
	out_be64(&priv2->spu_chnlcntptr_RW, 1);
	csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);

	/* Save the following CH: [0,3,4,24,25,27] */
	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
		idx = ch_indices[i];
		out_be64(&priv2->spu_chnlcntptr_RW, idx);
		eieio();
		csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW);
		csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW);
		out_be64(&priv2->spu_chnldata_RW, 0UL);
		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
		eieio();
	}
}

static inline void save_spu_mb(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;
	int i;

	/* Save, Step 43:
	 *     Save SPU Read Mailbox Channel.
	 */
	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
	eieio();
	csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW);
	for (i = 0; i < 4; i++) {
		csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW);
	}
	out_be64(&priv2->spu_chnlcnt_RW, 0UL);
	eieio();
}

static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 44:
	 *     Save MFC_CMD Channel.
	 */
	out_be64(&priv2->spu_chnlcntptr_RW, 21UL);
	eieio();
	csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW);
	eieio();
}

static inline void reset_ch(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;
	u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL };
	u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL };
	u64 idx;
	int i;

	/* Save, Step 45:
	 *     Reset the following CH: [21, 23, 28, 30]
	 */
	for (i = 0; i < 4; i++) {
		idx = ch_indices[i];
		out_be64(&priv2->spu_chnlcntptr_RW, idx);
		eieio();
		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
		eieio();
	}
}

static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Save, Step 46:
	 * Restore, Step 25.
	 *     Write MFC_CNTL[Sc]=0 (resume queue processing).
	 */
	out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
}

static inline void get_kernel_slb(u64 ea, u64 slb[2])
{
	u64 llp;

	if (REGION_ID(ea) == KERNEL_REGION_ID)
		llp = mmu_psize_defs[mmu_linear_psize].sllp;
	else
		llp = mmu_psize_defs[mmu_virtual_psize].sllp;
	slb[0] = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
		SLB_VSID_KERNEL | llp;
	slb[1] = (ea & ESID_MASK) | SLB_ESID_V;
}

static inline void load_mfc_slb(struct spu *spu, u64 slb[2], int slbe)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	out_be64(&priv2->slb_index_W, slbe);
	eieio();
	out_be64(&priv2->slb_vsid_RW, slb[0]);
	out_be64(&priv2->slb_esid_RW, slb[1]);
	eieio();
}

static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu)
{
	u64 code_slb[2];
	u64 lscsa_slb[2];

	/* Save, Step 47:
	 * Restore, Step 30.
	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All
	 *     register, then initialize SLB_VSID and SLB_ESID
	 *     to provide access to SPU context save code and
	 *     LSCSA.
	 *
	 *     This implementation places both the context
	 *     switch code and LSCSA in kernel address space.
	 *
	 *     Further this implementation assumes that the
	 *     MFC_SR1[R]=1 (in other words, assume that
	 *     translation is desired by OS environment).
	 */
	spu_invalidate_slbs(spu);
	get_kernel_slb((unsigned long)&spu_save_code[0], code_slb);
	get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb);
	load_mfc_slb(spu, code_slb, 0);
	if ((lscsa_slb[0] != code_slb[0]) || (lscsa_slb[1] != code_slb[1]))
		load_mfc_slb(spu, lscsa_slb, 1);
}

static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
{
	/* Save, Step 48:
	 * Restore, Step 23.
	 *     Change the software context switch pending flag
	 *     to context switch active.
	 */
	set_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags);
	clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
	mb();
}

static inline void enable_interrupts(struct spu_state *csa, struct spu *spu)
{
	unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
	    CLASS1_ENABLE_STORAGE_FAULT_INTR;

	/* Save, Step 49:
	 * Restore, Step 22:
	 *     Reset and then enable interrupts, as
	 *     needed by OS.
	 *
	 *     This implementation enables only class1
	 *     (translation) interrupts.
	 */
	spin_lock_irq(&spu->register_lock);
	spu_int_stat_clear(spu, 0, ~0ul);
	spu_int_stat_clear(spu, 1, ~0ul);
	spu_int_stat_clear(spu, 2, ~0ul);
	spu_int_mask_set(spu, 0, 0ul);
	spu_int_mask_set(spu, 1, class1_mask);
	spu_int_mask_set(spu, 2, 0ul);
	spin_unlock_irq(&spu->register_lock);
}

static inline int send_mfc_dma(struct spu *spu, unsigned long ea,
			       unsigned int ls_offset, unsigned int size,
			       unsigned int tag, unsigned int rclass,
			       unsigned int cmd)
{
	struct spu_problem __iomem *prob = spu->problem;
	union mfc_tag_size_class_cmd command;
	unsigned int transfer_size;
	volatile unsigned int status = 0x0;

	while (size > 0) {
		transfer_size =
		    (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size;
		command.u.mfc_size = transfer_size;
		command.u.mfc_tag = tag;
		command.u.mfc_rclassid = rclass;
		command.u.mfc_cmd = cmd;
		do {
			out_be32(&prob->mfc_lsa_W, ls_offset);
			out_be64(&prob->mfc_ea_W, ea);
			out_be64(&prob->mfc_union_W.all64, command.all64);
			status =
			    in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
			if (unlikely(status & 0x2)) {
				cpu_relax();
			}
		} while (status & 0x3);
		size -= transfer_size;
		ea += transfer_size;
		ls_offset += transfer_size;
	}
	return 0;
}

static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu)
{
	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
	unsigned int ls_offset = 0x0;
	unsigned int size = 16384;
	unsigned int tag = 0;
	unsigned int rclass = 0;
	unsigned int cmd = MFC_PUT_CMD;

	/* Save, Step 50:
	 *     Issue a DMA command to copy the first 16K bytes
	 *     of local storage to the CSA.
	 */
	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
}

static inline void set_spu_npc(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 51:
	 * Restore, Step 31.
	 *     Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry
	 *     point address of context save code in local
	 *     storage.
	 *
	 *     This implementation uses SPU-side save/restore
	 *     programs with entry points at LSA of 0.
	 */
	out_be32(&prob->spu_npc_RW, 0);
	eieio();
}

static inline void set_signot1(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;
	union {
		u64 ull;
		u32 ui[2];
	} addr64;

	/* Save, Step 52:
	 * Restore, Step 32:
	 *    Write SPU_Sig_Notify_1 register with upper 32-bits
	 *    of the CSA.LSCSA effective address.
	 */
	addr64.ull = (u64) csa->lscsa;
	out_be32(&prob->signal_notify1, addr64.ui[0]);
	eieio();
}

static inline void set_signot2(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;
	union {
		u64 ull;
		u32 ui[2];
	} addr64;

	/* Save, Step 53:
	 * Restore, Step 33:
	 *    Write SPU_Sig_Notify_2 register with lower 32-bits
	 *    of the CSA.LSCSA effective address.
	 */
	addr64.ull = (u64) csa->lscsa;
	out_be32(&prob->signal_notify2, addr64.ui[1]);
	eieio();
}

static inline void send_save_code(struct spu_state *csa, struct spu *spu)
{
	unsigned long addr = (unsigned long)&spu_save_code[0];
	unsigned int ls_offset = 0x0;
	unsigned int size = sizeof(spu_save_code);
	unsigned int tag = 0;
	unsigned int rclass = 0;
	unsigned int cmd = MFC_GETFS_CMD;

	/* Save, Step 54:
	 *     Issue a DMA command to copy context save code
	 *     to local storage and start SPU.
	 */
	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
}

static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;

	/* Save, Step 55:
	 * Restore, Step 38.
	 *     Write PPU_QueryMask=1 (enable Tag Group 0)
	 *     and issue eieio instruction.
	 */
	out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0));
	eieio();
}

static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;
	u32 mask = MFC_TAGID_TO_TAGMASK(0);
	unsigned long flags;

	/* Save, Step 56:
	 * Restore, Step 39.
	 * Restore, Step 39.
	 * Restore, Step 46.
	 *     Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete)
	 *     or write PPU_QueryType[TS]=01 and wait for Tag Group
	 *     Complete Interrupt.  Write INT_Stat_Class0 or
	 *     INT_Stat_Class2 with value of 'handled'.
	 */
	POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);

	local_irq_save(flags);
	spu_int_stat_clear(spu, 0, ~(0ul));
	spu_int_stat_clear(spu, 2, ~(0ul));
	local_irq_restore(flags);
}

static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;
	unsigned long flags;

	/* Save, Step 57:
	 * Restore, Step 40.
	 *     Poll until SPU_Status[R]=0 or wait for SPU Class 0
	 *     or SPU Class 2 interrupt.  Write INT_Stat_class0
	 *     or INT_Stat_class2 with value of handled.
	 */
	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);

	local_irq_save(flags);
	spu_int_stat_clear(spu, 0, ~(0ul));
	spu_int_stat_clear(spu, 2, ~(0ul));
	local_irq_restore(flags);
}

static inline int check_save_status(struct spu_state *csa, struct spu *spu)
{
	struct spu_problem __iomem *prob = spu->problem;
	u32 complete;

	/* Save, Step 54:
	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
	 *     context save succeeded, otherwise context save
	 *     failed.
	 */
	complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
		    SPU_STATUS_STOPPED_BY_STOP);
	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
}

static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
{
	/* Restore, Step 4:
	 *    If required, notify the "using application" that
	 *    the SPU task has been terminated.  TBD.
	 */
}

static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	/* Restore, Step 7:
	 * Restore, Step 47.
	 *     Write MFC_Cntl[Dh,Sc]='1','1' to suspend
	 *     the queue and halt the decrementer.
	 */
	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
		 MFC_CNTL_DECREMENTER_HALTED);
	eieio();
}