/*
    Copyright 2016-2019 StapleButter

    This file is part of melonDS.

    melonDS is free software: you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
    Software Foundation, either version 3 of the License, or (at your option)
    any later version.

    melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with melonDS. If not, see http://www.gnu.org/licenses/.
*/

#include <stdio.h>
#include <string.h>
#include "Config.h"
#include "NDS.h"
#include "ARM.h"
#include "NDSCart.h"
#include "DMA.h"
#include "FIFO.h"
#include "GPU.h"
#include "SPU.h"
#include "SPI.h"
#include "RTC.h"
#include "Wifi.h"
#include "Platform.h"
#include "melon_fopen.h"


namespace NDS
{

#ifdef DEBUG_CHECK_DESYNC
u64 dbg_CyclesSys;
u64 dbg_CyclesARM9;
u64 dbg_CyclesTimer9;
u64 dbg_CyclesARM7;
u64 dbg_CyclesTimer7;
#endif

// timing notes
//
// * this implementation is technically wrong for VRAM
//   each bank is considered a separate region
//   but this would only matter in specific VRAM->VRAM DMA transfers or
//   when running code in VRAM, which is way unlikely
//
// bus/basedelay/nspenalty
//
// bus types:
// * 0 / 32-bit: nothing special
// * 1 / 16-bit: 32-bit accesses split into two 16-bit accesses, second is always sequential
// * 2 / 8-bit/GBARAM: (presumably) split into multiple 8-bit accesses?
// * 3 / ARM9 internal: cache/TCM
//
// ARM9 always gets 3c nonseq penalty when using the bus (except for mainRAM where the penalty is 7c)
//
// ARM7 only gets nonseq penalty when accessing mainRAM (7c as for ARM9)
//
// timings for GBA slot and wifi are set up at runtime

u8 ARM9MemTimings[0x40000][4];
u8 ARM7MemTimings[0x20000][4];

ARMv5* ARM9;
ARMv4* ARM7;

s32 CurIterationCycles;
s32 ARM7Offset;
int CurCPU;

SchedEvent SchedList[Event_MAX];
u32 SchedListMask;

u32 CPUStop;

u8 ARM9BIOS[0x1000];
u8 ARM7BIOS[0x4000];

u8 MainRAM[MAIN_RAM_SIZE];

u8 SharedWRAM[0x8000];
u8 WRAMCnt;
u8* SWRAM_ARM9;
u8* SWRAM_ARM7;
u32 SWRAM_ARM9Mask;
u32 SWRAM_ARM7Mask;

u8 ARM7WRAM[0x10000];

u16 ExMemCnt[2];

u8 ROMSeed0[2*8];
u8 ROMSeed1[2*8];

// IO shit
u32 IME[2];
u32 IE[2], IF[2];

u8 PostFlag9;
u8 PostFlag7;
u16 PowerControl9;
u16 PowerControl7;

u16 WifiWaitCnt;

u16 ARM7BIOSProt;

Timer Timers[8];
u8 TimerCheckMask[2];

DMA* DMAs[8];
u32 DMA9Fill[4];

u16 IPCSync9, IPCSync7;
u16 IPCFIFOCnt9, IPCFIFOCnt7;
FIFO<u32>* IPCFIFO9; // FIFO in which the ARM9 writes
FIFO<u32>* IPCFIFO7;

u16 DivCnt;
u32 DivNumerator[2];
u32 DivDenominator[2];
u32 DivQuotient[2];
u32 DivRemainder[2];

u16 SqrtCnt;
u32 SqrtVal[2];
u32 SqrtRes;

u32 KeyInput;
u16 KeyCnt;
u16 RCnt;

bool Running;


void DivDone(u32 param);
void SqrtDone(u32 param);
void RunTimer(u32 tid, s32 cycles);
void SetWifiWaitCnt(u16 val);
void SetGBASlotTimings();


bool Init()
{
    ARM9 = new ARMv5();
    ARM7 = new ARMv4();

    DMAs[0] = new DMA(0, 0);
    DMAs[1] = new DMA(0, 1);
    DMAs[2] = new DMA(0, 2);
    DMAs[3] = new DMA(0, 3);
    DMAs[4] = new DMA(1, 0);
    DMAs[5] = new DMA(1, 1);
    DMAs[6] = new DMA(1, 2);
    DMAs[7] = new DMA(1, 3);

    IPCFIFO9 = new FIFO<u32>(16);
    IPCFIFO7 = new FIFO<u32>(16);

    if (!NDSCart::Init()) return false;
    if (!GPU::Init()) return false;
    if (!SPU::Init()) return false;
    if (!SPI::Init()) return false;
    if (!RTC::Init()) return false;
    if (!Wifi::Init()) return false;

    return true;
}

void DeInit()
{
    delete ARM9;
    delete ARM7;

    for (int i = 0; i < 8; i++)
        delete DMAs[i];

    delete IPCFIFO9;
    delete IPCFIFO7;

    NDSCart::DeInit();
    GPU::DeInit();
    SPU::DeInit();
    SPI::DeInit();
    RTC::DeInit();
    Wifi::DeInit();
}


void SetARM9RegionTimings(u32 addrstart, u32 addrend, int buswidth, int nonseq, int seq)
{
    addrstart >>= 14;
    addrend   >>= 14;

    if (addrend == 0x3FFFF) addrend++;

    int N16, S16, N32, S32;
    N16 = nonseq;
    S16 = seq;
    if (buswidth == 16)
    {
        N32 = N16 + S16;
        S32 = S16 + S16;
    }
    else
    {
        N32 = N16;
        S32 = S16;
    }

    for (u32 i = addrstart; i < addrend; i++)
    {
        ARM9MemTimings[i][0] = N16;
        ARM9MemTimings[i][1] = S16;
        ARM9MemTimings[i][2] = N32;
        ARM9MemTimings[i][3] = S32;
    }

    ARM9->UpdateRegionTimings(addrstart<<14, addrend<<14);
}

void SetARM7RegionTimings(u32 addrstart, u32 addrend, int buswidth, int nonseq, int seq)
{
    addrstart >>= 15;
    addrend   >>= 15;

    if (addrend == 0x1FFFF) addrend++;

    int N16, S16, N32, S32;
    N16 = nonseq;
    S16 = seq;
    if (buswidth == 16)
    {
        N32 = N16 + S16;
        S32 = S16 + S16;
    }
    else
    {
        N32 = N16;
        S32 = S16;
    }

    for (u32 i = addrstart; i < addrend; i++)
    {
        ARM7MemTimings[i][0] = N16;
        ARM7MemTimings[i][1] = S16;
        ARM7MemTimings[i][2] = N32;
        ARM7MemTimings[i][3] = S32;
    }
}

void InitTimings()
{
    // TODO, eventually:
    // VRAM is initially unmapped. The timings should be those of void regions.
    // Similarly for any unmapped VRAM area.
    // Need to check whether supporting these timing characteristics would impact performance
    // (especially wrt VRAM mirroring and overlapping and whatnot).

    // ARM9

    SetARM9RegionTimings(0x00000000, 0xFFFFFFFF, 32, 1 + 3, 1); // void

    SetARM9RegionTimings(0xFFFF0000, 0xFFFFFFFF, 32, 1 + 3, 1); // BIOS
    SetARM9RegionTimings(0x02000000, 0x03000000, 16, 8, 1);     // main RAM
    SetARM9RegionTimings(0x03000000, 0x04000000, 32, 1 + 3, 1); // ARM9/shared WRAM
    SetARM9RegionTimings(0x04000000, 0x05000000, 32, 1 + 3, 1); // IO
    SetARM9RegionTimings(0x05000000, 0x06000000, 16, 1 + 3, 1); // palette
    SetARM9RegionTimings(0x06000000, 0x07000000, 16, 1 + 3, 1); // VRAM
    SetARM9RegionTimings(0x07000000, 0x08000000, 32, 1 + 3, 1); // OAM

    // ARM7

    SetARM7RegionTimings(0x00000000, 0xFFFFFFFF, 32, 1, 1); // void

    SetARM7RegionTimings(0x00000000, 0x00010000, 32, 1, 1); // BIOS
    SetARM7RegionTimings(0x02000000, 0x03000000, 16, 8, 1); // main RAM
    SetARM7RegionTimings(0x03000000, 0x04000000, 32, 1, 1); // ARM7/shared WRAM
    SetARM7RegionTimings(0x04000000, 0x04800000, 32, 1, 1); // IO
    SetARM7RegionTimings(0x06000000, 0x07000000, 16, 1, 1); // ARM7 VRAM

    // handled later: GBA slot, wifi
}

void SetupDirectBoot()
{
    u32 bootparams[8];
    memcpy(bootparams, &NDSCart::CartROM[0x20], 8*4);

    printf("ARM9: offset=%08X entry=%08X RAM=%08X size=%08X\n",
           bootparams[0], bootparams[1], bootparams[2], bootparams[3]);
    printf("ARM7: offset=%08X entry=%08X RAM=%08X size=%08X\n",
           bootparams[4], bootparams[5], bootparams[6], bootparams[7]);

    MapSharedWRAM(3);

    for (u32 i = 0; i < bootparams[3]; i+=4)
    {
        u32 tmp = *(u32*)&NDSCart::CartROM[bootparams[0]+i];
        ARM9Write32(bootparams[2]+i, tmp);
    }

    for (u32 i = 0; i < bootparams[7]; i+=4)
    {
        u32 tmp = *(u32*)&NDSCart::CartROM[bootparams[4]+i];
        ARM7Write32(bootparams[6]+i, tmp);
    }

    for (u32 i = 0; i < 0x170; i+=4)
    {
        u32 tmp = *(u32*)&NDSCart::CartROM[i];
        ARM9Write32(0x027FFE00+i, tmp);
    }

    ARM9Write32(0x027FF800, NDSCart::CartID);
    ARM9Write32(0x027FF804, NDSCart::CartID);
    ARM9Write16(0x027FF808, *(u16*)&NDSCart::CartROM[0x15E]);
    ARM9Write16(0x027FF80A, *(u16*)&NDSCart::CartROM[0x6C]);

    ARM9Write16(0x027FF850, 0x5835);

    ARM9Write32(0x027FFC00, NDSCart::CartID);
    ARM9Write32(0x027FFC04, NDSCart::CartID);
    ARM9Write16(0x027FFC08, *(u16*)&NDSCart::CartROM[0x15E]);
    ARM9Write16(0x027FFC0A, *(u16*)&NDSCart::CartROM[0x6C]);

    ARM9Write16(0x027FFC10, 0x5835);
    ARM9Write16(0x027FFC30, 0xFFFF);
    ARM9Write16(0x027FFC40, 0x0001);

    ARM9->CP15Write(0x910, 0x0300000A);
    ARM9->CP15Write(0x911, 0x00000020);
    ARM9->CP15Write(0x100, 0x00050000);

    ARM9->R[12] = bootparams[1];
    ARM9->R[13] = 0x03002F7C;
    ARM9->R[14] = bootparams[1];
    ARM9->R_IRQ[0] = 0x03003F80;
    ARM9->R_SVC[0] = 0x03003FC0;

    ARM7->R[12] = bootparams[5];
    ARM7->R[13] = 0x0380FD80;
    ARM7->R[14] = bootparams[5];
    ARM7->R_IRQ[0] = 0x0380FF80;
    ARM7->R_SVC[0] = 0x0380FFC0;

    ARM9->JumpTo(bootparams[1]);
    ARM7->JumpTo(bootparams[5]);

    PostFlag9 = 0x01;
    PostFlag7 = 0x01;

    PowerControl9 = 0x820F;
    GPU::DisplaySwap(PowerControl9);

    // checkme
    RCnt = 0x8000;

    NDSCart::SPICnt = 0x8000;

    SPU::SetBias(0x200);

    SetWifiWaitCnt(0x0030);

    ARM7BIOSProt = 0x1204;

    SPI_Firmware::SetupDirectBoot();
}

void Reset()
{
    FILE* f;
    u32 i;

#ifdef DEBUG_CHECK_DESYNC
        dbg_CyclesSys = 0;
        dbg_CyclesARM9 = 0;
        dbg_CyclesTimer9 = 0;
        dbg_CyclesARM7 = 0;
        dbg_CyclesTimer7 = 0;
#endif // DEBUG_CHECK_DESYNC

    f = melon_fopen_local("bios9.bin", "rb");
    if (!f)
    {
        printf("ARM9 BIOS not found\n");

        for (i = 0; i < 16; i++)
            ((u32*)ARM9BIOS)[i] = 0xE7FFDEFF;
    }
    else
    {
        fseek(f, 0, SEEK_SET);
        fread(ARM9BIOS, 0x1000, 1, f);

        printf("ARM9 BIOS loaded\n");
        fclose(f);
    }

    f = melon_fopen_local("bios7.bin", "rb");
    if (!f)
    {
        printf("ARM7 BIOS not found\n");

        for (i = 0; i < 16; i++)
            ((u32*)ARM7BIOS)[i] = 0xE7FFDEFF;
    }
    else
    {
        fseek(f, 0, SEEK_SET);
        fread(ARM7BIOS, 0x4000, 1, f);

        printf("ARM7 BIOS loaded\n");
        fclose(f);
    }

    ARM9->SetClockShift(1);
    ARM7->SetClockShift(0);

    InitTimings();

    memset(MainRAM, 0, MAIN_RAM_SIZE);
    memset(SharedWRAM, 0, 0x8000);
    memset(ARM7WRAM, 0, 0x10000);

    MapSharedWRAM(0);

    ExMemCnt[0] = 0;
    ExMemCnt[1] = 0;
    memset(ROMSeed0, 0, 2*8);
    memset(ROMSeed1, 0, 2*8);
    SetGBASlotTimings();

    IME[0] = 0;
    IE[0] = 0;
    IF[0] = 0;
    IME[1] = 0;
    IE[1] = 0;
    IF[1] = 0;

    PostFlag9 = 0x00;
    PostFlag7 = 0x00;
    PowerControl9 = 0x0001;
    PowerControl7 = 0x0001;

    WifiWaitCnt = 0xFFFF; // temp
    SetWifiWaitCnt(0);

    ARM7BIOSProt = 0;

    IPCSync9 = 0;
    IPCSync7 = 0;
    IPCFIFOCnt9 = 0;
    IPCFIFOCnt7 = 0;
    IPCFIFO9->Clear();
    IPCFIFO7->Clear();

    DivCnt = 0;
    SqrtCnt = 0;

    ARM9->Reset();
    ARM7->Reset();

    CPUStop = 0;

    memset(Timers, 0, 8*sizeof(Timer));
    TimerCheckMask[0] = 0;
    TimerCheckMask[1] = 0;

    for (i = 0; i < 8; i++) DMAs[i]->Reset();
    memset(DMA9Fill, 0, 4*4);

    memset(SchedList, 0, sizeof(SchedList));
    SchedListMask = 0;

    CurIterationCycles = 0;
    ARM7Offset = 0;

    KeyInput = 0x007F03FF;
    KeyCnt = 0;
    RCnt = 0;

    NDSCart::Reset();
    GPU::Reset();
    SPU::Reset();
    SPI::Reset();
    RTC::Reset();
    Wifi::Reset();
}

void Stop()
{
    printf("Stopping: shutdown\n");
    Running = false;
    Platform::StopEmu();
    GPU::Stop();
    SPU::Stop();
}

bool DoSavestate_Scheduler(Savestate* file)
{
    // this is a bit of a hack
    // but uh, your local coder realized that the scheduler list contains function pointers
    // and that storing those as-is is not a very good idea
    // unless you want it to crash and burn

    // this is the solution your local coder came up with.
    // it's gross but I think it's the best solution for this problem.
    // just remember to add here if you add more event callbacks, kay?
    // atleast until we come up with something more elegant.

    void (*eventfuncs[])(u32) =
    {
        GPU::StartScanline, GPU::StartHBlank, GPU::FinishFrame,
        SPU::Mix,
        Wifi::USTimer,

        GPU::DisplayFIFO,
        NDSCart::ROMPrepareData, NDSCart::ROMEndTransfer,
        NDSCart::SPITransferDone,
        SPI::TransferDone,
        DivDone,
        SqrtDone,

        NULL
    };

    int len = Event_MAX;
    if (file->Saving)
    {
        for (int i = 0; i < len; i++)
        {
            SchedEvent* evt = &SchedList[i];

            u32 funcid = -1;
            if (evt->Func)
            {
                for (int j = 0; eventfuncs[j]; j++)
                {
                    if (evt->Func == eventfuncs[j])
                    {
                        funcid = j;
                        break;
                    }
                }
                if (funcid == -1)
                {
                    printf("savestate: VERY BAD!!!!! FUNCTION POINTER FOR EVENT %d NOT IN HACKY LIST. CANNOT SAVE. SMACK STAPLEBUTTER.\n", i);
                    return false;
                }
            }

            file->Var32(&funcid);
            file->Var32((u32*)&evt->WaitCycles);
            file->Var32(&evt->Param);
        }
    }
    else
    {
        for (int i = 0; i < len; i++)
        {
            SchedEvent* evt = &SchedList[i];

            u32 funcid;
            file->Var32(&funcid);

            if (funcid != -1)
            {
                for (int j = 0; ; j++)
                {
                    if (!eventfuncs[j])
                    {
                        printf("savestate: VERY BAD!!!!!! EVENT FUNCTION POINTER ID %d IS OUT OF RANGE. HAX?????\n", j);
                        return false;
                    }
                    if (j == funcid) break;
                }

                evt->Func = eventfuncs[funcid];
            }
            else
                evt->Func = NULL;

            file->Var32((u32*)&evt->WaitCycles);
            file->Var32(&evt->Param);
        }
    }

    return true;
}

bool DoSavestate(Savestate* file)
{
    file->Section("NDSG");

    file->VarArray(MainRAM, 0x400000);
    file->VarArray(SharedWRAM, 0x8000);
    file->VarArray(ARM7WRAM, 0x10000);

    file->VarArray(ExMemCnt, 2*sizeof(u16));
    file->VarArray(ROMSeed0, 2*8);
    file->VarArray(ROMSeed1, 2*8);

    file->Var16(&WifiWaitCnt);

    file->VarArray(IME, 2*sizeof(u32));
    file->VarArray(IE, 2*sizeof(u32));
    file->VarArray(IF, 2*sizeof(u32));

    file->Var8(&PostFlag9);
    file->Var8(&PostFlag7);
    file->Var16(&PowerControl9);
    file->Var16(&PowerControl7);

    file->Var16(&ARM7BIOSProt);

    file->Var16(&IPCSync9);
    file->Var16(&IPCSync7);
    file->Var16(&IPCFIFOCnt9);
    file->Var16(&IPCFIFOCnt7);
    IPCFIFO9->DoSavestate(file);
    IPCFIFO7->DoSavestate(file);

    file->Var16(&DivCnt);
    file->Var16(&SqrtCnt);

    file->Var32(&CPUStop);

    for (int i = 0; i < 8; i++)
    {
        Timer* timer = &Timers[i];

        file->Var16(&timer->Reload);
        file->Var16(&timer->Cnt);
        file->Var32(&timer->Counter);
        file->Var32(&timer->CycleShift);
    }
    file->VarArray(TimerCheckMask, 2*sizeof(u8));

    file->VarArray(DMA9Fill, 4*sizeof(u32));

    if (!DoSavestate_Scheduler(file)) return false;
    file->Var32(&SchedListMask);
    file->Var32((u32*)&CurIterationCycles);
    file->Var32((u32*)&ARM7Offset);

    // TODO: save KeyInput????
    file->Var16(&KeyCnt);
    file->Var16(&RCnt);


    for (int i = 0; i < 8; i++)
        DMAs[i]->DoSavestate(file);

    file->Var8(&WRAMCnt);

    if (!file->Saving)
    {
        // 'dept of redundancy dept'
        // but we do need to update the mappings
        MapSharedWRAM(WRAMCnt);
    }

    if (!file->Saving)
    {
        GPU::DisplaySwap(PowerControl9>>15);

        InitTimings();
        SetGBASlotTimings();

        u16 tmp = WifiWaitCnt;
        WifiWaitCnt = 0xFFFF;
        SetWifiWaitCnt(tmp); // force timing table update
    }

    ARM9->DoSavestate(file);
    ARM7->DoSavestate(file);

    NDSCart::DoSavestate(file);
    GPU::DoSavestate(file);
    SPU::DoSavestate(file);
    SPI::DoSavestate(file);
    RTC::DoSavestate(file);
    Wifi::DoSavestate(file);

    return true;
}

bool LoadROM(const char* path, const char* sram, bool direct)
{
    if (NDSCart::LoadROM(path, sram, direct))
    {
        Running = true;
        return true;
    }
    else
    {
        printf("Failed to load ROM %s\n", path);
        return false;
    }
}

void LoadBIOS()
{
    Reset();
    Running = true;
}

void RelocateSave(const char* path, bool write)
{
    printf("SRAM: relocating to %s (write=%s)\n", path, write?"true":"false");
    NDSCart::RelocateSave(path, write);
}


void CalcIterationCycles()
{
    CurIterationCycles = 16;

    for (int i = 0; i < Event_MAX; i++)
    {
        if (!(SchedListMask & (1<<i)))
            continue;

        if (SchedList[i].WaitCycles < CurIterationCycles)
            CurIterationCycles = SchedList[i].WaitCycles;
    }
}

void RunSystem(s32 cycles)
{
    for (int i = 0; i < Event_MAX; i++)
    {
        if (!(SchedListMask & (1<<i)))
            continue;

        SchedList[i].WaitCycles -= cycles;

        if (SchedList[i].WaitCycles < 1)
        {
            SchedListMask &= ~(1<<i);
            SchedList[i].Func(SchedList[i].Param);
        }
    }
}

u32 RunFrame()
{
    if (!Running) return 263; // dorp

    GPU::StartFrame();

    while (Running && GPU::TotalScanlines==0)
    {
        // TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1
        CalcIterationCycles();
        s32 arm9cycles;

        if (CPUStop & 0x80000000)
        {
            // GXFIFO stall
            // we just run the GPU and the timers.
            // the rest of the hardware is driven by the event scheduler.

            arm9cycles = GPU3D::CyclesToRunFor();
            arm9cycles = std::min(CurIterationCycles, arm9cycles);
            RunTightTimers(0, arm9cycles);

#ifdef DEBUG_CHECK_DESYNC
            dbg_CyclesARM9 += arm9cycles;
#endif // DEBUG_CHECK_DESYNC
        }
        else if (CPUStop & 0x0FFF)
        {
            s32 cycles = CurIterationCycles;

            cycles = DMAs[0]->Run(cycles);
            if (cycles > 0 && !(CPUStop & 0x80000000))
                cycles = DMAs[1]->Run(cycles);
            if (cycles > 0 && !(CPUStop & 0x80000000))
                cycles = DMAs[2]->Run(cycles);
            if (cycles > 0 && !(CPUStop & 0x80000000))
                cycles = DMAs[3]->Run(cycles);

            arm9cycles = CurIterationCycles - cycles;
        }
        else
        {
            ARM9->CyclesToRun = CurIterationCycles << 1;
            CurCPU = 1; ARM9->Execute(); CurCPU = 0;
            arm9cycles = ARM9->Cycles >> 1;
            RunTightTimers(0, arm9cycles);
        }

        RunLooseTimers(0, arm9cycles);
        GPU3D::Run(arm9cycles);

        s32 ndscyclestorun = arm9cycles;

        // ARM7Offset > ndscyclestorun means we are too far ahead of the ARM9
        if (ARM7Offset > ndscyclestorun)
        {
            ARM7Offset -= ndscyclestorun;
        }
        else
        if (CPUStop & 0x0FFF0000)
        {
            s32 cycles = ndscyclestorun - ARM7Offset;

            cycles = DMAs[4]->Run(cycles);
            if (cycles > 0)
                cycles = DMAs[5]->Run(cycles);
            if (cycles > 0)
                cycles = DMAs[6]->Run(cycles);
            if (cycles > 0)
                cycles = DMAs[7]->Run(cycles);

            ARM7Offset = -cycles;
        }
        else
        {
            ARM7->CyclesToRun = ndscyclestorun - ARM7Offset;
            CurCPU = 2; ARM7->Execute(); CurCPU = 0;
            ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun;
            RunTightTimers(1, ARM7->Cycles);
        }

#ifdef DEBUG_CHECK_DESYNC
        dbg_CyclesSys += ndscyclestorun;
#endif // DEBUG_CHECK_DESYNC

        RunLooseTimers(1, ndscyclestorun);
        RunSystem(ndscyclestorun);
    }

#ifdef DEBUG_CHECK_DESYNC
    printf("[%08X%08X] ARM9=%ld timer9=%ld, ARM7=%ld timer7=%ld\n",
           (u32)(dbg_CyclesSys>>32), (u32)dbg_CyclesSys,
           dbg_CyclesARM9-dbg_CyclesSys,
           dbg_CyclesTimer9-dbg_CyclesSys,
           dbg_CyclesARM7-dbg_CyclesSys,
           dbg_CyclesTimer7-dbg_CyclesSys);
#endif

    return GPU::TotalScanlines;
}

void Reschedule()
{
    s32 oldcycles = CurIterationCycles;
    CalcIterationCycles();

    if (CurIterationCycles >= oldcycles)
    {
        CurIterationCycles = oldcycles;
        return;
    }

    if (CurCPU == 0)
    {
        CurIterationCycles = oldcycles;
        return;
    }

    if      (CurCPU == 1) ARM9->CyclesToRun = CurIterationCycles << 1;
    else if (CurCPU == 2) ARM7->CyclesToRun = CurIterationCycles - ARM7Offset;
    // this is all. a reschedule shouldn't happen during DMA or GXFIFO stall.
}

void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param)
{
    if (SchedListMask & (1<<id))
    {
        printf("!! EVENT %d ALREADY SCHEDULED\n", id);
        return;
    }

    SchedEvent* evt = &SchedList[id];

    if (periodic)
        evt->WaitCycles += delay;
    else
    {
        if      (CurCPU == 1) evt->WaitCycles = delay + (ARM9->Cycles >> 1);
        else if (CurCPU == 2) evt->WaitCycles = delay + ARM7->Cycles;
        else                  evt->WaitCycles = delay;
    }

    evt->Func = func;
    evt->Param = param;

    SchedListMask |= (1<<id);

    Reschedule();
}

void CancelEvent(u32 id)
{
    SchedListMask &= ~(1<<id);
}


void PressKey(u32 key)
{
    KeyInput &= ~(1 << key);
}

void ReleaseKey(u32 key)
{
    KeyInput |= (1 << key);
}

void TouchScreen(u16 x, u16 y)
{
    SPI_TSC::SetTouchCoords(x, y);
}

void ReleaseScreen()
{
    SPI_TSC::SetTouchCoords(0x000, 0xFFF);
}


void SetKeyMask(u32 mask)
{
    u32 key_lo = mask & 0x3FF;
    u32 key_hi = (mask >> 10) & 0x3;

    KeyInput &= 0xFFFCFC00;
    KeyInput |= key_lo | (key_hi << 16);
}


void Halt()
{
    printf("Halt()\n");
    Running = false;
}


void MapSharedWRAM(u8 val)
{
    WRAMCnt = val;

    switch (WRAMCnt & 0x3)
    {
    case 0:
        SWRAM_ARM9 = &SharedWRAM[0];
        SWRAM_ARM9Mask = 0x7FFF;
        SWRAM_ARM7 = NULL;
        SWRAM_ARM7Mask = 0;
        break;

    case 1:
        SWRAM_ARM9 = &SharedWRAM[0x4000];
        SWRAM_ARM9Mask = 0x3FFF;
        SWRAM_ARM7 = &SharedWRAM[0];
        SWRAM_ARM7Mask = 0x3FFF;
        break;

    case 2:
        SWRAM_ARM9 = &SharedWRAM[0];
        SWRAM_ARM9Mask = 0x3FFF;
        SWRAM_ARM7 = &SharedWRAM[0x4000];
        SWRAM_ARM7Mask = 0x3FFF;
        break;

    case 3:
        SWRAM_ARM9 = NULL;
        SWRAM_ARM9Mask = 0;
        SWRAM_ARM7 = &SharedWRAM[0];
        SWRAM_ARM7Mask = 0x7FFF;
        break;
    }
}


void SetWifiWaitCnt(u16 val)
{
    if (WifiWaitCnt == val) return;

    WifiWaitCnt = val;

    const int ntimings[4] = {10, 8, 6, 18};
    SetARM7RegionTimings(0x04800000, 0x04808000, 16, ntimings[val & 0x3], (val & 0x4) ? 4 : 6);
    SetARM7RegionTimings(0x04808000, 0x04810000, 16, ntimings[(val>>3) & 0x3], (val & 0x20) ? 4 : 10);
}

void SetGBASlotTimings()
{
    int curcpu = (ExMemCnt[0] >> 7) & 0x1;

    const int ntimings[4] = {10, 8, 6, 18};

    u16 curcnt = ExMemCnt[curcpu];
    int ramN = ntimings[curcnt & 0x3];
    int romN = ntimings[(curcnt>>2) & 0x3];
    int romS = (curcnt & 0x10) ? 4 : 6;

    // TODO: PHI pin thing?

    if (curcpu == 0)
    {
        SetARM9RegionTimings(0x08000000, 0x0A000000, 16, romN + 3, romS);
        SetARM9RegionTimings(0x0A000000, 0x0B000000, 8, ramN + 3, ramN);

        SetARM7RegionTimings(0x08000000, 0x0A000000, 32, 1, 1);
        SetARM7RegionTimings(0x0A000000, 0x0B000000, 32, 1, 1);
    }
    else
    {
        SetARM9RegionTimings(0x08000000, 0x0A000000, 32, 1, 1);
        SetARM9RegionTimings(0x0A000000, 0x0B000000, 32, 1, 1);

        SetARM7RegionTimings(0x08000000, 0x0A000000, 16, romN, romS);
        SetARM7RegionTimings(0x0A000000, 0x0B000000, 8, ramN, ramN);
    }
}


void SetIRQ(u32 cpu, u32 irq)
{
    IF[cpu] |= (1 << irq);
}

void ClearIRQ(u32 cpu, u32 irq)
{
    IF[cpu] &= ~(1 << irq);
}

bool HaltInterrupted(u32 cpu)
{
    if (cpu == 0)
    {
        if (!(IME[0] & 0x1))
            return false;
    }

    if (IF[cpu] & IE[cpu])
        return true;

    return false;
}

void StopCPU(u32 cpu, u32 mask)
{
    if (cpu)
    {
        CPUStop |= (mask << 16);
        ARM7->Halt(2);
    }
    else
    {
        CPUStop |= mask;
        ARM9->Halt(2);
    }
}

void ResumeCPU(u32 cpu, u32 mask)
{
    if (cpu) mask <<= 16;
    CPUStop &= ~mask;
}

void GXFIFOStall()
{
    if (CPUStop & 0x80000000) return;

    CPUStop |= 0x80000000;

    if (CurCPU == 1) ARM9->Halt(2);
    else
    {
        DMAs[0]->StallIfRunning();
        DMAs[1]->StallIfRunning();
        DMAs[2]->StallIfRunning();
        DMAs[3]->StallIfRunning();
    }
}

void GXFIFOUnstall()
{
    CPUStop &= ~0x80000000;
}

u32 GetPC(u32 cpu)
{
    return cpu ? ARM7->R[15] : ARM9->R[15];
}



void HandleTimerOverflow(u32 tid)
{
    Timer* timer = &Timers[tid];
    //if ((timer->Cnt & 0x84) != 0x80) return;

    timer->Counter += timer->Reload << 16;
    if (timer->Cnt & (1<<6))
        SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));

    //u32 delay = (0x10000 - timer->Reload) << (16 - timer->CycleShift);
    //delay -= (timer->Counter - timer->Reload) >> timer->CycleShift;
    //printf("timer%d IRQ: resched %d, reload=%04X cnt=%08X\n", tid, delay, timer->Reload, timer->Counter);
    //ScheduleEvent(Event_TimerIRQ_0 + tid, true, delay, HandleTimerOverflow, tid);

    if ((tid & 0x3) == 3)
        return;

    for (;;)
    {
        tid++;

        timer = &Timers[tid];

        if ((timer->Cnt & 0x84) != 0x84)
            break;

        timer->Counter += 0x10000;
        if (timer->Counter >> 16)
            break;

        timer->Counter = timer->Reload << 16;
        if (timer->Cnt & (1<<6))
            SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));

        if ((tid & 0x3) == 3)
            break;
    }
}

void RunTimer(u32 tid, s32 cycles)
{
    Timer* timer = &Timers[tid];
    //if ((timer->Cnt & 0x84) != 0x80)
    //    return;

    u32 oldcount = timer->Counter;
    timer->Counter += (cycles << timer->CycleShift);
    if (timer->Counter < oldcount)
        HandleTimerOverflow(tid);
}

void RunTightTimers(u32 cpu, s32 cycles)
{
    register u32 timermask = TimerCheckMask[cpu];

    if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles);
    if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles);
    if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles);
    if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles);

#ifdef DEBUG_CHECK_DESYNC
    if (cpu) dbg_CyclesTimer7 += cycles;
    else     dbg_CyclesTimer9 += cycles;
#endif // DEBUG_CHECK_DESYNC
}

void RunLooseTimers(u32 cpu, s32 cycles)
{
    register u32 timermask = TimerCheckMask[cpu];

    if (timermask & 0x10) RunTimer((cpu<<2)+0, cycles);
    if (timermask & 0x20) RunTimer((cpu<<2)+1, cycles);
    if (timermask & 0x40) RunTimer((cpu<<2)+2, cycles);
    if (timermask & 0x80) RunTimer((cpu<<2)+3, cycles);
}



bool DMAsInMode(u32 cpu, u32 mode)
{
    cpu <<= 2;
    if (DMAs[cpu+0]->IsInMode(mode)) return true;
    if (DMAs[cpu+1]->IsInMode(mode)) return true;
    if (DMAs[cpu+2]->IsInMode(mode)) return true;
    if (DMAs[cpu+3]->IsInMode(mode)) return true;
    return false;
}

bool DMAsRunning(u32 cpu)
{
    cpu <<= 2;
    if (DMAs[cpu+0]->IsRunning()) return true;
    if (DMAs[cpu+1]->IsRunning()) return true;
    if (DMAs[cpu+2]->IsRunning()) return true;
    if (DMAs[cpu+3]->IsRunning()) return true;
    return false;
}

void CheckDMAs(u32 cpu, u32 mode)
{
    cpu <<= 2;
    DMAs[cpu+0]->StartIfNeeded(mode);
    DMAs[cpu+1]->StartIfNeeded(mode);
    DMAs[cpu+2]->StartIfNeeded(mode);
    DMAs[cpu+3]->StartIfNeeded(mode);
}

void StopDMAs(u32 cpu, u32 mode)
{
    cpu <<= 2;
    DMAs[cpu+0]->StopIfNeeded(mode);
    DMAs[cpu+1]->StopIfNeeded(mode);
    DMAs[cpu+2]->StopIfNeeded(mode);
    DMAs[cpu+3]->StopIfNeeded(mode);
}




const s32 TimerPrescaler[4] = {0, 6, 8, 10};

u16 TimerGetCounter(u32 timer)
{
    u32 ret = Timers[timer].Counter;

    return ret >> 16;
}

void TimerStart(u32 id, u16 cnt)
{
    Timer* timer = &Timers[id];
    u16 curstart = timer->Cnt & (1<<7);
    u16 newstart = cnt & (1<<7);

    timer->Cnt = cnt;
    timer->CycleShift = 16 - TimerPrescaler[cnt & 0x03];

    if ((!curstart) && newstart)
    {
        timer->Counter = timer->Reload << 16;

        /*if ((cnt & 0x84) == 0x80)
        {
            u32 delay = (0x10000 - timer->Reload) << TimerPrescaler[cnt & 0x03];
            printf("timer%d IRQ: start   %d, reload=%04X cnt=%08X\n", id, delay, timer->Reload, timer->Counter);
            CancelEvent(Event_TimerIRQ_0 + id);
            ScheduleEvent(Event_TimerIRQ_0 + id, false, delay, HandleTimerOverflow, id);
        }*/
    }

    if ((cnt & 0x84) == 0x80)
    {
        u32 tmask;
        if ((cnt & 0x03) == 0)
            tmask = 0x01 << (id&0x3);
        else
            tmask = 0x10 << (id&0x3);

        TimerCheckMask[id>>2] |= tmask;
    }
    else
        TimerCheckMask[id>>2] &= ~(0x11 << (id&0x3));
}



void DivDone(u32 param)
{
    DivCnt &= ~0xC000;

    switch (DivCnt & 0x0003)
    {
    case 0x0000:
        {
            s32 num = (s32)DivNumerator[0];
            s32 den = (s32)DivDenominator[0];
            if (den == 0)
            {
                DivQuotient[0] = (num<0) ? 1:-1;
                DivQuotient[1] = (num<0) ? -1:1;
                *(s64*)&DivRemainder[0] = num;
            }
            else if (num == -0x80000000 && den == -1)
            {
                *(s64*)&DivQuotient[0] = 0x80000000;
            }
            else
            {
                *(s64*)&DivQuotient[0] = (s64)(num / den);
                *(s64*)&DivRemainder[0] = (s64)(num % den);
            }
        }
        break;

    case 0x0001:
    case 0x0003:
        {
            s64 num = *(s64*)&DivNumerator[0];
            s32 den = (s32)DivDenominator[0];
            if (den == 0)
            {
                *(s64*)&DivQuotient[0] = (num<0) ? 1:-1;
                *(s64*)&DivRemainder[0] = num;
            }
            else if (num == -0x8000000000000000 && den == -1)
            {
                *(s64*)&DivQuotient[0] = 0x8000000000000000;
            }
            else
            {
                *(s64*)&DivQuotient[0] = (s64)(num / den);
                *(s64*)&DivRemainder[0] = (s64)(num % den);
            }
        }
        break;

    case 0x0002:
        {
            s64 num = *(s64*)&DivNumerator[0];
            s64 den = *(s64*)&DivDenominator[0];
            if (den == 0)
            {
                *(s64*)&DivQuotient[0] = (num<0) ? 1:-1;
                *(s64*)&DivRemainder[0] = num;
            }
            else if (num == -0x8000000000000000 && den == -1)
            {
                *(s64*)&DivQuotient[0] = 0x8000000000000000;
            }
            else
            {
                *(s64*)&DivQuotient[0] = (s64)(num / den);
                *(s64*)&DivRemainder[0] = (s64)(num % den);
            }
        }
        break;
    }

    if ((DivDenominator[0] | DivDenominator[1]) == 0)
        DivCnt |= 0x4000;
}

void StartDiv()
{
    NDS::CancelEvent(NDS::Event_Div);
    DivCnt |= 0x8000;
    NDS::ScheduleEvent(NDS::Event_Div, false, ((DivCnt&0x3)==0) ? 18:34, DivDone, 0);
}

// http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2
void SqrtDone(u32 param)
{
    u64 val;
    u32 res = 0;
    u64 rem = 0;
    u32 prod = 0;
    u32 nbits, topshift;

    SqrtCnt &= ~0x8000;

    if (SqrtCnt & 0x0001)
    {
        val = *(u64*)&SqrtVal[0];
        nbits = 32;
        topshift = 62;
    }
    else
    {
        val = (u64)SqrtVal[0]; // 32bit
        nbits = 16;
        topshift = 30;
    }

    for (u32 i = 0; i < nbits; i++)
    {
        rem = (rem << 2) + ((val >> topshift) & 0x3);
        val <<= 2;
        res <<= 1;

        prod = (res << 1) + 1;
        if (rem >= prod)
        {
            rem -= prod;
            res++;
        }
    }

    SqrtRes = res;
}

void StartSqrt()
{
    NDS::CancelEvent(NDS::Event_Sqrt);
    SqrtCnt |= 0x8000;
    NDS::ScheduleEvent(NDS::Event_Sqrt, false, 13, SqrtDone, 0);
}



void debug(u32 param)
{
    printf("ARM9 PC=%08X LR=%08X %08X\n", ARM9->R[15], ARM9->R[14], ARM9->R_IRQ[1]);
    printf("ARM7 PC=%08X LR=%08X %08X\n", ARM7->R[15], ARM7->R[14], ARM7->R_IRQ[1]);

    printf("ARM9 IME=%08X IE=%08X IF=%08X\n", IME[0], IE[0], IF[0]);
    printf("ARM7 IME=%08X IE=%08X IF=%08X\n", IME[1], IE[1], IF[1]);

    //for (int i = 0; i < 9; i++)
    //    printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]);

    /*FILE*
    shit = fopen("debug/dldio.bin", "wb");
    for (u32 i = 0x02000000; i < 0x02400000; i+=4)
    {
        u32 val = ARM7Read32(i);
        fwrite(&val, 4, 1, shit);
    }
    for (u32 i = 0x037F0000; i < 0x03810000; i+=4)
    {
        u32 val = ARM7Read32(i);
        fwrite(&val, 4, 1, shit);
    }
    fclose(shit);*/
}



u8 ARM9Read8(u32 addr)
{
    if ((addr & 0xFFFFF000) == 0xFFFF0000)
    {
        return *(u8*)&ARM9BIOS[addr & 0xFFF];
    }

    switch (addr & 0xFF000000)
    {
    case 0x02000000:
        return *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)];

    case 0x03000000:
        if (SWRAM_ARM9)
        {
            return *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
        }
        else
        {
            return 0;
        }

    case 0x04000000:
        return ARM9IORead8(addr);

    case 0x05000000:
        return *(u8*)&GPU::Palette[addr & 0x7FF];

    case 0x06000000:
        switch (addr & 0x00E00000)
        {
        case 0x00000000: return GPU::ReadVRAM_ABG<u8>(addr);
        case 0x00200000: return GPU::ReadVRAM_BBG<u8>(addr);
        case 0x00400000: return GPU::ReadVRAM_AOBJ<u8>(addr);
        case 0x00600000: return GPU::ReadVRAM_BOBJ<u8>(addr);
        default:         return GPU::ReadVRAM_LCDC<u8>(addr);
        }

    case 0x07000000:
        return *(u8*)&GPU::OAM[addr & 0x7FF];

    case 0x08000000:
    case 0x09000000:
        if (ExMemCnt[0] & (1<<7)) return 0xFF; // TODO: proper open bus
        //return *(u8*)&NDSCart::CartROM[addr & (NDSCart::CartROMSize-1)];
        //printf("GBA read8 %08X\n", addr);
        // TODO!!!
        return 0xFF;
    }

    printf("unknown arm9 read8 %08X\n", addr);
    return 0;
}

u16 ARM9Read16(u32 addr)
{
    if ((addr & 0xFFFFF000) == 0xFFFF0000)
    {
        return *(u16*)&ARM9BIOS[addr & 0xFFF];
    }

    switch (addr & 0xFF000000)
    {
    case 0x02000000:
        return *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)];

    case 0x03000000:
        if (SWRAM_ARM9)
        {
            return *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
        }
        else
        {
            return 0;
        }

    case 0x04000000:
        return ARM9IORead16(addr);

    case 0x05000000:
        return *(u16*)&GPU::Palette[addr & 0x7FF];

    case 0x06000000:
        switch (addr & 0x00E00000)
        {
        case 0x00000000: return GPU::ReadVRAM_ABG<u16>(addr);
        case 0x00200000: return GPU::ReadVRAM_BBG<u16>(addr);
        case 0x00400000: return GPU::ReadVRAM_AOBJ<u16>(addr);
        case 0x00600000: return GPU::ReadVRAM_BOBJ<u16>(addr);
        default:         return GPU::ReadVRAM_LCDC<u16>(addr);
        }

    case 0x07000000:
        return *(u16*)&GPU::OAM[addr & 0x7FF];

    case 0x08000000:
    case 0x09000000:
        if (ExMemCnt[0] & (1<<7)) return 0xFFFF; // TODO: proper open bus
        //return *(u16*)&NDSCart::CartROM[addr & (NDSCart::CartROMSize-1)];
        //printf("GBA read16 %08X\n", addr);
        // TODO!!!
        return 0xFFFF;
    }

    //printf("unknown arm9 read16 %08X %08X\n", addr, ARM9->R[15]);
    return 0;
}

u32 ARM9Read32(u32 addr)
{
    if ((addr & 0xFFFFF000) == 0xFFFF0000)
    {
        return *(u32*)&ARM9BIOS[addr & 0xFFF];
    }

    switch (addr & 0xFF000000)
    {
    case 0x02000000:
        return *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)];

    case 0x03000000:
        if (SWRAM_ARM9)
        {
            return *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
        }
        else
        {
            return 0;
        }

    case 0x04000000:
        return ARM9IORead32(addr);

    case 0x05000000:
        return *(u32*)&GPU::Palette[addr & 0x7FF];

    case 0x06000000:
        switch (addr & 0x00E00000)
        {
        case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr);
        case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr);
        case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr);
        case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr);
        default:         return GPU::ReadVRAM_LCDC<u32>(addr);
        }

    case 0x07000000:
        return *(u32*)&GPU::OAM[addr & 0x7FF];

    case 0x08000000:
    case 0x09000000:
        if (ExMemCnt[0] & (1<<7)) return 0xFFFFFFFF; // TODO: proper open bus
        //return *(u32*)&NDSCart::CartROM[addr & (NDSCart::CartROMSize-1)];
        //printf("GBA read32 %08X\n", addr);
        // TODO!!!
        return 0xFFFFFFFF;
    }

    printf("unknown arm9 read32 %08X | %08X %08X\n", addr, ARM9->R[15], ARM9->R[12]);
    return 0;
}

void ARM9Write8(u32 addr, u8 val)
{
    switch (addr & 0xFF000000)
    {
    case 0x02000000:
        *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
        return;

    case 0x03000000:
        if (SWRAM_ARM9)
        {
            *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
        }
        return;

    case 0x04000000:
        ARM9IOWrite8(addr, val);
        return;

    case 0x05000000:
    case 0x06000000:
    case 0x07000000:
        // checkme
        return;
    }

    printf("unknown arm9 write8 %08X %02X\n", addr, val);
}

void ARM9Write16(u32 addr, u16 val)
{
    switch (addr & 0xFF000000)
    {
    case 0x02000000:
        *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
        return;

    case 0x03000000:
        if (SWRAM_ARM9)
        {
            *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
        }
        return;

    case 0x04000000:
        ARM9IOWrite16(addr, val);
        return;

    case 0x05000000:
        *(u16*)&GPU::Palette[addr & 0x7FF] = val;
        return;

    case 0x06000000:
        switch (addr & 0x00E00000)
        {
        case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); return;
        case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); return;
        case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); return;
        case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); return;
        default:         GPU::WriteVRAM_LCDC<u16>(addr, val); return;
        }

    case 0x07000000:
        *(u16*)&GPU::OAM[addr & 0x7FF] = val;
        return;
    }

    //printf("unknown arm9 write16 %08X %04X\n", addr, val);
}

void ARM9Write32(u32 addr, u32 val)
{
    switch (addr & 0xFF000000)
    {
    case 0x02000000:
        *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
        return ;

    case 0x03000000:
        if (SWRAM_ARM9)
        {
            *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
        }
        return;

    case 0x04000000:
        ARM9IOWrite32(addr, val);
        return;

    case 0x05000000:
        *(u32*)&GPU::Palette[addr & 0x7FF] = val;
        return;

    case 0x06000000:
        switch (addr & 0x00E00000)
        {
        case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return;
        case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
        case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
        case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
        default:         GPU::WriteVRAM_LCDC<u32>(addr, val); return;
        }

    case 0x07000000:
        *(u32*)&GPU::OAM[addr & 0x7FF] = val;
        return;
    }

    printf("unknown arm9 write32 %08X %08X | %08X\n", addr, val, ARM9->R[15]);
}

bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region)
{
    switch (addr & 0xFF000000)
    {
    case 0x02000000:
        region->Mem = MainRAM;
        region->Mask = MAIN_RAM_SIZE-1;
        return true;

    case 0x03000000:
        if (SWRAM_ARM9)
        {
            region->Mem = SWRAM_ARM9;
            region->Mask = SWRAM_ARM9Mask;
            return true;
        }
        break;
    }

    if ((addr & 0xFFFFF000) == 0xFFFF0000 && !write)
    {
        region->Mem = ARM9BIOS;
        region->Mask = 0xFFF;
        return true;
    }

    region->Mem = NULL;
    return false;
}



u8 ARM7Read8(u32 addr)
{
    if (addr < 0x00004000)
    {
        if (ARM7->R[15] >= 0x4000)
            return 0xFF;
        if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt)
            return 0xFF;

        return *(u8*)&ARM7BIOS[addr];
    }

    switch (addr & 0xFF800000)
    {
    case 0x02000000:
    case 0x02800000:
        return *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)];

    case 0x03000000:
        if (SWRAM_ARM7)
        {
            return *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
        }
        else
        {
            return *(u8*)&ARM7WRAM[addr & 0xFFFF];
        }

    case 0x03800000:
        return *(u8*)&ARM7WRAM[addr & 0xFFFF];

    case 0x04000000:
        return ARM7IORead8(addr);

    case 0x06000000:
    case 0x06800000:
        return GPU::ReadVRAM_ARM7<u8>(addr);
    }

    printf("unknown arm7 read8 %08X %08X %08X/%08X\n", addr, ARM7->R[15], ARM7->R[0], ARM7->R[1]);
    return 0;
}

u16 ARM7Read16(u32 addr)
{
    if (addr < 0x00004000)
    {
        if (ARM7->R[15] >= 0x4000)
            return 0xFFFF;
        if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt)
            return 0xFFFF;

        return *(u16*)&ARM7BIOS[addr];
    }

    switch (addr & 0xFF800000)
    {
    case 0x02000000:
    case 0x02800000:
        return *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)];

    case 0x03000000:
        if (SWRAM_ARM7)
        {
            return *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
        }
        else
        {
            return *(u16*)&ARM7WRAM[addr & 0xFFFF];
        }

    case 0x03800000:
        return *(u16*)&ARM7WRAM[addr & 0xFFFF];

    case 0x04000000:
        return ARM7IORead16(addr);

    case 0x04800000:
        if (addr < 0x04810000)
        {
            return Wifi::Read(addr);
        }
        break;

    case 0x06000000:
    case 0x06800000:
        return GPU::ReadVRAM_ARM7<u16>(addr);
    }

    printf("unknown arm7 read16 %08X %08X\n", addr, ARM7->R[15]);
    return 0;
}

u32 ARM7Read32(u32 addr)
{
    if (addr < 0x00004000)
    {
        if (ARM7->R[15] >= 0x4000)
            return 0xFFFFFFFF;
        if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt)
            return 0xFFFFFFFF;

        return *(u32*)&ARM7BIOS[addr];
    }

    switch (addr & 0xFF800000)
    {
    case 0x02000000:
    case 0x02800000:
        return *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)];

    case 0x03000000:
        if (SWRAM_ARM7)
        {
            return *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
        }
        else
        {
            return *(u32*)&ARM7WRAM[addr & 0xFFFF];
        }

    case 0x03800000:
        return *(u32*)&ARM7WRAM[addr & 0xFFFF];

    case 0x04000000:
        return ARM7IORead32(addr);

    case 0x04800000:
        if (addr < 0x04810000)
        {
            return Wifi::Read(addr) | (Wifi::Read(addr+2) << 16);
        }
        break;

    case 0x06000000:
    case 0x06800000:
        return GPU::ReadVRAM_ARM7<u32>(addr);
    }

    printf("unknown arm7 read32 %08X | %08X\n", addr, ARM7->R[15]);
    return 0;
}

void ARM7Write8(u32 addr, u8 val)
{
    switch (addr & 0xFF800000)
    {
    case 0x02000000:
    case 0x02800000:
        *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
        return;

    case 0x03000000:
        if (SWRAM_ARM7)
        {
            *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
            return;
        }
        else
        {
            *(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
            return;
        }

    case 0x03800000:
        *(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
        return;

    case 0x04000000:
        ARM7IOWrite8(addr, val);
        return;

    case 0x06000000:
    case 0x06800000:
        GPU::WriteVRAM_ARM7<u8>(addr, val);
        return;
    }

    printf("unknown arm7 write8 %08X %02X @ %08X\n", addr, val, ARM7->R[15]);
}

void ARM7Write16(u32 addr, u16 val)
{
    switch (addr & 0xFF800000)
    {
    case 0x02000000:
    case 0x02800000:
        *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
        return;

    case 0x03000000:
        if (SWRAM_ARM7)
        {
            *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
            return;
        }
        else
        {
            *(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
            return;
        }

    case 0x03800000:
        *(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
        return;

    case 0x04000000:
        ARM7IOWrite16(addr, val);
        return;

    case 0x04800000:
        if (addr < 0x04810000)
        {
            Wifi::Write(addr, val);
            return;
        }
        break;

    case 0x06000000:
    case 0x06800000:
        GPU::WriteVRAM_ARM7<u16>(addr, val);
        return;
    }

    //printf("unknown arm7 write16 %08X %04X @ %08X\n", addr, val, ARM7->R[15]);
}

void ARM7Write32(u32 addr, u32 val)
{
    switch (addr & 0xFF800000)
    {
    case 0x02000000:
    case 0x02800000:
        *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
        return;

    case 0x03000000:
        if (SWRAM_ARM7)
        {
            *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
            return;
        }
        else
        {
            *(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
            return;
        }

    case 0x03800000:
        *(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
        return;

    case 0x04000000:
        ARM7IOWrite32(addr, val);
        return;

    case 0x04800000:
        if (addr < 0x04810000)
        {
            Wifi::Write(addr, val & 0xFFFF);
            Wifi::Write(addr+2, val >> 16);
            return;
        }
        break;

    case 0x06000000:
    case 0x06800000:
        GPU::WriteVRAM_ARM7<u32>(addr, val);
        return;
    }

    //printf("unknown arm7 write32 %08X %08X @ %08X\n", addr, val, ARM7->R[15]);
}

bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region)
{
    switch (addr & 0xFF800000)
    {
    case 0x02000000:
    case 0x02800000:
        region->Mem = MainRAM;
        region->Mask = MAIN_RAM_SIZE-1;
        return true;

    case 0x03000000:
        // note on this, and why we can only cover it in one particular case:
        // it is typical for games to map all shared WRAM to the ARM7
        // then access all the WRAM as one contiguous block starting at 0x037F8000
        // this case needs a bit of a hack to cover
        // it's not really worth bothering anyway
        if (!SWRAM_ARM7)
        {
            region->Mem = ARM7WRAM;
            region->Mask = 0xFFFF;
            return true;
        }
        break;

    case 0x03800000:
        region->Mem = ARM7WRAM;
        region->Mask = 0xFFFF;
        return true;
    }

    // BIOS. ARM7 PC has to be within range.
    if (addr < 0x00004000 && !write)
    {
        if (ARM7->R[15] < 0x4000 && (addr >= ARM7BIOSProt || ARM7->R[15] < ARM7BIOSProt))
        {
            region->Mem = ARM7BIOS;
            region->Mask = 0x3FFF;
            return true;
        }
    }

    region->Mem = NULL;
    return false;
}




#define CASE_READ8_16BIT(addr, val) \
    case (addr): return (val) & 0xFF; \
    case (addr+1): return (val) >> 8;

#define CASE_READ8_32BIT(addr, val) \
    case (addr): return (val) & 0xFF; \
    case (addr+1): return ((val) >> 8) & 0xFF; \
    case (addr+2): return ((val) >> 16) & 0xFF; \
    case (addr+3): return (val) >> 24;

u8 ARM9IORead8(u32 addr)
{
    switch (addr)
    {
    case 0x04000130: return KeyInput & 0xFF;
    case 0x04000131: return (KeyInput >> 8) & 0xFF;
    case 0x04000132: return KeyCnt & 0xFF;
    case 0x04000133: return KeyCnt >> 8;

    case 0x040001A2: return NDSCart::ReadSPIData();

    case 0x040001A8: return NDSCart::ROMCommand[0];
    case 0x040001A9: return NDSCart::ROMCommand[1];
    case 0x040001AA: return NDSCart::ROMCommand[2];
    case 0x040001AB: return NDSCart::ROMCommand[3];
    case 0x040001AC: return NDSCart::ROMCommand[4];
    case 0x040001AD: return NDSCart::ROMCommand[5];
    case 0x040001AE: return NDSCart::ROMCommand[6];
    case 0x040001AF: return NDSCart::ROMCommand[7];

    case 0x04000208: return IME[0];

    case 0x04000240: return GPU::VRAMCNT[0];
    case 0x04000241: return GPU::VRAMCNT[1];
    case 0x04000242: return GPU::VRAMCNT[2];
    case 0x04000243: return GPU::VRAMCNT[3];
    case 0x04000244: return GPU::VRAMCNT[4];
    case 0x04000245: return GPU::VRAMCNT[5];
    case 0x04000246: return GPU::VRAMCNT[6];
    case 0x04000247: return WRAMCnt;
    case 0x04000248: return GPU::VRAMCNT[7];
    case 0x04000249: return GPU::VRAMCNT[8];

    CASE_READ8_16BIT(0x04000280, DivCnt)
    CASE_READ8_32BIT(0x04000290, DivNumerator[0])
    CASE_READ8_32BIT(0x04000294, DivNumerator[1])
    CASE_READ8_32BIT(0x04000298, DivDenominator[0])
    CASE_READ8_32BIT(0x0400029C, DivDenominator[1])
    CASE_READ8_32BIT(0x040002A0, DivQuotient[0])
    CASE_READ8_32BIT(0x040002A4, DivQuotient[1])
    CASE_READ8_32BIT(0x040002A8, DivRemainder[0])
    CASE_READ8_32BIT(0x040002AC, DivRemainder[1])

    CASE_READ8_16BIT(0x040002B0, SqrtCnt)
    CASE_READ8_32BIT(0x040002B4, SqrtRes)
    CASE_READ8_32BIT(0x040002B8, SqrtVal[0])
    CASE_READ8_32BIT(0x040002BC, SqrtVal[1])

    case 0x04000300: return PostFlag9;
    }

    if (addr >= 0x04000000 && addr < 0x04000060)
    {
        return GPU::GPU2D_A->Read8(addr);
    }
    if (addr >= 0x04001000 && addr < 0x04001060)
    {
        return GPU::GPU2D_B->Read8(addr);
    }
    if (addr >= 0x04000320 && addr < 0x040006A4)
    {
        return GPU3D::Read8(addr);
    }

    printf("unknown ARM9 IO read8 %08X %08X\n", addr, ARM9->R[15]);
    return 0;
}

u16 ARM9IORead16(u32 addr)
{
    switch (addr)
    {
    case 0x04000004: return GPU::DispStat[0];
    case 0x04000006: return GPU::VCount;

    case 0x04000060: return GPU3D::Read16(addr);
    case 0x04000064:
    case 0x04000066: return GPU::GPU2D_A->Read16(addr);

    case 0x040000B8: return DMAs[0]->Cnt & 0xFFFF;
    case 0x040000BA: return DMAs[0]->Cnt >> 16;
    case 0x040000C4: return DMAs[1]->Cnt & 0xFFFF;
    case 0x040000C6: return DMAs[1]->Cnt >> 16;
    case 0x040000D0: return DMAs[2]->Cnt & 0xFFFF;
    case 0x040000D2: return DMAs[2]->Cnt >> 16;
    case 0x040000DC: return DMAs[3]->Cnt & 0xFFFF;
    case 0x040000DE: return DMAs[3]->Cnt >> 16;

    case 0x040000E0: return ((u16*)DMA9Fill)[0];
    case 0x040000E2: return ((u16*)DMA9Fill)[1];
    case 0x040000E4: return ((u16*)DMA9Fill)[2];
    case 0x040000E6: return ((u16*)DMA9Fill)[3];
    case 0x040000E8: return ((u16*)DMA9Fill)[4];
    case 0x040000EA: return ((u16*)DMA9Fill)[5];
    case 0x040000EC: return ((u16*)DMA9Fill)[6];
    case 0x040000EE: return ((u16*)DMA9Fill)[7];

    case 0x04000100: return TimerGetCounter(0);
    case 0x04000102: return Timers[0].Cnt;
    case 0x04000104: return TimerGetCounter(1);
    case 0x04000106: return Timers[1].Cnt;
    case 0x04000108: return TimerGetCounter(2);
    case 0x0400010A: return Timers[2].Cnt;
    case 0x0400010C: return TimerGetCounter(3);
    case 0x0400010E: return Timers[3].Cnt;

    case 0x04000130: return KeyInput & 0xFFFF;
    case 0x04000132: return KeyCnt;

    case 0x04000180: return IPCSync9;
    case 0x04000184:
        {
            u16 val = IPCFIFOCnt9;
            if (IPCFIFO9->IsEmpty())     val |= 0x0001;
            else if (IPCFIFO9->IsFull()) val |= 0x0002;
            if (IPCFIFO7->IsEmpty())     val |= 0x0100;
            else if (IPCFIFO7->IsFull()) val |= 0x0200;
            return val;
        }

    case 0x040001A0: return NDSCart::SPICnt;
    case 0x040001A2: return NDSCart::ReadSPIData();

    case 0x040001A8: return NDSCart::ROMCommand[0] |
                           (NDSCart::ROMCommand[1] << 8);
    case 0x040001AA: return NDSCart::ROMCommand[2] |
                           (NDSCart::ROMCommand[3] << 8);
    case 0x040001AC: return NDSCart::ROMCommand[4] |
                           (NDSCart::ROMCommand[5] << 8);
    case 0x040001AE: return NDSCart::ROMCommand[6] |
                           (NDSCart::ROMCommand[7] << 8);

    case 0x04000204: return ExMemCnt[0];
    case 0x04000208: return IME[0];
    case 0x04000210: return IE[0] & 0xFFFF;
    case 0x04000212: return IE[0] >> 16;

    case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8);
    case 0x04000242: return GPU::VRAMCNT[2] | (GPU::VRAMCNT[3] << 8);
    case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8);
    case 0x04000246: return GPU::VRAMCNT[6] | (WRAMCnt << 8);
    case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8);

    case 0x04000280: return DivCnt;
    case 0x04000290: return DivNumerator[0] & 0xFFFF;
    case 0x04000292: return DivNumerator[0] >> 16;
    case 0x04000294: return DivNumerator[1] & 0xFFFF;
    case 0x04000296: return DivNumerator[1] >> 16;
    case 0x04000298: return DivDenominator[0] & 0xFFFF;
    case 0x0400029A: return DivDenominator[0] >> 16;
    case 0x0400029C: return DivDenominator[1] & 0xFFFF;
    case 0x0400029E: return DivDenominator[1] >> 16;
    case 0x040002A0: return DivQuotient[0] & 0xFFFF;
    case 0x040002A2: return DivQuotient[0] >> 16;
    case 0x040002A4: return DivQuotient[1] & 0xFFFF;
    case 0x040002A6: return DivQuotient[1] >> 16;
    case 0x040002A8: return DivRemainder[0] & 0xFFFF;
    case 0x040002AA: return DivRemainder[0] >> 16;
    case 0x040002AC: return DivRemainder[1] & 0xFFFF;
    case 0x040002AE: return DivRemainder[1] >> 16;

    case 0x040002B0: return SqrtCnt;
    case 0x040002B4: return SqrtRes & 0xFFFF;
    case 0x040002B6: return SqrtRes >> 16;
    case 0x040002B8: return SqrtVal[0] & 0xFFFF;
    case 0x040002BA: return SqrtVal[0] >> 16;
    case 0x040002BC: return SqrtVal[1] & 0xFFFF;
    case 0x040002BE: return SqrtVal[1] >> 16;

    case 0x04000300: return PostFlag9;
    case 0x04000304: return PowerControl9;
    }

    if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C))
    {
        return GPU::GPU2D_A->Read16(addr);
    }
    if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C))
    {
        return GPU::GPU2D_B->Read16(addr);
    }
    if (addr >= 0x04000320 && addr < 0x040006A4)
    {
        return GPU3D::Read16(addr);
    }

    printf("unknown ARM9 IO read16 %08X %08X\n", addr, ARM9->R[15]);
    return 0;
}

u32 ARM9IORead32(u32 addr)
{
    switch (addr)
    {
    case 0x04000004: return GPU::DispStat[0] | (GPU::VCount << 16);

    case 0x04000060: return GPU3D::Read32(addr);
    case 0x04000064: return GPU::GPU2D_A->Read32(addr);

    case 0x040000B0: return DMAs[0]->SrcAddr;
    case 0x040000B4: return DMAs[0]->DstAddr;
    case 0x040000B8: return DMAs[0]->Cnt;
    case 0x040000BC: return DMAs[1]->SrcAddr;
    case 0x040000C0: return DMAs[1]->DstAddr;
    case 0x040000C4: return DMAs[1]->Cnt;
    case 0x040000C8: return DMAs[2]->SrcAddr;
    case 0x040000CC: return DMAs[2]->DstAddr;
    case 0x040000D0: return DMAs[2]->Cnt;
    case 0x040000D4: return DMAs[3]->SrcAddr;
    case 0x040000D8: return DMAs[3]->DstAddr;
    case 0x040000DC: return DMAs[3]->Cnt;

    case 0x040000E0: return DMA9Fill[0];
    case 0x040000E4: return DMA9Fill[1];
    case 0x040000E8: return DMA9Fill[2];
    case 0x040000EC: return DMA9Fill[3];

    case 0x040000F4: return 0; // ???? Golden Sun Dark Dawn keeps reading this

    case 0x04000100: return TimerGetCounter(0) | (Timers[0].Cnt << 16);
    case 0x04000104: return TimerGetCounter(1) | (Timers[1].Cnt << 16);
    case 0x04000108: return TimerGetCounter(2) | (Timers[2].Cnt << 16);
    case 0x0400010C: return TimerGetCounter(3) | (Timers[3].Cnt << 16);

    case 0x04000130: return (KeyInput & 0xFFFF) | (KeyCnt << 16);

    case 0x04000180: return IPCSync9;

    case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16);
    case 0x040001A4: return NDSCart::ROMCnt;

    case 0x040001A8: return NDSCart::ROMCommand[0] |
                           (NDSCart::ROMCommand[1] << 8) |
                           (NDSCart::ROMCommand[2] << 16) |
                           (NDSCart::ROMCommand[3] << 24);
    case 0x040001AC: return NDSCart::ROMCommand[4] |
                           (NDSCart::ROMCommand[5] << 8) |
                           (NDSCart::ROMCommand[6] << 16) |
                           (NDSCart::ROMCommand[7] << 24);

    case 0x04000208: return IME[0];
    case 0x04000210: return IE[0];
    case 0x04000214: return IF[0];

    case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8) | (GPU::VRAMCNT[2] << 16) | (GPU::VRAMCNT[3] << 24);
    case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8) | (GPU::VRAMCNT[6] << 16) | (WRAMCnt << 24);
    case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8);

    case 0x04000280: return DivCnt;
    case 0x04000290: return DivNumerator[0];
    case 0x04000294: return DivNumerator[1];
    case 0x04000298: return DivDenominator[0];
    case 0x0400029C: return DivDenominator[1];
    case 0x040002A0: return DivQuotient[0];
    case 0x040002A4: return DivQuotient[1];
    case 0x040002A8: return DivRemainder[0];
    case 0x040002AC: return DivRemainder[1];

    case 0x040002B0: return SqrtCnt;
    case 0x040002B4: return SqrtRes;
    case 0x040002B8: return SqrtVal[0];
    case 0x040002BC: return SqrtVal[1];

    case 0x04100000:
        if (IPCFIFOCnt9 & 0x8000)
        {
            u32 ret;
            if (IPCFIFO7->IsEmpty())
            {
                IPCFIFOCnt9 |= 0x4000;
                ret = IPCFIFO7->Peek();
            }
            else
            {
                ret = IPCFIFO7->Read();

                if (IPCFIFO7->IsEmpty() && (IPCFIFOCnt7 & 0x0004))
                    SetIRQ(1, IRQ_IPCSendDone);
            }
            return ret;
        }
        else
            return IPCFIFO7->Peek();

    case 0x04100010:
        if (!(ExMemCnt[0] & (1<<11))) return NDSCart::ReadROMData();
        return 0;
    }

    if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C))
    {
        return GPU::GPU2D_A->Read32(addr);
    }
    if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C))
    {
        return GPU::GPU2D_B->Read32(addr);
    }
    if (addr >= 0x04000320 && addr < 0x040006A4)
    {
        return GPU3D::Read32(addr);
    }

    printf("unknown ARM9 IO read32 %08X %08X\n", addr, ARM9->R[15]);
    return 0;
}

void ARM9IOWrite8(u32 addr, u8 val)
{
    switch (addr)
    {
    case 0x0400006C:
    case 0x0400006D: GPU::GPU2D_A->Write8(addr, val); return;
    case 0x0400106C:
    case 0x0400106D: GPU::GPU2D_B->Write8(addr, val); return;

    case 0x04000132:
        KeyCnt = (KeyCnt & 0xFF00) | val;
        return;
    case 0x04000133:
        KeyCnt = (KeyCnt & 0x00FF) | (val << 8);
        return;

    case 0x040001A0:
        if (!(ExMemCnt[0] & (1<<11)))
        {
            NDSCart::WriteSPICnt((NDSCart::SPICnt & 0xFF00) | val);
        }
        return;
    case 0x040001A1:
        if (!(ExMemCnt[0] & (1<<11)))
        {
            NDSCart::WriteSPICnt((NDSCart::SPICnt & 0x00FF) | (val << 8));
        }
        return;
    case 0x040001A2:
        NDSCart::WriteSPIData(val);
        return;

    case 0x040001A8: NDSCart::ROMCommand[0] = val; return;
    case 0x040001A9: NDSCart::ROMCommand[1] = val; return;
    case 0x040001AA: NDSCart::ROMCommand[2] = val; return;
    case 0x040001AB: NDSCart::ROMCommand[3] = val; return;
    case 0x040001AC: NDSCart::ROMCommand[4] = val; return;
    case 0x040001AD: NDSCart::ROMCommand[5] = val; return;
    case 0x040001AE: NDSCart::ROMCommand[6] = val; return;
    case 0x040001AF: NDSCart::ROMCommand[7] = val; return;

    case 0x04000208: IME[0] = val & 0x1; return;

    case 0x04000240: GPU::MapVRAM_AB(0, val); return;
    case 0x04000241: GPU::MapVRAM_AB(1, val); return;
    case 0x04000242: GPU::MapVRAM_CD(2, val); return;
    case 0x04000243: GPU::MapVRAM_CD(3, val); return;
    case 0x04000244: GPU::MapVRAM_E(4, val); return;
    case 0x04000245: GPU::MapVRAM_FG(5, val); return;
    case 0x04000246: GPU::MapVRAM_FG(6, val); return;
    case 0x04000247: MapSharedWRAM(val); return;
    case 0x04000248: GPU::MapVRAM_H(7, val); return;
    case 0x04000249: GPU::MapVRAM_I(8, val); return;

    case 0x04000300:
        if (PostFlag9 & 0x01) val |= 0x01;
        PostFlag9 = val & 0x03;
        return;
    }

    if (addr >= 0x04000000 && addr < 0x04000060)
    {
        GPU::GPU2D_A->Write8(addr, val);
        return;
    }
    if (addr >= 0x04001000 && addr < 0x04001060)
    {
        GPU::GPU2D_B->Write8(addr, val);
        return;
    }
    if (addr >= 0x04000320 && addr < 0x040006A4)
    {
        GPU3D::Write8(addr, val);
        return;
    }

    printf("unknown ARM9 IO write8 %08X %02X %08X\n", addr, val, ARM9->R[15]);
}

void ARM9IOWrite16(u32 addr, u16 val)
{
    switch (addr)
    {
    case 0x04000004: GPU::SetDispStat(0, val); return;
    case 0x04000006: GPU::SetVCount(val); return;

    case 0x04000060: GPU3D::Write16(addr, val); return;

    case 0x04000068:
    case 0x0400006A: GPU::GPU2D_A->Write16(addr, val); return;

    case 0x0400006C: GPU::GPU2D_A->Write16(addr, val); return;
    case 0x0400106C: GPU::GPU2D_B->Write16(addr, val); return;

    case 0x040000B8: DMAs[0]->WriteCnt((DMAs[0]->Cnt & 0xFFFF0000) | val); return;
    case 0x040000BA: DMAs[0]->WriteCnt((DMAs[0]->Cnt & 0x0000FFFF) | (val << 16)); return;
    case 0x040000C4: DMAs[1]->WriteCnt((DMAs[1]->Cnt & 0xFFFF0000) | val); return;
    case 0x040000C6: DMAs[1]->WriteCnt((DMAs[1]->Cnt & 0x0000FFFF) | (val << 16)); return;
    case 0x040000D0: DMAs[2]->WriteCnt((DMAs[2]->Cnt & 0xFFFF0000) | val); return;
    case 0x040000D2: DMAs[2]->WriteCnt((DMAs[2]->Cnt & 0x0000FFFF) | (val << 16)); return;
    case 0x040000DC: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0xFFFF0000) | val); return;
    case 0x040000DE: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0x0000FFFF) | (val << 16)); return;

    case 0x040000E0: DMA9Fill[0] = (DMA9Fill[0] & 0xFFFF0000) | val; return;
    case 0x040000E2: DMA9Fill[0] = (DMA9Fill[0] & 0x0000FFFF) | (val << 16); return;
    case 0x040000E4: DMA9Fill[1] = (DMA9Fill[1] & 0xFFFF0000) | val; return;
    case 0x040000E6: DMA9Fill[1] = (DMA9Fill[1] & 0x0000FFFF) | (val << 16); return;
    case 0x040000E8: DMA9Fill[2] = (DMA9Fill[2] & 0xFFFF0000) | val; return;
    case 0x040000EA: DMA9Fill[2] = (DMA9Fill[2] & 0x0000FFFF) | (val << 16); return;
    case 0x040000EC: DMA9Fill[3] = (DMA9Fill[3] & 0xFFFF0000) | val; return;
    case 0x040000EE: DMA9Fill[3] = (DMA9Fill[3] & 0x0000FFFF) | (val << 16); return;

    case 0x04000100: Timers[0].Reload = val; return;
    case 0x04000102: TimerStart(0, val); return;
    case 0x04000104: Timers[1].Reload = val; return;
    case 0x04000106: TimerStart(1, val); return;
    case 0x04000108: Timers[2].Reload = val; return;
    case 0x0400010A: TimerStart(2, val); return;
    case 0x0400010C: Timers[3].Reload = val; return;
    case 0x0400010E: TimerStart(3, val); return;

    case 0x04000132:
        KeyCnt = val;
        return;

    case 0x04000180:
        IPCSync7 &= 0xFFF0;
        IPCSync7 |= ((val & 0x0F00) >> 8);
        IPCSync9 &= 0xB0FF;
        IPCSync9 |= (val & 0x4F00);
        if ((val & 0x2000) && (IPCSync7 & 0x4000))
        {
            SetIRQ(1, IRQ_IPCSync);
        }
        return;

    case 0x04000184:
        if (val & 0x0008)
            IPCFIFO9->Clear();
        if ((val & 0x0004) && (!(IPCFIFOCnt9 & 0x0004)) && IPCFIFO9->IsEmpty())
            SetIRQ(0, IRQ_IPCSendDone);
        if ((val & 0x0400) && (!(IPCFIFOCnt9 & 0x0400)) && (!IPCFIFO7->IsEmpty()))
            SetIRQ(0, IRQ_IPCRecv);
        if (val & 0x4000)
            IPCFIFOCnt9 &= ~0x4000;
        IPCFIFOCnt9 = val & 0x8404;
        return;

    case 0x040001A0:
        if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteSPICnt(val);
        return;
    case 0x040001A2:
        NDSCart::WriteSPIData(val & 0xFF);
        return;

    case 0x040001A8:
        NDSCart::ROMCommand[0] = val & 0xFF;
        NDSCart::ROMCommand[1] = val >> 8;
        return;
    case 0x040001AA:
        NDSCart::ROMCommand[2] = val & 0xFF;
        NDSCart::ROMCommand[3] = val >> 8;
        return;
    case 0x040001AC:
        NDSCart::ROMCommand[4] = val & 0xFF;
        NDSCart::ROMCommand[5] = val >> 8;
        return;
    case 0x040001AE:
        NDSCart::ROMCommand[6] = val & 0xFF;
        NDSCart::ROMCommand[7] = val >> 8;
        return;

    case 0x040001B8: ROMSeed0[4] = val & 0x7F; return;
    case 0x040001BA: ROMSeed1[4] = val & 0x7F; return;

    case 0x04000204:
        ExMemCnt[0] = val;
        ExMemCnt[1] = (ExMemCnt[1] & 0x007F) | (val & 0xFF80);
        SetGBASlotTimings();
        return;

    case 0x04000208: IME[0] = val & 0x1; return;
    case 0x04000210: IE[0] = (IE[0] & 0xFFFF0000) | val; return;
    case 0x04000212: IE[0] = (IE[0] & 0x0000FFFF) | (val << 16); return;
    // TODO: what happens when writing to IF this way??

    case 0x04000240:
        GPU::MapVRAM_AB(0, val & 0xFF);
        GPU::MapVRAM_AB(1, val >> 8);
        return;
    case 0x04000242:
        GPU::MapVRAM_CD(2, val & 0xFF);
        GPU::MapVRAM_CD(3, val >> 8);
        return;
    case 0x04000244:
        GPU::MapVRAM_E(4, val & 0xFF);
        GPU::MapVRAM_FG(5, val >> 8);
        return;
    case 0x04000246:
        GPU::MapVRAM_FG(6, val & 0xFF);
        MapSharedWRAM(val >> 8);
        return;
    case 0x04000248:
        GPU::MapVRAM_H(7, val & 0xFF);
        GPU::MapVRAM_I(8, val >> 8);
        return;

    case 0x04000280: DivCnt = val; StartDiv(); return;

    case 0x040002B0: SqrtCnt = val; StartSqrt(); return;

    case 0x04000300:
        if (PostFlag9 & 0x01) val |= 0x01;
        PostFlag9 = val & 0x03;
        return;

    case 0x04000304:
        PowerControl9 = val;
        GPU::DisplaySwap(PowerControl9>>15);
        return;
    }

    if (addr >= 0x04000000 && addr < 0x04000060)
    {
        GPU::GPU2D_A->Write16(addr, val);
        return;
    }
    if (addr >= 0x04001000 && addr < 0x04001060)
    {
        GPU::GPU2D_B->Write16(addr, val);
        return;
    }
    if (addr >= 0x04000320 && addr < 0x040006A4)
    {
        GPU3D::Write16(addr, val);
        return;
    }

    printf("unknown ARM9 IO write16 %08X %04X %08X\n", addr, val, ARM9->R[15]);
}

void ARM9IOWrite32(u32 addr, u32 val)
{
    switch (addr)
    {
    case 0x04000060: GPU3D::Write32(addr, val); return;
    case 0x04000064:
    case 0x04000068: GPU::GPU2D_A->Write32(addr, val); return;

    case 0x0400006C: GPU::GPU2D_A->Write16(addr, val&0xFFFF); return;
    case 0x0400106C: GPU::GPU2D_B->Write16(addr, val&0xFFFF); return;

    case 0x040000B0: DMAs[0]->SrcAddr = val; return;
    case 0x040000B4: DMAs[0]->DstAddr = val; return;
    case 0x040000B8: DMAs[0]->WriteCnt(val); return;
    case 0x040000BC: DMAs[1]->SrcAddr = val; return;
    case 0x040000C0: DMAs[1]->DstAddr = val; return;
    case 0x040000C4: DMAs[1]->WriteCnt(val); return;
    case 0x040000C8: DMAs[2]->SrcAddr = val; return;
    case 0x040000CC: DMAs[2]->DstAddr = val; return;
    case 0x040000D0: DMAs[2]->WriteCnt(val); return;
    case 0x040000D4: DMAs[3]->SrcAddr = val; return;
    case 0x040000D8: DMAs[3]->DstAddr = val; return;
    case 0x040000DC: DMAs[3]->WriteCnt(val); return;

    case 0x040000E0: DMA9Fill[0] = val; return;
    case 0x040000E4: DMA9Fill[1] = val; return;
    case 0x040000E8: DMA9Fill[2] = val; return;
    case 0x040000EC: DMA9Fill[3] = val; return;

    case 0x04000100:
        Timers[0].Reload = val & 0xFFFF;
        TimerStart(0, val>>16);
        return;
    case 0x04000104:
        Timers[1].Reload = val & 0xFFFF;
        TimerStart(1, val>>16);
        return;
    case 0x04000108:
        Timers[2].Reload = val & 0xFFFF;
        TimerStart(2, val>>16);
        return;
    case 0x0400010C:
        Timers[3].Reload = val & 0xFFFF;
        TimerStart(3, val>>16);
        return;

    case 0x04000130:
        KeyCnt = val >> 16;
        return;
    case 0x04000180:
        ARM9IOWrite16(addr, val);
        return;

    case 0x04000188:
        if (IPCFIFOCnt9 & 0x8000)
        {
            if (IPCFIFO9->IsFull())
                IPCFIFOCnt9 |= 0x4000;
            else
            {
                bool wasempty = IPCFIFO9->IsEmpty();
                IPCFIFO9->Write(val);
                if ((IPCFIFOCnt7 & 0x0400) && wasempty)
                    SetIRQ(1, IRQ_IPCRecv);
            }
        }
        return;

    case 0x040001A0:
        if (!(ExMemCnt[0] & (1<<11)))
        {
            NDSCart::WriteSPICnt(val & 0xFFFF);
            NDSCart::WriteSPIData((val >> 16) & 0xFF);
        }
        return;
    case 0x040001A4:
        if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteROMCnt(val);
        return;

    case 0x040001A8:
        NDSCart::ROMCommand[0] = val & 0xFF;
        NDSCart::ROMCommand[1] = (val >> 8) & 0xFF;
        NDSCart::ROMCommand[2] = (val >> 16) & 0xFF;
        NDSCart::ROMCommand[3] = val >> 24;
        return;
    case 0x040001AC:
        NDSCart::ROMCommand[4] = val & 0xFF;
        NDSCart::ROMCommand[5] = (val >> 8) & 0xFF;
        NDSCart::ROMCommand[6] = (val >> 16) & 0xFF;
        NDSCart::ROMCommand[7] = val >> 24;
        return;

    case 0x040001B0: *(u32*)&ROMSeed0[0] = val; return;
    case 0x040001B4: *(u32*)&ROMSeed1[0] = val; return;

    case 0x04000208: IME[0] = val & 0x1; return;
    case 0x04000210: IE[0] = val; return;
    case 0x04000214: IF[0] &= ~val; GPU3D::CheckFIFOIRQ(); return;

    case 0x04000240:
        GPU::MapVRAM_AB(0, val & 0xFF);
        GPU::MapVRAM_AB(1, (val >> 8) & 0xFF);
        GPU::MapVRAM_CD(2, (val >> 16) & 0xFF);
        GPU::MapVRAM_CD(3, val >> 24);
        return;
    case 0x04000244:
        GPU::MapVRAM_E(4, val & 0xFF);
        GPU::MapVRAM_FG(5, (val >> 8) & 0xFF);
        GPU::MapVRAM_FG(6, (val >> 16) & 0xFF);
        MapSharedWRAM(val >> 24);
        return;
    case 0x04000248:
        GPU::MapVRAM_H(7, val & 0xFF);
        GPU::MapVRAM_I(8, (val >> 8) & 0xFF);
        return;

    case 0x04000280: DivCnt = val; StartDiv(); return;

    case 0x040002B0: SqrtCnt = val; StartSqrt(); return;

    case 0x04000290: DivNumerator[0] = val; StartDiv(); return;
    case 0x04000294: DivNumerator[1] = val; StartDiv(); return;
    case 0x04000298: DivDenominator[0] = val; StartDiv(); return;
    case 0x0400029C: DivDenominator[1] = val; StartDiv(); return;

    case 0x040002B8: SqrtVal[0] = val; StartSqrt(); return;
    case 0x040002BC: SqrtVal[1] = val; StartSqrt(); return;

    case 0x04000304:
        PowerControl9 = val & 0xFFFF;
        GPU::DisplaySwap(PowerControl9>>15);
        return;
    }

    if (addr >= 0x04000000 && addr < 0x04000060)
    {
        GPU::GPU2D_A->Write32(addr, val);
        return;
    }
    if (addr >= 0x04001000 && addr < 0x04001060)
    {
        GPU::GPU2D_B->Write32(addr, val);
        return;
    }
    if (addr >= 0x04000320 && addr < 0x040006A4)
    {
        GPU3D::Write32(addr, val);
        return;
    }

    printf("unknown ARM9 IO write32 %08X %08X %08X\n", addr, val, ARM9->R[15]);
}


u8 ARM7IORead8(u32 addr)
{
    switch (addr)
    {
    case 0x04000130: return KeyInput & 0xFF;
    case 0x04000131: return (KeyInput >> 8) & 0xFF;
    case 0x04000132: return KeyCnt & 0xFF;
    case 0x04000133: return KeyCnt >> 8;
    case 0x04000134: return RCnt & 0xFF;
    case 0x04000135: return RCnt >> 8;
    case 0x04000136: return (KeyInput >> 16) & 0xFF;
    case 0x04000137: return KeyInput >> 24;

    case 0x04000138: return RTC::Read() & 0xFF;

    case 0x040001A2: return NDSCart::ReadSPIData();

    case 0x040001A8: return NDSCart::ROMCommand[0];
    case 0x040001A9: return NDSCart::ROMCommand[1];
    case 0x040001AA: return NDSCart::ROMCommand[2];
    case 0x040001AB: return NDSCart::ROMCommand[3];
    case 0x040001AC: return NDSCart::ROMCommand[4];
    case 0x040001AD: return NDSCart::ROMCommand[5];
    case 0x040001AE: return NDSCart::ROMCommand[6];
    case 0x040001AF: return NDSCart::ROMCommand[7];

    case 0x040001C2: return SPI::ReadData();

    case 0x04000208: return IME[1];

    case 0x04000240: return GPU::VRAMSTAT;
    case 0x04000241: return WRAMCnt;

    case 0x04000300: return PostFlag7;
    }

    if (addr >= 0x04000400 && addr < 0x04000520)
    {
        return SPU::Read8(addr);
    }

    printf("unknown ARM7 IO read8 %08X %08X\n", addr, ARM7->R[15]);
    return 0;
}

u16 ARM7IORead16(u32 addr)
{
    switch (addr)
    {
    case 0x04000004: return GPU::DispStat[1];
    case 0x04000006: return GPU::VCount;

    case 0x040000B8: return DMAs[4]->Cnt & 0xFFFF;
    case 0x040000BA: return DMAs[4]->Cnt >> 16;
    case 0x040000C4: return DMAs[5]->Cnt & 0xFFFF;
    case 0x040000C6: return DMAs[5]->Cnt >> 16;
    case 0x040000D0: return DMAs[6]->Cnt & 0xFFFF;
    case 0x040000D2: return DMAs[6]->Cnt >> 16;
    case 0x040000DC: return DMAs[7]->Cnt & 0xFFFF;
    case 0x040000DE: return DMAs[7]->Cnt >> 16;

    case 0x04000100: return TimerGetCounter(4);
    case 0x04000102: return Timers[4].Cnt;
    case 0x04000104: return TimerGetCounter(5);
    case 0x04000106: return Timers[5].Cnt;
    case 0x04000108: return TimerGetCounter(6);
    case 0x0400010A: return Timers[6].Cnt;
    case 0x0400010C: return TimerGetCounter(7);
    case 0x0400010E: return Timers[7].Cnt;

    case 0x04000130: return KeyInput & 0xFFFF;
    case 0x04000132: return KeyCnt;
    case 0x04000134: return RCnt;
    case 0x04000136: return KeyInput >> 16;

    case 0x04000138: return RTC::Read();

    case 0x04000180: return IPCSync7;
    case 0x04000184:
        {
            u16 val = IPCFIFOCnt7;
            if (IPCFIFO7->IsEmpty())     val |= 0x0001;
            else if (IPCFIFO7->IsFull()) val |= 0x0002;
            if (IPCFIFO9->IsEmpty())     val |= 0x0100;
            else if (IPCFIFO9->IsFull()) val |= 0x0200;
            return val;
        }

    case 0x040001A0: return NDSCart::SPICnt;
    case 0x040001A2: return NDSCart::ReadSPIData();

    case 0x040001A8: return NDSCart::ROMCommand[0] |
                           (NDSCart::ROMCommand[1] << 8);
    case 0x040001AA: return NDSCart::ROMCommand[2] |
                           (NDSCart::ROMCommand[3] << 8);
    case 0x040001AC: return NDSCart::ROMCommand[4] |
                           (NDSCart::ROMCommand[5] << 8);
    case 0x040001AE: return NDSCart::ROMCommand[6] |
                           (NDSCart::ROMCommand[7] << 8);

    case 0x040001C0: return SPI::Cnt;
    case 0x040001C2: return SPI::ReadData();

    case 0x04000204: return ExMemCnt[1];
    case 0x04000206: return WifiWaitCnt;

    case 0x04000208: return IME[1];
    case 0x04000210: return IE[1] & 0xFFFF;
    case 0x04000212: return IE[1] >> 16;

    case 0x04000300: return PostFlag7;
    case 0x04000304: return PowerControl7;
    case 0x04000308: return ARM7BIOSProt;
    }

    if (addr >= 0x04000400 && addr < 0x04000520)
    {
        return SPU::Read16(addr);
    }

    printf("unknown ARM7 IO read16 %08X %08X\n", addr, ARM7->R[15]);
    return 0;
}

u32 ARM7IORead32(u32 addr)
{
    switch (addr)
    {
    case 0x04000004: return GPU::DispStat[1] | (GPU::VCount << 16);

    case 0x040000B0: return DMAs[4]->SrcAddr;
    case 0x040000B4: return DMAs[4]->DstAddr;
    case 0x040000B8: return DMAs[4]->Cnt;
    case 0x040000BC: return DMAs[5]->SrcAddr;
    case 0x040000C0: return DMAs[5]->DstAddr;
    case 0x040000C4: return DMAs[5]->Cnt;
    case 0x040000C8: return DMAs[6]->SrcAddr;
    case 0x040000CC: return DMAs[6]->DstAddr;
    case 0x040000D0: return DMAs[6]->Cnt;
    case 0x040000D4: return DMAs[7]->SrcAddr;
    case 0x040000D8: return DMAs[7]->DstAddr;
    case 0x040000DC: return DMAs[7]->Cnt;

    case 0x04000100: return TimerGetCounter(4) | (Timers[4].Cnt << 16);
    case 0x04000104: return TimerGetCounter(5) | (Timers[5].Cnt << 16);
    case 0x04000108: return TimerGetCounter(6) | (Timers[6].Cnt << 16);
    case 0x0400010C: return TimerGetCounter(7) | (Timers[7].Cnt << 16);

    case 0x04000130: return (KeyInput & 0xFFFF) | (KeyCnt << 16);
    case 0x04000134: return RCnt | (KeyCnt & 0xFFFF0000);
    case 0x04000138: return RTC::Read();

    case 0x04000180: return IPCSync7;

    case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16);
    case 0x040001A4: return NDSCart::ROMCnt;

    case 0x040001A8: return NDSCart::ROMCommand[0] |
                           (NDSCart::ROMCommand[1] << 8) |
                           (NDSCart::ROMCommand[2] << 16) |
                           (NDSCart::ROMCommand[3] << 24);
    case 0x040001AC: return NDSCart::ROMCommand[4] |
                           (NDSCart::ROMCommand[5] << 8) |
                           (NDSCart::ROMCommand[6] << 16) |
                           (NDSCart::ROMCommand[7] << 24);

    case 0x040001C0:
        return SPI::Cnt | (SPI::ReadData() << 16);

    case 0x04000208: return IME[1];
    case 0x04000210: return IE[1];
    case 0x04000214: return IF[1];

    case 0x04000308: return ARM7BIOSProt;

    case 0x04100000:
        if (IPCFIFOCnt7 & 0x8000)
        {
            u32 ret;
            if (IPCFIFO9->IsEmpty())
            {
                IPCFIFOCnt7 |= 0x4000;
                ret = IPCFIFO9->Peek();
            }
            else
            {
                ret = IPCFIFO9->Read();

                if (IPCFIFO9->IsEmpty() && (IPCFIFOCnt9 & 0x0004))
                    SetIRQ(0, IRQ_IPCSendDone);
            }
            return ret;
        }
        else
            return IPCFIFO9->Peek();

    case 0x04100010:
        if (ExMemCnt[0] & (1<<11)) return NDSCart::ReadROMData();
        return 0;
    }

    if (addr >= 0x04000400 && addr < 0x04000520)
    {
        return SPU::Read32(addr);
    }

    printf("unknown ARM7 IO read32 %08X %08X\n", addr, ARM7->R[15]);
    return 0;
}

void ARM7IOWrite8(u32 addr, u8 val)
{
    switch (addr)
    {
    case 0x04000132:
        KeyCnt = (KeyCnt & 0xFF00) | val;
        return;
    case 0x04000133:
        KeyCnt = (KeyCnt & 0x00FF) | (val << 8);
        return;
    case 0x04000134:
        RCnt = (RCnt & 0xFF00) | val;
        return;
    case 0x04000135:
        RCnt = (RCnt & 0x00FF) | (val << 8);
        return;

    case 0x04000138: RTC::Write(val, true); return;

    case 0x040001A0:
        if (ExMemCnt[0] & (1<<11))
        {
            NDSCart::WriteSPICnt((NDSCart::SPICnt & 0xFF00) | val);
        }
        return;
    case 0x040001A1:
        if (ExMemCnt[0] & (1<<11))
        {
            NDSCart::WriteSPICnt((NDSCart::SPICnt & 0x00FF) | (val << 8));
        }
        return;
    case 0x040001A2:
        NDSCart::WriteSPIData(val);
        return;

    case 0x040001A8: NDSCart::ROMCommand[0] = val; return;
    case 0x040001A9: NDSCart::ROMCommand[1] = val; return;
    case 0x040001AA: NDSCart::ROMCommand[2] = val; return;
    case 0x040001AB: NDSCart::ROMCommand[3] = val; return;
    case 0x040001AC: NDSCart::ROMCommand[4] = val; return;
    case 0x040001AD: NDSCart::ROMCommand[5] = val; return;
    case 0x040001AE: NDSCart::ROMCommand[6] = val; return;
    case 0x040001AF: NDSCart::ROMCommand[7] = val; return;

    case 0x040001C2:
        SPI::WriteData(val);
        return;

    case 0x04000208: IME[1] = val & 0x1; return;

    case 0x04000300:
        if (ARM7->R[15] >= 0x4000)
            return;
        if (!(PostFlag7 & 0x01))
            PostFlag7 = val & 0x01;
        return;

    case 0x04000301:
        if (val == 0x80) ARM7->Halt(1);
        return;
    }

    if (addr >= 0x04000400 && addr < 0x04000520)
    {
        SPU::Write8(addr, val);
        return;
    }

    printf("unknown ARM7 IO write8 %08X %02X %08X\n", addr, val, ARM7->R[15]);
}

void ARM7IOWrite16(u32 addr, u16 val)
{
    switch (addr)
    {
    case 0x04000004: GPU::SetDispStat(1, val); return;
    case 0x04000006: GPU::SetVCount(val); return;

    case 0x040000B8: DMAs[4]->WriteCnt((DMAs[4]->Cnt & 0xFFFF0000) | val); return;
    case 0x040000BA: DMAs[4]->WriteCnt((DMAs[4]->Cnt & 0x0000FFFF) | (val << 16)); return;
    case 0x040000C4: DMAs[5]->WriteCnt((DMAs[5]->Cnt & 0xFFFF0000) | val); return;
    case 0x040000C6: DMAs[5]->WriteCnt((DMAs[5]->Cnt & 0x0000FFFF) | (val << 16)); return;
    case 0x040000D0: DMAs[6]->WriteCnt((DMAs[6]->Cnt & 0xFFFF0000) | val); return;
    case 0x040000D2: DMAs[6]->WriteCnt((DMAs[6]->Cnt & 0x0000FFFF) | (val << 16)); return;
    case 0x040000DC: DMAs[7]->WriteCnt((DMAs[7]->Cnt & 0xFFFF0000) | val); return;
    case 0x040000DE: DMAs[7]->WriteCnt((DMAs[7]->Cnt & 0x0000FFFF) | (val << 16)); return;

    case 0x04000100: Timers[4].Reload = val; return;
    case 0x04000102: TimerStart(4, val); return;
    case 0x04000104: Timers[5].Reload = val; return;
    case 0x04000106: TimerStart(5, val); return;
    case 0x04000108: Timers[6].Reload = val; return;
    case 0x0400010A: TimerStart(6, val); return;
    case 0x0400010C: Timers[7].Reload = val; return;
    case 0x0400010E: TimerStart(7, val); return;

    case 0x04000132: KeyCnt = val; return;
    case 0x04000134: RCnt = val; return;

    case 0x04000138: RTC::Write(val, false); return;

    case 0x04000180:
        IPCSync9 &= 0xFFF0;
        IPCSync9 |= ((val & 0x0F00) >> 8);
        IPCSync7 &= 0xB0FF;
        IPCSync7 |= (val & 0x4F00);
        if ((val & 0x2000) && (IPCSync9 & 0x4000))
        {
            SetIRQ(0, IRQ_IPCSync);
        }
        return;

    case 0x04000184:
        if (val & 0x0008)
            IPCFIFO7->Clear();
        if ((val & 0x0004) && (!(IPCFIFOCnt7 & 0x0004)) && IPCFIFO7->IsEmpty())
            SetIRQ(1, IRQ_IPCSendDone);
        if ((val & 0x0400) && (!(IPCFIFOCnt7 & 0x0400)) && (!IPCFIFO9->IsEmpty()))
            SetIRQ(1, IRQ_IPCRecv);
        if (val & 0x4000)
            IPCFIFOCnt7 &= ~0x4000;
        IPCFIFOCnt7 = val & 0x8404;
        return;

    case 0x040001A0:
        if (ExMemCnt[0] & (1<<11))
            NDSCart::WriteSPICnt(val);
        return;
    case 0x040001A2:
        NDSCart::WriteSPIData(val & 0xFF);
        return;

    case 0x040001A8:
        NDSCart::ROMCommand[0] = val & 0xFF;
        NDSCart::ROMCommand[1] = val >> 8;
        return;
    case 0x040001AA:
        NDSCart::ROMCommand[2] = val & 0xFF;
        NDSCart::ROMCommand[3] = val >> 8;
        return;
    case 0x040001AC:
        NDSCart::ROMCommand[4] = val & 0xFF;
        NDSCart::ROMCommand[5] = val >> 8;
        return;
    case 0x040001AE:
        NDSCart::ROMCommand[6] = val & 0xFF;
        NDSCart::ROMCommand[7] = val >> 8;
        return;

    case 0x040001B8: ROMSeed0[12] = val & 0x7F; return;
    case 0x040001BA: ROMSeed1[12] = val & 0x7F; return;

    case 0x040001C0:
        SPI::WriteCnt(val);
        return;
    case 0x040001C2:
        SPI::WriteData(val & 0xFF);
        return;

    case 0x04000204:
        ExMemCnt[1] = (ExMemCnt[1] & 0xFF80) | (val & 0x007F);
        SetGBASlotTimings();
        return;
    case 0x04000206:
        SetWifiWaitCnt(val);
        return;

    case 0x04000208: IME[1] = val & 0x1; return;
    case 0x04000210: IE[1] = (IE[1] & 0xFFFF0000) | val; return;
    case 0x04000212: IE[1] = (IE[1] & 0x0000FFFF) | (val << 16); return;
    // TODO: what happens when writing to IF this way??

    case 0x04000300:
        if (ARM7->R[15] >= 0x4000)
            return;
        if (!(PostFlag7 & 0x01))
            PostFlag7 = val & 0x01;
        return;

    case 0x04000304: PowerControl7 = val; return;

    case 0x04000308:
        if (ARM7BIOSProt == 0)
            ARM7BIOSProt = val & 0xFFFE;
        return;
    }

    if (addr >= 0x04000400 && addr < 0x04000520)
    {
        SPU::Write16(addr, val);
        return;
    }

    printf("unknown ARM7 IO write16 %08X %04X %08X\n", addr, val, ARM7->R[15]);
}

void ARM7IOWrite32(u32 addr, u32 val)
{
    switch (addr)
    {
    case 0x040000B0: DMAs[4]->SrcAddr = val; return;
    case 0x040000B4: DMAs[4]->DstAddr = val; return;
    case 0x040000B8: DMAs[4]->WriteCnt(val); return;
    case 0x040000BC: DMAs[5]->SrcAddr = val; return;
    case 0x040000C0: DMAs[5]->DstAddr = val; return;
    case 0x040000C4: DMAs[5]->WriteCnt(val); return;
    case 0x040000C8: DMAs[6]->SrcAddr = val; return;
    case 0x040000CC: DMAs[6]->DstAddr = val; return;
    case 0x040000D0: DMAs[6]->WriteCnt(val); return;
    case 0x040000D4: DMAs[7]->SrcAddr = val; return;
    case 0x040000D8: DMAs[7]->DstAddr = val; return;
    case 0x040000DC: DMAs[7]->WriteCnt(val); return;

    case 0x04000100:
        Timers[4].Reload = val & 0xFFFF;
        TimerStart(4, val>>16);
        return;
    case 0x04000104:
        Timers[5].Reload = val & 0xFFFF;
        TimerStart(5, val>>16);
        return;
    case 0x04000108:
        Timers[6].Reload = val & 0xFFFF;
        TimerStart(6, val>>16);
        return;
    case 0x0400010C:
        Timers[7].Reload = val & 0xFFFF;
        TimerStart(7, val>>16);
        return;

    case 0x04000130: KeyCnt = val >> 16; return;
    case 0x04000134: RCnt = val & 0xFFFF; return;
    case 0x04000138: RTC::Write(val & 0xFFFF, false); return;

    case 0x04000180:
        ARM7IOWrite16(addr, val);
        return;
    case 0x04000188:
        if (IPCFIFOCnt7 & 0x8000)
        {
            if (IPCFIFO7->IsFull())
                IPCFIFOCnt7 |= 0x4000;
            else
            {
                bool wasempty = IPCFIFO7->IsEmpty();
                IPCFIFO7->Write(val);
                if ((IPCFIFOCnt9 & 0x0400) && wasempty)
                    SetIRQ(0, IRQ_IPCRecv);
            }
        }
        return;

    case 0x040001A0:
        if (ExMemCnt[0] & (1<<11))
        {
            NDSCart::WriteSPICnt(val & 0xFFFF);
            NDSCart::WriteSPIData((val >> 16) & 0xFF);
        }
        return;
    case 0x040001A4:
        if (ExMemCnt[0] & (1<<11)) NDSCart::WriteROMCnt(val);
        return;

    case 0x040001A8:
        NDSCart::ROMCommand[0] = val & 0xFF;
        NDSCart::ROMCommand[1] = (val >> 8) & 0xFF;
        NDSCart::ROMCommand[2] = (val >> 16) & 0xFF;
        NDSCart::ROMCommand[3] = val >> 24;
        return;
    case 0x040001AC:
        NDSCart::ROMCommand[4] = val & 0xFF;
        NDSCart::ROMCommand[5] = (val >> 8) & 0xFF;
        NDSCart::ROMCommand[6] = (val >> 16) & 0xFF;
        NDSCart::ROMCommand[7] = val >> 24;
        return;

    case 0x040001B0: *(u32*)&ROMSeed0[8] = val; return;
    case 0x040001B4: *(u32*)&ROMSeed1[8] = val; return;

    case 0x04000208: IME[1] = val & 0x1; return;
    case 0x04000210: IE[1] = val; return;
    case 0x04000214: IF[1] &= ~val; return;

    case 0x04000308:
        if (ARM7BIOSProt == 0)
            ARM7BIOSProt = val & 0xFFFE;
        return;
    }

    if (addr >= 0x04000400 && addr < 0x04000520)
    {
        SPU::Write32(addr, val);
        return;
    }

    printf("unknown ARM7 IO write32 %08X %08X %08X\n", addr, val, ARM7->R[15]);
}

}
