/**************************************************************************//**
 *
 * Copyright 1998-2012 NetBurner, Inc.  ALL RIGHTS RESERVED
 *   Permission is hereby granted to purchasers of NetBurner Hardware
 *   to use or modify this computer program for any use as long as the
 *   resultant program is only executed on NetBurner provided hardware.
 *
 *   No other rights to use this program or it's derivatives in part or
 *   in whole are granted.
 *
 *   It may be possible to license this or other NetBurner software for
 *   use on non-NetBurner Hardware.
 *   Please contact sales@Netburner.com for more information.
 *
 *   NetBurner makes no representation or warranties
 *   with respect to the performance of this computer program, and
 *   specifically disclaims any responsibility for any damages,
 *   special or consequential, connected with the use of this program.
 *
 *---------------------------------------------------------------------
 * NetBurner, Inc.
 * 5405 Morehouse Drive
 * San Diego, California 92121
 *
 * information available at:  http://www.netburner.com
 * E-Mail info@netburner.com
 *
 * Support is available: E-Mail support@netburner.com
 *
 *****************************************************************************/

#include "predef.h"
#include <basictypes.h>
#include "constants.h"
#include <system.h>
#include <ucos.h>
#include <ucosmcfc.h>
#include <cfinter.h>
#include <sim.h>
#include <dspi.h>
#include <debugtraps.h>

#include <sim5441x.h>
#include <intcdefs.h>

#define CS_STATE_DEASSERT    0
#define CS_STATE_DMA         1
#define CS_STATE_IRQ         2

#include <stdio.h>
#define LOGME iprintf("We made it to line %d of file %s.\r\n", __LINE__, __FILE__);
#ifdef __DEBUG_DSPI
#include <stdio.h>
#define DBPRINT_DSPI(args...) iprintf(args)
#else
#define DBPRINT_DSPI(args...)
#endif



extern unsigned long CPU_CLOCK;

// known memory of 00's for loading into TX when no TX buffer given
uint32_t fakeTXData FAST_SYS_VAR __attribute__ ((aligned (2))) = 0x00000000;
uint32_t fakeRXData FAST_SYS_VAR __attribute__ ((aligned (2))) = 0x00000000;

static bool needInit = true;

// All current Netburner boards have 4 modules, but just in case...
dspiDriverStruct DSPIModule::driverCxt[DSPI_MODULE_COUNT] FAST_SYS_VAR;
DSPIModule * DSPIModule::lastCxts[DSPI_MODULE_COUNT] FAST_SYS_VAR;
#define NULL_TX_BUF_SIZE 32
static uint8_t nullTxBuf[NULL_TX_BUF_SIZE];

typedef struct {
    uint32_t prescale;
    uint32_t baudrate_scaler;
    bool    doubleBaudRate;
    uint32_t baud;
} baudSettingsStruct;

typedef struct {
    uint8_t pcssck;
    uint8_t cssck;
    uint8_t pdt;
    uint8_t dt;
    uint8_t pasc;
    uint8_t asc;
} delaySettingsStruct;

extern "C" {
    void putleds(BYTE LV);
}

void DSPI_Isr_done( unsigned int moduleNum, volatile dspistruct &spi )
{
    // do not clear the EOQ flag until *AFTER* setting the halt bit
    // see MCF54418 manual section 40.4.1: Start and stop of DSPI transfers
    if ((DSPIModule::driverCxt[moduleNum].dma.csState)
            && DSPIModule::driverCxt[moduleNum].csReturnToInactive) {
        spi.mcr = DSPIModule::driverCxt[moduleNum].dma.savedDSPI.mcr;
        DSPIModule::driverCxt[moduleNum].dma.csState = CS_STATE_DEASSERT;
    }
    while (!(spi.sr & 0x80000000)) { asm("    nop;"); }
    spi.mcr |= MCR_HALT_BIT;
    spi.sr = SR_CLR_FLAGS;
    spi.sr = SR_EOQF_MASK;
    DSPIModule::driverCxt[moduleNum].DSPI_INT_STATUS = DSPI_OK;
    DSPIModule::lastCxts[moduleNum]->m_inProgress = false;
    if(DSPIModule::driverCxt[moduleNum].DSPI_Sem)
    {
        OSSemPost( DSPIModule::driverCxt[moduleNum].DSPI_Sem );
    }

    // Stop the module
    return;
}

void DSPI_Isr_cleanup( unsigned int moduleNum, volatile dspistruct &spi,
        DWORD regWordsToWrite, DWORD regLastWordsToWrite)
{
    if(DSPIModule::driverCxt[moduleNum].DSPIfinished)
    {
        DSPI_Isr_done( moduleNum, spi );
        return;
    }

    if(DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft == 0)
    {
        DSPIModule::driverCxt[moduleNum].DSPIfinished = TRUE;
    }
    DSPIModule::driverCxt[moduleNum].WordsToWrite = regWordsToWrite;
    DSPIModule::driverCxt[moduleNum].LastWordsToWrite = regWordsToWrite;

    // Writing to the EOQ bit clears it
    spi.sr |= SR_EOQF_MASK;
    // reenable the EOQF interrupt
    spi.rser = RSER_EOQF_IRQ_ONLY;
    spi.mcr &= ~MCR_HALT_BIT;
    return;
}

inline void DSPI_Isr_8bit( unsigned int moduleNum, volatile dspistruct *spiModule )
{
    register DWORD i;
    register volatile dspistruct &spi = *spiModule;
    register volatile BYTE* Rxbuf = DSPIModule::driverCxt[moduleNum].pDSPIRxbuf;
    register volatile BYTE* Txbuf = DSPIModule::driverCxt[moduleNum].pDSPITxbuf;
    register DWORD regWordsToWrite = DSPIModule::driverCxt[moduleNum].WordsToWrite;
    register DWORD regLastWordsToWrite = DSPIModule::driverCxt[moduleNum].LastWordsToWrite;

    if(!(spi.sr & SR_EOQF_MASK))
    {
        DSPIModule::driverCxt[moduleNum].DSPI_INT_STATUS = DSPI_BUSY;
        regLastWordsToWrite = 0;
        DSPIModule::driverCxt[moduleNum].DSPIfinished = FALSE;
    }

    // the FIFOs have 16 slots, so we're reading/writing 16 frames each
    // time through here, and each frame uses 1 byte out of our queues
    if(DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft > 16)
    {
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft -= 16;
    }
    else
    {
        regWordsToWrite = DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft;
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft = 0;
    }

    if (regWordsToWrite)
    {
        bool nullTx = false;
        if (!Txbuf) {
            Txbuf = nullTxBuf;
            nullTx = true;
        }

        // Set Continuous CS enable, use CTAR[0], get CS bits from Command_Mask
        DWORD fifoCmd = 0x80000000 | ((uint32_t)DSPIModule::driverCxt[moduleNum].Command_Mask) << 16;
        if (DSPIModule::driverCxt[moduleNum].csReturnToInactive == DEASSERT_EVERY_TRANSFER) {
            fifoCmd &= 0x7FFFFFFF;
            if (DSPIModule::driverCxt[moduleNum].dma.csState) {
                spi.mcr = DSPIModule::driverCxt[moduleNum].dma.savedDSPI.mcr;
            }
        }
        // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
        // assert during the actual transfers
        if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
            fifoCmd &= 0x7F00FFFF;
        }
        for(i = 0; i < regWordsToWrite - 1; i++)
        {
            spi.pushr = fifoCmd | *Txbuf++;
        }
        // Set command EOQ bit
        fifoCmd |= PUSHR_EOQ_BIT;
        // if there's no more to transmit, clear the Contiuous CS bit in the command mask
        if ((DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft == 0)
                && DSPIModule::driverCxt[moduleNum].csReturnToInactive)
        {
            fifoCmd &= 0x7FFFFFFF;
        }
        // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
        // assert during the actual transfers
        if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
            fifoCmd &= 0x7F00FFFF;
        }
        spi.pushr = fifoCmd | *Txbuf++;

        if (nullTx) {
            Txbuf = NULL;
        }
    }
    if (Rxbuf)
    {
        for( i=0; i < regLastWordsToWrite; i++)
        {
            *Rxbuf++ = spi.popr;
        }
    }

    // Update the spiRecord buffer pointers
    DSPIModule::driverCxt[moduleNum].pDSPIRxbuf = Rxbuf;
    DSPIModule::driverCxt[moduleNum].pDSPITxbuf = Txbuf;
    DSPI_Isr_cleanup( moduleNum, spi, regWordsToWrite, regLastWordsToWrite );
    return;
}

inline void DSPI_Isr_16bit( unsigned int moduleNum, volatile dspistruct *spiModule )
{
    register DWORD i;
    register volatile dspistruct &spi = *spiModule;
    register volatile BYTE* Rxbuf = DSPIModule::driverCxt[moduleNum].pDSPIRxbuf;
    register volatile BYTE* Txbuf = DSPIModule::driverCxt[moduleNum].pDSPITxbuf;
    register DWORD regWordsToWrite = DSPIModule::driverCxt[moduleNum].WordsToWrite;
    register DWORD regLastWordsToWrite = DSPIModule::driverCxt[moduleNum].LastWordsToWrite;

    if(!(spi.sr & SR_EOQF_MASK))
    {
        DSPIModule::driverCxt[moduleNum].DSPI_INT_STATUS = DSPI_BUSY;
        regLastWordsToWrite = 0;
        DSPIModule::driverCxt[moduleNum].DSPIfinished = FALSE;
    }

    if(DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft > 32)
    {
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft -= 32;
    }
    else
    {
        regWordsToWrite = (DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft / 2);
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft = 0;
    }

    if(regWordsToWrite)
    {
        bool nullTx = false;
        if (!Txbuf) {
            Txbuf = nullTxBuf;
            nullTx = true;
        }

        // Set Continuous CS enable, use CTAR[0], get CS bits from Command_Mask
        DWORD fifoCmd = 0x80000000 | ((uint32_t)DSPIModule::driverCxt[moduleNum].Command_Mask) << 16;
        if (DSPIModule::driverCxt[moduleNum].csReturnToInactive == DEASSERT_EVERY_TRANSFER) {
            fifoCmd &= 0x7FFFFFFF;
            if (DSPIModule::driverCxt[moduleNum].dma.csState) {
                spi.mcr = DSPIModule::driverCxt[moduleNum].dma.savedDSPI.mcr;
            }
        }
        // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
        // assert during the actual transfers
        if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
            fifoCmd &= 0x7F00FFFF;
        }
        for(i = 0; i < regWordsToWrite - 1; i++)
        {
            spi.pushr = fifoCmd | *(PWORD)Txbuf;
            Txbuf += 2;
        }
        // Set command EOQ bit
        fifoCmd |= PUSHR_EOQ_BIT;
        // if there's no more to transmit, clear the Contiuous CS bit in the command mask
        if ((DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft == 0)
                && DSPIModule::driverCxt[moduleNum].csReturnToInactive)
        {
            fifoCmd &= 0x7FFFFFFF;
        }
        // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
        // assert during the actual transfers
        if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
            fifoCmd &= 0x7F00FFFF;
        }
        spi.pushr = fifoCmd | *(PWORD)Txbuf;
        Txbuf += 2;

        if (nullTx) {
            Txbuf = NULL;
        }
    }
    if(Rxbuf)
    {
        // read RX FIFO and move it to the RX buffer
        for( i=0; i < regLastWordsToWrite; i++)
        {
            *(PWORD)Rxbuf = spi.popr;
            Rxbuf += 2;
        }
    }

    // Update the spiRecord buffer pointers
    DSPIModule::driverCxt[moduleNum].pDSPIRxbuf = Rxbuf;
    DSPIModule::driverCxt[moduleNum].pDSPITxbuf = Txbuf;
    DSPI_Isr_cleanup( moduleNum, spi, regWordsToWrite, regLastWordsToWrite );
    return;
}

inline void DSPI_Isr_19bit( unsigned int moduleNum, volatile dspistruct *spiModule )
{
    register DWORD i;
    register volatile dspistruct &spi = *spiModule;
    register volatile BYTE* Rxbuf = DSPIModule::driverCxt[moduleNum].pDSPIRxbuf;
    register volatile BYTE* Txbuf = DSPIModule::driverCxt[moduleNum].pDSPITxbuf;
    register DWORD regWordsToWrite = DSPIModule::driverCxt[moduleNum].WordsToWrite;
    register DWORD regLastWordsToWrite = DSPIModule::driverCxt[moduleNum].LastWordsToWrite;

    if(!(spi.sr & SR_EOQF_MASK))
    {
        DSPIModule::driverCxt[moduleNum].DSPI_INT_STATUS = DSPI_BUSY;
        regLastWordsToWrite = 0;
        DSPIModule::driverCxt[moduleNum].DSPIfinished = FALSE;
    }

    if(DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft > 32)
    {
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft -= 32;
    }
    else
    {
        regWordsToWrite = (DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft / 2);
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft = 0;
    }

    if(regWordsToWrite)
    {
        bool nullTx = false;
        if (!Txbuf) {
            Txbuf = nullTxBuf;
            nullTx = true;
        }

        // Set Continuous CS enable, use CTAR[0], get CS bits from Command_Mask
        DWORD fifoCmd = 0x80000000 | ((uint32_t)DSPIModule::driverCxt[moduleNum].Command_Mask) << 16;
        for(i = 0; i < regWordsToWrite - 2; i+=2)
        {
            // Queue a TX with the first (BitsPerQueue - 4) bits, using CTAR[0]
            spi.pushr = fifoCmd | (*(PDWORD)Txbuf >> 4);

            // Queue a TX with the final 4 bits, using CTAR[1]
            fifoCmd |= 0x10000000; // set CTAR[1] bit
            if (DSPIModule::driverCxt[moduleNum].csReturnToInactive == DEASSERT_EVERY_TRANSFER) {
                fifoCmd |= 0x80000000;
            }
            // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
            // assert during the actual transfers
            if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                    (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
                fifoCmd &= 0x7F00FFFF;
            }
            spi.pushr = fifoCmd | (*(PDWORD)Txbuf & 0x0F);
            if (DSPIModule::driverCxt[moduleNum].csReturnToInactive == DEASSERT_EVERY_TRANSFER) {
                fifoCmd &= 0x7FFFFFFF;
            }
            fifoCmd &= ~0x10000000; // clear CTAR[1] bit, returning CTAR[0]
            // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
            // assert during the actual transfers
            if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                    (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
                fifoCmd &= 0x7F00FFFF;
            }
            Txbuf += 4;
        }
        // Queue a TX with the first (BitsPerQueue - 4) bits, using CTAR[0]
        spi.pushr = fifoCmd | (*(PDWORD)Txbuf >> 4);

        // Queue a TX with the final 4 bits, using CTAR[1], adding the EOQ bit
        fifoCmd |= 0x18000000;
        // if there's no more to transmit, clear the Continuous CS bit in the command mask
        if ((DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft == 0)
                && DSPIModule::driverCxt[moduleNum].csReturnToInactive)
        {
            fifoCmd &= 0x7FFFFFFF;
        }
        // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
        // assert during the actual transfers
        if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
            fifoCmd &= 0x7F00FFFF;
        }
        spi.pushr = fifoCmd | (*(PDWORD)Txbuf & 0x0F);
        Txbuf += 4;

        if (nullTx) {
            Txbuf = NULL;
        }
    }
    if(Rxbuf)
    {
        for( i=0; i < regLastWordsToWrite; i+=2)
        {
            *(PDWORD)Rxbuf = (spi.popr << 4);
            *(PDWORD)Rxbuf |= spi.popr;
            Rxbuf += 4;
        }
    }

    // Update the spiRecord buffer pointers
    DSPIModule::driverCxt[moduleNum].pDSPIRxbuf = Rxbuf;
    DSPIModule::driverCxt[moduleNum].pDSPITxbuf = Txbuf;
    DSPI_Isr_cleanup( moduleNum, spi, regWordsToWrite, regLastWordsToWrite );
    return;
}

inline void DSPI_Isr_31bit( unsigned int moduleNum, volatile dspistruct *spiModule )
{
    register DWORD i;
    register volatile dspistruct &spi = *spiModule;
    register volatile BYTE* Rxbuf = DSPIModule::driverCxt[moduleNum].pDSPIRxbuf;
    register volatile BYTE* Txbuf = DSPIModule::driverCxt[moduleNum].pDSPITxbuf;
    register DWORD regWordsToWrite = DSPIModule::driverCxt[moduleNum].WordsToWrite;
    register DWORD regLastWordsToWrite = DSPIModule::driverCxt[moduleNum].LastWordsToWrite;

    if(!(spi.sr & SR_EOQF_MASK))
    {
        DSPIModule::driverCxt[moduleNum].DSPI_INT_STATUS = DSPI_BUSY;
        regLastWordsToWrite = 0;
        DSPIModule::driverCxt[moduleNum].DSPIfinished = FALSE;
    }

    if(DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft > 32)
    {
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft -= 32;
    }
    else
    {
        regWordsToWrite = (DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft / 2);
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft = 0;
    }

    if(regWordsToWrite)
    {
        bool nullTx = false;
        if (!Txbuf) {
            Txbuf = nullTxBuf;
            nullTx = true;
        }

        // Set Continuous CS enable, use CTAR[0], get CS bits from Command_Mask
        DWORD fifoCmd = 0x80000000 | ((uint32_t)DSPIModule::driverCxt[moduleNum].Command_Mask) << 16;
        for(i = 0; i < regWordsToWrite - 1; i++)
        {
            if (DSPIModule::driverCxt[moduleNum].csReturnToInactive == DEASSERT_EVERY_TRANSFER) {
                fifoCmd |= 0x80000000;
            }
            // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
            // assert during the actual transfers
            if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                    (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
                fifoCmd &= 0x7F00FFFF;
            }
            // Queue a TX with the first (BitsPerQueue - 16) bits, using CTAR[0]
            spi.pushr = fifoCmd | *(PWORD)Txbuf;
            Txbuf += 2;

            // Set or clear CTAR[1] <-> CTAR[0]
            fifoCmd ^= 0x10000000;
            if (DSPIModule::driverCxt[moduleNum].csReturnToInactive == DEASSERT_EVERY_TRANSFER) {
                fifoCmd &= 0x7FFFFFFF;
            }
        }

        // Queue a TX with the final 16 bits, using CTAR[1], adding the EOQ bit
        fifoCmd |= 0x18000000;
        // if there's no more to transmit, clear the Continuous CS bit in the command mask
        if ((DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft == 0)
                && DSPIModule::driverCxt[moduleNum].csReturnToInactive)
        {
            fifoCmd &= 0x7FFFFFFF;
        }
        // If we're in a "DEASSERT_NEVER" block, the cs is inverted and we should not
        // assert during the actual transfers
        if (DSPIModule::driverCxt[moduleNum].dma.csState &&
                (DSPIModule::driverCxt[moduleNum].csReturnToInactive != DEASSERT_EVERY_TRANSFER)) {
            fifoCmd &= 0x7F00FFFF;
        }
        spi.pushr = fifoCmd | ((*(PWORD)Txbuf));
        Txbuf += 2;

        if (nullTx) {
            Txbuf = NULL;
        }
    }
    if(Rxbuf)
    {
        for( i=0; i < regLastWordsToWrite; i++)
        {
            *(PWORD)Rxbuf = spi.popr;
            Rxbuf += 2;
        }
    }

    // Update the spiRecord buffer pointers
    DSPIModule::driverCxt[moduleNum].pDSPIRxbuf = Rxbuf;
    DSPIModule::driverCxt[moduleNum].pDSPITxbuf = Txbuf;
    DSPI_Isr_cleanup( moduleNum, spi, regWordsToWrite, regLastWordsToWrite );
    return;
}

/*-----------------------------------------------------------------------
DSPI interrupt service routine.
Called by hardware when DSPI event occurs
------------------------------------------------------------------------*/
void DSPI_Isr( unsigned int moduleNum, volatile dspistruct *spiModule )
{
//    register DWORD i;
//    register volatile dspistruct &spi = *spiModule;
//    register volatile BYTE* Rxbuf = driverCxt[moduleNum].pDSPIRxbuf;
//    register volatile BYTE* Txbuf = driverCxt[moduleNum].pDSPITxbuf;
//    register DWORD regWordsToWrite = driverCxt[moduleNum].WordsToWrite;
//    register DWORD regLastWordsToWrite = driverCxt[moduleNum].LastWordsToWrite;
//
//    if(!(spi.sr & SR_EOQF_MASK))
//    {
//        driverCxt[moduleNum].DSPI_INT_STATUS = DSPI_BUSY;
//        regLastWordsToWrite = 0;
//        driverCxt[moduleNum].DSPIfinished = FALSE;
//    }


    // Disable all interrupts for the dspi module
    spiModule->rser = 0x00000000;
    spiModule->mcr |= MCR_HALT_BIT;
    if(DSPIModule::driverCxt[moduleNum].BitsPerQueue <= 8)
    {
        DSPI_Isr_8bit( moduleNum, spiModule );
    }

    else if( ((DSPIModule::driverCxt[moduleNum].BitsPerQueue > 8)
                && (DSPIModule::driverCxt[moduleNum].BitsPerQueue < 17))
            || (DSPIModule::driverCxt[moduleNum].BitsPerQueue == 32) )
    {
        DSPI_Isr_16bit( moduleNum, spiModule );
    }

    else if((DSPIModule::driverCxt[moduleNum].BitsPerQueue > 16)
            && (DSPIModule::driverCxt[moduleNum].BitsPerQueue < 20))
    {
        DSPI_Isr_19bit( moduleNum, spiModule );
    }

    else if((DSPIModule::driverCxt[moduleNum].BitsPerQueue > 19)
            && (DSPIModule::driverCxt[moduleNum].BitsPerQueue < 32))
    {
        DSPI_Isr_31bit( moduleNum, spiModule );
    }

//    // Update the spiRecord buffer pointers
//    driverCxt[moduleNum].pDSPIRxbuf = Rxbuf;
//    driverCxt[moduleNum].pDSPITxbuf = Txbuf;
//
//    putleds(driverCxt[moduleNum].DSPIfinished);
//
//    if(driverCxt[moduleNum].DSPIfinished)
//    {
//        // do not clear the EOQ flag until *AFTER* setting the halt bit
//        // see MCF54418 manual section 40.4.1: Start and stop of DSPI transfers
//        spi.mcr |= MCR_HALT_BIT;
//        spi.sr &= SR_CLR_FLAGS;
//        driverCxt[moduleNum].DSPI_INT_STATUS = DSPI_OK;
//        if(driverCxt[moduleNum].DSPI_Sem)
//            OSSemPost( driverCxt[moduleNum].DSPI_Sem );
//
//        // Stop the module
//        return;
//    }
//
//    if(driverCxt[moduleNum].DSPI_SizeLeft == 0)
//        driverCxt[moduleNum].DSPIfinished = TRUE;
//    driverCxt[moduleNum].WordsToWrite = regWordsToWrite;
//    driverCxt[moduleNum].LastWordsToWrite = regWordsToWrite;
//    // Writing to the EOQ bit clears it, and starts the SPI transfers.
//    spi.mcr &= ~MCR_HALT_BIT;
//    spi.sr |= SR_EOQF_MASK;
    return;
}

static inline void setupTCDs_8Bit( int moduleNum, volatile dspistruct &spi,
        volatile edma_tcdstruct &txTcd, volatile edma_tcdstruct &rxTcd, bool txCanIRQ )
{
    // If we don't actually have a buffer to send, fake it
    if (!(DSPIModule::driverCxt[moduleNum].dma.txPresent))
    {
        txTcd.saddr = (vudword)&fakeTXData;
        txTcd.soff = TCD_XOFF_0BYTE;
    }
    else
    {
        txTcd.soff = TCD_XOFF_1BYTE;
    }
    txTcd.attr = TCD_ATTR_8BIT_TRANS;
    txTcd.nbytes = 0x0001; // write 1 byte per minor loop (request)
    txTcd.slast = 0x00000000; // don't move curr saddr when finishing the last major loop
    // write to the low byte of the data word of the pushr register
    txTcd.daddr = ((uint32_t)&spi.pushr) + 3;
    txTcd.doff = TCD_XOFF_0BYTE;
    txTcd.dlast_sga = 0x00000000; // don't move curr daddr when finishing the last major loop

    // set the D_REQ bit, but not the INT_MAJOR bit, unless we're not recieving
//    if (DSPIModule::driverCxt[moduleNum].dma.rxPresent) {
        txTcd.csr = TCD_CSR_DISABLE_REQ;
        rxTcd.csr = TCD_CSR_DREQ_INT_MAJOR; // set the D_REQ and INT_MAJOR bits
//    }
//    else {
//        txTcd.csr = TCD_CSR_DREQ_INT_MAJOR;
//        rxTcd.csr = TCD_CSR_DISABLE_REQ;
//    }

    rxTcd.saddr = ((uint32_t)&spi.popr) + 3;
    rxTcd.attr = TCD_ATTR_8BIT_TRANS;
    rxTcd.soff = TCD_XOFF_0BYTE;
    rxTcd.nbytes = 0x0001;
    rxTcd.slast = 0x00000000;
    if (!(DSPIModule::driverCxt[moduleNum].dma.rxPresent))
    {
        rxTcd.daddr = (vudword)&fakeRXData;
        rxTcd.doff = TCD_XOFF_0BYTE;
    }
    else
    {
        rxTcd.doff = TCD_XOFF_1BYTE;
    }
    rxTcd.dlast_sga = 0x00000000;
//    rxTcd.csr = TCD_CSR_DREQ_INT_MAJOR; // set the D_REQ and INT_MAJOR bits
}

static inline void setupTCDs_16Bit( int moduleNum, volatile dspistruct &spi,
        volatile edma_tcdstruct &txTcd, volatile edma_tcdstruct &rxTcd, bool txCanIRQ )
{
    // If we don't actually have a buffer to send, fake it
    if (!(DSPIModule::driverCxt[moduleNum].dma.txPresent))
    {
        txTcd.saddr = (vudword)&fakeTXData;
        txTcd.soff = TCD_XOFF_0BYTE;
    }
    else
    {
        txTcd.soff = TCD_XOFF_2BYTE;
    }
    txTcd.attr = TCD_ATTR_16BIT_TRANS;
    txTcd.nbytes = 0x0002; // write 2 bytes per minor loop (request)
    txTcd.slast = 0x00000000; // don't move curr saddr when finishing the last major loop
    txTcd.daddr = ((uint32_t)&spi.pushr) + 2; // write to the data word of the pushr register
    txTcd.doff = TCD_XOFF_0BYTE;
    txTcd.dlast_sga = 0x00000000; // don't move curr daddr when finishing the last major loop

    // set the D_REQ bit, but not the INT_MAJOR bit, unless we're not recieving
//    if (DSPIModule::driverCxt[moduleNum].dma.rxPresent) {
        txTcd.csr = TCD_CSR_DISABLE_REQ;
        rxTcd.csr = TCD_CSR_DREQ_INT_MAJOR; // set the D_REQ and INT_MAJOR bits
//    }
//    else {
//        txTcd.csr = TCD_CSR_DREQ_INT_MAJOR;
//        rxTcd.csr = TCD_CSR_DISABLE_REQ;
//    }

    rxTcd.saddr = ((uint32_t)&spi.popr) + 2;
    rxTcd.attr = TCD_ATTR_16BIT_TRANS;
    rxTcd.soff = TCD_XOFF_0BYTE;
    rxTcd.nbytes = 0x0002;
    rxTcd.slast = 0x00000000;
    if (!(DSPIModule::driverCxt[moduleNum].dma.rxPresent))
    {
        rxTcd.daddr = (vudword)&fakeRXData;
        rxTcd.doff = TCD_XOFF_0BYTE;
    }
    else
    {
        rxTcd.doff = TCD_XOFF_2BYTE;
    }
    rxTcd.dlast_sga = 0x00000000;
//    rxTcd.csr = TCD_CSR_DREQ_INT_MAJOR; // set the D_REQ and INT_MAJOR bits
}

static void dmaChannelISR( int moduleNum, volatile dspistruct *spi, dspiDriverStruct *drvCxt,
                    uint8_t rxChannel, uint8_t txChannel )
{
    if (drvCxt->DSPI_SizeLeft == 0)
    {
//        while (spi->sr & 0x0000F000) { asm("    nop;"); }
//        asm("    nop;");
//        while (!(spi->sr & 0x80000000)) { asm("    nop;"); }
        spi->sr |= SR_CLR_FLAGS;
        if (drvCxt->csReturnToInactive) {
            spi->mcr = drvCxt->dma.savedDSPI.mcr;
            drvCxt->dma.csState = CS_STATE_DEASSERT;
        }
        spi->rser = drvCxt->dma.savedDSPI.rser;
        sim2.edma.tcd[rxChannel].csr = 0;
        sim2.edma.tcd[txChannel].csr = 0;
        drvCxt->DSPIfinished = TRUE;
        drvCxt->DSPI_INT_STATUS = DSPI_OK;
        DSPIModule::lastCxts[moduleNum]->m_inProgress = false;
        if (drvCxt->DSPI_Sem)
        {
            OSSemPost(drvCxt->DSPI_Sem);
        }
        return;
    }
    register uint8_t &bytecount = drvCxt->dma.byteCount;
    register int loopCnt = (drvCxt->DSPI_SizeLeft / bytecount);
    // set DMA loop counters
    if ( loopCnt > TCD_XITER_CNT_MASK )
    {
        drvCxt->DSPI_SizeLeft -= TCD_XITER_CNT_MASK * bytecount;
    }
    else
    {
        volatile edma_tcdstruct &rxTcd = sim2.edma.tcd[rxChannel];
        volatile edma_tcdstruct &txTcd = sim2.edma.tcd[txChannel];
        // set the Major loop count, making sure to not set the E_LINK bit
        txTcd.citer = loopCnt;
        txTcd.biter = loopCnt;
        rxTcd.citer = loopCnt;
        rxTcd.biter = loopCnt;
        drvCxt->DSPI_SizeLeft -= loopCnt * bytecount;
    }

    // Enable DMA requests, in RX->TX order to prevent losing RX data
    if (DSPIModule::driverCxt[moduleNum].dma.rxPresent) sim2.edma.serq = rxChannel;
    sim2.edma.serq = txChannel;
}

void setupDSPI_Dma( int moduleNum, volatile dspistruct &spi )
{
    volatile edma_tcdstruct *rxTcd = NULL;
    volatile edma_tcdstruct *txTcd = NULL;
    uint8_t rxChannel = 0, txChannel = 0;
    switch (moduleNum)
    {
        case 0:
            rxChannel = DMA_CH_DSPI_0_RX;
            txChannel = DMA_CH_DSPI_0_TX;
            break;
        case 1:
            rxChannel = DMA_CH_DSPI_1_RX;
            txChannel = DMA_CH_DSPI_1_TX;
            break;
        case 2:
            rxChannel = DMA_CH_DSPI_2_RX;
            txChannel = DMA_CH_DSPI_2_TX;
            break;
        case 3:
            rxChannel = DMA_CH_DSPI_3_RX;
            txChannel = DMA_CH_DSPI_3_TX;
            break;
    }

    rxTcd = &sim2.edma.tcd[rxChannel];
    txTcd = &sim2.edma.tcd[txChannel];

    // Leave the group priorities alone, but clear all others
    sim2.edma.cr &= CR_CLEAR_NON_GRP_PRIO;

    // Start setting up the TCD
    txTcd->saddr = (vudword)DSPIModule::driverCxt[moduleNum].pDSPITxbuf;
    rxTcd->daddr = (vudword)DSPIModule::driverCxt[moduleNum].pDSPIRxbuf;

    if (DSPIModule::driverCxt[moduleNum].BitsPerQueue > 8)
    {
        DSPIModule::driverCxt[moduleNum].dma.byteCount = 2;
        setupTCDs_16Bit( moduleNum, spi, *txTcd, *rxTcd, DSPIModule::driverCxt[moduleNum].pDSPIRxbuf == NULL );
    }
    else
    {
        DSPIModule::driverCxt[moduleNum].dma.byteCount = 1;
        setupTCDs_8Bit( moduleNum, spi, *txTcd, *rxTcd, DSPIModule::driverCxt[moduleNum].pDSPIRxbuf == NULL );
    }

    register int loopCnt = (DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft / DSPIModule::driverCxt[moduleNum].dma.byteCount);
    // RX fifo not empty
    while ((spi.sr & 0x00020000)) {
        volatile uint16_t tmp = spi.popr;
        (void)tmp; // Stop the compiler complaining of unused variable.
    }
    if ( loopCnt > TCD_XITER_CNT_MASK )
    {
        txTcd->citer = TCD_XITER_CNT_MASK;
        txTcd->biter = TCD_XITER_CNT_MASK;
        rxTcd->citer = TCD_XITER_CNT_MASK;
        rxTcd->biter = TCD_XITER_CNT_MASK;
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft -= TCD_XITER_CNT_MASK * DSPIModule::driverCxt[moduleNum].dma.byteCount;
    }
    else
    {
        // set the Major loop count, making sure to not set the E_LINK bit
        txTcd->citer = loopCnt;
        txTcd->biter = loopCnt;
        rxTcd->citer = loopCnt;
        rxTcd->biter = loopCnt;
        DSPIModule::driverCxt[moduleNum].DSPI_SizeLeft -= loopCnt * DSPIModule::driverCxt[moduleNum].dma.byteCount;
    }


    // invert the inactive state for the CS pins that we want active during the transfer
    if (!(DSPIModule::driverCxt[moduleNum].dma.csState)) {
        spi.mcr ^= ((DSPIModule::driverCxt[moduleNum].Command_Mask << 16) & 0x00FF0000);
        DSPIModule::driverCxt[moduleNum].dma.csState = true;
    }

    // Disable the TX Fifo to prevent a RX Fifo overflow
    spi.mcr |= MCR_DIS_TXF;

    // Enable DMA requests, in RX->TX order to pevent losing RX data
    sim2.edma.serq = rxChannel;
//    if (DSPIModule::driverCxt[moduleNum].dma.rxPresent) sim2.edma.serq = rxChannel;
    sim2.edma.serq = txChannel;
    spi.mcr &= ~MCR_HALT_BIT;
}

/*************** Begin Interrupt Handlers ***************/

INTERRUPT( dspi0_rx_dma_int_routine, 0x2200 )
{
    // clear the channel's interrupt bit...
    sim2.edma.cint = DMA_CH_DSPI_0_RX;
    dmaChannelISR( 0, &sim2.dspi0, DSPIModule::driverCxt, DMA_CH_DSPI_0_RX, DMA_CH_DSPI_0_TX );
}

INTERRUPT( dspi0_tx_dma_int_routine, 0x2200 )
{
    // clear the channel's interrupt bit...
    sim2.edma.cint = DMA_CH_DSPI_0_TX;
    dmaChannelISR( 0, &sim2.dspi0, DSPIModule::driverCxt, DMA_CH_DSPI_0_RX, DMA_CH_DSPI_0_TX );
}

INTERRUPT( dspi1_rx_dma_int_routine, 0x2200 )
{
    // clear the channel's interrupt bit...
    sim2.edma.cint = DMA_CH_DSPI_1_RX;
    dmaChannelISR( 1, &sim2.dspi1, DSPIModule::driverCxt + 1, DMA_CH_DSPI_1_RX, DMA_CH_DSPI_1_TX );
}

INTERRUPT( dspi1_tx_dma_int_routine, 0x2200 )
{
    // clear the channel's interrupt bit...
    sim2.edma.cint = DMA_CH_DSPI_1_TX;
    dmaChannelISR( 1, &sim2.dspi1, DSPIModule::driverCxt + 1, DMA_CH_DSPI_1_RX, DMA_CH_DSPI_1_TX );
}

INTERRUPT( dspi2_rx_dma_int_routine, 0x2200 )
{
    // clear the channel's interrupt bit...
    sim2.edma.cint = DMA_CH_DSPI_2_RX;
    dmaChannelISR( 2, &sim1.dspi2, DSPIModule::driverCxt + 2, DMA_CH_DSPI_2_RX, DMA_CH_DSPI_2_TX );
}

INTERRUPT( dspi2_tx_dma_int_routine, 0x2200 )
{
    // clear the channel's interrupt bit...
    sim2.edma.cint = DMA_CH_DSPI_2_TX;
    dmaChannelISR( 2, &sim1.dspi2, DSPIModule::driverCxt + 2, DMA_CH_DSPI_2_RX, DMA_CH_DSPI_2_TX );
}

INTERRUPT( dspi3_rx_dma_int_routine, 0x2200 )
{
    // clear the channel's interrupt bit...
    sim2.edma.cint = DMA_CH_DSPI_3_RX;
    dmaChannelISR( 3, &sim1.dspi3, DSPIModule::driverCxt + 3, DMA_CH_DSPI_3_RX, DMA_CH_DSPI_3_TX );
}
INTERRUPT( dspi3_tx_dma_int_routine, 0x2200 )
{
    // clear the channel's interrupt bit...
    sim2.edma.cint = DMA_CH_DSPI_3_TX;
    dmaChannelISR( 3, &sim1.dspi3, DSPIModule::driverCxt + 3, DMA_CH_DSPI_3_RX, DMA_CH_DSPI_3_TX );
}

INTERRUPT( dspi0_int_routine, 0x2200 )
{
   DSPI_Isr(0, &sim2.dspi0);
}

INTERRUPT( dspi1_int_routine, 0x2200 )
{
   DSPI_Isr(1, &sim2.dspi1);
}

INTERRUPT( dspi2_int_routine, 0x2200 )
{
   DSPI_Isr(2, &sim1.dspi2);
}

INTERRUPT( dspi3_int_routine, 0x2200 )
{
   DSPI_Isr(3, &sim1.dspi3);
}

/*************** End Interrupt Handlers ***************/

// determine if we can use DMA:
// if any buffer is not 2 byte aligned in 16 bit mode, we can't use dma
int canDoDMA( BYTE SPIModule, PBYTE pTXBuf, volatile BYTE* pRXBuf )
{
    if (DSPIModule::driverCxt[SPIModule].BitsPerQueue > 8)
    {
        if (pRXBuf && pTXBuf) {
            if ((((DWORD)pRXBuf) & 0x01) || (((DWORD)pTXBuf) & 0x01))
                return FALSE;
        }
        else if (pRXBuf) {
            if (((DWORD)pRXBuf) & 0x01)
                return FALSE;
        }
        else if (pTXBuf) {
            if (((DWORD)pTXBuf) & 0x01)
                return FALSE;
        }
    }
    return TRUE;
}

/*----------------------------------------------------------------------------
BYTE DSPIStart( PBYTE pTxbuf, volatile BYTE* pRxbuf, DWORD num, BYTE SPIModule,
            OS_SEM* DSPI_Finished_Sem = NULL );

If configured for 8 bits per transfer then the data must be BYTE aligned
If configured for > than 8 bits per transfer then the data must be WORD aligned
If configured for > than 16 bits per transfer then the data must be DWORD aligned
If either RX or TX pointer is assigned 'null' then that comm direction will not occur
Sends 'num' BYTES  of data starting at PBYTE 'pTxbuf'
Reads and stores in RAM at PBYTE 'pRxbuf'
If DSPI_Finished points to a semaphore then DSPI will POST it when finished
This semaphore is not required but it is the most efficient way to detemine when finished

SPIModule selects which dspi module you wish to use

Returns the current state of the DSPI bus
*///////////////////////////////////////////////////////////////////////////////////
BYTE DSPIModule::Start( PBYTE pTXBuf, volatile BYTE* pRXBuf, DWORD num,
            int csReturnToInactive)
{
    bool enableDMA = m_enableDMA;
    if (m_moduleNum > DSPI_MODULE_COUNT - 1)
    {
        return DSPI_BUSY;
    }
    {
        OSLockObj lock;
        if(driverCxt[m_moduleNum].DSPI_INT_STATUS != DSPI_OK )
        {
            return driverCxt[m_moduleNum].DSPI_INT_STATUS;
        }

        driverCxt[m_moduleNum].DSPI_INT_STATUS = DSPI_BUSY;
        m_inProgress = true;
    }

    if(num == 0)
    {
        if(m_finishedSem)
            OSSemPost(m_finishedSem);
        return DSPI_OK;
    }
    driverCxt[m_moduleNum].pDSPIRxbuf = pRXBuf;
    driverCxt[m_moduleNum].pDSPITxbuf = pTXBuf;
    driverCxt[m_moduleNum].DSPI_SizeLeft = num;
    driverCxt[m_moduleNum].csReturnToInactive = (csReturnType)csReturnToInactive;
    driverCxt[m_moduleNum].DSPI_Sem = m_finishedSem;
    driverCxt[m_moduleNum].WordsToWrite = 16;
    driverCxt[m_moduleNum].DSPIfinished = FALSE;

    volatile dspistruct *spi = NULL;
    switch (m_moduleNum)
    {
        case 0:
            spi = &sim2.dspi0;
            break;
        case 1:
            spi = &sim2.dspi1;
            break;
        case 2:
            spi = &sim1.dspi2;
            break;
        case 3:
            spi = &sim1.dspi3;
            break;
    }

    spi->mcr |= MCR_CLR_FIFOS;
    spi->tcr = 0;

//    if (lastCxts[m_moduleNum] != this) {
        spi->ctar[0] = m_ctar0;
        spi->ctar[1] = m_ctar1;
        driverCxt[m_moduleNum].Command_Mask = m_CommandMask;
        driverCxt[m_moduleNum].BitsPerQueue = m_BitsPerQueue;
//    }
    lastCxts[m_moduleNum] = this;

    if (num < 32) { enableDMA = false; }
    if (enableDMA)
    {
        enableDMA = canDoDMA( m_moduleNum, pTXBuf, pRXBuf );
    }

    if (driverCxt[m_moduleNum].dma.enabled && enableDMA)
//            && (num / (driverCxt[m_moduleNum].BitsPerQueue > 8) + 1) > 64)
    {
        // save the current MCR register, as we're going to screw with it
        // during the transfer and set it back at the end
        if (!driverCxt[m_moduleNum].dma.csState) {
            driverCxt[m_moduleNum].dma.savedDSPI = *const_cast<dspistruct *>(spi);
        }
        driverCxt[m_moduleNum].dma.rxPresent = (pRXBuf != NULL);
        driverCxt[m_moduleNum].dma.txPresent = (pTXBuf != NULL);

        spi->rser = RSER_DMA_IRQ_ONLY;
        spi->mcr |= MCR_HALT_BIT;
        spi->sr |= SR_EOQF_MASK;
        driverCxt[m_moduleNum].DSPI_SizeLeft = num;

        setupDSPI_Dma(m_moduleNum, *spi);
    }
    else
    {
        spi->rser = RSER_EOQF_IRQ_ONLY;

        if (driverCxt[m_moduleNum].csReturnToInactive == DEASSERT_NEVER) {
            if (!driverCxt[m_moduleNum].dma.csState) {
                driverCxt[m_moduleNum].dma.savedDSPI = *const_cast<dspistruct *>(spi);
                spi->mcr ^= ((driverCxt[m_moduleNum].Command_Mask << 16) & 0x00FF0000);
                driverCxt[m_moduleNum].dma.csState = CS_STATE_IRQ;
            }
        }
        DSPI_Isr(m_moduleNum, spi);
    }
    return DSPI_OK;
}
/*
 *******************************************************************************
 *
 *  static inline baudSettingsStruct calcBaudSettings( DWORD Baudrate )
 *
 *  This function finds the best settings for the timing register to best
 *  approximate the desired baudrate. It performs a brute force search and returns
 *  a structure that contains the register setting values that produce the optimal
 *  baudrate, as well as the baudrate that the settings will create.
 *
 *      Parameters:
 *          -Baudrate is the desired baudrate to set
 *
 *      Returns:
 *          baudSettingsStruct containing optimal baudrate settings
 *
 *******************************************************************************
 */
static inline void calcBaudSettings( baudSettingsStruct &best, DWORD Baudrate )
{
    DWORD Baud;
    int32_t devMin = 0x80000000;
    int32_t devCur = 0x80000000;

    baudSettingsStruct current;
    // Prescaler bits for baud rate generator
    // bits value:  0   1   2   3
    // Prescale:    2   3   5   7
    current.prescale = 0;
    best.prescale = 0;
    // Baud Rate scaler bits
    // bits value:  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
    // Scaler:      2   4   6   8   16  2^5 2^6 2^7 2^8 2^9 2^A 2^B 2^C 2^D 2^E 2^F
    current.baudrate_scaler = 0;
    best.baudrate_scaler = 0;
    // Double Baud rate bit
    // Doubles the effective baudrate of module
    current.doubleBaudRate = false;
    best.doubleBaudRate = false;
    for (int j = 0; j < 2 && devMin < 0; j++, current.doubleBaudRate = true)
    {
        for (current.prescale = 0; current.prescale < 4 && devMin < 0; current.prescale++)
        {
            // the prescale values are slighly strange. Get the right one...
            uint32_t prescaleValue = (current.prescale) ? 2 : (1 + 2 * current.prescale);

            // Baud = CPU_CLOCK / 2 * (1 + doubleBaudRate)
            //          / ( 2 * prescale_value * baudrate_scaler_value)
            //  See 40.4.3.1 of Freescale reference manual (v4) for the MCF54418

            // Baud Rate prescaler value is 2
            Baud = CPU_CLOCK * (1 + current.doubleBaudRate)
                / (8 * prescaleValue); // Baud Rate prescaler * cpu_clock PS (2) * sysclock PS (always 2)
            if (Baud <= CPU_CLOCK / 3)
            {
                devCur = Baud - Baudrate;
                if (devCur > devMin && devCur <= 0)
                {
                    devMin = devCur;
                    best.prescale = current.prescale;
                    best.baudrate_scaler = 0;
                    best.doubleBaudRate = current.doubleBaudRate;
                    best.baud = Baud;
                    if (devMin == 0) break;
                }
            }

            // Baud Rate prescaler value is 4
            Baud = CPU_CLOCK * (1 + current.doubleBaudRate)
                / (16 * prescaleValue); // Baud Rate prescaler * cpu_clock PS (2) * sysclock PS (always 2)
            if (Baud <= CPU_CLOCK / 3)
            {
                devCur = Baud - Baudrate;
                if (devCur > devMin && devCur <= 0)
                {
                    devMin = devCur;
                    best.prescale = current.prescale;
                    best.baudrate_scaler = 1;
                    best.doubleBaudRate = current.doubleBaudRate;
                    best.baud = Baud;
                    if (devMin == 0) break;
                }
            }

            // Baud Rate prescaler value is 6
            Baud = CPU_CLOCK * (1 + current.doubleBaudRate)
                / (24 * prescaleValue); // Baud Rate prescaler *  cpu_clock PS (2) * sysclock PS (always 2)
            if (Baud <= CPU_CLOCK / 3)
            {
                devCur = Baud - Baudrate;
                if (devCur > devMin && devCur <= 0)
                {
                    devMin = devCur;
                    best.prescale = current.prescale;
                    best.baudrate_scaler = 2;
                    best.doubleBaudRate = current.doubleBaudRate;
                    best.baud = Baud;
                    if (devMin == 0) break;
                }
            }

            // Now that we've dealt with the non patterned low value prescalers,
            // we can loop
            for (current.baudrate_scaler = 3;
                    current.baudrate_scaler <= 0xF && devMin < 0;
                    current.baudrate_scaler++)
            {
                // clock prescaler value is 2
                Baud = CPU_CLOCK * (1 + current.doubleBaudRate)
                        / ( 4 * prescaleValue * (0x00000001 << current.baudrate_scaler));
                if (Baud <= CPU_CLOCK / 3)
                {
                    devCur = Baud - Baudrate;
                    if (devCur > devMin && devCur <= 0)
                    {
                        devMin = devCur;
                        best.prescale = current.prescale;
                        best.baudrate_scaler = current.baudrate_scaler;
                        best.doubleBaudRate = current.doubleBaudRate;
                        best.baud = Baud;
                    }
                }
            }
        }
    }
//    return best;
}

/*******************************************************************************
 *
 *  static inline
 *      delaySettingsStruct calcDelaySettings( BYTE QCD, BYTE DTL,
 *                                              const &baudSettingsStruct baudSet )
 *
 *  In short, this finds the hardware settings that best approximate the requested
 *  delay time.
 *
 *  This function does a brute force search on all possible delay times for the
 *  post CS/pre SCK delay and the After SCK/pre !CS delay, to get the minimum
 *  delay that is at least as large as the requested QCD and DTL values.
 *
 *      Parameters:
 *          -QCD is a value in the QDLYR register and will change the delay
 *          between the assertion of the chip select and the start of the DSPI
 *          clock.  Default setting of one half DSPI clk will be used if
 *          parameter is specified as 0x0 or not included.
 *          -DTL is a value in the QDLYR register and will change the delay
 *          following a tranfer of a single WORD in the DSPI queue.  Default
 *          reset value of 17/(fsys/2) will be used if parameter is specified
 *          as 0x0 or not included.
 *          -baudSet is a baudSettings structure containing the baud settings for
 *          the DSPI_CTAR register that the delays are being calculated for.
 *
 *      Returns:
 *          delaySettingsStruct containing optimal delay setting values
 *
 *******************************************************************************
 */
static inline
    delaySettingsStruct calcDelaySettings( DWORD QCD, DWORD DTL, const baudSettingsStruct &baudSet )
{
    delaySettingsStruct curr, best = {0, 0, 0, 0};
    int32_t devMin = 0x7FFFFFFF;
    int32_t devCur;

    int32_t cycles;
    // if no QCD is passed, set it for one half the SCK period
    if (!QCD)
    {
        // Delay cycles = cpu frequency / transmit frequency / module clock scaling.
        QCD = CPU_CLOCK / baudSet.baud / 4;
        // was integer division; was there remainder? if so increment the delay cycles
        QCD += (CPU_CLOCK % baudSet.baud == 0) ? 0 : 1;
    }

    // if they try to set a value we should get as close as possible
    // PCSSCK prescaler settings go from 0 -> 3, see table 40.6 of Freescale manual
    for (curr.pcssck = 0; curr.pcssck < 4 && devMin > 0; curr.pcssck++)
    {
        // CSSCK settings go from 0 -> 0xFF, but once they're above 8
        // the scaler is greater than 256
        for (curr.cssck = 0; curr.cssck < 8 && devMin > 0; curr.cssck++)
        {
            cycles = (1 + 2 * curr.pcssck) * (0x00000002 << curr.cssck);
            devCur = cycles - QCD;
            if (devCur < devMin && devCur >=0)
            {
                devMin = devCur;
                best.pcssck = curr.pcssck;
                best.cssck = curr.cssck;
                best.pdt = curr.pcssck;
                best.dt = (curr.cssck < 0xF) ? curr.cssck + 1 : curr.cssck;
            }
        }
    }

    // Attempting to keep same behavior as advertised by DSPI on the other modules.
    // Advertised as 17/f_sys/2 cycles delay
    if (!DTL) DTL = 17;

    devMin = 0x7FFFFFFF;
    // if they try to set a value we should get as close as possible
    // PCSSCK prescaler settings go from 0 -> 3, see table 40.6 of Freescale manual
    for (curr.pasc = 0; curr.pasc < 4 && devMin > 0; curr.pasc++)
    {
        // CSSCK settings go from 0 -> 0xFF, but once they're above 8
        // the scaler is greater than 256
        for (curr.asc = 0; curr.asc < 16 && devMin > 0; curr.asc++)
        {
            cycles = (1 + 2 * curr.pasc) * (0x00000002 << curr.asc);
            devCur = cycles - DTL;
            if (devCur < devMin && devCur >=0)
            {
                devMin = devCur;
                best.pasc = curr.pasc;
                best.asc = curr.asc;
            }
        }
    }

    return best;
}

/*----------------------------------------------------------------------------
BYTE DSPIInit( BYTE SPIModule = DEFAULT_SPI_MODULE, DWORD Baudrate = 2000000,
               BYTE QueueBitSize = 0x8, BYTE CS = 0xF,
               BYTE CSPol = 0x1, BYTE ClkPolarity = 0x0, BYTE ClkPhase = 0x1,
               BOOL DoutHiz = TRUE, BYTE QCD = 0x0, BYTE DTL = 0x0 );

DSPI initialization routine
-SPIModule selects which dspi module you wish to use
-Baudrate which you wish to be the max transfer speed.  Will pick next lowest speed if not available
-QueueBitSize is the size of each queue to be transfered.  If the number of bits per
transfer is greater then 8 then the data must be word alligned.
-CS is the the a value between 0 and 15 and determines which DSPI chip selects are asserted
durring a DSPI transfer.  An active chip select is represented with a 0 bit.
-CSPol is a value that states the polarity of a chip select when it is inactive.
-ClkPolarity should be set to either '0' or '1' and this will be the logic level
of the clock when inactive.
-ClkPhase should be set to either '0' if data is captured on the leading clock edge or '1' if
data is changed on the leading clock edge.
-DoutHiz should be true if the DOUT line needs to be in high impedance in between transfers.
-QCD is a value in the QDLYR register and will change the delay between the assertion of
the chip select and the start of the DSPI clock.  Default setting of one half DSPI clk
will be used if parameter is specified as 0x0 or not included.
-DTL is a value in the QDLYR register and will change the delay following a tranfer of
a single WORD in the DSPI queue.  Default reset value of 17/(fsys/2) will be used if
parameter is specified as 0x0 or not included.

Returns the current state of the DSPI bus
*///////////////////////////////////////////////////////////////////////////////////
BYTE DSPIModule::Init( DWORD Baudrate, BYTE QueueBitSize, BYTE CS,
                BYTE CSPol, BYTE ClkPolarity, BYTE ClkPhase, BOOL DoutHiz, BYTE QCD, BYTE DTL )
{
    if (needInit)
    {
        driverCxt[0].DSPI_INT_STATUS = DSPI_OK;
        driverCxt[0].DSPIfinished = TRUE;
#if DSPI_MODULE_COUNT >=2
        driverCxt[1].DSPI_INT_STATUS = DSPI_OK;
        driverCxt[1].DSPIfinished = TRUE;

#if DSPI_MODULE_COUNT >=3
        driverCxt[2].DSPI_INT_STATUS = DSPI_OK;
        driverCxt[2].DSPIfinished = TRUE;

#if DSPI_MODULE_COUNT >=4
        driverCxt[3].DSPI_INT_STATUS = DSPI_OK;
        driverCxt[3].DSPIfinished = TRUE;

#if DSPI_MODULE_COUNT >=5
        driverCxt[4].DSPI_INT_STATUS = DSPI_OK;
        driverCxt[4].DSPIfinished = TRUE;

#if DSPI_MODULE_COUNT >=6
        driverCxt[5].DSPI_INT_STATUS = DSPI_OK;
        driverCxt[5].DSPIfinished = TRUE;

#if DSPI_MODULE_COUNT >=7
        driverCxt[6].DSPI_INT_STATUS = DSPI_OK;
        driverCxt[6].DSPIfinished = TRUE;

#if DSPI_MODULE_COUNT >=8
        driverCxt[7].DSPI_INT_STATUS = DSPI_OK;
        driverCxt[7].DSPIfinished = TRUE;
#endif
#endif
#endif
#endif
#endif
#endif
#endif
//        for (int i = 0; i < NULL_TX_BUF_SIZE; i++) { nullTxBuf[i] = 0; }
        needInit = false;

    }
    register uint32_t i;
    // DSPI0 is not brought out on any board
    if (m_moduleNum > DSPI_MODULE_COUNT - 1)
    {

        return DSPI_BUSY;
    }

    volatile dspistruct *spi = NULL;

    switch (m_moduleNum)
    {
        case 0:
            spi = &sim2.dspi0;
            break;
        case 1:
            spi = &sim2.dspi1;
            break;
        case 2:
            spi = &sim1.dspi2;
            break;
        case 3:
            spi = &sim1.dspi3;
            break;
    }
    if (!spi)
    {
        return driverCxt[m_moduleNum].DSPI_INT_STATUS;
    }

    if(driverCxt[m_moduleNum].DSPI_INT_STATUS != DSPI_OK )
    {
        return driverCxt[m_moduleNum].DSPI_INT_STATUS;
    }

    m_BitsPerQueue = QueueBitSize;
    m_CommandMask = ((uint8_t)(CS ^ CSPol));


    DBPRINT_DSPI("BitsPerQueue: %d\r\n", driverCxt[m_moduleNum].BitsPerQueue);
    // The DSPI engine cannot handle continuous SCK
    // or modified timing at > CPU_CLOCK/3
    if (Baudrate > CPU_CLOCK/3) Baudrate = CPU_CLOCK/3 - CPU_CLOCK/30;

    baudSettingsStruct baudSettings;
    calcBaudSettings( baudSettings, Baudrate );
    m_actualBaudrate = baudSettings.baud;
    // see 23.2.3 of MCF5270 manual for explanation of DTL * 32
    delaySettingsStruct delaySettings = calcDelaySettings( QCD, (DWORD)DTL * 32, baudSettings );

    // If the baud we're going to transmit at is above certain values,
    // we need to adjust our slew rates to prevent signal integrity issues
    if (baudSettings.baud > 20000000) // 20,000,000
    {
        switch (m_moduleNum)
        {
            case 0:
                // DSPI0 slewrate is bits [1:0], onewire is bits [5:4]
                sim1.gpio.srcr_dspiow   &= ~0x03;
                sim1.gpio.srcr_dspiow   |= 0x02;
                break;
            case 1:
                // SDHC slewrate is bits [1:0], others unused
                sim1.gpio.srcr_sdhc     &= ~0x03;
                sim1.gpio.srcr_sdhc     |= 0x02;
                break;
            case 2:
                // DSPI2 pins are UART0, UART0 slewrate is bits [1:0]
                sim1.gpio.srcr_uart     &= ~0x03;
                sim1.gpio.srcr_uart     |= 0x02;
                break;
            case 3:
                // DSPI3 pins are UART1, UART1 slewrate is bits [3:2]
                sim1.gpio.srcr_uart     &= ~0x0C;
                sim1.gpio.srcr_uart     |= 0x08;
                break;
        }
    }
    else if (baudSettings.baud > 4000000) //4,000,000
    {
        switch (m_moduleNum)
        {
            case 0:
                // DSPI0 slewrate is bits [1:0], onewire is bits [5:4]
                sim1.gpio.srcr_dspiow   &= ~0x03;
                sim1.gpio.srcr_dspiow   |= 0x01;
                break;
            case 1:
                // SDHC slewrate is bits [1:0], others unused
                sim1.gpio.srcr_sdhc     &= ~0x03;
                sim1.gpio.srcr_sdhc     |= 0x01;
                break;
            case 2:
                // DSPI2 pins are UART0, UART0 slewrate is bits [1:0]
                sim1.gpio.srcr_uart     &= ~0x03;
                sim1.gpio.srcr_uart     |= 0x01;
                break;
            case 3:
                // DSPI3 pins are UART1, UART1 slewrate is bits [3:2]
                sim1.gpio.srcr_uart     &= ~0x0C;
                sim1.gpio.srcr_uart     |= 0x04;
                break;
        }

    }

    DBPRINT_DSPI("Desired Baudrate: %lu\r\n", Baudrate);
    DBPRINT_DSPI("Baud Rate and settings:\r\n");
    DBPRINT_DSPI("          Baud:  %lu\r\n", baudSettings.baud);
    DBPRINT_DSPI("      prescale:  %lu\r\n", baudSettings.prescale);
    DBPRINT_DSPI("    BRprescale:  %lu\r\n", baudSettings.baudrate_scaler);
    DBPRINT_DSPI("      doubleBR:  %s\r\n\r\n", baudSettings.doubleBaudRate ? "TRUE" : "FALSE");

    DBPRINT_DSPI("Delay settings:\r\n");
    DBPRINT_DSPI("    pcssck: %u\r\n", delaySettings.pcssck);
    DBPRINT_DSPI("     cssck: %u\r\n", delaySettings.cssck);
    DBPRINT_DSPI("      pasc: %u\r\n", delaySettings.pasc);
    DBPRINT_DSPI("       asc: %u\r\n", delaySettings.asc);

    // Begin constructing the Baud rate and delay value mask for the timing registers
    i = 0x00000000;
    i |= ( (DWORD) baudSettings.doubleBaudRate ) << 31;
    i |= baudSettings.prescale << 16;
    i |= baudSettings.baudrate_scaler;
    i |= ( (DWORD) delaySettings.pcssck )   << 22;
    i |= ( (DWORD) delaySettings.cssck )    << 12;
    i |= ( (DWORD) delaySettings.pasc )     << 20;
    i |= ( (DWORD) delaySettings.asc )      <<  8;
    i |= ( (DWORD) delaySettings.pdt )      << 18;
    i |= ( (DWORD) delaySettings.dt )       <<  4;
    if(ClkPolarity)
        i |= CTAR_CLOCK_POLARITY;
    if(ClkPhase)
        i |= CTAR_CLOCK_PHASE;

    // Set base values for timing registers.
    spi->ctar[0] = m_ctar0 = i;
    spi->ctar[1] = m_ctar1 = i;

    // If the bit size is less that 17, we can send it in a single frame
    if((QueueBitSize >= 4) && (QueueBitSize < 17))
    {
        m_ctar0 |= QueueBitSize - 1 << 27; // Frame size starts at bit 27
        driverCxt[m_moduleNum].dma.enabled = true;
    }
    // Minimum frame size is 4 bits, therefore if the bit size is less than 20
    // we need to calculate the length of the frames a little differently
    else if((QueueBitSize > 16) && (QueueBitSize < 20))
    {
        m_ctar0 |= QueueBitSize - 5 << 27; // Frame size starts at bit 27
        m_ctar1 |= CTAR_FRAME_4BIT;
        driverCxt[m_moduleNum].dma.enabled = false;
    }
    else if ((QueueBitSize > 19) && (QueueBitSize < 32))
    {
        // Transmit 16 bits in second chunk, and the variable ammount in the first.
        m_ctar0 |= QueueBitSize - 17 << 27; // Frame size starts at bit 27
        m_ctar1 |= CTAR_FRAME_16BIT;
        driverCxt[m_moduleNum].dma.enabled = false;
    }
    else if(QueueBitSize == 32)
    {
        m_ctar0 |= CTAR_FRAME_16BIT;
        driverCxt[m_moduleNum].dma.enabled = true;
    }
    else
    {
        driverCxt[m_moduleNum].dma.enabled = false;
        return DSPI_BUSY;
    }

    spi->mcr = MCR_MASTER_INIT | (((DWORD) CSPol) << 16); // Chip Select inactive state bits start at bit 16

    switch (m_moduleNum)
    {
        case 0:
            SETUP_DSPI0_ISR(&dspi0_int_routine, 2);
            SETUP_DMA12_ISR(&dspi0_rx_dma_int_routine, 2);
            SETUP_DMA13_ISR(&dspi0_tx_dma_int_routine, 2);
            break;
        case 1:
            SETUP_DSPI1_ISR(&dspi1_int_routine, 2);
            SETUP_DMA14_ISR(&dspi1_rx_dma_int_routine, 2);
            SETUP_DMA15_ISR(&dspi1_tx_dma_int_routine, 2);
            break;
        case 2:
            SETUP_DSPI2_ISR(&dspi2_int_routine, 2);
            SETUP_DMA28_ISR(&dspi2_rx_dma_int_routine, 2);
            SETUP_DMA29_ISR(&dspi2_tx_dma_int_routine, 2);
            break;
        case 3:
            SETUP_DSPI3_ISR(&dspi3_int_routine, 2);
            SETUP_DMA44_ISR(&dspi3_rx_dma_int_routine, 2);
            SETUP_DMA45_ISR(&dspi3_tx_dma_int_routine, 2);
            break;
    }

    spi->ctar[0] = m_ctar0;
    spi->ctar[1] = m_ctar1;

    return driverCxt[m_moduleNum].DSPI_INT_STATUS;
}

/*----------------------------------------------------------------------------
BOOL DSPIdone();

returns TRUE if DSPI is finished
returns FALSE if DSPI is active
*///////////////////////////////////////////////////////////////////////////////////
BOOL DSPIModule::Done( BYTE SPIModule )
{
    switch (SPIModule)
    {
        case 0:
            return driverCxt[SPIModule].DSPIfinished && (sim2.dspi0.mcr & 0x1);
        case 1:
            return driverCxt[SPIModule].DSPIfinished && (sim2.dspi1.mcr & 0x1);
        case 2:
            return driverCxt[SPIModule].DSPIfinished && (sim1.dspi2.mcr & 0x1);
        case 3:
            return driverCxt[SPIModule].DSPIfinished && (sim1.dspi3.mcr & 0x1);
    }
    return TRUE;
}

DSPIModule::DSPIModule( BYTE SPIModule )
    : m_moduleNum(SPIModule), m_mcr(0), m_ctar0(0), m_ctar1(0), m_enableDMA(true),
        m_CommandMask(0), m_BitsPerQueue(8), m_finishedSem(NULL), m_actualBaudrate(0),
        m_inProgress(false)
{
}

DSPIModule::DSPIModule( BYTE SPIModule, DWORD Baudrate, BYTE QueueBitSize, BYTE CS,
                BYTE CSPol, BYTE ClkPolarity, BYTE ClkPhase, BOOL DoutHiz, BYTE QCD, BYTE DTL )
    : m_moduleNum(SPIModule), m_mcr(0), m_ctar0(0), m_ctar1(0), m_enableDMA(true),
        m_CommandMask(0), m_BitsPerQueue(8), m_finishedSem(NULL), m_actualBaudrate(0),
        m_inProgress(false)
{
    Init( Baudrate, QueueBitSize, CS, CSPol, ClkPolarity,
            ClkPhase, DoutHiz, QCD, DTL );
}

bool DSPIModule::EnableDMA( bool enableDMA )
{
    OSLockObj lock;
    if (m_inProgress) { return false; }
    m_enableDMA = enableDMA;
    return true;
}

bool DSPIModule::SetSem( OS_SEM *finishedSem )
{
    OSLockObj lock;
    if (m_inProgress) { return false; }
    m_finishedSem = finishedSem;
    return true;
}

static DSPIModule::DSPIModule legacyCxts[DSPI_MODULE_COUNT] =
{
    DSPIModule(0),
#if DSPI_MODULE_COUNT >=2
    DSPIModule(1),
#if DSPI_MODULE_COUNT >=3
    DSPIModule(2),
#if DSPI_MODULE_COUNT >=4
    DSPIModule(3),
#if DSPI_MODULE_COUNT >=5
    DSPIModule(4),
#if DSPI_MODULE_COUNT >=6
    DSPIModule(5),
#if DSPI_MODULE_COUNT >=7
    DSPIModule(6),
#if DSPI_MODULE_COUNT >=8
    DSPIModule(7),
#endif
#endif
#endif
#endif
#endif
#endif
#endif
};
static OS_CRIT legacyCrits[DSPI_MODULE_COUNT];
static bool legacyFirstInit = true;

BYTE DSPIInit( BYTE SPIModule, DWORD Baudrate, BYTE QueueBitSize, BYTE CS,
                BYTE CSPol, BYTE ClkPolarity, BYTE ClkPhase, BOOL DoutHiz, BYTE QCD, BYTE DTL )
{
    if (legacyFirstInit) {
         OSCritInit( legacyCrits );
#if DSPI_MODULE_COUNT >=2
        OSCritInit( legacyCrits + 1 );
#if DSPI_MODULE_COUNT >=3
        OSCritInit( legacyCrits + 2 );
#if DSPI_MODULE_COUNT >=4
        OSCritInit( legacyCrits + 3);
#if DSPI_MODULE_COUNT >=5
        OSCritInit( legacyCrits + 4);
#if DSPI_MODULE_COUNT >=6
        OSCritInit( legacyCrits + 5);
#if DSPI_MODULE_COUNT >=7
        OSCritInit( legacyCrits + 6);
#if DSPI_MODULE_COUNT >=8
        OSCritInit( legacyCrits + 7);
#endif
#endif
#endif
#endif
#endif
#endif
#endif
       legacyFirstInit = false;
    }
    // DSPI0 is not brought out on any board
    if (SPIModule < 1 || SPIModule > DSPI_MODULE_COUNT - 1)
    {
        return DSPI_BUSY;
    }

    OSCriticalSectionObj lock(legacyCrits[SPIModule]);
    return legacyCxts[SPIModule].Init( Baudrate, QueueBitSize, CS, CSPol, ClkPolarity,
            ClkPhase, DoutHiz, QCD, DTL );
}

BYTE DSPIStart( BYTE SPIModule, PBYTE transmitBufferPtr, volatile BYTE* receiveBufferPtr,
      DWORD byteCount, OS_SEM* finishedSem, BYTE enableDMA, int csReturnToInactive)
{
    // DSPI0 is not brought out on any board
    if (SPIModule < 1 || SPIModule > DSPI_MODULE_COUNT - 1)
    {
        return DSPI_BUSY;
    }
    OSCriticalSectionObj lock(legacyCrits[SPIModule]);
    if (!legacyCxts[SPIModule].Done()) {
        return DSPI_BUSY;
    }
    legacyCxts[SPIModule].EnableDMA(enableDMA);
    legacyCxts[SPIModule].SetSem(finishedSem);

    return legacyCxts[SPIModule].Start( transmitBufferPtr, receiveBufferPtr,
                                        byteCount, csReturnToInactive );
}

BOOL DSPIdone( BYTE SPIModule )
{
    return DSPIModule::Done( SPIModule );
}
