diff --git a/Marlin/src/HAL/HAL_DUE/HAL_spi_Due.cpp b/Marlin/src/HAL/HAL_DUE/HAL_spi_Due.cpp
index d23cb35aeaca559b42783a508106d66d62f326a9..30386661b7acdd425f64710431daed226faa511a 100644
--- a/Marlin/src/HAL/HAL_DUE/HAL_spi_Due.cpp
+++ b/Marlin/src/HAL/HAL_DUE/HAL_spi_Due.cpp
@@ -23,6 +23,10 @@
/**
* Software SPI functions originally from Arduino Sd2Card Library
* Copyright (C) 2009 by William Greiman
+ *
+ * Completely rewritten and tuned by Eduardo José Tagle in 2017/2018
+ * in ARM thumb2 inline assembler and tuned for maximum speed and performance
+ * allowing SPI clocks of up to 12 Mhz to increase SD card read/write performance
*/
/**
@@ -53,6 +57,9 @@
// software SPI
// --------------------------------------------------------------------------
+ // set optimization so ARDUINO optimizes this file
+ #pragma GCC optimize (3)
+
/* ---------------- Delay Cycles routine -------------- */
/* https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles */
@@ -105,27 +112,171 @@
typedef uint8_t (*pfnSpiTransfer) (uint8_t b);
- // bitbanging transfer
- #define SWSPI_BIT_XFER(n) \
- WRITE(MOSI_PIN, bout & (1 << n)); \
- WRITE(SCK_PIN, HIGH); /* Sampling point */\
- /* (implicit by overhead) DELAY_NS(63); 5.3 cycles @ 84mhz */ \
- bin |= (READ(MISO_PIN) != 0) << n; \
- WRITE(SCK_PIN, LOW); /* Toggling point*/ \
- /* (implicit by overhead) DELAY_NS(63); 5.3 cycles @ 84mhz */
-
- // run at ~8 .. ~10Mhz
- static uint8_t spiTransfer0(uint8_t bout) { // using Mode 0
- volatile uint8_t bin = 0; /* volatile to disable deferred processing */
- SWSPI_BIT_XFER(7);
- SWSPI_BIT_XFER(6);
- SWSPI_BIT_XFER(5);
- SWSPI_BIT_XFER(4);
- SWSPI_BIT_XFER(3);
- SWSPI_BIT_XFER(2);
- SWSPI_BIT_XFER(1);
- SWSPI_BIT_XFER(0);
- return bin;
+ /* ---------------- Macros to be able to access definitions from asm */
+
+ #define _PORT(IO) DIO ## IO ## _WPORT
+ #define _PIN_MASK(IO) MASK(DIO ## IO ## _PIN)
+ #define _PIN_SHIFT(IO) DIO ## IO ## _PIN
+ #define PORT(IO) _PORT(IO)
+ #define PIN_MASK(IO) _PIN_MASK(IO)
+ #define PIN_SHIFT(IO) _PIN_SHIFT(IO)
+
+ // run at ~8 .. ~10Mhz - Tx version (Rx data discarded)
+ static uint8_t spiTransferTx0(uint8_t bout) { // using Mode 0
+ register uint32_t MOSI_PORT_PLUS30 = ((uint32_t) PORT(MOSI_PIN)) + 0x30; /* SODR of port */
+ register uint32_t MOSI_MASK = PIN_MASK(MOSI_PIN);
+ register uint32_t SCK_PORT_PLUS30 = ((uint32_t) PORT(SCK_PIN)) + 0x30; /* SODR of port */
+ register uint32_t SCK_MASK = PIN_MASK(SCK_PIN);
+ register uint32_t idx;
+
+ /* Negate bout, as the assembler requires a negated value */
+ bout = ~bout;
+
+ /* The software SPI routine */
+ __asm__ __volatile__(
+ ".syntax unified" "\n\t" // is to prevent CM0,CM1 non-unified syntax
+
+ /* Bit 7 */
+ " ubfx %[idx],%[txval],#7,#1" "\n\t" /* Place bit 7 in bit 0 of idx*/
+
+ " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t" /* Access the proper SODR or CODR registers based on that bit */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ubfx %[idx],%[txval],#6,#1" "\n\t" /* Place bit 6 in bit 0 of idx*/
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+
+ /* Bit 6 */
+ " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t" /* Access the proper SODR or CODR registers based on that bit */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ubfx %[idx],%[txval],#5,#1" "\n\t" /* Place bit 5 in bit 0 of idx*/
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+
+ /* Bit 5 */
+ " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t" /* Access the proper SODR or CODR registers based on that bit */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ubfx %[idx],%[txval],#4,#1" "\n\t" /* Place bit 4 in bit 0 of idx*/
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+
+ /* Bit 4 */
+ " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t" /* Access the proper SODR or CODR registers based on that bit */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ubfx %[idx],%[txval],#3,#1" "\n\t" /* Place bit 3 in bit 0 of idx*/
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+
+ /* Bit 3 */
+ " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t" /* Access the proper SODR or CODR registers based on that bit */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ubfx %[idx],%[txval],#2,#1" "\n\t" /* Place bit 2 in bit 0 of idx*/
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+
+ /* Bit 2 */
+ " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t" /* Access the proper SODR or CODR registers based on that bit */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ubfx %[idx],%[txval],#1,#1" "\n\t" /* Place bit 1 in bit 0 of idx*/
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+
+ /* Bit 1 */
+ " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t" /* Access the proper SODR or CODR registers based on that bit */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ubfx %[idx],%[txval],#0,#1" "\n\t" /* Place bit 0 in bit 0 of idx*/
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+
+ /* Bit 0 */
+ " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t" /* Access the proper SODR or CODR registers based on that bit */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " nop" "\n\t"
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+
+ : [mosi_mask]"+r"( MOSI_MASK ),
+ [mosi_port]"+r"( MOSI_PORT_PLUS30 ),
+ [sck_mask]"+r"( SCK_MASK ),
+ [sck_port]"+r"( SCK_PORT_PLUS30 ),
+ [idx]"+r"( idx ),
+ [txval]"+r"( bout )
+ :
+ : "cc"
+ );
+
+ return 0;
+ }
+
+ // run at ~8 .. ~10Mhz - Rx version (Tx line not altered)
+ static uint8_t spiTransferRx0(uint8_t bout) { // using Mode 0
+ int bin = 0, work = 0;
+ register uint32_t MISO_PORT_PLUS3C = ((uint32_t) PORT(MISO_PIN)) + 0x3C; /* PDSR of port */
+ register uint32_t SCK_PORT_PLUS30 = ((uint32_t) PORT(SCK_PIN)) + 0x30; /* SODR of port */
+ register uint32_t SCK_MASK = PIN_MASK(SCK_PIN);
+ UNUSED(bout);
+
+ /* The software SPI routine */
+ __asm__ __volatile__(
+ ".syntax unified" "\n\t" // is to prevent CM0,CM1 non-unified syntax
+
+ /* bit 7 */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ldr %[work],[%[miso_port]]" "\n\t" /* PDSR */
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+ " lsrs %[work],%[work],%[miso_shift]" "\n\t" /* Isolate input into carry */
+ " adc %[bin],%[bin],%[bin]" "\n\t" /* Shift left result and add the carry */
+
+ /* bit 6 */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ldr %[work],[%[miso_port]]" "\n\t" /* PDSR */
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+ " lsrs %[work],%[work],%[miso_shift]" "\n\t" /* Isolate input into carry */
+ " adc %[bin],%[bin],%[bin]" "\n\t" /* Shift left result and add the carry */
+
+ /* bit 5 */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ldr %[work],[%[miso_port]]" "\n\t" /* PDSR */
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+ " lsrs %[work],%[work],%[miso_shift]" "\n\t" /* Isolate input into carry */
+ " adc %[bin],%[bin],%[bin]" "\n\t" /* Shift left result and add the carry */
+
+ /* bit 4 */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ldr %[work],[%[miso_port]]" "\n\t" /* PDSR */
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+ " lsrs %[work],%[work],%[miso_shift]" "\n\t" /* Isolate input into carry */
+ " adc %[bin],%[bin],%[bin]" "\n\t" /* Shift left result and add the carry */
+
+ /* bit 3 */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ldr %[work],[%[miso_port]]" "\n\t" /* PDSR */
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+ " lsrs %[work],%[work],%[miso_shift]" "\n\t" /* Isolate input into carry */
+ " adc %[bin],%[bin],%[bin]" "\n\t" /* Shift left result and add the carry */
+
+ /* bit 2 */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ldr %[work],[%[miso_port]]" "\n\t" /* PDSR */
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+ " lsrs %[work],%[work],%[miso_shift]" "\n\t" /* Isolate input into carry */
+ " adc %[bin],%[bin],%[bin]" "\n\t" /* Shift left result and add the carry */
+
+ /* bit 1 */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ldr %[work],[%[miso_port]]" "\n\t" /* PDSR */
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+ " lsrs %[work],%[work],%[miso_shift]" "\n\t" /* Isolate input into carry */
+ " adc %[bin],%[bin],%[bin]" "\n\t" /* Shift left result and add the carry */
+
+ /* bit 0 */
+ " str %[sck_mask],[%[sck_port]]" "\n\t" /* SODR */
+ " ldr %[work],[%[miso_port]]" "\n\t" /* PDSR */
+ " str %[sck_mask],[%[sck_port],#0x4]" "\n\t" /* CODR */
+ " lsrs %[work],%[work],%[miso_shift]" "\n\t" /* Isolate input into carry */
+ " adc %[bin],%[bin],%[bin]" "\n\t" /* Shift left result and add the carry */
+
+ : [miso_port]"+r"( MISO_PORT_PLUS3C ),
+ [sck_mask]"+r"( SCK_MASK ),
+ [sck_port]"+r"( SCK_PORT_PLUS30 ),
+ [bin]"+r"(bin),
+ [work]"+r"(work)
+ : [miso_shift]"M"( PIN_SHIFT(MISO_PIN) + 1 ) /* So we move to the carry */
+ : "cc"
+ );
+
+ return (uint8_t)bin;
}
// run at ~4Mhz
@@ -133,15 +284,15 @@
int bits = 8;
do {
WRITE(MOSI_PIN, b & 0x80);
- b <<= 1; // little setup time
+ b <<= 1; // little setup time
WRITE(SCK_PIN, HIGH);
- DELAY_NS(125); // 10 cycles @ 84mhz
+ DELAY_NS(125); // 10 cycles @ 84mhz
b |= (READ(MISO_PIN) != 0);
WRITE(SCK_PIN, LOW);
- DELAY_NS(125); // 10 cycles @ 84mhz
+ DELAY_NS(125); // 10 cycles @ 84mhz
} while (--bits);
return b;
}
@@ -166,8 +317,9 @@
return b;
}
- // Use the generic one
- static pfnSpiTransfer spiTransfer = spiTransferX;
+ // Pointers to generic functions
+ static pfnSpiTransfer spiTransferTx = spiTransferX;
+ static pfnSpiTransfer spiTransferRx = spiTransferX;
void spiBegin() {
SET_OUTPUT(SS_PIN);
@@ -190,14 +342,17 @@
void spiInit(uint8_t spiRate) {
switch (spiRate) {
case 0:
- spiTransfer = spiTransfer0;
+ spiTransferTx = spiTransferTx0;
+ spiTransferRx = spiTransferRx0;
break;
case 1:
- spiTransfer = spiTransfer1;
+ spiTransferTx = spiTransfer1;
+ spiTransferRx = spiTransfer1;
break;
default:
spiDelayCyclesX4 = (F_CPU/1000000) >> (6 - spiRate);
- spiTransfer = spiTransferX;
+ spiTransferTx = spiTransferX;
+ spiTransferRx = spiTransferX;
break;
}
@@ -208,48 +363,36 @@
uint8_t spiRec() {
WRITE(SS_PIN, LOW);
- uint8_t b = spiTransfer(0xff);
+ WRITE(MOSI_PIN, 1); /* Output 1s 1*/
+ uint8_t b = spiTransferRx(0xFF);
WRITE(SS_PIN, HIGH);
return b;
}
- void spiRead(uint8_t*buf, uint16_t nbyte) {
+ void spiRead(uint8_t* buf, uint16_t nbyte) {
if (nbyte == 0) return;
WRITE(SS_PIN, LOW);
+ WRITE(MOSI_PIN, 1); /* Output 1s 1*/
for (int i = 0; i < nbyte; i++) {
- buf[i] = spiTransfer(0xff);
+ buf[i] = spiTransferRx(0xff);
}
WRITE(SS_PIN, HIGH);
}
void spiSend(uint8_t b) {
WRITE(SS_PIN, LOW);
- uint8_t response = spiTransfer(b);
- UNUSED(response);
- WRITE(SS_PIN, HIGH);
- }
-
- static void spiSend(const uint8_t* buf, size_t n) {
- uint8_t response;
- if (n == 0) return;
- WRITE(SS_PIN, LOW);
- for (uint16_t i = 0; i < n; i++) {
- response = spiTransfer(buf[i]);
- }
- UNUSED(response);
+ (void) spiTransferTx(b);
WRITE(SS_PIN, HIGH);
}
void spiSendBlock(uint8_t token, const uint8_t* buf) {
- uint8_t response;
WRITE(SS_PIN, LOW);
- response = spiTransfer(token);
+ (void) spiTransferTx(token);
for (uint16_t i = 0; i < 512; i++) {
- response = spiTransfer(buf[i]);
+ (void) spiTransferTx(buf[i]);
}
- UNUSED(response);
WRITE(SS_PIN, HIGH);
}