diff --git a/Marlin/src/HAL/HAL_AVR/MarlinSerial.cpp b/Marlin/src/HAL/HAL_AVR/MarlinSerial.cpp
index ece24977663696a9319715a12b91b0ec907c29ef..32a6cc340bb3d5fc250f1ee875c90ed2ad8ca30b 100644
--- a/Marlin/src/HAL/HAL_AVR/MarlinSerial.cpp
+++ b/Marlin/src/HAL/HAL_AVR/MarlinSerial.cpp
@@ -29,6 +29,7 @@
  * Modified 14 February 2016 by Andreas Hardtung (added tx buffer)
  * Modified 01 October 2017 by Eduardo José Tagle (added XON/XOFF)
  * Modified 10 June 2018 by Eduardo José Tagle (See #10991)
+ * Templatized 01 October 2018 by Eduardo José Tagle to allow multiple instances
  */
 
 #ifdef __AVR__
@@ -42,62 +43,26 @@
   #include "MarlinSerial.h"
   #include "../../Marlin.h"
 
-  struct ring_buffer_r {
-    unsigned char buffer[RX_BUFFER_SIZE];
-    volatile ring_buffer_pos_t head, tail;
-  };
-
-  #if TX_BUFFER_SIZE > 0
-    struct ring_buffer_t {
-      unsigned char buffer[TX_BUFFER_SIZE];
-      volatile uint8_t head, tail;
-    };
-  #endif
-
-  #if UART_PRESENT(SERIAL_PORT)
-    ring_buffer_r rx_buffer = { { 0 }, 0, 0 };
-    #if TX_BUFFER_SIZE > 0
-      ring_buffer_t tx_buffer = { { 0 }, 0, 0 };
-    #endif
-    static bool _written;
-  #endif
-
-  #if ENABLED(SERIAL_XON_XOFF)
-    constexpr uint8_t XON_XOFF_CHAR_SENT = 0x80,  // XON / XOFF Character was sent
-                      XON_XOFF_CHAR_MASK = 0x1F;  // XON / XOFF character to send
-    // XON / XOFF character definitions
-    constexpr uint8_t XON_CHAR  = 17, XOFF_CHAR = 19;
-    uint8_t xon_xoff_state = XON_XOFF_CHAR_SENT | XON_CHAR;
-  #endif
-
-  #if ENABLED(SERIAL_STATS_DROPPED_RX)
-    uint8_t rx_dropped_bytes = 0;
-  #endif
-
-  #if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
-    uint8_t rx_buffer_overruns = 0;
-  #endif
-
-  #if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
-    uint8_t rx_framing_errors = 0;
-  #endif
-
-  #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
-    ring_buffer_pos_t rx_max_enqueued = 0;
-  #endif
+  template<typename Cfg> typename MarlinSerial<Cfg>::ring_buffer_r MarlinSerial<Cfg>::rx_buffer = { 0 };
+  template<typename Cfg> typename MarlinSerial<Cfg>::ring_buffer_t MarlinSerial<Cfg>::tx_buffer = { 0 };
+  template<typename Cfg> bool     MarlinSerial<Cfg>::_written = false;
+  template<typename Cfg> uint8_t  MarlinSerial<Cfg>::xon_xoff_state = MarlinSerial<Cfg>::XON_XOFF_CHAR_SENT | MarlinSerial<Cfg>::XON_CHAR;
+  template<typename Cfg> uint8_t  MarlinSerial<Cfg>::rx_dropped_bytes = 0;
+  template<typename Cfg> uint8_t  MarlinSerial<Cfg>::rx_buffer_overruns = 0;
+  template<typename Cfg> uint8_t  MarlinSerial<Cfg>::rx_framing_errors = 0;
+  template<typename Cfg> typename MarlinSerial<Cfg>::ring_buffer_pos_t MarlinSerial<Cfg>::rx_max_enqueued = 0;
 
   // A SW memory barrier, to ensure GCC does not overoptimize loops
   #define sw_barrier() asm volatile("": : :"memory");
 
-  #if ENABLED(EMERGENCY_PARSER)
-    #include "../../feature/emergency_parser.h"
-  #endif
+  #include "../../feature/emergency_parser.h"
 
   // "Atomically" read the RX head index value without disabling interrupts:
   // This MUST be called with RX interrupts enabled, and CAN'T be called
   // from the RX ISR itself!
-  FORCE_INLINE ring_buffer_pos_t atomic_read_rx_head() {
-    #if RX_BUFFER_SIZE > 256
+  template<typename Cfg>
+  FORCE_INLINE typename MarlinSerial<Cfg>::ring_buffer_pos_t MarlinSerial<Cfg>::atomic_read_rx_head() {
+    if (Cfg::RX_SIZE > 256) {
       // Keep reading until 2 consecutive reads return the same value,
       // meaning there was no update in-between caused by an interrupt.
       // This works because serial RX interrupts happen at a slower rate
@@ -111,23 +76,25 @@
         sw_barrier();
       } while (vold != vnew);
       return vnew;
-    #else
+    }
+    else {
       // With an 8bit index, reads are always atomic. No need for special handling
       return rx_buffer.head;
-    #endif
+    }
   }
 
-  #if RX_BUFFER_SIZE > 256
-    static volatile bool rx_tail_value_not_stable = false;
-    static volatile uint16_t rx_tail_value_backup = 0;
-  #endif
+  template<typename Cfg>
+  volatile bool MarlinSerial<Cfg>::rx_tail_value_not_stable = false;
+  template<typename Cfg>
+  volatile uint16_t MarlinSerial<Cfg>::rx_tail_value_backup = 0;
 
   // Set RX tail index, taking into account the RX ISR could interrupt
   //  the write to this variable in the middle - So a backup strategy
   //  is used to ensure reads of the correct values.
   //    -Must NOT be called from the RX ISR -
-  FORCE_INLINE void atomic_set_rx_tail(ring_buffer_pos_t value) {
-    #if RX_BUFFER_SIZE > 256
+  template<typename Cfg>
+  FORCE_INLINE void MarlinSerial<Cfg>::atomic_set_rx_tail(typename MarlinSerial<Cfg>::ring_buffer_pos_t value) {
+    if (Cfg::RX_SIZE > 256) {
       // Store the new value in the backup
       rx_tail_value_backup = value;
       sw_barrier();
@@ -140,29 +107,29 @@
       // Signal the new value is completely stored into the value
       rx_tail_value_not_stable = false;
       sw_barrier();
-    #else
+    }
+    else
       rx_buffer.tail = value;
-    #endif
   }
 
   // Get the RX tail index, taking into account the read could be
   //  interrupting in the middle of the update of that index value
   //    -Called from the RX ISR -
-  FORCE_INLINE ring_buffer_pos_t atomic_read_rx_tail() {
-    #if RX_BUFFER_SIZE > 256
+  template<typename Cfg>
+  FORCE_INLINE typename MarlinSerial<Cfg>::ring_buffer_pos_t MarlinSerial<Cfg>::atomic_read_rx_tail() {
+    if (Cfg::RX_SIZE > 256) {
       // If the true index is being modified, return the backup value
       if (rx_tail_value_not_stable) return rx_tail_value_backup;
-    #endif
+    }
     // The true index is stable, return it
     return rx_buffer.tail;
   }
 
   // (called with RX interrupts disabled)
-  FORCE_INLINE void store_rxd_char() {
+  template<typename Cfg>
+  FORCE_INLINE void MarlinSerial<Cfg>::store_rxd_char() {
 
-    #if ENABLED(EMERGENCY_PARSER)
-      static EmergencyParser::State emergency_state; // = EP_RESET
-    #endif
+    static EmergencyParser::State emergency_state; // = EP_RESET
 
     // Get the tail - Nothing can alter its value while this ISR is executing, but there's
     // a chance that this ISR interrupted the main process while it was updating the index.
@@ -173,27 +140,17 @@
     ring_buffer_pos_t h = rx_buffer.head;
 
     // Get the next element
-    ring_buffer_pos_t i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+    ring_buffer_pos_t i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
-    // This must read the M_UCSRxA register before reading the received byte to detect error causes
-    #if ENABLED(SERIAL_STATS_DROPPED_RX)
-      if (TEST(M_UCSRxA, M_DORx) && !++rx_dropped_bytes) --rx_dropped_bytes;
-    #endif
-
-    #if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
-      if (TEST(M_UCSRxA, M_DORx) && !++rx_buffer_overruns) --rx_buffer_overruns;
-    #endif
-
-    #if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
-      if (TEST(M_UCSRxA, M_FEx) && !++rx_framing_errors) --rx_framing_errors;
-    #endif
+    // This must read the R_UCSRA register before reading the received byte to detect error causes
+    if (Cfg::DROPPED_RX && B_DOR && !++rx_dropped_bytes) --rx_dropped_bytes;
+    if (Cfg::RX_OVERRUNS && B_DOR && !++rx_buffer_overruns) --rx_buffer_overruns;
+    if (Cfg::RX_FRAMING_ERRORS && B_FE && !++rx_framing_errors) --rx_framing_errors;
 
     // Read the character from the USART
-    uint8_t c = M_UDRx;
+    uint8_t c = R_UDR;
 
-    #if ENABLED(EMERGENCY_PARSER)
-      emergency_parser.update(emergency_state, c);
-    #endif
+    if (Cfg::EMERGENCYPARSER) emergency_parser.update(emergency_state, c);
 
     // If the character is to be stored at the index just before the tail
     // (such that the head would advance to the current tail), the RX FIFO is
@@ -202,29 +159,28 @@
       rx_buffer.buffer[h] = c;
       h = i;
     }
-    #if ENABLED(SERIAL_STATS_DROPPED_RX)
-      else if (!++rx_dropped_bytes) --rx_dropped_bytes;
-    #endif
+    else if (Cfg::DROPPED_RX && !++rx_dropped_bytes)
+      --rx_dropped_bytes;
 
-    #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
+    if (Cfg::MAX_RX_QUEUED) {
       // Calculate count of bytes stored into the RX buffer
-      const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+      const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
       // Keep track of the maximum count of enqueued bytes
       NOLESS(rx_max_enqueued, rx_count);
-    #endif
+    }
 
-    #if ENABLED(SERIAL_XON_XOFF)
+    if (Cfg::XONOFF) {
       // If the last char that was sent was an XON
       if ((xon_xoff_state & XON_XOFF_CHAR_MASK) == XON_CHAR) {
 
         // Bytes stored into the RX buffer
-        const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+        const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
         // If over 12.5% of RX buffer capacity, send XOFF before running out of
         // RX buffer space .. 325 bytes @ 250kbits/s needed to let the host react
         // and stop sending bytes. This translates to 13mS propagation time.
-        if (rx_count >= (RX_BUFFER_SIZE) / 8) {
+        if (rx_count >= (Cfg::RX_SIZE) / 8) {
 
           // At this point, definitely no TX interrupt was executing, since the TX ISR can't be preempted.
           // Don't enable the TX interrupt here as a means to trigger the XOFF char, because if it happens
@@ -238,19 +194,17 @@
           // Wait until the TX register becomes empty and send it - Here there could be a problem
           // - While waiting for the TX register to empty, the RX register could receive a new
           //   character. This must also handle that situation!
-          while (!TEST(M_UCSRxA, M_UDREx)) {
+          while (!B_UDRE) {
 
-            if (TEST(M_UCSRxA,M_RXCx)) {
+            if (B_RXC) {
               // A char arrived while waiting for the TX buffer to be empty - Receive and process it!
 
-              i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+              i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
               // Read the character from the USART
-              c = M_UDRx;
+              c = R_UDR;
 
-              #if ENABLED(EMERGENCY_PARSER)
-                emergency_parser.update(emergency_state, c);
-              #endif
+              if (Cfg::EMERGENCYPARSER) emergency_parser.update(emergency_state, c);
 
               // If the character is to be stored at the index just before the tail
               // (such that the head would advance to the current tail), the FIFO is
@@ -259,19 +213,18 @@
                 rx_buffer.buffer[h] = c;
                 h = i;
               }
-              #if ENABLED(SERIAL_STATS_DROPPED_RX)
-                else if (!++rx_dropped_bytes) --rx_dropped_bytes;
-              #endif
+              else if (Cfg::DROPPED_RX && !++rx_dropped_bytes)
+                --rx_dropped_bytes;
             }
             sw_barrier();
           }
 
-          M_UDRx = XOFF_CHAR;
+          R_UDR = XOFF_CHAR;
 
           // Clear the TXC bit -- "can be cleared by writing a one to its bit
           // location". This makes sure flush() won't return until the bytes
           // actually got written
-          SBI(M_UCSRxA, M_TXCx);
+          B_TXC = 1;
 
           // At this point there could be a race condition between the write() function
           // and this sending of the XOFF char. This interrupt could happen between the
@@ -280,19 +233,18 @@
           // sure the write() function will succeed is to wait for the XOFF char to be
           // completely sent. Since an extra character could be received during the wait
           // it must also be handled!
-          while (!TEST(M_UCSRxA, M_UDREx)) {
+          while (!B_UDRE) {
 
-            if (TEST(M_UCSRxA,M_RXCx)) {
+            if (B_RXC) {
               // A char arrived while waiting for the TX buffer to be empty - Receive and process it!
 
-              i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+              i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
               // Read the character from the USART
-              c = M_UDRx;
+              c = R_UDR;
 
-              #if ENABLED(EMERGENCY_PARSER)
+              if (Cfg::EMERGENCYPARSER)
                 emergency_parser.update(emergency_state, c);
-              #endif
 
               // If the character is to be stored at the index just before the tail
               // (such that the head would advance to the current tail), the FIFO is
@@ -301,9 +253,8 @@
                 rx_buffer.buffer[h] = c;
                 h = i;
               }
-              #if ENABLED(SERIAL_STATS_DROPPED_RX)
-                else if (!++rx_dropped_bytes) --rx_dropped_bytes;
-              #endif
+              else if (Cfg::DROPPED_RX && !++rx_dropped_bytes)
+                --rx_dropped_bytes;
             }
             sw_barrier();
           }
@@ -312,78 +263,68 @@
           // have any issues writing to the UART TX register if it needs to!
         }
       }
-    #endif // SERIAL_XON_XOFF
+    }
 
     // Store the new head value - The main loop will retry until the value is stable
     rx_buffer.head = h;
   }
 
-  #if TX_BUFFER_SIZE > 0
-
-    // (called with TX irqs disabled)
-    FORCE_INLINE void _tx_udr_empty_irq(void) {
-
+  // (called with TX irqs disabled)
+  template<typename Cfg>
+  FORCE_INLINE void MarlinSerial<Cfg>::_tx_udr_empty_irq(void) {
+    if (Cfg::TX_SIZE > 0) {
       // Read positions
       uint8_t t = tx_buffer.tail;
       const uint8_t h = tx_buffer.head;
 
-      #if ENABLED(SERIAL_XON_XOFF)
+      if (Cfg::XONOFF) {
         // If an XON char is pending to be sent, do it now
         if (xon_xoff_state == XON_CHAR) {
 
           // Send the character
-          M_UDRx = XON_CHAR;
+          R_UDR = XON_CHAR;
 
           // clear the TXC bit -- "can be cleared by writing a one to its bit
           // location". This makes sure flush() won't return until the bytes
           // actually got written
-          SBI(M_UCSRxA, M_TXCx);
+          B_TXC = 1;
 
           // Remember we sent it.
           xon_xoff_state = XON_CHAR | XON_XOFF_CHAR_SENT;
 
           // If nothing else to transmit, just disable TX interrupts.
-          if (h == t) CBI(M_UCSRxB, M_UDRIEx); // (Non-atomic, could be reenabled by the main program, but eventually this will succeed)
+          if (h == t) B_UDRIE = 0; // (Non-atomic, could be reenabled by the main program, but eventually this will succeed)
 
           return;
         }
-      #endif
+      }
 
       // If nothing to transmit, just disable TX interrupts. This could
       // happen as the result of the non atomicity of the disabling of RX
       // interrupts that could end reenabling TX interrupts as a side effect.
       if (h == t) {
-        CBI(M_UCSRxB, M_UDRIEx); // (Non-atomic, could be reenabled by the main program, but eventually this will succeed)
+        B_UDRIE = 0; // (Non-atomic, could be reenabled by the main program, but eventually this will succeed)
         return;
       }
 
       // There is something to TX, Send the next byte
       const uint8_t c = tx_buffer.buffer[t];
-      t = (t + 1) & (TX_BUFFER_SIZE - 1);
-      M_UDRx = c;
+      t = (t + 1) & (Cfg::TX_SIZE - 1);
+      R_UDR = c;
       tx_buffer.tail = t;
 
       // Clear the TXC bit (by writing a one to its bit location).
       // Ensures flush() won't return until the bytes are actually written/
-      SBI(M_UCSRxA, M_TXCx);
+      B_TXC = 1;
 
       // Disable interrupts if there is nothing to transmit following this byte
-      if (h == t) CBI(M_UCSRxB, M_UDRIEx); // (Non-atomic, could be reenabled by the main program, but eventually this will succeed)
+      if (h == t) B_UDRIE = 0; // (Non-atomic, could be reenabled by the main program, but eventually this will succeed)
     }
-
-    #ifdef M_USARTx_UDRE_vect
-      ISR(M_USARTx_UDRE_vect) { _tx_udr_empty_irq(); }
-    #endif
-
-  #endif // TX_BUFFER_SIZE
-
-  #ifdef M_USARTx_RX_vect
-    ISR(M_USARTx_RX_vect) { store_rxd_char(); }
-  #endif
+  }
 
   // Public Methods
-
-  void MarlinSerial::begin(const long baud) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::begin(const long baud) {
     uint16_t baud_setting;
     bool useU2X = true;
 
@@ -394,41 +335,41 @@
       if (baud == 57600) useU2X = false;
     #endif
 
+    R_UCSRA = 0;
     if (useU2X) {
-      M_UCSRxA = _BV(M_U2Xx);
+      B_U2X = 1;
       baud_setting = (F_CPU / 4 / baud - 1) / 2;
     }
-    else {
-      M_UCSRxA = 0;
+    else
       baud_setting = (F_CPU / 8 / baud - 1) / 2;
-    }
 
     // assign the baud_setting, a.k.a. ubbr (USART Baud Rate Register)
-    M_UBRRxH = baud_setting >> 8;
-    M_UBRRxL = baud_setting;
-
-    SBI(M_UCSRxB, M_RXENx);
-    SBI(M_UCSRxB, M_TXENx);
-    SBI(M_UCSRxB, M_RXCIEx);
-    #if TX_BUFFER_SIZE > 0
-      CBI(M_UCSRxB, M_UDRIEx);
-    #endif
+    R_UBRRH = baud_setting >> 8;
+    R_UBRRL = baud_setting;
+
+    B_RXEN = 1;
+    B_TXEN = 1;
+    B_RXCIE = 1;
+    if (Cfg::TX_SIZE > 0) B_UDRIE = 0;
     _written = false;
   }
 
-  void MarlinSerial::end() {
-    CBI(M_UCSRxB, M_RXENx);
-    CBI(M_UCSRxB, M_TXENx);
-    CBI(M_UCSRxB, M_RXCIEx);
-    CBI(M_UCSRxB, M_UDRIEx);
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::end() {
+    B_RXEN = 0;
+    B_TXEN = 0;
+    B_RXCIE = 0;
+    B_UDRIE = 0;
   }
 
-  int MarlinSerial::peek(void) {
+  template<typename Cfg>
+  int MarlinSerial<Cfg>::peek(void) {
     const ring_buffer_pos_t h = atomic_read_rx_head(), t = rx_buffer.tail;
     return h == t ? -1 : rx_buffer.buffer[t];
   }
 
-  int MarlinSerial::read(void) {
+  template<typename Cfg>
+  int MarlinSerial<Cfg>::read(void) {
     const ring_buffer_pos_t h = atomic_read_rx_head();
 
     // Read the tail. Main thread owns it, so it is safe to directly read it
@@ -439,42 +380,45 @@
 
     // Get the next char
     const int v = rx_buffer.buffer[t];
-    t = (ring_buffer_pos_t)(t + 1) & (RX_BUFFER_SIZE - 1);
+    t = (ring_buffer_pos_t)(t + 1) & (Cfg::RX_SIZE - 1);
 
     // Advance tail - Making sure the RX ISR will always get an stable value, even
     // if it interrupts the writing of the value of that variable in the middle.
     atomic_set_rx_tail(t);
 
-    #if ENABLED(SERIAL_XON_XOFF)
+    if (Cfg::XONOFF) {
       // If the XOFF char was sent, or about to be sent...
       if ((xon_xoff_state & XON_XOFF_CHAR_MASK) == XOFF_CHAR) {
         // Get count of bytes in the RX buffer
-        const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
-        if (rx_count < (RX_BUFFER_SIZE) / 10) {
-          #if TX_BUFFER_SIZE > 0
+        const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
+        if (rx_count < (Cfg::RX_SIZE) / 10) {
+          if (Cfg::TX_SIZE > 0) {
             // Signal we want an XON character to be sent.
             xon_xoff_state = XON_CHAR;
             // Enable TX ISR. Non atomic, but it will eventually enable them
-            SBI(M_UCSRxB, M_UDRIEx);
-          #else
+            B_UDRIE = 1;
+          }
+          else {
             // If not using TX interrupts, we must send the XON char now
             xon_xoff_state = XON_CHAR | XON_XOFF_CHAR_SENT;
-            while (!TEST(M_UCSRxA, M_UDREx)) sw_barrier();
-            M_UDRx = XON_CHAR;
-          #endif
+            while (!B_UDRE) sw_barrier();
+            R_UDR = XON_CHAR;
+          }
         }
       }
-    #endif
+    }
 
     return v;
   }
 
-  ring_buffer_pos_t MarlinSerial::available(void) {
+  template<typename Cfg>
+  typename MarlinSerial<Cfg>::ring_buffer_pos_t MarlinSerial<Cfg>::available(void) {
     const ring_buffer_pos_t h = atomic_read_rx_head(), t = rx_buffer.tail;
-    return (ring_buffer_pos_t)(RX_BUFFER_SIZE + h - t) & (RX_BUFFER_SIZE - 1);
+    return (ring_buffer_pos_t)(Cfg::RX_SIZE + h - t) & (Cfg::RX_SIZE - 1);
   }
 
-  void MarlinSerial::flush(void) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::flush(void) {
 
     // Set the tail to the head:
     //  - Read the RX head index in a safe way. (See atomic_read_rx_head.)
@@ -482,26 +426,36 @@
     //    if it interrupts the writing of the value of that variable in the middle.
     atomic_set_rx_tail(atomic_read_rx_head());
 
-    #if ENABLED(SERIAL_XON_XOFF)
+    if (Cfg::XONOFF) {
       // If the XOFF char was sent, or about to be sent...
       if ((xon_xoff_state & XON_XOFF_CHAR_MASK) == XOFF_CHAR) {
-        #if TX_BUFFER_SIZE > 0
+        if (Cfg::TX_SIZE > 0) {
           // Signal we want an XON character to be sent.
           xon_xoff_state = XON_CHAR;
           // Enable TX ISR. Non atomic, but it will eventually enable it.
-          SBI(M_UCSRxB, M_UDRIEx);
-        #else
+          B_UDRIE = 1;
+        }
+        else {
           // If not using TX interrupts, we must send the XON char now
           xon_xoff_state = XON_CHAR | XON_XOFF_CHAR_SENT;
-          while (!TEST(M_UCSRxA, M_UDREx)) sw_barrier();
-          M_UDRx = XON_CHAR;
-        #endif
+          while (!B_UDRE) sw_barrier();
+          R_UDR = XON_CHAR;
+        }
       }
-    #endif
+    }
   }
 
-  #if TX_BUFFER_SIZE > 0
-    void MarlinSerial::write(const uint8_t c) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::write(const uint8_t c) {
+    if (Cfg::TX_SIZE == 0) {
+
+      _written = true;
+      while (!B_UDRE) sw_barrier();
+      R_UDR = c;
+
+    }
+    else {
+
       _written = true;
 
       // If the TX interrupts are disabled and the data register
@@ -511,17 +465,17 @@
       // interrupt overhead becomes a slowdown.
       // Yes, there is a race condition between the sending of the
       // XOFF char at the RX ISR, but it is properly handled there
-      if (!TEST(M_UCSRxB, M_UDRIEx) && TEST(M_UCSRxA, M_UDREx)) {
-        M_UDRx = c;
+      if (!B_UDRIE && B_UDRE) {
+        R_UDR = c;
 
         // clear the TXC bit -- "can be cleared by writing a one to its bit
         // location". This makes sure flush() won't return until the bytes
         // actually got written
-        SBI(M_UCSRxA, M_TXCx);
+        B_TXC = 1;
         return;
       }
 
-      const uint8_t i = (tx_buffer.head + 1) & (TX_BUFFER_SIZE - 1);
+      const uint8_t i = (tx_buffer.head + 1) & (Cfg::TX_SIZE - 1);
 
       // If global interrupts are disabled (as the result of being called from an ISR)...
       if (!ISRS_ENABLED()) {
@@ -530,7 +484,7 @@
         while (i == tx_buffer.tail) {
 
           // If we can transmit another byte, do it.
-          if (TEST(M_UCSRxA, M_UDREx)) _tx_udr_empty_irq();
+          if (B_UDRE) _tx_udr_empty_irq();
 
           // Make sure compiler rereads tx_buffer.tail
           sw_barrier();
@@ -538,7 +492,7 @@
       }
       else {
         // Interrupts are enabled, just wait until there is space
-        while (i == tx_buffer.tail) { sw_barrier(); }
+        while (i == tx_buffer.tail) sw_barrier();
       }
 
       // Store new char. head is always safe to move
@@ -546,10 +500,27 @@
       tx_buffer.head = i;
 
       // Enable TX ISR - Non atomic, but it will eventually enable TX ISR
-      SBI(M_UCSRxB, M_UDRIEx);
+      B_UDRIE = 1;
     }
+  }
+
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::flushTX(void) {
+
+    if (Cfg::TX_SIZE == 0) {
+      // No bytes written, no need to flush. This special case is needed since there's
+      // no way to force the TXC (transmit complete) bit to 1 during initialization.
+      if (!_written) return;
+
+      // Wait until everything was transmitted
+      while (!B_TXC) sw_barrier();
+
+      // At this point nothing is queued anymore (DRIE is disabled) and
+      // the hardware finished transmission (TXC is set).
+
+    }
+    else {
 
-    void MarlinSerial::flushTX(void) {
       // No bytes written, no need to flush. This special case is needed since there's
       // no way to force the TXC (transmit complete) bit to 1 during initialization.
       if (!_written) return;
@@ -558,11 +529,10 @@
       if (!ISRS_ENABLED()) {
 
         // Wait until everything was transmitted - We must do polling, as interrupts are disabled
-        while (tx_buffer.head != tx_buffer.tail || !TEST(M_UCSRxA, M_TXCx)) {
+        while (tx_buffer.head != tx_buffer.tail || !B_TXC) {
 
           // If there is more space, send an extra character
-          if (TEST(M_UCSRxA, M_UDREx))
-            _tx_udr_empty_irq();
+          if (B_UDRE) _tx_udr_empty_irq();
 
           sw_barrier();
         }
@@ -570,55 +540,40 @@
       }
       else {
         // Wait until everything was transmitted
-        while (tx_buffer.head != tx_buffer.tail || !TEST(M_UCSRxA, M_TXCx)) sw_barrier();
+        while (tx_buffer.head != tx_buffer.tail || !B_TXC) sw_barrier();
       }
 
       // At this point nothing is queued anymore (DRIE is disabled) and
       // the hardware finished transmission (TXC is set).
     }
-
-  #else // TX_BUFFER_SIZE == 0
-
-    void MarlinSerial::write(const uint8_t c) {
-      _written = true;
-      while (!TEST(M_UCSRxA, M_UDREx)) sw_barrier();
-      M_UDRx = c;
-    }
-
-    void MarlinSerial::flushTX(void) {
-      // No bytes written, no need to flush. This special case is needed since there's
-      // no way to force the TXC (transmit complete) bit to 1 during initialization.
-      if (!_written) return;
-
-      // Wait until everything was transmitted
-      while (!TEST(M_UCSRxA, M_TXCx)) sw_barrier();
-
-      // At this point nothing is queued anymore (DRIE is disabled) and
-      // the hardware finished transmission (TXC is set).
-    }
-  #endif // TX_BUFFER_SIZE == 0
+  }
 
   /**
    * Imports from print.h
    */
 
-  void MarlinSerial::print(char c, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(char c, int base) {
     print((long)c, base);
   }
 
-  void MarlinSerial::print(unsigned char b, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(unsigned char b, int base) {
     print((unsigned long)b, base);
   }
 
-  void MarlinSerial::print(int n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(int n, int base) {
     print((long)n, base);
   }
 
-  void MarlinSerial::print(unsigned int n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(unsigned int n, int base) {
     print((unsigned long)n, base);
   }
 
-  void MarlinSerial::print(long n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(long n, int base) {
     if (base == 0) write(n);
     else if (base == 10) {
       if (n < 0) { print('-'); n = -n; }
@@ -628,68 +583,81 @@
       printNumber(n, base);
   }
 
-  void MarlinSerial::print(unsigned long n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(unsigned long n, int base) {
     if (base == 0) write(n);
     else printNumber(n, base);
   }
 
-  void MarlinSerial::print(double n, int digits) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(double n, int digits) {
     printFloat(n, digits);
   }
 
-  void MarlinSerial::println(void) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(void) {
     print('\r');
     print('\n');
   }
 
-  void MarlinSerial::println(const String& s) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(const String& s) {
     print(s);
     println();
   }
 
-  void MarlinSerial::println(const char c[]) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(const char c[]) {
     print(c);
     println();
   }
 
-  void MarlinSerial::println(char c, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(char c, int base) {
     print(c, base);
     println();
   }
 
-  void MarlinSerial::println(unsigned char b, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(unsigned char b, int base) {
     print(b, base);
     println();
   }
 
-  void MarlinSerial::println(int n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(int n, int base) {
     print(n, base);
     println();
   }
 
-  void MarlinSerial::println(unsigned int n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(unsigned int n, int base) {
     print(n, base);
     println();
   }
 
-  void MarlinSerial::println(long n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(long n, int base) {
     print(n, base);
     println();
   }
 
-  void MarlinSerial::println(unsigned long n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(unsigned long n, int base) {
     print(n, base);
     println();
   }
 
-  void MarlinSerial::println(double n, int digits) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(double n, int digits) {
     print(n, digits);
     println();
   }
 
   // Private Methods
 
-  void MarlinSerial::printNumber(unsigned long n, uint8_t base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::printNumber(unsigned long n, uint8_t base) {
     if (n) {
       unsigned char buf[8 * sizeof(long)]; // Enough space for base 2
       int8_t i = 0;
@@ -704,7 +672,8 @@
       print('0');
   }
 
-  void MarlinSerial::printFloat(double number, uint8_t digits) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::printFloat(double number, uint8_t digits) {
     // Handle negative numbers
     if (number < 0.0) {
       print('-');
@@ -713,9 +682,7 @@
 
     // Round correctly so that print(1.999, 2) prints as "2.00"
     double rounding = 0.5;
-    for (uint8_t i = 0; i < digits; ++i)
-      rounding *= 0.1;
-
+    for (uint8_t i = 0; i < digits; ++i) rounding *= 0.1;
     number += rounding;
 
     // Extract the integer part of the number and print it
@@ -736,8 +703,20 @@
     }
   }
 
+  // Hookup ISR handlers
+  ISR(SERIAL_REGNAME(USART,SERIAL_PORT,_RX_vect)) {
+    MarlinSerial<MarlinSerialCfg>::store_rxd_char();
+  }
+
+  ISR(SERIAL_REGNAME(USART,SERIAL_PORT,_UDRE_vect)) {
+    MarlinSerial<MarlinSerialCfg>::_tx_udr_empty_irq();
+  }
+
   // Preinstantiate
-  MarlinSerial customizedSerial;
+  template class MarlinSerial<MarlinSerialCfg>;
+
+  // Instantiate
+  MarlinSerial<MarlinSerialCfg> customizedSerial;
 
 #endif // !USBCON && (UBRRH || UBRR0H || UBRR1H || UBRR2H || UBRR3H)
 
diff --git a/Marlin/src/HAL/HAL_AVR/MarlinSerial.h b/Marlin/src/HAL/HAL_AVR/MarlinSerial.h
index 8c2b3f3186e534674be3f1012481887ab2dc61d7..b378567b6e3cb59c4be1481e2367c2595fef13c4 100644
--- a/Marlin/src/HAL/HAL_AVR/MarlinSerial.h
+++ b/Marlin/src/HAL/HAL_AVR/MarlinSerial.h
@@ -27,12 +27,13 @@
  * Modified 28 September 2010 by Mark Sproul
  * Modified 14 February 2016 by Andreas Hardtung (added tx buffer)
  * Modified 01 October 2017 by Eduardo José Tagle (added XON/XOFF)
+ * Templatized 01 October 2018 by Eduardo José Tagle to allow multiple instances
  */
 
 #ifndef _MARLINSERIAL_H_
 #define _MARLINSERIAL_H_
 
-#include "../../inc/MarlinConfigPre.h"
+#include "../shared/MarlinSerial.h"
 
 #include <WString.h>
 
@@ -40,73 +41,173 @@
   #define SERIAL_PORT 0
 #endif
 
-// The presence of the UBRRH register is used to detect a UART.
-#define UART_PRESENT(port) ((port == 0 && (defined(UBRRH) || defined(UBRR0H))) || \
-                            (port == 1 && defined(UBRR1H)) || (port == 2 && defined(UBRR2H)) || \
-                            (port == 3 && defined(UBRR3H)))
-
-// These are macros to build serial port register names for the selected SERIAL_PORT (C preprocessor
-// requires two levels of indirection to expand macro values properly)
-#define SERIAL_REGNAME(registerbase,number,suffix) SERIAL_REGNAME_INTERNAL(registerbase,number,suffix)
-#if SERIAL_PORT == 0 && (!defined(UBRR0H) || !defined(UDR0)) // use un-numbered registers if necessary
-  #define SERIAL_REGNAME_INTERNAL(registerbase,number,suffix) registerbase##suffix
-#else
-  #define SERIAL_REGNAME_INTERNAL(registerbase,number,suffix) registerbase##number##suffix
-#endif
+#ifndef USBCON
 
-// Registers used by MarlinSerial class (expanded depending on selected serial port)
-#define M_UCSRxA           SERIAL_REGNAME(UCSR,SERIAL_PORT,A) // defines M_UCSRxA to be UCSRnA where n is the serial port number
-#define M_UCSRxB           SERIAL_REGNAME(UCSR,SERIAL_PORT,B)
-#define M_RXENx            SERIAL_REGNAME(RXEN,SERIAL_PORT,)
-#define M_TXENx            SERIAL_REGNAME(TXEN,SERIAL_PORT,)
-#define M_TXCx             SERIAL_REGNAME(TXC,SERIAL_PORT,)
-#define M_RXCIEx           SERIAL_REGNAME(RXCIE,SERIAL_PORT,)
-#define M_UDREx            SERIAL_REGNAME(UDRE,SERIAL_PORT,)
-#define M_FEx              SERIAL_REGNAME(FE,SERIAL_PORT,)
-#define M_DORx             SERIAL_REGNAME(DOR,SERIAL_PORT,)
-#define M_UPEx             SERIAL_REGNAME(UPE,SERIAL_PORT,)
-#define M_UDRIEx           SERIAL_REGNAME(UDRIE,SERIAL_PORT,)
-#define M_UDRx             SERIAL_REGNAME(UDR,SERIAL_PORT,)
-#define M_UBRRxH           SERIAL_REGNAME(UBRR,SERIAL_PORT,H)
-#define M_UBRRxL           SERIAL_REGNAME(UBRR,SERIAL_PORT,L)
-#define M_RXCx             SERIAL_REGNAME(RXC,SERIAL_PORT,)
-#define M_USARTx_RX_vect   SERIAL_REGNAME(USART,SERIAL_PORT,_RX_vect)
-#define M_U2Xx             SERIAL_REGNAME(U2X,SERIAL_PORT,)
-#define M_USARTx_UDRE_vect SERIAL_REGNAME(USART,SERIAL_PORT,_UDRE_vect)
-
-#define DEC 10
-#define HEX 16
-#define OCT 8
-#define BIN 2
-#define BYTE 0
+  // The presence of the UBRRH register is used to detect a UART.
+  #define UART_PRESENT(port) ((port == 0 && (defined(UBRRH) || defined(UBRR0H))) || \
+                              (port == 1 && defined(UBRR1H)) || (port == 2 && defined(UBRR2H)) || \
+                              (port == 3 && defined(UBRR3H)))
 
-#ifndef USBCON
-  // We're using a ring buffer (I think), in which rx_buffer_head is the index of the
-  // location to which to write the next incoming character and rx_buffer_tail is the
-  // index of the location from which to read.
-  #if RX_BUFFER_SIZE > 256
-    typedef uint16_t ring_buffer_pos_t;
+  // These are macros to build serial port register names for the selected SERIAL_PORT (C preprocessor
+  // requires two levels of indirection to expand macro values properly)
+  #define SERIAL_REGNAME(registerbase,number,suffix) SERIAL_REGNAME_INTERNAL(registerbase,number,suffix)
+  #if SERIAL_PORT == 0 && (!defined(UBRR0H) || !defined(UDR0)) // use un-numbered registers if necessary
+    #define SERIAL_REGNAME_INTERNAL(registerbase,number,suffix) registerbase##suffix
   #else
-    typedef uint8_t ring_buffer_pos_t;
+    #define SERIAL_REGNAME_INTERNAL(registerbase,number,suffix) registerbase##number##suffix
   #endif
 
-  #if ENABLED(SERIAL_STATS_DROPPED_RX)
-    extern uint8_t rx_dropped_bytes;
-  #endif
+  // Registers used by MarlinSerial class (expanded depending on selected serial port)
 
-  #if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
-    extern uint8_t rx_buffer_overruns;
-  #endif
+  // Templated 8bit register (generic)
+  #define UART_REGISTER_DECL_BASE(registerbase, suffix) \
+    template<int portNr> struct R_##registerbase##x##suffix {}
 
-  #if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
-    extern uint8_t rx_framing_errors;
-  #endif
+  // Templated 8bit register (specialization for each port)
+  #define UART_REGISTER_DECL(port, registerbase, suffix) \
+    template<> struct R_##registerbase##x##suffix<port> { \
+      constexpr R_##registerbase##x##suffix(int) {} \
+      FORCE_INLINE void operator=(uint8_t newVal) const { SERIAL_REGNAME(registerbase,port,suffix) = newVal; } \
+      FORCE_INLINE operator uint8_t() const { return SERIAL_REGNAME(registerbase,port,suffix); } \
+    }
+
+  // Templated 1bit register (generic)
+  #define UART_BIT_DECL_BASE(registerbase, suffix, bit) \
+    template<int portNr>struct B_##bit##x {}
+
+  // Templated 1bit register (specialization for each port)
+  #define UART_BIT_DECL(port, registerbase, suffix, bit) \
+    template<> struct B_##bit##x<port> { \
+      constexpr B_##bit##x(int) {} \
+      FORCE_INLINE void operator=(int newVal) const { \
+        if (newVal) \
+          SBI(SERIAL_REGNAME(registerbase,port,suffix),SERIAL_REGNAME(bit,port,)); \
+        else \
+          CBI(SERIAL_REGNAME(registerbase,port,suffix),SERIAL_REGNAME(bit,port,)); \
+      } \
+      FORCE_INLINE operator bool() const { return TEST(SERIAL_REGNAME(registerbase,port,suffix),SERIAL_REGNAME(bit,port,)); } \
+    }
+
+  #define UART_DECL_BASE() \
+    UART_REGISTER_DECL_BASE(UCSR,A);\
+    UART_REGISTER_DECL_BASE(UDR,);\
+    UART_REGISTER_DECL_BASE(UBRR,H);\
+    UART_REGISTER_DECL_BASE(UBRR,L);\
+    UART_BIT_DECL_BASE(UCSR,B,RXEN);\
+    UART_BIT_DECL_BASE(UCSR,B,TXEN);\
+    UART_BIT_DECL_BASE(UCSR,A,TXC);\
+    UART_BIT_DECL_BASE(UCSR,B,RXCIE);\
+    UART_BIT_DECL_BASE(UCSR,A,UDRE);\
+    UART_BIT_DECL_BASE(UCSR,A,FE);\
+    UART_BIT_DECL_BASE(UCSR,A,DOR);\
+    UART_BIT_DECL_BASE(UCSR,B,UDRIE);\
+    UART_BIT_DECL_BASE(UCSR,A,RXC);\
+    UART_BIT_DECL_BASE(UCSR,A,U2X)
 
-  #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
-    extern ring_buffer_pos_t rx_max_enqueued;
+  #define UART_DECL(port) \
+    UART_REGISTER_DECL(port,UCSR,A);\
+    UART_REGISTER_DECL(port,UDR,);\
+    UART_REGISTER_DECL(port,UBRR,H);\
+    UART_REGISTER_DECL(port,UBRR,L);\
+    UART_BIT_DECL(port,UCSR,B,RXEN);\
+    UART_BIT_DECL(port,UCSR,B,TXEN);\
+    UART_BIT_DECL(port,UCSR,A,TXC);\
+    UART_BIT_DECL(port,UCSR,B,RXCIE);\
+    UART_BIT_DECL(port,UCSR,A,UDRE);\
+    UART_BIT_DECL(port,UCSR,A,FE);\
+    UART_BIT_DECL(port,UCSR,A,DOR);\
+    UART_BIT_DECL(port,UCSR,B,UDRIE);\
+    UART_BIT_DECL(port,UCSR,A,RXC);\
+    UART_BIT_DECL(port,UCSR,A,U2X)
+
+  // Declare empty templates
+  UART_DECL_BASE();
+
+  // And all the specializations for each possible serial port
+  #if UART_PRESENT(0)
+    UART_DECL(0);
+  #endif
+  #if UART_PRESENT(1)
+    UART_DECL(1);
+  #endif
+  #if UART_PRESENT(2)
+    UART_DECL(2);
   #endif
+  #if UART_PRESENT(3)
+    UART_DECL(3);
+  #endif
+
+  #define DEC 10
+  #define HEX 16
+  #define OCT 8
+  #define BIN 2
+  #define BYTE 0
 
+  // Templated type selector
+  template<bool b, typename T, typename F> struct TypeSelector { typedef T type;} ;
+  template<typename T, typename F> struct TypeSelector<false, T, F> { typedef F type; };
+
+  template<typename Cfg>
   class MarlinSerial {
+  protected:
+    // Registers
+    static constexpr R_UCSRxA<Cfg::PORT> R_UCSRA = 0;
+    static constexpr R_UDRx<Cfg::PORT>   R_UDR   = 0;
+    static constexpr R_UBRRxH<Cfg::PORT> R_UBRRH = 0;
+    static constexpr R_UBRRxL<Cfg::PORT> R_UBRRL = 0;
+
+    // Bits
+    static constexpr B_RXENx<Cfg::PORT>  B_RXEN  = 0;
+    static constexpr B_TXENx<Cfg::PORT>  B_TXEN  = 0;
+    static constexpr B_TXCx<Cfg::PORT>   B_TXC   = 0;
+    static constexpr B_RXCIEx<Cfg::PORT> B_RXCIE = 0;
+    static constexpr B_UDREx<Cfg::PORT>  B_UDRE  = 0;
+    static constexpr B_FEx<Cfg::PORT>    B_FE    = 0;
+    static constexpr B_DORx<Cfg::PORT>   B_DOR   = 0;
+    static constexpr B_UDRIEx<Cfg::PORT> B_UDRIE = 0;
+    static constexpr B_RXCx<Cfg::PORT>   B_RXC   = 0;
+    static constexpr B_U2Xx<Cfg::PORT>   B_U2X   = 0;
+
+    // Base size of type on buffer size
+    typedef typename TypeSelector<(Cfg::RX_SIZE>256), uint16_t, uint8_t>::type ring_buffer_pos_t;
+
+    struct ring_buffer_r {
+      volatile ring_buffer_pos_t head, tail;
+      unsigned char buffer[Cfg::RX_SIZE];
+    };
+
+    struct ring_buffer_t {
+      volatile uint8_t head, tail;
+      unsigned char buffer[Cfg::TX_SIZE];
+    };
+
+    static ring_buffer_r rx_buffer;
+    static ring_buffer_t tx_buffer;
+    static bool _written;
+
+    static constexpr uint8_t XON_XOFF_CHAR_SENT = 0x80,  // XON / XOFF Character was sent
+                             XON_XOFF_CHAR_MASK = 0x1F;  // XON / XOFF character to send
+
+    // XON / XOFF character definitions
+    static constexpr uint8_t XON_CHAR  = 17, XOFF_CHAR = 19;
+    static uint8_t xon_xoff_state,
+                   rx_dropped_bytes,
+                   rx_buffer_overruns,
+                   rx_framing_errors;
+    static ring_buffer_pos_t rx_max_enqueued;
+
+    static FORCE_INLINE ring_buffer_pos_t atomic_read_rx_head();
+
+    static volatile bool rx_tail_value_not_stable;
+    static volatile uint16_t rx_tail_value_backup;
+
+    static FORCE_INLINE void atomic_set_rx_tail(ring_buffer_pos_t value);
+    static FORCE_INLINE ring_buffer_pos_t atomic_read_rx_tail();
+
+    public:
+
+    FORCE_INLINE static void store_rxd_char();
+    FORCE_INLINE static void _tx_udr_empty_irq(void);
 
     public:
       MarlinSerial() {};
@@ -119,21 +220,10 @@
       static void write(const uint8_t c);
       static void flushTX(void);
 
-      #if ENABLED(SERIAL_STATS_DROPPED_RX)
-        FORCE_INLINE static uint32_t dropped() { return rx_dropped_bytes; }
-      #endif
-
-      #if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
-        FORCE_INLINE static uint32_t buffer_overruns() { return rx_buffer_overruns; }
-      #endif
-
-      #if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
-        FORCE_INLINE static uint32_t framing_errors() { return rx_framing_errors; }
-      #endif
-
-      #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
-        FORCE_INLINE static ring_buffer_pos_t rxMaxEnqueued() { return rx_max_enqueued; }
-      #endif
+      FORCE_INLINE static uint8_t dropped() { return Cfg::DROPPED_RX ? rx_dropped_bytes : 0; }
+      FORCE_INLINE static uint8_t buffer_overruns() { return Cfg::RX_OVERRUNS ? rx_buffer_overruns : 0; }
+      FORCE_INLINE static uint8_t framing_errors() { return Cfg::RX_FRAMING_ERRORS ? rx_framing_errors : 0; }
+      FORCE_INLINE static ring_buffer_pos_t rxMaxEnqueued() { return Cfg::MAX_RX_QUEUED ? rx_max_enqueued : 0; }
 
       FORCE_INLINE static void write(const char* str) { while (*str) write(*str++); }
       FORCE_INLINE static void write(const uint8_t* buffer, size_t size) { while (size--) write(*buffer++); }
@@ -165,7 +255,20 @@
       static void printFloat(double, uint8_t);
   };
 
-  extern MarlinSerial customizedSerial;
+  // Serial port configuration
+  struct MarlinSerialCfg {
+    static constexpr int PORT               = SERIAL_PORT;
+    static constexpr unsigned int RX_SIZE   = RX_BUFFER_SIZE;
+    static constexpr unsigned int TX_SIZE   = TX_BUFFER_SIZE;
+    static constexpr bool XONOFF            = bSERIAL_XON_XOFF;
+    static constexpr bool EMERGENCYPARSER   = bEMERGENCY_PARSER;
+    static constexpr bool DROPPED_RX        = bSERIAL_STATS_DROPPED_RX;
+    static constexpr bool RX_OVERRUNS       = bSERIAL_STATS_RX_BUFFER_OVERRUNS;
+    static constexpr bool RX_FRAMING_ERRORS = bSERIAL_STATS_RX_FRAMING_ERRORS;
+    static constexpr bool MAX_RX_QUEUED     = bSERIAL_STATS_MAX_RX_QUEUED;
+  };
+
+  extern MarlinSerial<MarlinSerialCfg> customizedSerial;
 
 #endif // !USBCON
 
diff --git a/Marlin/src/HAL/HAL_DUE/MarlinSerialUSB_Due.h b/Marlin/src/HAL/HAL_DUE/MarlinSerialUSB_Due.h
index bfa2ccedc04048adcba6b9671c793560f39a6dcd..925c322cfdc20048e57b957b988a26d4069635df 100644
--- a/Marlin/src/HAL/HAL_DUE/MarlinSerialUSB_Due.h
+++ b/Marlin/src/HAL/HAL_DUE/MarlinSerialUSB_Due.h
@@ -52,11 +52,11 @@ public:
   static void write(const uint8_t c);
 
   #if ENABLED(SERIAL_STATS_DROPPED_RX)
-  FORCE_INLINE static uint32_t dropped() { return 0; }
+    FORCE_INLINE static uint32_t dropped() { return 0; }
   #endif
 
   #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
-  FORCE_INLINE static int rxMaxEnqueued() { return 0; }
+    FORCE_INLINE static int rxMaxEnqueued() { return 0; }
   #endif
 
   FORCE_INLINE static void write(const char* str) { while (*str) write(*str++); }
diff --git a/Marlin/src/HAL/HAL_DUE/MarlinSerial_Due.cpp b/Marlin/src/HAL/HAL_DUE/MarlinSerial_Due.cpp
index 0ff98e62ffca67e2505aabb18529b672e59e9d45..7efa85e0500ef9b515322c807a22b8aae89c1a6e 100644
--- a/Marlin/src/HAL/HAL_DUE/MarlinSerial_Due.cpp
+++ b/Marlin/src/HAL/HAL_DUE/MarlinSerial_Due.cpp
@@ -29,100 +29,32 @@
 
 #include "../../inc/MarlinConfig.h"
 
-#include "MarlinSerial_Due.h"
-#include "InterruptVectors_Due.h"
-#include "../../Marlin.h"
-
 // If not using the USB port as serial port
 #if SERIAL_PORT >= 0
 
-  // Based on selected port, use the proper configuration
-  #if SERIAL_PORT == 0
-    #define HWUART UART
-    #define HWUART_IRQ UART_IRQn
-    #define HWUART_IRQ_ID ID_UART
-  #elif SERIAL_PORT == 1
-    #define HWUART ((Uart*)USART0)
-    #define HWUART_IRQ USART0_IRQn
-    #define HWUART_IRQ_ID ID_USART0
-  #elif SERIAL_PORT == 2
-    #define HWUART ((Uart*)USART1)
-    #define HWUART_IRQ USART1_IRQn
-    #define HWUART_IRQ_ID ID_USART1
-  #elif SERIAL_PORT == 3
-    #define HWUART ((Uart*)USART2)
-    #define HWUART_IRQ USART2_IRQn
-    #define HWUART_IRQ_ID ID_USART2
-  #elif SERIAL_PORT == 4
-    #define HWUART ((Uart*)USART3)
-    #define HWUART_IRQ USART3_IRQn
-    #define HWUART_IRQ_ID ID_USART3
-  #endif
-
-  struct ring_buffer_r {
-    unsigned char buffer[RX_BUFFER_SIZE];
-    volatile ring_buffer_pos_t head, tail;
-  };
-
-  #if TX_BUFFER_SIZE > 0
-    struct ring_buffer_t {
-      unsigned char buffer[TX_BUFFER_SIZE];
-      volatile uint8_t head, tail;
-    };
-  #endif
-
-  ring_buffer_r rx_buffer = { { 0 }, 0, 0 };
-  #if TX_BUFFER_SIZE > 0
-    ring_buffer_t tx_buffer = { { 0 }, 0, 0 };
-  #endif
-  static bool _written;
-
-  #if ENABLED(SERIAL_XON_XOFF)
-    constexpr uint8_t XON_XOFF_CHAR_SENT = 0x80,  // XON / XOFF Character was sent
-                      XON_XOFF_CHAR_MASK = 0x1F;  // XON / XOFF character to send
-    // XON / XOFF character definitions
-    constexpr uint8_t XON_CHAR  = 17, XOFF_CHAR = 19;
-    uint8_t xon_xoff_state = XON_XOFF_CHAR_SENT | XON_CHAR;
-
-    // Validate that RX buffer size is at least 4096 bytes- According to several experiments, on
-    // the original Arduino Due that uses a ATmega16U2 as USB to serial bridge, due to the introduced
-    // latencies, at least 2959 bytes of RX buffering (when transmitting at 250kbits/s) are required
-    // to avoid overflows.
-
-    #if RX_BUFFER_SIZE < 4096
-      #error Arduino DUE requires at least 4096 bytes of RX buffer to avoid buffer overflows when using XON/XOFF handshake
-    #endif
-  #endif
-
-  #if ENABLED(SERIAL_STATS_DROPPED_RX)
-    uint8_t rx_dropped_bytes = 0;
-  #endif
-
-  #if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
-    uint8_t rx_buffer_overruns = 0;
-  #endif
-
-  #if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
-    uint8_t rx_framing_errors = 0;
-  #endif
-
-  #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
-    ring_buffer_pos_t rx_max_enqueued = 0;
-  #endif
+  #include "MarlinSerial_Due.h"
+  #include "InterruptVectors_Due.h"
+  #include "../../Marlin.h"
+
+  template<typename Cfg> typename MarlinSerial<Cfg>::ring_buffer_r MarlinSerial<Cfg>::rx_buffer = { 0 };
+  template<typename Cfg> typename MarlinSerial<Cfg>::ring_buffer_t MarlinSerial<Cfg>::tx_buffer = { 0 };
+  template<typename Cfg> bool     MarlinSerial<Cfg>::_written = false;
+  template<typename Cfg> uint8_t  MarlinSerial<Cfg>::xon_xoff_state = MarlinSerial<Cfg>::XON_XOFF_CHAR_SENT | MarlinSerial<Cfg>::XON_CHAR;
+  template<typename Cfg> uint8_t  MarlinSerial<Cfg>::rx_dropped_bytes = 0;
+  template<typename Cfg> uint8_t  MarlinSerial<Cfg>::rx_buffer_overruns = 0;
+  template<typename Cfg> uint8_t  MarlinSerial<Cfg>::rx_framing_errors = 0;
+  template<typename Cfg> typename MarlinSerial<Cfg>::ring_buffer_pos_t MarlinSerial<Cfg>::rx_max_enqueued = 0;
 
   // A SW memory barrier, to ensure GCC does not overoptimize loops
   #define sw_barrier() asm volatile("": : :"memory");
 
-  #if ENABLED(EMERGENCY_PARSER)
-    #include "../../feature/emergency_parser.h"
-  #endif
+  #include "../../feature/emergency_parser.h"
 
   // (called with RX interrupts disabled)
-  FORCE_INLINE void store_rxd_char() {
+  template<typename Cfg>
+  FORCE_INLINE void MarlinSerial<Cfg>::store_rxd_char() {
 
-    #if ENABLED(EMERGENCY_PARSER)
-      static EmergencyParser::State emergency_state; // = EP_RESET
-    #endif
+    static EmergencyParser::State emergency_state; // = EP_RESET
 
     // Get the tail - Nothing can alter its value while we are at this ISR
     const ring_buffer_pos_t t = rx_buffer.tail;
@@ -131,14 +63,12 @@
     ring_buffer_pos_t h = rx_buffer.head;
 
     // Get the next element
-    ring_buffer_pos_t i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+    ring_buffer_pos_t i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
     // Read the character from the USART
     uint8_t c = HWUART->UART_RHR;
 
-    #if ENABLED(EMERGENCY_PARSER)
-      emergency_parser.update(emergency_state, c);
-    #endif
+    if (Cfg::EMERGENCYPARSER) emergency_parser.update(emergency_state, c);
 
     // If the character is to be stored at the index just before the tail
     // (such that the head would advance to the current tail), the RX FIFO is
@@ -147,29 +77,26 @@
       rx_buffer.buffer[h] = c;
       h = i;
     }
-    #if ENABLED(SERIAL_STATS_DROPPED_RX)
-      else if (!++rx_dropped_bytes) --rx_dropped_bytes;
-    #endif
+    else if (Cfg::DROPPED_RX && !++rx_dropped_bytes)
+      --rx_dropped_bytes;
 
-    #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
-      const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
-      // Calculate count of bytes stored into the RX buffer
+    const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
+    // Calculate count of bytes stored into the RX buffer
 
-      // Keep track of the maximum count of enqueued bytes
-      NOLESS(rx_max_enqueued, rx_count);
-    #endif
+    // Keep track of the maximum count of enqueued bytes
+    if (Cfg::MAX_RX_QUEUED) NOLESS(rx_max_enqueued, rx_count);
 
-    #if ENABLED(SERIAL_XON_XOFF)
+    if (Cfg::XONOFF) {
       // If the last char that was sent was an XON
       if ((xon_xoff_state & XON_XOFF_CHAR_MASK) == XON_CHAR) {
 
         // Bytes stored into the RX buffer
-        const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+        const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
         // If over 12.5% of RX buffer capacity, send XOFF before running out of
         // RX buffer space .. 325 bytes @ 250kbits/s needed to let the host react
         // and stop sending bytes. This translates to 13mS propagation time.
-        if (rx_count >= (RX_BUFFER_SIZE) / 8) {
+        if (rx_count >= (Cfg::RX_SIZE) / 8) {
 
           // At this point, definitely no TX interrupt was executing, since the TX isr can't be preempted.
           // Don't enable the TX interrupt here as a means to trigger the XOFF char, because if it happens
@@ -189,14 +116,12 @@
             if (status & UART_SR_RXRDY) {
               // We received a char while waiting for the TX buffer to be empty - Receive and process it!
 
-              i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+              i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
               // Read the character from the USART
               c = HWUART->UART_RHR;
 
-              #if ENABLED(EMERGENCY_PARSER)
-                emergency_parser.update(emergency_state, c);
-              #endif
+              if (Cfg::EMERGENCYPARSER) emergency_parser.update(emergency_state, c);
 
               // If the character is to be stored at the index just before the tail
               // (such that the head would advance to the current tail), the FIFO is
@@ -205,9 +130,8 @@
                 rx_buffer.buffer[h] = c;
                 h = i;
               }
-              #if ENABLED(SERIAL_STATS_DROPPED_RX)
-                else if (!++rx_dropped_bytes) --rx_dropped_bytes;
-              #endif
+              else if (Cfg::DROPPED_RX && !++rx_dropped_bytes)
+                --rx_dropped_bytes;
             }
             sw_barrier();
           }
@@ -226,14 +150,12 @@
             if (status & UART_SR_RXRDY) {
               // A char arrived while waiting for the TX buffer to be empty - Receive and process it!
 
-              i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+              i = (ring_buffer_pos_t)(h + 1) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
 
               // Read the character from the USART
               c = HWUART->UART_RHR;
 
-              #if ENABLED(EMERGENCY_PARSER)
-                emergency_parser.update(emergency_state, c);
-              #endif
+              if (Cfg::EMERGENCYPARSER) emergency_parser.update(emergency_state, c);
 
               // If the character is to be stored at the index just before the tail
               // (such that the head would advance to the current tail), the FIFO is
@@ -242,9 +164,8 @@
                 rx_buffer.buffer[h] = c;
                 h = i;
               }
-              #if ENABLED(SERIAL_STATS_DROPPED_RX)
-                else if (!++rx_dropped_bytes) --rx_dropped_bytes;
-              #endif
+              else if (Cfg::DROPPED_RX && !++rx_dropped_bytes)
+                --rx_dropped_bytes;
             }
             sw_barrier();
           }
@@ -253,20 +174,20 @@
           // have any issues writing to the UART TX register if it needs to!
         }
       }
-    #endif // SERIAL_XON_XOFF
+    }
 
     // Store the new head value
     rx_buffer.head = h;
   }
 
-  #if TX_BUFFER_SIZE > 0
-
-    FORCE_INLINE void _tx_thr_empty_irq(void) {
+  template<typename Cfg>
+  FORCE_INLINE void MarlinSerial<Cfg>::_tx_thr_empty_irq(void) {
+    if (Cfg::TX_SIZE > 0) {
       // Read positions
       uint8_t t = tx_buffer.tail;
       const uint8_t h = tx_buffer.head;
 
-      #if ENABLED(SERIAL_XON_XOFF)
+      if (Cfg::XONOFF) {
         // If an XON char is pending to be sent, do it now
         if (xon_xoff_state == XON_CHAR) {
 
@@ -281,7 +202,7 @@
 
           return;
         }
-      #endif
+      }
 
       // If nothing to transmit, just disable TX interrupts. This could
       // happen as the result of the non atomicity of the disabling of RX
@@ -293,41 +214,32 @@
 
       // There is something to TX, Send the next byte
       const uint8_t c = tx_buffer.buffer[t];
-      t = (t + 1) & (TX_BUFFER_SIZE - 1);
+      t = (t + 1) & (Cfg::TX_SIZE - 1);
       HWUART->UART_THR = c;
       tx_buffer.tail = t;
 
       // Disable interrupts if there is nothing to transmit following this byte
       if (h == t) HWUART->UART_IDR = UART_IDR_TXRDY;
     }
+  }
 
-  #endif // TX_BUFFER_SIZE > 0
-
-  static void UART_ISR(void) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::UART_ISR(void) {
     const uint32_t status = HWUART->UART_SR;
 
     // Data received?
     if (status & UART_SR_RXRDY) store_rxd_char();
 
-    #if TX_BUFFER_SIZE > 0
+    if (Cfg::TX_SIZE > 0) {
       // Something to send, and TX interrupts are enabled (meaning something to send)?
       if ((status & UART_SR_TXRDY) && (HWUART->UART_IMR & UART_IMR_TXRDY)) _tx_thr_empty_irq();
-    #endif
+    }
 
     // Acknowledge errors
     if ((status & UART_SR_OVRE) || (status & UART_SR_FRAME)) {
-
-      #if ENABLED(SERIAL_STATS_DROPPED_RX)
-        if (status & UART_SR_OVRE && !++rx_dropped_bytes) --rx_dropped_bytes;
-      #endif
-
-      #if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
-        if (status & UART_SR_OVRE && !++rx_buffer_overruns) --rx_buffer_overruns;
-      #endif
-
-      #if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
-        if (status & UART_SR_FRAME && !++rx_framing_errors) --rx_framing_errors;
-      #endif
+      if (Cfg::DROPPED_RX && (status & UART_SR_OVRE) && !++rx_dropped_bytes) --rx_dropped_bytes;
+      if (Cfg::RX_OVERRUNS && (status & UART_SR_OVRE) && !++rx_buffer_overruns) --rx_buffer_overruns;
+      if (Cfg::RX_FRAMING_ERRORS && (status & UART_SR_FRAME) && !++rx_framing_errors) --rx_framing_errors;
 
       // TODO: error reporting outside ISR
       HWUART->UART_CR = UART_CR_RSTSTA;
@@ -335,8 +247,8 @@
   }
 
   // Public Methods
-
-  void MarlinSerial::begin(const long baud_setting) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::begin(const long baud_setting) {
 
     // Disable UART interrupt in NVIC
     NVIC_DisableIRQ( HWUART_IRQ );
@@ -382,12 +294,11 @@
     // Enable receiver and transmitter
     HWUART->UART_CR = UART_CR_RXEN | UART_CR_TXEN;
 
-    #if TX_BUFFER_SIZE > 0
-      _written = false;
-    #endif
+    if (Cfg::TX_SIZE > 0) _written = false;
   }
 
-  void MarlinSerial::end() {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::end() {
     // Disable UART interrupt in NVIC
     NVIC_DisableIRQ( HWUART_IRQ );
 
@@ -399,12 +310,14 @@
     pmc_disable_periph_clk( HWUART_IRQ_ID );
   }
 
-  int MarlinSerial::peek(void) {
+  template<typename Cfg>
+  int MarlinSerial<Cfg>::peek(void) {
     const int v = rx_buffer.head == rx_buffer.tail ? -1 : rx_buffer.buffer[rx_buffer.tail];
     return v;
   }
 
-  int MarlinSerial::read(void) {
+  template<typename Cfg>
+  int MarlinSerial<Cfg>::read(void) {
 
     const ring_buffer_pos_t h = rx_buffer.head;
     ring_buffer_pos_t t = rx_buffer.tail;
@@ -412,64 +325,74 @@
     if (h == t) return -1;
 
     int v = rx_buffer.buffer[t];
-    t = (ring_buffer_pos_t)(t + 1) & (RX_BUFFER_SIZE - 1);
+    t = (ring_buffer_pos_t)(t + 1) & (Cfg::RX_SIZE - 1);
 
     // Advance tail
     rx_buffer.tail = t;
 
-    #if ENABLED(SERIAL_XON_XOFF)
+    if (Cfg::XONOFF) {
       // If the XOFF char was sent, or about to be sent...
       if ((xon_xoff_state & XON_XOFF_CHAR_MASK) == XOFF_CHAR) {
         // Get count of bytes in the RX buffer
-        const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(RX_BUFFER_SIZE - 1);
+        const ring_buffer_pos_t rx_count = (ring_buffer_pos_t)(h - t) & (ring_buffer_pos_t)(Cfg::RX_SIZE - 1);
         // When below 10% of RX buffer capacity, send XON before running out of RX buffer bytes
-        if (rx_count < (RX_BUFFER_SIZE) / 10) {
-          #if TX_BUFFER_SIZE > 0
+        if (rx_count < (Cfg::RX_SIZE) / 10) {
+          if (Cfg::TX_SIZE > 0) {
             // Signal we want an XON character to be sent.
             xon_xoff_state = XON_CHAR;
             // Enable TX isr.
             HWUART->UART_IER = UART_IER_TXRDY;
-          #else
+          }
+          else {
             // If not using TX interrupts, we must send the XON char now
             xon_xoff_state = XON_CHAR | XON_XOFF_CHAR_SENT;
             while (!(HWUART->UART_SR & UART_SR_TXRDY)) sw_barrier();
             HWUART->UART_THR = XON_CHAR;
-          #endif
+          }
         }
       }
-    #endif
+    }
 
     return v;
   }
 
-  ring_buffer_pos_t MarlinSerial::available(void) {
+  template<typename Cfg>
+  typename MarlinSerial<Cfg>::ring_buffer_pos_t MarlinSerial<Cfg>::available(void) {
     const ring_buffer_pos_t h = rx_buffer.head, t = rx_buffer.tail;
-    return (ring_buffer_pos_t)(RX_BUFFER_SIZE + h - t) & (RX_BUFFER_SIZE - 1);
+    return (ring_buffer_pos_t)(Cfg::RX_SIZE + h - t) & (Cfg::RX_SIZE - 1);
   }
 
-  void MarlinSerial::flush(void) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::flush(void) {
     rx_buffer.tail = rx_buffer.head;
 
-    #if ENABLED(SERIAL_XON_XOFF)
+    if (Cfg::XONOFF) {
       if ((xon_xoff_state & XON_XOFF_CHAR_MASK) == XOFF_CHAR) {
-        #if TX_BUFFER_SIZE > 0
+        if (Cfg::TX_SIZE > 0) {
           // Signal we want an XON character to be sent.
           xon_xoff_state = XON_CHAR;
           // Enable TX isr.
           HWUART->UART_IER = UART_IER_TXRDY;
-        #else
+        }
+        else {
           // If not using TX interrupts, we must send the XON char now
           xon_xoff_state = XON_CHAR | XON_XOFF_CHAR_SENT;
           while (!(HWUART->UART_SR & UART_SR_TXRDY)) sw_barrier();
           HWUART->UART_THR = XON_CHAR;
-        #endif
+        }
       }
-    #endif
+    }
   }
 
-  #if TX_BUFFER_SIZE > 0
-    void MarlinSerial::write(const uint8_t c) {
-      _written = true;
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::write(const uint8_t c) {
+    _written = true;
+
+    if (Cfg::TX_SIZE == 0) {
+      while (!(HWUART->UART_SR & UART_SR_TXRDY)) sw_barrier();
+      HWUART->UART_THR = c;
+    }
+    else {
 
       // If the TX interrupts are disabled and the data register
       // is empty, just write the byte to the data register and
@@ -483,7 +406,7 @@
         return;
       }
 
-      const uint8_t i = (tx_buffer.head + 1) & (TX_BUFFER_SIZE - 1);
+      const uint8_t i = (tx_buffer.head + 1) & (Cfg::TX_SIZE - 1);
 
       // If global interrupts are disabled (as the result of being called from an ISR)...
       if (!ISRS_ENABLED()) {
@@ -508,10 +431,25 @@
       // Enable TX isr - Non atomic, but it will eventually enable TX isr
       HWUART->UART_IER = UART_IER_TXRDY;
     }
+  }
 
-    void MarlinSerial::flushTX(void) {
-      // TX
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::flushTX(void) {
+    // TX
 
+    if (Cfg::TX_SIZE == 0) {
+      // No bytes written, no need to flush. This special case is needed since there's
+      // no way to force the TXC (transmit complete) bit to 1 during initialization.
+      if (!_written) return;
+
+      // Wait until everything was transmitted
+      while (!(HWUART->UART_SR & UART_SR_TXEMPTY)) sw_barrier();
+
+      // At this point nothing is queued anymore (DRIE is disabled) and
+      // the hardware finished transmission (TXC is set).
+
+    }
+    else {
       // If we have never written a byte, no need to flush. This special
       // case is needed since there is no way to force the TXC (transmit
       // complete) bit to 1 during initialization
@@ -536,51 +474,34 @@
       // At this point nothing is queued anymore (DRIE is disabled) and
       // the hardware finished transmission (TXC is set).
     }
-
-  #else // TX_BUFFER_SIZE == 0
-
-    void MarlinSerial::write(const uint8_t c) {
-      _written = true;
-      while (!(HWUART->UART_SR & UART_SR_TXRDY)) sw_barrier();
-      HWUART->UART_THR = c;
-    }
-
-    void MarlinSerial::flushTX(void) {
-      // TX
-
-      // No bytes written, no need to flush. This special case is needed since there's
-      // no way to force the TXC (transmit complete) bit to 1 during initialization.
-      if (!_written) return;
-
-      // Wait until everything was transmitted
-      while (!(HWUART->UART_SR & UART_SR_TXEMPTY)) sw_barrier();
-
-      // At this point nothing is queued anymore (DRIE is disabled) and
-      // the hardware finished transmission (TXC is set).
-    }
-  #endif // TX_BUFFER_SIZE == 0
+  }
 
   /**
    * Imports from print.h
    */
 
-  void MarlinSerial::print(char c, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(char c, int base) {
     print((long)c, base);
   }
 
-  void MarlinSerial::print(unsigned char b, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(unsigned char b, int base) {
     print((unsigned long)b, base);
   }
 
-  void MarlinSerial::print(int n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(int n, int base) {
     print((long)n, base);
   }
 
-  void MarlinSerial::print(unsigned int n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(unsigned int n, int base) {
     print((unsigned long)n, base);
   }
 
-  void MarlinSerial::print(long n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(long n, int base) {
     if (base == 0) write(n);
     else if (base == 10) {
       if (n < 0) { print('-'); n = -n; }
@@ -590,68 +511,80 @@
       printNumber(n, base);
   }
 
-  void MarlinSerial::print(unsigned long n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(unsigned long n, int base) {
     if (base == 0) write(n);
     else printNumber(n, base);
   }
 
-  void MarlinSerial::print(double n, int digits) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::print(double n, int digits) {
     printFloat(n, digits);
   }
 
-  void MarlinSerial::println(void) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(void) {
     print('\r');
     print('\n');
   }
 
-  void MarlinSerial::println(const String& s) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(const String& s) {
     print(s);
     println();
   }
 
-  void MarlinSerial::println(const char c[]) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(const char c[]) {
     print(c);
     println();
   }
 
-  void MarlinSerial::println(char c, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(char c, int base) {
     print(c, base);
     println();
   }
 
-  void MarlinSerial::println(unsigned char b, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(unsigned char b, int base) {
     print(b, base);
     println();
   }
 
-  void MarlinSerial::println(int n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(int n, int base) {
     print(n, base);
     println();
   }
 
-  void MarlinSerial::println(unsigned int n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(unsigned int n, int base) {
     print(n, base);
     println();
   }
 
-  void MarlinSerial::println(long n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(long n, int base) {
     print(n, base);
     println();
   }
 
-  void MarlinSerial::println(unsigned long n, int base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(unsigned long n, int base) {
     print(n, base);
     println();
   }
 
-  void MarlinSerial::println(double n, int digits) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::println(double n, int digits) {
     print(n, digits);
     println();
   }
 
   // Private Methods
-
-  void MarlinSerial::printNumber(unsigned long n, uint8_t base) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::printNumber(unsigned long n, uint8_t base) {
     if (n) {
       unsigned char buf[8 * sizeof(long)]; // Enough space for base 2
       int8_t i = 0;
@@ -666,7 +599,8 @@
       print('0');
   }
 
-  void MarlinSerial::printFloat(double number, uint8_t digits) {
+  template<typename Cfg>
+  void MarlinSerial<Cfg>::printFloat(double number, uint8_t digits) {
     // Handle negative numbers
     if (number < 0.0) {
       print('-');
@@ -697,7 +631,11 @@
   }
 
   // Preinstantiate
-  MarlinSerial customizedSerial;
+  template class MarlinSerial<MarlinSerialCfg>;
+
+  // Instantiate
+  MarlinSerial<MarlinSerialCfg> customizedSerial;
+
 #endif
 
 #endif // ARDUINO_ARCH_SAM
diff --git a/Marlin/src/HAL/HAL_DUE/MarlinSerial_Due.h b/Marlin/src/HAL/HAL_DUE/MarlinSerial_Due.h
index 2f5a07f515e48a38b09c1d2699dd2feeb2856133..7e20596b4c2fcd7d2097f3f830e2eed8485350fd 100644
--- a/Marlin/src/HAL/HAL_DUE/MarlinSerial_Due.h
+++ b/Marlin/src/HAL/HAL_DUE/MarlinSerial_Due.h
@@ -29,7 +29,7 @@
 #ifndef MARLINSERIAL_DUE_H
 #define MARLINSERIAL_DUE_H
 
-#include "../../inc/MarlinConfig.h"
+#include "../shared/MarlinSerial.h"
 
 #if SERIAL_PORT >= 0
 
@@ -60,29 +60,60 @@
 //  #error "TX_BUFFER_SIZE must be 0, a power of 2 greater than 1, and no greater than 256."
 //#endif
 
-#if RX_BUFFER_SIZE > 256
-  typedef uint16_t ring_buffer_pos_t;
-#else
-  typedef uint8_t ring_buffer_pos_t;
-#endif
-
-#if ENABLED(SERIAL_STATS_DROPPED_RX)
-  extern uint8_t rx_dropped_bytes;
-#endif
-
-#if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
-  extern uint8_t rx_buffer_overruns;
-#endif
-
-#if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
-  extern uint8_t rx_framing_errors;
-#endif
+// Templated type selector
+template<bool b, typename T, typename F> struct TypeSelector { typedef T type;} ;
+template<typename T, typename F> struct TypeSelector<false, T, F> { typedef F type; };
 
-#if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
-  extern ring_buffer_pos_t rx_max_enqueued;
-#endif
+// Templated structure wrapper
+template<typename S, unsigned int addr> struct StructWrapper {
+  constexpr StructWrapper(int) {}
+  FORCE_INLINE S* operator->() const { return (S*)addr; }
+};
 
+template<typename Cfg>
 class MarlinSerial {
+protected:
+  // Information for all supported UARTs
+  static constexpr uint32_t BASES[] = {0x400E0800U, 0x40098000U, 0x4009C000U, 0x400A0000U, 0x400A4000U};
+  static constexpr IRQn_Type IRQS[] = {  UART_IRQn, USART0_IRQn, USART1_IRQn, USART2_IRQn, USART3_IRQn};
+  static constexpr int    IRQ_IDS[] = {    ID_UART,   ID_USART0,   ID_USART1,   ID_USART2,   ID_USART3};
+
+  // Alias for shorter code
+  static constexpr StructWrapper<Uart,BASES[Cfg::PORT]> HWUART = 0;
+  static constexpr IRQn_Type HWUART_IRQ = IRQS[Cfg::PORT];
+  static constexpr int HWUART_IRQ_ID = IRQ_IDS[Cfg::PORT];
+
+  // Base size of type on buffer size
+  typedef typename TypeSelector<(Cfg::RX_SIZE>256), uint16_t, uint8_t>::type ring_buffer_pos_t;
+
+  struct ring_buffer_r {
+    volatile ring_buffer_pos_t head, tail;
+    unsigned char buffer[Cfg::RX_SIZE];
+  };
+
+  struct ring_buffer_t {
+    volatile uint8_t head, tail;
+    unsigned char buffer[Cfg::TX_SIZE];
+  };
+
+  static ring_buffer_r rx_buffer;
+  static ring_buffer_t tx_buffer;
+  static bool _written;
+
+  static constexpr uint8_t XON_XOFF_CHAR_SENT = 0x80,  // XON / XOFF Character was sent
+                           XON_XOFF_CHAR_MASK = 0x1F;  // XON / XOFF character to send
+
+  // XON / XOFF character definitions
+  static constexpr uint8_t XON_CHAR  = 17, XOFF_CHAR = 19;
+  static uint8_t xon_xoff_state,
+                 rx_dropped_bytes,
+                 rx_buffer_overruns,
+                 rx_framing_errors;
+  static ring_buffer_pos_t rx_max_enqueued;
+
+  FORCE_INLINE static void store_rxd_char();
+  FORCE_INLINE static void _tx_thr_empty_irq(void);
+  static void UART_ISR(void);
 
 public:
   MarlinSerial() {};
@@ -95,21 +126,10 @@ public:
   static void write(const uint8_t c);
   static void flushTX(void);
 
-  #if ENABLED(SERIAL_STATS_DROPPED_RX)
-    FORCE_INLINE static uint32_t dropped() { return rx_dropped_bytes; }
-  #endif
-
-  #if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
-    FORCE_INLINE static uint32_t buffer_overruns() { return rx_buffer_overruns; }
-  #endif
-
-  #if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
-    FORCE_INLINE static uint32_t framing_errors() { return rx_framing_errors; }
-  #endif
-
-  #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
-    FORCE_INLINE static ring_buffer_pos_t rxMaxEnqueued() { return rx_max_enqueued; }
-  #endif
+  FORCE_INLINE static uint8_t dropped() { return Cfg::DROPPED_RX ? rx_dropped_bytes : 0; }
+  FORCE_INLINE static uint8_t buffer_overruns() { return Cfg::RX_OVERRUNS ? rx_buffer_overruns : 0; }
+  FORCE_INLINE static uint8_t framing_errors() { return Cfg::RX_FRAMING_ERRORS ? rx_framing_errors : 0; }
+  FORCE_INLINE static ring_buffer_pos_t rxMaxEnqueued() { return Cfg::MAX_RX_QUEUED ? rx_max_enqueued : 0; }
 
   FORCE_INLINE static void write(const char* str) { while (*str) write(*str++); }
   FORCE_INLINE static void write(const uint8_t* buffer, size_t size) { while (size--) write(*buffer++); }
@@ -141,7 +161,20 @@ private:
   static void printFloat(double, uint8_t);
 };
 
-extern MarlinSerial customizedSerial;
+// Serial port configuration
+struct MarlinSerialCfg {
+  static constexpr int PORT               = SERIAL_PORT;
+  static constexpr unsigned int RX_SIZE   = RX_BUFFER_SIZE;
+  static constexpr unsigned int TX_SIZE   = TX_BUFFER_SIZE;
+  static constexpr bool XONOFF            = bSERIAL_XON_XOFF;
+  static constexpr bool EMERGENCYPARSER   = bEMERGENCY_PARSER;
+  static constexpr bool DROPPED_RX        = bSERIAL_STATS_DROPPED_RX;
+  static constexpr bool RX_OVERRUNS       = bSERIAL_STATS_RX_BUFFER_OVERRUNS;
+  static constexpr bool RX_FRAMING_ERRORS = bSERIAL_STATS_RX_FRAMING_ERRORS;
+  static constexpr bool MAX_RX_QUEUED     = bSERIAL_STATS_MAX_RX_QUEUED;
+};
+
+extern MarlinSerial<MarlinSerialCfg> customizedSerial;
 
 #endif // SERIAL_PORT >= 0
 
diff --git a/Marlin/src/HAL/shared/MarlinSerial.h b/Marlin/src/HAL/shared/MarlinSerial.h
new file mode 100644
index 0000000000000000000000000000000000000000..3efd877023842df229b1b62cc963e3d0ccb07f6b
--- /dev/null
+++ b/Marlin/src/HAL/shared/MarlinSerial.h
@@ -0,0 +1,61 @@
+/**
+ * Marlin 3D Printer Firmware
+ * Copyright (C) 2016 MarlinFirmware [https://github.com/MarlinFirmware/Marlin]
+ *
+ * Based on Sprinter and grbl.
+ * Copyright (C) 2011 Camiel Gubbels / Erik van der Zalm
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#pragma once
+
+/**
+ * HAL/shared/MarlinSerial.h
+ */
+
+#include "../../inc/MarlinConfigPre.h"
+
+constexpr bool
+  bSERIAL_XON_XOFF = (false
+    #if ENABLED(SERIAL_XON_XOFF)
+      || true
+    #endif
+  ),
+  bEMERGENCY_PARSER = (false
+    #if ENABLED(EMERGENCY_PARSER)
+      || true
+    #endif
+  ),
+  bSERIAL_STATS_DROPPED_RX = (false
+    #if ENABLED(SERIAL_STATS_DROPPED_RX)
+      || true
+    #endif
+  ),
+  bSERIAL_STATS_RX_BUFFER_OVERRUNS = (false
+    #if ENABLED(SERIAL_STATS_RX_BUFFER_OVERRUNS)
+      || true
+    #endif
+  ),
+  bSERIAL_STATS_RX_FRAMING_ERRORS = (false
+    #if ENABLED(SERIAL_STATS_RX_FRAMING_ERRORS)
+      || true
+    #endif
+  ),
+  bSERIAL_STATS_MAX_RX_QUEUED = (false
+    #if ENABLED(SERIAL_STATS_MAX_RX_QUEUED)
+      || true
+    #endif
+  );
+