diff --git a/Marlin/Configuration_adv.h b/Marlin/Configuration_adv.h
index 93ca69391887b87edb33b51226bdc183bdc0c79a..fdeeba22277d18bc102ea9e2fdf0ca742b5c130b 100644
--- a/Marlin/Configuration_adv.h
+++ b/Marlin/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/MarlinSerial.cpp b/Marlin/MarlinSerial.cpp
index c87f31b315d5acc788596265f09eb15b9b5f7071..114ec1105aa1d06b6302e66f2e72bbf978a6679c 100644
--- a/Marlin/MarlinSerial.cpp
+++ b/Marlin/MarlinSerial.cpp
@@ -21,11 +21,12 @@
  */
 
 /**
-  HardwareSerial.cpp - Hardware serial library for Wiring
+  MarlinSerial.cpp - Hardware serial library for Wiring
   Copyright (c) 2006 Nicholas Zambetti.  All right reserved.
 
   Modified 23 November 2006 by David A. Mellis
   Modified 28 September 2010 by Mark Sproul
+  Modified 14 February 2016 by Andreas Hardtung (added tx buffer)
 */
 
 #include "Marlin.h"
@@ -38,9 +39,14 @@
 #if defined(UBRRH) || defined(UBRR0H) || defined(UBRR1H) || defined(UBRR2H) || defined(UBRR3H)
 
 #if UART_PRESENT(SERIAL_PORT)
-  ring_buffer rx_buffer  =  { { 0 }, 0, 0 };
+  ring_buffer_r rx_buffer  =  { { 0 }, 0, 0 };
+  #if TX_BUFFER_SIZE > 0
+    ring_buffer_t tx_buffer  =  { { 0 }, 0, 0 };
+    static bool _written;
+  #endif
 #endif
 
+
 FORCE_INLINE void store_char(unsigned char c) {
   CRITICAL_SECTION_START;
     uint8_t h = rx_buffer.head;
@@ -61,12 +67,38 @@ FORCE_INLINE void store_char(unsigned char c) {
   #endif
 }
 
+#if TX_BUFFER_SIZE > 0
+  FORCE_INLINE void _tx_udr_empty_irq(void)
+  {
+    // If interrupts are enabled, there must be more data in the output
+    // buffer. Send the next byte
+    uint8_t t = tx_buffer.tail;
+    uint8_t c = tx_buffer.buffer[t];
+    tx_buffer.tail = (t + 1) & (TX_BUFFER_SIZE - 1);
+
+    M_UDRx = c;
+
+    // clear the TXC bit -- "can be cleared by writing a one to its bit
+    // location". This makes sure flush() won't return until the bytes
+    // actually got written
+    SBI(M_UCSRxA, M_TXCx);
+
+    if (tx_buffer.head == tx_buffer.tail) {
+      // Buffer empty, so disable interrupts
+      CBI(M_UCSRxB, M_UDRIEx);
+    }
+  }
+
+  #if defined(M_USARTx_UDRE_vect)
+    ISR(M_USARTx_UDRE_vect) {
+      _tx_udr_empty_irq();
+    }
+  #endif
+
+#endif
 
-//#elif defined(SIG_USART_RECV)
 #if defined(M_USARTx_RX_vect)
-  // fixed by Mark Sproul this is on the 644/644p
-  //SIGNAL(SIG_USART_RECV)
-  SIGNAL(M_USARTx_RX_vect) {
+  ISR(M_USARTx_RX_vect) {
     unsigned char c  =  M_UDRx;
     store_char(c);
   }
@@ -107,14 +139,25 @@ void MarlinSerial::begin(long baud) {
   SBI(M_UCSRxB, M_RXENx);
   SBI(M_UCSRxB, M_TXENx);
   SBI(M_UCSRxB, M_RXCIEx);
+  #if TX_BUFFER_SIZE > 0
+    CBI(M_UCSRxB, M_UDRIEx);
+    _written = false;
+  #endif
 }
 
 void MarlinSerial::end() {
   CBI(M_UCSRxB, M_RXENx);
   CBI(M_UCSRxB, M_TXENx);
   CBI(M_UCSRxB, M_RXCIEx);
+  CBI(M_UCSRxB, M_UDRIEx);
 }
 
+void MarlinSerial::checkRx(void) {
+  if (TEST(M_UCSRxA, M_RXCx)) {
+    uint8_t c  =  M_UDRx;
+    store_char(c);
+  }
+}
 
 int MarlinSerial::peek(void) {
   int v;
@@ -145,7 +188,16 @@ int MarlinSerial::read(void) {
   return v;
 }
 
-void MarlinSerial::flush() {
+uint8_t MarlinSerial::available(void) {
+  CRITICAL_SECTION_START;
+    uint8_t h = rx_buffer.head;
+    uint8_t t = rx_buffer.tail;
+  CRITICAL_SECTION_END;
+  return (uint8_t)(RX_BUFFER_SIZE + h - t) & (RX_BUFFER_SIZE - 1);
+}
+
+void MarlinSerial::flush(void) {
+  // RX
   // don't reverse this or there may be problems if the RX interrupt
   // occurs after reading the value of rx_buffer_head but before writing
   // the value to rx_buffer_tail; the previous value of rx_buffer_head
@@ -156,6 +208,86 @@ void MarlinSerial::flush() {
   CRITICAL_SECTION_END;
 }
 
+#if TX_BUFFER_SIZE > 0
+  uint8_t MarlinSerial::availableForWrite(void) {
+    CRITICAL_SECTION_START;
+      uint8_t h = tx_buffer.head;
+      uint8_t t = tx_buffer.tail;
+    CRITICAL_SECTION_END;
+    return (uint8_t)(TX_BUFFER_SIZE + h - t) & (TX_BUFFER_SIZE - 1);
+  }
+
+  void MarlinSerial::write(uint8_t c) {
+    _written = true;
+    CRITICAL_SECTION_START;
+      bool emty = (tx_buffer.head == tx_buffer.tail);
+    CRITICAL_SECTION_END;
+    // If the buffer and the data register is empty, just write the byte
+    // to the data register and be done. This shortcut helps
+    // significantly improve the effective datarate at high (>
+    // 500kbit/s) bitrates, where interrupt overhead becomes a slowdown.
+    if (emty && TEST(M_UCSRxA, M_UDREx)) {
+      CRITICAL_SECTION_START;
+        M_UDRx = c;
+        SBI(M_UCSRxA, M_TXCx);
+      CRITICAL_SECTION_END;
+      return;
+    }
+    uint8_t i = (tx_buffer.head + 1) & (TX_BUFFER_SIZE - 1);
+
+    // If the output buffer is full, there's nothing for it other than to
+    // wait for the interrupt handler to empty it a bit
+    while (i == tx_buffer.tail) {
+      if (!TEST(SREG, SREG_I)) {
+        // Interrupts are disabled, so we'll have to poll the data
+        // register empty flag ourselves. If it is set, pretend an
+        // interrupt has happened and call the handler to free up
+        // space for us.
+        if(TEST(M_UCSRxA, M_UDREx))
+       _tx_udr_empty_irq();
+      } else {
+        // nop, the interrupt handler will free up space for us
+      }
+    }
+
+    tx_buffer.buffer[tx_buffer.head] = c;
+    { CRITICAL_SECTION_START;
+        tx_buffer.head = i;
+        SBI(M_UCSRxB, M_UDRIEx);
+      CRITICAL_SECTION_END;
+    }
+    return;
+  }
+
+  void MarlinSerial::flushTX(void) {
+    // TX
+    // If we have never written a byte, no need to flush. This special
+    // case is needed since there is no way to force the TXC (transmit
+    // complete) bit to 1 during initialization
+    if (!_written)
+      return;
+
+    while (TEST(M_UCSRxB, M_UDRIEx) || !TEST(M_UCSRxA, M_TXCx)) {
+      if (!TEST(SREG, SREG_I) && TEST(M_UCSRxB, M_UDRIEx))
+        // Interrupts are globally disabled, but the DR empty
+        // interrupt should be enabled, so poll the DR empty flag to
+        // prevent deadlock
+        if (TEST(M_UCSRxA, M_UDREx))
+          _tx_udr_empty_irq();
+    }
+    // If we get here, nothing is queued anymore (DRIE is disabled) and
+    // the hardware finished tranmission (TXC is set).
+}
+
+#else
+  void MarlinSerial::write(uint8_t c) {
+    while (!TEST(M_UCSRxA, M_UDREx))
+      ;
+    M_UDRx = c;
+  }
+#endif
+
+// end NEW
 
 /// imports from print.h
 
@@ -321,7 +453,7 @@ MarlinSerial customizedSerial;
   // Currently looking for: M108, M112, M410
   // If you alter the parser please don't forget to update the capabilities in Conditionals.h
 
-  void emergency_parser(unsigned char c) {
+  FORCE_INLINE void emergency_parser(unsigned char c) {
 
     enum e_parser_state {
       state_RESET,
diff --git a/Marlin/MarlinSerial.h b/Marlin/MarlinSerial.h
index b27b98169ac7bc9c10eef393262753a2d6a0bc5c..a5e2ee71dbf49762cada02ef9670b7543e32d92f 100644
--- a/Marlin/MarlinSerial.h
+++ b/Marlin/MarlinSerial.h
@@ -21,10 +21,12 @@
  */
 
 /**
-  HardwareSerial.h - Hardware serial library for Wiring
+  MarlinSerial.h - Hardware serial library for Wiring
   Copyright (c) 2006 Nicholas Zambetti.  All right reserved.
 
   Modified 28 September 2010 by Mark Sproul
+  Modified 14 February 2016 by Andreas Hardtung (added tx buffer)
+
 */
 
 #ifndef MarlinSerial_h
@@ -61,14 +63,17 @@
 #define M_UCSRxB SERIAL_REGNAME(UCSR,SERIAL_PORT,B)
 #define M_RXENx SERIAL_REGNAME(RXEN,SERIAL_PORT,)
 #define M_TXENx SERIAL_REGNAME(TXEN,SERIAL_PORT,)
+#define M_TXCx SERIAL_REGNAME(TXC,SERIAL_PORT,)
 #define M_RXCIEx SERIAL_REGNAME(RXCIE,SERIAL_PORT,)
 #define M_UDREx SERIAL_REGNAME(UDRE,SERIAL_PORT,)
+#define M_UDRIEx SERIAL_REGNAME(UDRIE,SERIAL_PORT,)
 #define M_UDRx SERIAL_REGNAME(UDR,SERIAL_PORT,)
 #define M_UBRRxH SERIAL_REGNAME(UBRR,SERIAL_PORT,H)
 #define M_UBRRxL SERIAL_REGNAME(UBRR,SERIAL_PORT,L)
 #define M_RXCx SERIAL_REGNAME(RXC,SERIAL_PORT,)
 #define M_USARTx_RX_vect SERIAL_REGNAME(USART,SERIAL_PORT,_RX_vect)
 #define M_U2Xx SERIAL_REGNAME(U2X,SERIAL_PORT,)
+#define M_USARTx_UDRE_vect SERIAL_REGNAME(USART,SERIAL_PORT,_UDRE_vect)
 
 
 #define DEC 10
@@ -87,18 +92,35 @@
 #ifndef RX_BUFFER_SIZE
   #define RX_BUFFER_SIZE 128
 #endif
+#ifndef TX_BUFFER_SIZE
+  #define TX_BUFFER_SIZE 32
+#endif
 #if !((RX_BUFFER_SIZE == 256) ||(RX_BUFFER_SIZE == 128) ||(RX_BUFFER_SIZE == 64) ||(RX_BUFFER_SIZE == 32) ||(RX_BUFFER_SIZE == 16) ||(RX_BUFFER_SIZE == 8) ||(RX_BUFFER_SIZE == 4) ||(RX_BUFFER_SIZE == 2))
   #error "RX_BUFFER_SIZE has to be a power of 2 and >= 2"
 #endif
+#if !((TX_BUFFER_SIZE == 256) ||(TX_BUFFER_SIZE == 128) ||(TX_BUFFER_SIZE == 64) ||(TX_BUFFER_SIZE == 32) ||(TX_BUFFER_SIZE == 16) ||(TX_BUFFER_SIZE == 8) ||(TX_BUFFER_SIZE == 4) ||(TX_BUFFER_SIZE == 2) ||(TX_BUFFER_SIZE == 0))
+  #error TX_BUFFER_SIZE has to be a power of 2 or 0
+#endif
 
-struct ring_buffer {
+struct ring_buffer_r {
   unsigned char buffer[RX_BUFFER_SIZE];
   volatile uint8_t head;
   volatile uint8_t tail;
 };
 
+#if TX_BUFFER_SIZE > 0
+  struct ring_buffer_t {
+    unsigned char buffer[TX_BUFFER_SIZE];
+    volatile uint8_t head;
+    volatile uint8_t tail;
+  };
+#endif
+
 #if UART_PRESENT(SERIAL_PORT)
-  extern ring_buffer rx_buffer;
+  extern ring_buffer_r rx_buffer;
+  #if TX_BUFFER_SIZE > 0
+    extern ring_buffer_t tx_buffer;
+  #endif
 #endif
 
 #if ENABLED(EMERGENCY_PARSER)
@@ -115,43 +137,13 @@ class MarlinSerial { //: public Stream
     int peek(void);
     int read(void);
     void flush(void);
-
-    FORCE_INLINE uint8_t available(void) {
-      CRITICAL_SECTION_START;
-        uint8_t h = rx_buffer.head;
-        uint8_t t = rx_buffer.tail;
-      CRITICAL_SECTION_END;
-      return (uint8_t)(RX_BUFFER_SIZE + h - t) & (RX_BUFFER_SIZE - 1);
-    }
-
-    FORCE_INLINE void write(uint8_t c) {
-      while (!TEST(M_UCSRxA, M_UDREx))
-        ;
-      M_UDRx = c;
-    }
-
-    FORCE_INLINE void checkRx(void) {
-      if (TEST(M_UCSRxA, M_RXCx)) {
-        unsigned char c  =  M_UDRx;
-        CRITICAL_SECTION_START;
-          uint8_t h = rx_buffer.head;
-          uint8_t i = (uint8_t)(h + 1) & (RX_BUFFER_SIZE - 1);
-
-          // if we should be storing the received character into the location
-          // just before the tail (meaning that the head would advance to the
-          // current location of the tail), we're about to overflow the buffer
-          // and so we don't write the character or advance the head.
-          if (i != rx_buffer.tail) {
-            rx_buffer.buffer[h] = c;
-            rx_buffer.head = i;
-          }
-        CRITICAL_SECTION_END;
-
-        #if ENABLED(EMERGENCY_PARSER)
-          emergency_parser(c);
-        #endif
-      }
-    }
+    uint8_t available(void);
+    void checkRx(void);
+    void write(uint8_t c);
+    #if TX_BUFFER_SIZE > 0
+      uint8_t availableForWrite(void);
+      void flushTX(void);
+    #endif
 
   private:
     void printNumber(unsigned long, uint8_t);
diff --git a/Marlin/example_configurations/Cartesio/Configuration_adv.h b/Marlin/example_configurations/Cartesio/Configuration_adv.h
index e1848a3ed88003aab9507684b2c487bc10588898..c538b3d84c7b8bdcc6e2a14fb8b6052939a980ab 100644
--- a/Marlin/example_configurations/Cartesio/Configuration_adv.h
+++ b/Marlin/example_configurations/Cartesio/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/Felix/Configuration_adv.h b/Marlin/example_configurations/Felix/Configuration_adv.h
index b46c497787df107b7073b307d93669333c7c5a05..fddd7c4084254601b2677132c8f52de6fb11e24a 100644
--- a/Marlin/example_configurations/Felix/Configuration_adv.h
+++ b/Marlin/example_configurations/Felix/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/Hephestos/Configuration_adv.h b/Marlin/example_configurations/Hephestos/Configuration_adv.h
index 48c0c4b5a46291517b70d063e690b7df394a7a70..ab000aa4ceca69eadd037cb35b59960739277a37 100644
--- a/Marlin/example_configurations/Hephestos/Configuration_adv.h
+++ b/Marlin/example_configurations/Hephestos/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/Hephestos_2/Configuration_adv.h b/Marlin/example_configurations/Hephestos_2/Configuration_adv.h
index bfd8b9a921fa2e467196f16e44ea9a5dca2bc15a..6c54735602c87e08aa180cb69594e84a87835062 100644
--- a/Marlin/example_configurations/Hephestos_2/Configuration_adv.h
+++ b/Marlin/example_configurations/Hephestos_2/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/K8200/Configuration_adv.h b/Marlin/example_configurations/K8200/Configuration_adv.h
index 9476e5b3bd4d5689c9b956104cf168594f3d82d2..8bc16833da5e5a8af8f0a4cb50f85c7360594bab 100644
--- a/Marlin/example_configurations/K8200/Configuration_adv.h
+++ b/Marlin/example_configurations/K8200/Configuration_adv.h
@@ -526,6 +526,12 @@ const unsigned int dropsegments = 2; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/K8400/Configuration_adv.h b/Marlin/example_configurations/K8400/Configuration_adv.h
index 1c73ae074ce77d9262bf4fa0777a54c80e1b7d40..f5056cc05eb430aacb073d0a39169e816cc2e12b 100644
--- a/Marlin/example_configurations/K8400/Configuration_adv.h
+++ b/Marlin/example_configurations/K8400/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 26
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/RigidBot/Configuration_adv.h b/Marlin/example_configurations/RigidBot/Configuration_adv.h
index bf798bb8f17d4ceecb2003ede46ab2a6ffebb29b..6adeb2b3e817fb9896d1426d8683e25e7f16d27f 100644
--- a/Marlin/example_configurations/RigidBot/Configuration_adv.h
+++ b/Marlin/example_configurations/RigidBot/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 8
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/SCARA/Configuration_adv.h b/Marlin/example_configurations/SCARA/Configuration_adv.h
index f743b660896e4641d1e41504f102f36d45b1ebdf..7dd0ce5bf130f803656dcb86496ec318572e3f8d 100644
--- a/Marlin/example_configurations/SCARA/Configuration_adv.h
+++ b/Marlin/example_configurations/SCARA/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/TAZ4/Configuration_adv.h b/Marlin/example_configurations/TAZ4/Configuration_adv.h
index a4309db767a74a8ed7c25d54f4d684d9b2f62ddf..18994554be8786d1d3d0f844f5b607e3faad435d 100644
--- a/Marlin/example_configurations/TAZ4/Configuration_adv.h
+++ b/Marlin/example_configurations/TAZ4/Configuration_adv.h
@@ -528,6 +528,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/WITBOX/Configuration_adv.h b/Marlin/example_configurations/WITBOX/Configuration_adv.h
index 48c0c4b5a46291517b70d063e690b7df394a7a70..ab000aa4ceca69eadd037cb35b59960739277a37 100644
--- a/Marlin/example_configurations/WITBOX/Configuration_adv.h
+++ b/Marlin/example_configurations/WITBOX/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/delta/biv2.5/Configuration_adv.h b/Marlin/example_configurations/delta/biv2.5/Configuration_adv.h
index a19e75beda9df9927163938e28d5c9a626a370f3..525a33a27c6a5472cb1ac54c61ae6dd3f987f706 100644
--- a/Marlin/example_configurations/delta/biv2.5/Configuration_adv.h
+++ b/Marlin/example_configurations/delta/biv2.5/Configuration_adv.h
@@ -522,6 +522,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/delta/generic/Configuration_adv.h b/Marlin/example_configurations/delta/generic/Configuration_adv.h
index fcd0d9f1cc6d86d775fd8869bd85383aae5aa8cd..01f15087cc54ba03498ab8bdd2ebab0db8fec8d2 100644
--- a/Marlin/example_configurations/delta/generic/Configuration_adv.h
+++ b/Marlin/example_configurations/delta/generic/Configuration_adv.h
@@ -522,6 +522,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/delta/kossel_mini/Configuration_adv.h b/Marlin/example_configurations/delta/kossel_mini/Configuration_adv.h
index d634ce5ab1ed586d07a7969a1400fdfffefbf3a9..995b6a876e65bb5f4c6f38ba76f3ef754856434d 100644
--- a/Marlin/example_configurations/delta/kossel_mini/Configuration_adv.h
+++ b/Marlin/example_configurations/delta/kossel_mini/Configuration_adv.h
@@ -521,6 +521,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/delta/kossel_pro/Configuration_adv.h b/Marlin/example_configurations/delta/kossel_pro/Configuration_adv.h
index f508233ef95c330b5a5ac3ef10a5baba12f627e4..ed1a7b04a3fd1c9de9f4368b0b908d035d901845 100644
--- a/Marlin/example_configurations/delta/kossel_pro/Configuration_adv.h
+++ b/Marlin/example_configurations/delta/kossel_pro/Configuration_adv.h
@@ -526,6 +526,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/delta/kossel_xl/Configuration_adv.h b/Marlin/example_configurations/delta/kossel_xl/Configuration_adv.h
index 796966872536c0b0a9865924d2005b10db52e489..92e5616ad7ed1e161b637c78d3a2534d5639966d 100644
--- a/Marlin/example_configurations/delta/kossel_xl/Configuration_adv.h
+++ b/Marlin/example_configurations/delta/kossel_xl/Configuration_adv.h
@@ -522,6 +522,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/makibox/Configuration_adv.h b/Marlin/example_configurations/makibox/Configuration_adv.h
index f266d270ee656616f6441d69460cf964ac38599e..c95c2ec845279e95378e2bfdec3905b0f9e31f91 100644
--- a/Marlin/example_configurations/makibox/Configuration_adv.h
+++ b/Marlin/example_configurations/makibox/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410
diff --git a/Marlin/example_configurations/tvrrug/Round2/Configuration_adv.h b/Marlin/example_configurations/tvrrug/Round2/Configuration_adv.h
index ade737a739c579606d4fa9410ea02ec2aff0063d..fe78f340765c720e6bb9a87d90dd6d41a7edbec7 100644
--- a/Marlin/example_configurations/tvrrug/Round2/Configuration_adv.h
+++ b/Marlin/example_configurations/tvrrug/Round2/Configuration_adv.h
@@ -520,6 +520,12 @@ const unsigned int dropsegments = 5; //everything with less than this number of
 #define MAX_CMD_SIZE 96
 #define BUFSIZE 4
 
+// Set Transfer-Buffer-Size by uncommenting the next define. Default size is 32byte.
+// :[0,2,4,8,16,32,64,128,256]. To save 386byte of PROGMEM and (3 + TX_BUFFER_SIZE) bytes of RAM set TX_BUFFER_SIZE to 0
+// To buffer a simple "ok" you need 4 byte, for ADVANCED_OK/M105 you need 32 and for debug-echo: 128 byte to get the optimal speed.
+// Any other output does not need to be that speedy.
+#define TX_BUFFER_SIZE 0
+
 // Enable an emergency-command parser to intercept certain commands as they
 // enter the serial receive buffer, so they cannot be blocked.
 // Currently handles M108, M112, M410