diff --git a/Marlin/ultralcd_st7920_u8glib_rrd.h b/Marlin/ultralcd_st7920_u8glib_rrd.h
index cc88b5256d7d7f96237ef5c98b68b583d175a31d..a87c19e8ee43d6008b58bd062c7c5fd80d5f8452 100644
--- a/Marlin/ultralcd_st7920_u8glib_rrd.h
+++ b/Marlin/ultralcd_st7920_u8glib_rrd.h
@@ -27,9 +27,6 @@
 
 #if ENABLED(U8GLIB_ST7920)
 
-//set optimization so ARDUINO optimizes this file
-#pragma GCC optimize (3)
-
 #define ST7920_CLK_PIN  LCD_PINS_D4
 #define ST7920_DAT_PIN  LCD_PINS_ENABLE
 #define ST7920_CS_PIN   LCD_PINS_RS
@@ -43,20 +40,70 @@
 
 #include <U8glib.h>
 
+//set optimization so ARDUINO optimizes this file
+#pragma GCC optimize (3)
+
+#define DELAY_0_NOP  NOOP
+#define DELAY_1_NOP  __asm__("nop\n\t")
+#define DELAY_2_NOP  __asm__("nop\n\t" "nop\n\t")
+#define DELAY_3_NOP  __asm__("nop\n\t" "nop\n\t" "nop\n\t")
+#define DELAY_4_NOP  __asm__("nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t")
+
+
+// If you want you can define your own set of delays in Configuration.h
+//#define ST7920_DELAY_1 DELAY_0_NOP
+//#define ST7920_DELAY_2 DELAY_0_NOP
+//#define ST7920_DELAY_3 DELAY_0_NOP
+
+#if F_CPU >= 20000000
+  #define CPU_ST7920_DELAY_1 DELAY_0_NOP
+  #define CPU_ST7920_DELAY_2 DELAY_0_NOP
+  #define CPU_ST7920_DELAY_3 DELAY_1_NOP
+#elif (MOTHERBOARD == BOARD_3DRAG) || (MOTHERBOARD == BOARD_K8200)
+  #define CPU_ST7920_DELAY_1 DELAY_0_NOP
+  #define CPU_ST7920_DELAY_2 DELAY_3_NOP
+  #define CPU_ST7920_DELAY_3 DELAY_0_NOP
+#elif (MOTHERBOARD == BOARD_MINIRAMBO)
+  #define CPU_ST7920_DELAY_1 DELAY_0_NOP
+  #define CPU_ST7920_DELAY_2 DELAY_4_NOP
+  #define CPU_ST7920_DELAY_3 DELAY_0_NOP
+#elif (MOTHERBOARD == BOARD_RAMBO)
+  #define CPU_ST7920_DELAY_1 DELAY_0_NOP
+  #define CPU_ST7920_DELAY_2 DELAY_0_NOP
+  #define CPU_ST7920_DELAY_3 DELAY_0_NOP
+#elif F_CPU == 16000000
+  #define CPU_ST7920_DELAY_1 DELAY_0_NOP
+  #define CPU_ST7920_DELAY_2 DELAY_0_NOP
+  #define CPU_ST7920_DELAY_3 DELAY_1_NOP
+#else
+  #error "No valid condition for delays in 'ultralcd_st7920_u8glib_rrd.h'"
+#endif
+
+#ifndef ST7920_DELAY_1
+  #define ST7920_DELAY_1 CPU_ST7920_DELAY_1
+#endif
+#ifndef ST7920_DELAY_2
+  #define ST7920_DELAY_2 CPU_ST7920_DELAY_2
+#endif
+#ifndef ST7920_DELAY_3
+  #define ST7920_DELAY_3 CPU_ST7920_DELAY_3
+#endif
+
+#define ST7920_SND_BIT \
+  WRITE(ST7920_CLK_PIN, LOW);        ST7920_DELAY_1; \
+  WRITE(ST7920_DAT_PIN, val & 0x80); ST7920_DELAY_2; \
+  WRITE(ST7920_CLK_PIN, HIGH);       ST7920_DELAY_3; \
+  val <<= 1
+
 static void ST7920_SWSPI_SND_8BIT(uint8_t val) {
-  uint8_t i;
-  for (i = 0; i < 8; i++) {
-    WRITE(ST7920_CLK_PIN,0);
-    #if F_CPU == 20000000
-      __asm__("nop\n\t");
-    #endif
-    WRITE(ST7920_DAT_PIN,val&0x80);
-    val<<=1;
-    WRITE(ST7920_CLK_PIN,1);
-    #if F_CPU == 20000000
-      __asm__("nop\n\t""nop\n\t");
-    #endif
-  }
+  ST7920_SND_BIT; // 1
+  ST7920_SND_BIT; // 2
+  ST7920_SND_BIT; // 3
+  ST7920_SND_BIT; // 4
+  ST7920_SND_BIT; // 5
+  ST7920_SND_BIT; // 6
+  ST7920_SND_BIT; // 7
+  ST7920_SND_BIT; // 8
 }
 
 #define ST7920_CS()              {WRITE(ST7920_CS_PIN,1);u8g_10MicroDelay();}
@@ -138,6 +185,7 @@ class U8GLIB_ST7920_128X64_RRD : public U8GLIB {
   U8GLIB_ST7920_128X64_RRD(uint8_t dummy) : U8GLIB(&u8g_dev_st7920_128x64_rrd_sw_spi) { UNUSED(dummy); }
 };
 
+#pragma GCC reset_options
 
 #endif //U8GLIB_ST7920
 #endif //ULCDST7920_H