From 8f26c3a6d37ca7284f11112c9f9c12595218176e Mon Sep 17 00:00:00 2001
From: Scott Lahteine <github@thinkyhead.com>
Date: Sun, 20 May 2018 08:19:11 -0500
Subject: [PATCH] Refactor and optimize Stepper/Planner

---
 Marlin/Marlin_main.cpp    |  45 +--
 Marlin/cardreader.cpp     |   2 +-
 Marlin/endstops.cpp       |  12 +-
 Marlin/planner.cpp        | 597 +++++++++++++++++++++++------------
 Marlin/planner.h          | 232 +++++++++-----
 Marlin/planner_bezier.cpp |  10 +-
 Marlin/stepper.cpp        | 642 +++++++++++++++++++-------------------
 Marlin/stepper.h          | 148 +++------
 Marlin/temperature.cpp    |  20 +-
 Marlin/ubl_motion.cpp     |  13 +-
 Marlin/ultralcd.cpp       |   6 +-
 11 files changed, 963 insertions(+), 764 deletions(-)

diff --git a/Marlin/Marlin_main.cpp b/Marlin/Marlin_main.cpp
index cdaf65376d..054ffa00bb 100644
--- a/Marlin/Marlin_main.cpp
+++ b/Marlin/Marlin_main.cpp
@@ -8474,7 +8474,7 @@ inline void gcode_M111() {
  */
 inline void gcode_M81() {
   thermalManager.disable_all_heaters();
-  stepper.finish_and_disable();
+  planner.finish_and_disable();
 
   #if FAN_COUNT > 0
     for (uint8_t i = 0; i < FAN_COUNT; i++) fanSpeeds[i] = 0;
@@ -8517,7 +8517,7 @@ inline void gcode_M18_M84() {
   else {
     bool all_axis = !(parser.seen('X') || parser.seen('Y') || parser.seen('Z') || parser.seen('E'));
     if (all_axis) {
-      stepper.finish_and_disable();
+      planner.finish_and_disable();
     }
     else {
       planner.synchronize();
@@ -9963,7 +9963,7 @@ inline void gcode_M400() { planner.synchronize(); }
 #endif // FILAMENT_WIDTH_SENSOR
 
 void quickstop_stepper() {
-  stepper.quick_stop();
+  planner.quick_stop();
   planner.synchronize();
   set_current_from_steppers_for_axis(ALL_AXES);
   SYNC_PLAN_POSITION_KINEMATIC();
@@ -10342,7 +10342,7 @@ inline void gcode_M502() {
    * M540: Set whether SD card print should abort on endstop hit (M540 S<0|1>)
    */
   inline void gcode_M540() {
-    if (parser.seen('S')) stepper.abort_on_endstop_hit = parser.value_bool();
+    if (parser.seen('S')) planner.abort_on_endstop_hit = parser.value_bool();
   }
 
 #endif // ABORT_ON_ENDSTOP_HIT_FEATURE_ENABLED
@@ -12995,7 +12995,8 @@ void set_current_from_steppers_for_axis(const AxisEnum axis) {
         idle();
       }
       LOOP_XYZE(i) raw[i] += segment_distance[i];
-      planner.buffer_line_kinematic(raw, fr_mm_s, active_extruder, cartesian_segment_mm);
+      if (!planner.buffer_line_kinematic(raw, fr_mm_s, active_extruder, cartesian_segment_mm))
+        break;
     }
 
     // Since segment_distance is only approximate,
@@ -13281,7 +13282,8 @@ void set_current_from_steppers_for_axis(const AxisEnum axis) {
       #if ENABLED(SCARA_FEEDRATE_SCALING)
         // For SCARA scale the feed rate from mm/s to degrees/s
         // i.e., Complete the angular vector in the given time.
-        planner.buffer_segment(delta[A_AXIS], delta[B_AXIS], raw[Z_AXIS], raw[E_AXIS], HYPOT(delta[A_AXIS] - oldA, delta[B_AXIS] - oldB) * inverse_secs, active_extruder);
+        if (!planner.buffer_segment(delta[A_AXIS], delta[B_AXIS], raw[Z_AXIS], raw[E_AXIS], HYPOT(delta[A_AXIS] - oldA, delta[B_AXIS] - oldB) * inverse_secs, active_extruder))
+          break;
         /*
         SERIAL_ECHO(segments);
         SERIAL_ECHOPAIR(": X=", raw[X_AXIS]); SERIAL_ECHOPAIR(" Y=", raw[Y_AXIS]);
@@ -13291,7 +13293,8 @@ void set_current_from_steppers_for_axis(const AxisEnum axis) {
         //*/
         oldA = delta[A_AXIS]; oldB = delta[B_AXIS];
       #else
-        planner.buffer_line(delta[A_AXIS], delta[B_AXIS], delta[C_AXIS], raw[E_AXIS], _feedrate_mm_s, active_extruder, cartesian_segment_mm);
+        if (!planner.buffer_line(delta[A_AXIS], delta[B_AXIS], delta[C_AXIS], raw[E_AXIS], _feedrate_mm_s, active_extruder, cartesian_segment_mm))
+          break;
       #endif
     }
 
@@ -13385,14 +13388,14 @@ void set_current_from_steppers_for_axis(const AxisEnum axis) {
           }
           // unpark extruder: 1) raise, 2) move into starting XY position, 3) lower
           for (uint8_t i = 0; i < 3; i++)
-            planner.buffer_line(
+            if (!planner.buffer_line(
               i == 0 ? raised_parked_position[X_AXIS] : current_position[X_AXIS],
               i == 0 ? raised_parked_position[Y_AXIS] : current_position[Y_AXIS],
               i == 2 ? current_position[Z_AXIS] : raised_parked_position[Z_AXIS],
               current_position[E_AXIS],
               i == 1 ? PLANNER_XY_FEEDRATE() : planner.max_feedrate_mm_s[Z_AXIS],
-              active_extruder
-            );
+              active_extruder)
+            ) break;
           delayed_move_time = 0;
           active_extruder_parked = false;
           #if ENABLED(DEBUG_LEVELING_FEATURE)
@@ -13409,17 +13412,12 @@ void set_current_from_steppers_for_axis(const AxisEnum axis) {
               }
             #endif
             // move duplicate extruder into correct duplication position.
-            planner.set_position_mm(
-              inactive_extruder_x_pos,
-              current_position[Y_AXIS],
-              current_position[Z_AXIS],
-              current_position[E_AXIS]
-            );
-            planner.buffer_line(
+            planner.set_position_mm(inactive_extruder_x_pos, current_position[Y_AXIS], current_position[Z_AXIS], current_position[E_AXIS]);
+            if (!planner.buffer_line(
               current_position[X_AXIS] + duplicate_extruder_x_offset,
               current_position[Y_AXIS], current_position[Z_AXIS], current_position[E_AXIS],
-              planner.max_feedrate_mm_s[X_AXIS], 1
-            );
+              planner.max_feedrate_mm_s[X_AXIS], 1)
+            ) break;
             planner.synchronize();
             SYNC_PLAN_POSITION_KINEMATIC();
             extruder_duplication_enabled = true;
@@ -13652,14 +13650,17 @@ void prepare_move_to_destination() {
         // i.e., Complete the angular vector in the given time.
         inverse_kinematics(raw);
         ADJUST_DELTA(raw);
-        planner.buffer_segment(delta[A_AXIS], delta[B_AXIS], raw[Z_AXIS], raw[E_AXIS], HYPOT(delta[A_AXIS] - oldA, delta[B_AXIS] - oldB) * inverse_secs, active_extruder);
+        if (!planner.buffer_segment(delta[A_AXIS], delta[B_AXIS], raw[Z_AXIS], raw[E_AXIS], HYPOT(delta[A_AXIS] - oldA, delta[B_AXIS] - oldB) * inverse_secs, active_extruder))
+          break;
         oldA = delta[A_AXIS]; oldB = delta[B_AXIS];
       #elif HAS_UBL_AND_CURVES
         float pos[XYZ] = { raw[X_AXIS], raw[Y_AXIS], raw[Z_AXIS] };
         planner.apply_leveling(pos);
-        planner.buffer_segment(pos[X_AXIS], pos[Y_AXIS], pos[Z_AXIS], raw[E_AXIS], fr_mm_s, active_extruder);
+        if (!planner.buffer_segment(pos[X_AXIS], pos[Y_AXIS], pos[Z_AXIS], raw[E_AXIS], fr_mm_s, active_extruder))
+          break;
       #else
-        planner.buffer_line_kinematic(raw, fr_mm_s, active_extruder);
+        if (!planner.buffer_line_kinematic(raw, fr_mm_s, active_extruder))
+          break;
       #endif
     }
 
diff --git a/Marlin/cardreader.cpp b/Marlin/cardreader.cpp
index 109ab428bf..28fdf11523 100644
--- a/Marlin/cardreader.cpp
+++ b/Marlin/cardreader.cpp
@@ -941,7 +941,7 @@ void CardReader::printingHasFinished() {
     #endif
 
     #if ENABLED(SD_FINISHED_STEPPERRELEASE) && defined(SD_FINISHED_RELEASECOMMAND)
-      stepper.cleaning_buffer_counter = 1; // The command will fire from the Stepper ISR
+      planner.finish_and_disable();
     #endif
     print_job_timer.stop();
     if (print_job_timer.duration() > 60)
diff --git a/Marlin/endstops.cpp b/Marlin/endstops.cpp
index 93fbd9a5a8..15b601c52e 100644
--- a/Marlin/endstops.cpp
+++ b/Marlin/endstops.cpp
@@ -181,7 +181,7 @@ void Endstops::report_state() {
     #endif
 
     #define _ENDSTOP_HIT_ECHO(A,C) do{ \
-      SERIAL_ECHOPAIR(" " STRINGIFY(A) ":", stepper.triggered_position_mm(_AXIS(A))); \
+      SERIAL_ECHOPAIR(" " STRINGIFY(A) ":", planner.triggered_position_mm(_AXIS(A))); \
       _SET_STOP_CHAR(A,C); }while(0)
 
     #define _ENDSTOP_HIT_TEST(A,C) \
@@ -211,7 +211,7 @@ void Endstops::report_state() {
     hit_on_purpose();
 
     #if ENABLED(ABORT_ON_ENDSTOP_HIT_FEATURE_ENABLED) && ENABLED(SDSUPPORT)
-      if (stepper.abort_on_endstop_hit) {
+      if (planner.abort_on_endstop_hit) {
         card.sdprinting = false;
         card.closefile();
         quickstop_stepper();
@@ -322,7 +322,7 @@ void Endstops::update() {
       UPDATE_ENDSTOP_BIT(AXIS, MINMAX); \
       if (TEST_ENDSTOP(_ENDSTOP(AXIS, MINMAX))) { \
         _ENDSTOP_HIT(AXIS, MINMAX); \
-        stepper.endstop_triggered(_AXIS(AXIS)); \
+        planner.endstop_triggered(_AXIS(AXIS)); \
       } \
     }while(0)
 
@@ -331,9 +331,9 @@ void Endstops::update() {
     if (G38_move) {
       UPDATE_ENDSTOP_BIT(Z, MIN_PROBE);
       if (TEST_ENDSTOP(_ENDSTOP(Z, MIN_PROBE))) {
-        if      (stepper.current_block->steps[_AXIS(X)] > 0) { _ENDSTOP_HIT(X, MIN); stepper.endstop_triggered(_AXIS(X)); }
-        else if (stepper.current_block->steps[_AXIS(Y)] > 0) { _ENDSTOP_HIT(Y, MIN); stepper.endstop_triggered(_AXIS(Y)); }
-        else if (stepper.current_block->steps[_AXIS(Z)] > 0) { _ENDSTOP_HIT(Z, MIN); stepper.endstop_triggered(_AXIS(Z)); }
+        if      (stepper.current_block->steps[_AXIS(X)] > 0) { _ENDSTOP_HIT(X, MIN); planner.endstop_triggered(_AXIS(X)); }
+        else if (stepper.current_block->steps[_AXIS(Y)] > 0) { _ENDSTOP_HIT(Y, MIN); planner.endstop_triggered(_AXIS(Y)); }
+        else if (stepper.current_block->steps[_AXIS(Z)] > 0) { _ENDSTOP_HIT(Z, MIN); planner.endstop_triggered(_AXIS(Z)); }
         G38_endstop_hit = true;
       }
     }
diff --git a/Marlin/planner.cpp b/Marlin/planner.cpp
index 4d4b92e9dc..3818570b42 100644
--- a/Marlin/planner.cpp
+++ b/Marlin/planner.cpp
@@ -56,6 +56,10 @@
  *
  * IntersectionDistance[s1_, s2_, a_, d_] := (2 a d - s1^2 + s2^2)/(4 a)
  *
+ * --
+ *
+ * The fast inverse function needed for Bézier interpolation for AVR
+ * was designed, written and tested by Eduardo José Tagle on April/2018
  */
 
 #include "planner.h"
@@ -85,13 +89,18 @@ Planner planner;
  * A ring buffer of moves described in steps
  */
 block_t Planner::block_buffer[BLOCK_BUFFER_SIZE];
-volatile uint8_t Planner::block_buffer_head, // Index of the next block to be pushed
-                 Planner::block_buffer_tail;
+volatile uint8_t Planner::block_buffer_head,  // Index of the next block to be pushed
+                 Planner::block_buffer_tail;  // Index of the busy block, if any
+uint16_t Planner::cleaning_buffer_counter;    // A counter to disable queuing of blocks
 
-float Planner::max_feedrate_mm_s[XYZE_N], // Max speeds in mm per second
+float Planner::max_feedrate_mm_s[XYZE_N],   // Max speeds in mm per second
       Planner::axis_steps_per_mm[XYZE_N],
       Planner::steps_to_mm[XYZE_N];
 
+#if ENABLED(ABORT_ON_ENDSTOP_HIT_FEATURE_ENABLED)
+  bool Planner::abort_on_endstop_hit = false;
+#endif
+
 #if ENABLED(DISTINCT_E_FACTORS)
   uint8_t Planner::last_extruder = 0;     // Respond to extruder change
 #endif
@@ -160,7 +169,7 @@ int32_t Planner::position[NUM_AXIS] = { 0 };
 uint32_t Planner::cutoff_long;
 
 float Planner::previous_speed[NUM_AXIS],
-      Planner::previous_nominal_speed;
+      Planner::previous_nominal_speed_sqr;
 
 #if ENABLED(DISABLE_INACTIVE_EXTRUDER)
   uint8_t Planner::g_uc_extruder_last_move[EXTRUDERS] = { 0 };
@@ -197,7 +206,7 @@ void Planner::init() {
     ZERO(position_float);
   #endif
   ZERO(previous_speed);
-  previous_nominal_speed = 0.0;
+  previous_nominal_speed_sqr = 0.0;
   #if ABL_PLANAR
     bed_level_matrix.set_to_identity();
   #endif
@@ -347,7 +356,7 @@ void Planner::init() {
   //
   static uint32_t get_period_inverse(uint32_t d) {
 
-     static const uint8_t inv_tab[256] PROGMEM = {
+    static const uint8_t inv_tab[256] PROGMEM = {
       255,253,252,250,248,246,244,242,240,238,236,234,233,231,229,227,
       225,224,222,220,218,217,215,213,212,210,208,207,205,203,202,200,
       199,197,195,194,192,191,189,188,186,185,183,182,180,179,178,176,
@@ -520,7 +529,7 @@ void Planner::init() {
       A("rjmp 6f")                      // No, skip it
       A("mov %14,%15")
       A("clr %15")
-      L("6")                            // %16:%15:%14 = initial estimation of 0x1000000 / d)
+      L("6")                            // %16:%15:%14 = initial estimation of 0x1000000 / d
 
       // Now, we must refine the estimation present on %16:%15:%14 using 1 iteration
       // of Newton-Raphson. As it has a quadratic convergence, 1 iteration is enough
@@ -709,7 +718,6 @@ void Planner::init() {
     // Return the result
     return r11 | (uint16_t(r12) << 8) | (uint32_t(r13) << 16);
   }
-
 #endif // BEZIER_JERK_CONTROL
 
 #define MINIMAL_STEP_RATE 120
@@ -719,12 +727,13 @@ void Planner::init() {
  * by the provided factors.
  */
 void Planner::calculate_trapezoid_for_block(block_t* const block, const float &entry_factor, const float &exit_factor) {
+
   uint32_t initial_rate = CEIL(block->nominal_rate * entry_factor),
            final_rate = CEIL(block->nominal_rate * exit_factor); // (steps per second)
 
   // Limit minimal step rate (Otherwise the timer will overflow.)
-  NOLESS(initial_rate, MINIMAL_STEP_RATE);
-  NOLESS(final_rate, MINIMAL_STEP_RATE);
+  NOLESS(initial_rate, uint32_t(MINIMAL_STEP_RATE));
+  NOLESS(final_rate, uint32_t(MINIMAL_STEP_RATE));
 
   #if ENABLED(BEZIER_JERK_CONTROL)
     uint32_t cruise_rate = initial_rate;
@@ -733,19 +742,18 @@ void Planner::calculate_trapezoid_for_block(block_t* const block, const float &e
   const int32_t accel = block->acceleration_steps_per_s2;
 
           // Steps required for acceleration, deceleration to/from nominal rate
-  int32_t accelerate_steps = CEIL(estimate_acceleration_distance(initial_rate, block->nominal_rate, accel)),
-          decelerate_steps = FLOOR(estimate_acceleration_distance(block->nominal_rate, final_rate, -accel)),
+  uint32_t accelerate_steps = CEIL(estimate_acceleration_distance(initial_rate, block->nominal_rate, accel)),
+           decelerate_steps = FLOOR(estimate_acceleration_distance(block->nominal_rate, final_rate, -accel));
           // Steps between acceleration and deceleration, if any
-          plateau_steps = block->step_event_count - accelerate_steps - decelerate_steps;
+  int32_t plateau_steps = block->step_event_count - accelerate_steps - decelerate_steps;
 
   // Does accelerate_steps + decelerate_steps exceed step_event_count?
   // Then we can't possibly reach the nominal rate, there will be no cruising.
   // Use intersection_distance() to calculate accel / braking time in order to
   // reach the final_rate exactly at the end of this block.
   if (plateau_steps < 0) {
-    accelerate_steps = CEIL(intersection_distance(initial_rate, final_rate, accel, block->step_event_count));
-    NOLESS(accelerate_steps, 0); // Check limits due to numerical round-off
-    accelerate_steps = MIN((uint32_t)accelerate_steps, block->step_event_count);//(We can cast here to unsigned, because the above line ensures that we are above zero)
+    const float accelerate_steps_float = CEIL(intersection_distance(initial_rate, final_rate, accel, block->step_event_count));
+    accelerate_steps = MIN(uint32_t(MAX(accelerate_steps_float, 0)), block->step_event_count);
     plateau_steps = 0;
 
     #if ENABLED(BEZIER_JERK_CONTROL)
@@ -772,8 +780,12 @@ void Planner::calculate_trapezoid_for_block(block_t* const block, const float &e
 
   #endif
 
-  CRITICAL_SECTION_START;  // Fill variables used by the stepper in a critical section
-  if (!TEST(block->flag, BLOCK_BIT_BUSY)) { // Don't update variables if block is busy.
+  // Fill variables used by the stepper in a critical section
+  const bool was_enabled = STEPPER_ISR_ENABLED();
+  if (was_enabled) DISABLE_STEPPER_DRIVER_INTERRUPT();
+
+  // Don't update variables if block is busy: It is being interpreted by the planner
+  if (!TEST(block->flag, BLOCK_BIT_BUSY)) {
     block->accelerate_until = accelerate_steps;
     block->decelerate_after = accelerate_steps + plateau_steps;
     block->initial_rate = initial_rate;
@@ -786,32 +798,35 @@ void Planner::calculate_trapezoid_for_block(block_t* const block, const float &e
     #endif
     block->final_rate = final_rate;
   }
-  CRITICAL_SECTION_END;
+  if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
 }
 
-// "Junction jerk" in this context is the immediate change in speed at the junction of two blocks.
-// This method will calculate the junction jerk as the euclidean distance between the nominal
-// velocities of the respective blocks.
-//inline float junction_jerk(block_t *before, block_t *after) {
-//  return SQRT(
-//    POW((before->speed_x-after->speed_x), 2)+POW((before->speed_y-after->speed_y), 2));
-//}
-
 // The kernel called by recalculate() when scanning the plan from last to first entry.
-void Planner::reverse_pass_kernel(block_t* const current, const block_t* const next) {
-  if (current && next) {
-    // If entry speed is already at the maximum entry speed, no need to recheck. Block is cruising.
-    // If not, block in state of acceleration or deceleration. Reset entry speed to maximum and
-    // check for maximum allowable speed reductions to ensure maximum possible planned speed.
-    const float max_entry_speed = current->max_entry_speed;
-    if (current->entry_speed != max_entry_speed || TEST(next->flag, BLOCK_BIT_RECALCULATE)) {
-      // If nominal length true, max junction speed is guaranteed to be reached. Only compute
-      // for max allowable speed if block is decelerating and nominal length is false.
-      const float new_entry_speed = (TEST(current->flag, BLOCK_BIT_NOMINAL_LENGTH) || max_entry_speed <= next->entry_speed)
-        ? max_entry_speed
-        : MIN(max_entry_speed, max_allowable_speed(-current->acceleration, next->entry_speed, current->millimeters));
-      if (new_entry_speed != current->entry_speed) {
-        current->entry_speed = new_entry_speed;
+void Planner::reverse_pass_kernel(block_t* const current, const block_t * const next) {
+  if (current) {
+    // If entry speed is already at the maximum entry speed, and there was no change of speed
+    // in the next block, there is no need to recheck. Block is cruising and there is no need to
+    // compute anything for this block,
+    // If not, block entry speed needs to be recalculated to ensure maximum possible planned speed.
+    const float max_entry_speed_sqr = current->max_entry_speed_sqr;
+
+    // Compute maximum entry speed decelerating over the current block from its exit speed.
+    // If not at the maximum entry speed, or the previous block entry speed changed
+    if (current->entry_speed_sqr != max_entry_speed_sqr || (next && TEST(next->flag, BLOCK_BIT_RECALCULATE))) {
+
+      // If nominal length true, max junction speed is guaranteed to be reached.
+      // If a block can de/ac-celerate from nominal speed to zero within the length of the block, then
+      // the current block and next block junction speeds are guaranteed to always be at their maximum
+      // junction speeds in deceleration and acceleration, respectively. This is due to how the current
+      // block nominal speed limits both the current and next maximum junction speeds. Hence, in both
+      // the reverse and forward planners, the corresponding block junction speed will always be at the
+      // the maximum junction speed and may always be ignored for any speed reduction checks.
+
+      const float new_entry_speed_sqr = TEST(current->flag, BLOCK_BIT_NOMINAL_LENGTH)
+        ? max_entry_speed_sqr
+        : MIN(max_entry_speed_sqr, max_allowable_speed_sqr(-current->acceleration, next ? next->entry_speed_sqr : sq(MINIMUM_PLANNER_SPEED), current->millimeters));
+      if (current->entry_speed_sqr != new_entry_speed_sqr) {
+        current->entry_speed_sqr = new_entry_speed_sqr;
         SBI(current->flag, BLOCK_BIT_RECALCULATE);
       }
     }
@@ -826,44 +841,37 @@ void Planner::reverse_pass() {
   if (movesplanned() > 2) {
     const uint8_t endnr = next_block_index(block_buffer_tail); // tail is running. tail+1 shouldn't be altered because it's connected to the running block.
     uint8_t blocknr = prev_block_index(block_buffer_head);
-    block_t* current = &block_buffer[blocknr];
-
-    // Last/newest block in buffer:
-    const float max_entry_speed = current->max_entry_speed;
-    if (current->entry_speed != max_entry_speed) {
-      // If nominal length true, max junction speed is guaranteed to be reached. Only compute
-      // for max allowable speed if block is decelerating and nominal length is false.
-      const float new_entry_speed = TEST(current->flag, BLOCK_BIT_NOMINAL_LENGTH)
-        ? max_entry_speed
-        : MIN(max_entry_speed, max_allowable_speed(-current->acceleration, MINIMUM_PLANNER_SPEED, current->millimeters));
-      if (current->entry_speed != new_entry_speed) {
-        current->entry_speed = new_entry_speed;
-        SBI(current->flag, BLOCK_BIT_RECALCULATE);
-      }
-    }
 
-    do {
-      const block_t * const next = current;
-      blocknr = prev_block_index(blocknr);
+    // Perform the reverse pass
+    block_t *current, *next = NULL;
+    while (blocknr != endnr) {
+      // Perform the reverse pass - Only consider non sync blocks
       current = &block_buffer[blocknr];
-      reverse_pass_kernel(current, next);
-    } while (blocknr != endnr);
+      if (!TEST(current->flag, BLOCK_BIT_SYNC_POSITION)) {
+        reverse_pass_kernel(current, next);
+        next = current;
+      }
+      // Advance to the next
+      blocknr = prev_block_index(blocknr);
+    }
   }
 }
 
 // The kernel called by recalculate() when scanning the plan from first to last entry.
-void Planner::forward_pass_kernel(const block_t* const previous, block_t* const current) {
+void Planner::forward_pass_kernel(const block_t * const previous, block_t* const current) {
   if (previous) {
     // If the previous block is an acceleration block, too short to complete the full speed
     // change, adjust the entry speed accordingly. Entry speeds have already been reset,
     // maximized, and reverse-planned. If nominal length is set, max junction speed is
     // guaranteed to be reached. No need to recheck.
     if (!TEST(previous->flag, BLOCK_BIT_NOMINAL_LENGTH)) {
-      if (previous->entry_speed < current->entry_speed) {
-        const float new_entry_speed = MIN(current->entry_speed, max_allowable_speed(-previous->acceleration, previous->entry_speed, previous->millimeters));
-        // Check for junction speed change
-        if (current->entry_speed != new_entry_speed) {
-          current->entry_speed = new_entry_speed;
+      if (previous->entry_speed_sqr < current->entry_speed_sqr) {
+        // Compute the maximum allowable speed
+        const float new_entry_speed_sqr = max_allowable_speed_sqr(-previous->acceleration, previous->entry_speed_sqr, previous->millimeters);
+        // If true, current block is full-acceleration
+        if (current->entry_speed_sqr > new_entry_speed_sqr) {
+          // Always <= max_entry_speed_sqr. Backward pass sets this.
+          current->entry_speed_sqr = new_entry_speed_sqr;
           SBI(current->flag, BLOCK_BIT_RECALCULATE);
         }
       }
@@ -876,15 +884,21 @@ void Planner::forward_pass_kernel(const block_t* const previous, block_t* const
  * Once in reverse and once forward. This implements the forward pass.
  */
 void Planner::forward_pass() {
-  block_t* block[3] = { NULL, NULL, NULL };
-
-  for (uint8_t b = block_buffer_tail; b != block_buffer_head; b = next_block_index(b)) {
-    block[0] = block[1];
-    block[1] = block[2];
-    block[2] = &block_buffer[b];
-    forward_pass_kernel(block[0], block[1]);
+  const uint8_t endnr = block_buffer_head;
+  uint8_t blocknr = block_buffer_tail;
+
+  // Perform the forward pass
+  block_t *current, *previous = NULL;
+  while (blocknr != endnr) {
+    // Perform the forward pass - Only consider non-sync blocks
+    current = &block_buffer[blocknr];
+    if (!TEST(current->flag, BLOCK_BIT_SYNC_POSITION)) {
+      forward_pass_kernel(previous, current);
+      previous = current;
+    }
+    // Advance to the previous
+    blocknr = next_block_index(blocknr);
   }
-  forward_pass_kernel(block[1], block[2]);
 }
 
 /**
@@ -893,38 +907,72 @@ void Planner::forward_pass() {
  * recalculate() after updating the blocks.
  */
 void Planner::recalculate_trapezoids() {
-  int8_t block_index = block_buffer_tail;
-  block_t *current, *next = NULL;
+  uint8_t block_index = block_buffer_tail;
+
+  // As there could be a sync block in the head of the queue, and the next loop must not
+  // recalculate the head block (as it needs to be specially handled), scan backwards until
+  // we find the first non SYNC block
+  uint8_t head_block_index = block_buffer_head;
+  while (head_block_index != block_index) {
+
+    // Go back (head always point to the first free block)
+    uint8_t prev_index = prev_block_index(head_block_index);
+
+    // Get the pointer to the block
+    block_t *prev = &block_buffer[prev_index];
+
+    // If not dealing with a sync block, we are done. The last block is not a SYNC block
+    if (!TEST(prev->flag, BLOCK_BIT_SYNC_POSITION)) break;
+
+    // Examine the previous block. This and all following are SYNC blocks
+    head_block_index = prev_index;
+  };
+
+  // Go from the tail (currently executed block) to the first block, without including it)
+  block_t *current = NULL, *next = NULL;
+  float current_entry_speed = 0.0, next_entry_speed = 0.0;
+  while (block_index != head_block_index) {
 
-  while (block_index != block_buffer_head) {
-    current = next;
     next = &block_buffer[block_index];
-    if (current) {
-      // Recalculate if current block entry or exit junction speed has changed.
-      if (TEST(current->flag, BLOCK_BIT_RECALCULATE) || TEST(next->flag, BLOCK_BIT_RECALCULATE)) {
-        // NOTE: Entry and exit factors always > 0 by all previous logic operations.
-        const float nomr = 1.0 / current->nominal_speed;
-        calculate_trapezoid_for_block(current, current->entry_speed * nomr, next->entry_speed * nomr);
-        #if ENABLED(LIN_ADVANCE)
-          if (current->use_advance_lead) {
-            const float comp = current->e_D_ratio * extruder_advance_K * axis_steps_per_mm[E_AXIS];
-            current->max_adv_steps = current->nominal_speed * comp;
-            current->final_adv_steps = next->entry_speed * comp;
-          }
-        #endif
-        CBI(current->flag, BLOCK_BIT_RECALCULATE); // Reset current only to ensure next trapezoid is computed
+
+    // Skip sync blocks
+    if (!TEST(next->flag, BLOCK_BIT_SYNC_POSITION)) {
+      next_entry_speed = SQRT(next->entry_speed_sqr);
+
+      if (current) {
+        // Recalculate if current block entry or exit junction speed has changed.
+        if (TEST(current->flag, BLOCK_BIT_RECALCULATE) || TEST(next->flag, BLOCK_BIT_RECALCULATE)) {
+          // NOTE: Entry and exit factors always > 0 by all previous logic operations.
+          const float current_nominal_speed = SQRT(current->nominal_speed_sqr),
+                      nomr = 1.0 / current_nominal_speed;
+          calculate_trapezoid_for_block(current, current_entry_speed * nomr, next_entry_speed * nomr);
+          #if ENABLED(LIN_ADVANCE)
+            if (current->use_advance_lead) {
+              const float comp = current->e_D_ratio * extruder_advance_K * axis_steps_per_mm[E_AXIS];
+              current->max_adv_steps = current_nominal_speed * comp;
+              current->final_adv_steps = next_entry_speed * comp;
+            }
+          #endif
+          CBI(current->flag, BLOCK_BIT_RECALCULATE); // Reset current only to ensure next trapezoid is computed
+        }
       }
+
+      current = next;
+      current_entry_speed = next_entry_speed;
     }
+
     block_index = next_block_index(block_index);
   }
+
   // Last/newest block in buffer. Exit speed is set with MINIMUM_PLANNER_SPEED. Always recalculated.
   if (next) {
-    const float nomr = 1.0 / next->nominal_speed;
-    calculate_trapezoid_for_block(next, next->entry_speed * nomr, (MINIMUM_PLANNER_SPEED) * nomr);
+    const float next_nominal_speed = SQRT(next->nominal_speed_sqr),
+                nomr = 1.0 / next_nominal_speed;
+    calculate_trapezoid_for_block(next, next_entry_speed * nomr, (MINIMUM_PLANNER_SPEED) * nomr);
     #if ENABLED(LIN_ADVANCE)
       if (next->use_advance_lead) {
         const float comp = next->e_D_ratio * extruder_advance_K * axis_steps_per_mm[E_AXIS];
-        next->max_adv_steps = next->nominal_speed * comp;
+        next->max_adv_steps = next_nominal_speed * comp;
         next->final_adv_steps = (MINIMUM_PLANNER_SPEED) * comp;
       }
     #endif
@@ -974,7 +1022,7 @@ void Planner::recalculate() {
     for (uint8_t b = block_buffer_tail; b != block_buffer_head; b = next_block_index(b)) {
       block_t* block = &block_buffer[b];
       if (block->steps[X_AXIS] || block->steps[Y_AXIS] || block->steps[Z_AXIS]) {
-        float se = (float)block->steps[E_AXIS] / block->step_event_count * block->nominal_speed; // mm/sec;
+        const float se = (float)block->steps[E_AXIS] / block->step_event_count * SQRT(block->nominal_speed_sqr); // mm/sec;
         NOLESS(high, se);
       }
     }
@@ -1275,6 +1323,59 @@ void Planner::check_axes_activity() {
 
 #endif // PLANNER_LEVELING
 
+void Planner::quick_stop() {
+  // Remove all the queued blocks. Note that this function is NOT
+  // called from the Stepper ISR, so we must consider tail as readonly!
+  // that is why we set head to tail!
+  block_buffer_head = block_buffer_tail;
+
+  #if ENABLED(ULTRA_LCD)
+    // Clear the accumulated runtime
+    clear_block_buffer_runtime();
+  #endif
+
+  // Make sure to drop any attempt of queuing moves for at least 1 second
+  cleaning_buffer_counter = 1000;
+
+  // And stop the stepper ISR
+  stepper.quick_stop();
+}
+
+void Planner::endstop_triggered(const AxisEnum axis) {
+
+  /*NB: This will be called via endstops.update()
+    and endstops.update() can be called from the temperature
+    ISR. So Stepper interrupts are enabled */
+
+  // Disable stepper ISR
+  bool stepper_isr_enabled = STEPPER_ISR_ENABLED();
+  DISABLE_STEPPER_DRIVER_INTERRUPT();
+
+  // Record stepper position
+  stepper.endstop_triggered(axis);
+
+  // Discard the active block that led to the trigger
+  discard_current_block();
+
+  // Discard the CONTINUED block, if any. Note the planner can only queue 1 continued
+  // block after a previous non continued block, as the condition to queue them
+  // is that there are no queued blocks at the time a new block is queued.
+  const bool discard = has_blocks_queued() && TEST(block_buffer[block_buffer_tail].flag, BLOCK_BIT_CONTINUED);
+  if (discard) discard_current_block();
+
+  // Reenable stepper ISR if it was enabled
+  if (stepper_isr_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
+}
+
+float Planner::triggered_position_mm(const AxisEnum axis) {
+  return stepper.triggered_position(axis) * steps_to_mm[axis];
+}
+
+void Planner::finish_and_disable() {
+  while (has_blocks_queued() || cleaning_buffer_counter) idle();
+  disable_all_steppers();
+}
+
 /**
  * Get an axis position according to stepper position(s)
  * For CORE machines apply translation from ABC to XYZ.
@@ -1287,7 +1388,7 @@ float Planner::get_axis_position_mm(const AxisEnum axis) {
 
       // Protect the access to the position.
       const bool was_enabled = STEPPER_ISR_ENABLED();
-      DISABLE_STEPPER_DRIVER_INTERRUPT();
+      if (was_enabled) DISABLE_STEPPER_DRIVER_INTERRUPT();
 
       // ((a1+a2)+(a1-a2))/2 -> (a1+a2+a1-a2)/2 -> (a1+a1)/2 -> a1
       // ((a1+a2)-(a1-a2))/2 -> (a1+a2-a1+a2)/2 -> (a2+a2)/2 -> a2
@@ -1309,18 +1410,69 @@ float Planner::get_axis_position_mm(const AxisEnum axis) {
 /**
  * Block until all buffered steps are executed / cleaned
  */
-void Planner::synchronize() { while (has_blocks_queued() || stepper.cleaning_buffer_counter) idle(); }
+void Planner::synchronize() { while (has_blocks_queued() || cleaning_buffer_counter) idle(); }
 
 /**
  * Planner::_buffer_steps
  *
- * Add a new linear movement to the buffer (in terms of steps).
+ * Add a new linear movement to the planner queue (in terms of steps).
+ *
+ *  target      - target position in steps units
+ *  fr_mm_s     - (target) speed of the move
+ *  extruder    - target extruder
+ *  millimeters - the length of the movement, if known
+ *
+ * Returns true if movement was properly queued, false otherwise
+ */
+bool Planner::_buffer_steps(const int32_t (&target)[XYZE]
+  #if HAS_POSITION_FLOAT
+    , const float (&target_float)[XYZE]
+  #endif
+  , float fr_mm_s, const uint8_t extruder, const float &millimeters
+) {
+
+  // If we are cleaning, do not accept queuing of movements
+  if (cleaning_buffer_counter) return false;
+
+  // Wait for the next available block
+  uint8_t next_buffer_head;
+  block_t * const block = get_next_free_block(next_buffer_head);
+
+  // Fill the block with the specified movement
+  if (!_populate_block(block, false, target
+  #if HAS_POSITION_FLOAT
+    , target_float
+  #endif
+    , fr_mm_s, extruder, millimeters
+  )) {
+    // Movement was not queued, probably because it was too short.
+    //  Simply accept that as movement queued and done
+    return true;
+  }
+
+  // Move buffer head
+  block_buffer_head = next_buffer_head;
+
+  // Recalculate and optimize trapezoidal speed profiles
+  recalculate();
+
+  // Movement successfully queued!
+  return true;
+}
+
+/**
+ * Planner::_populate_block
+ *
+ * Fills a new linear movement in the block (in terms of steps).
  *
  *  target      - target position in steps units
  *  fr_mm_s     - (target) speed of the move
  *  extruder    - target extruder
+ *
+ * Returns true is movement is acceptable, false otherwise
  */
-void Planner::_buffer_steps(const int32_t (&target)[XYZE]
+bool Planner::_populate_block(block_t * const block, bool split_move,
+  const int32_t (&target)[XYZE]
   #if HAS_POSITION_FLOAT
     , const float (&target_float)[XYZE]
   #endif
@@ -1334,7 +1486,7 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
   int32_t de = target[E_AXIS] - position[E_AXIS];
 
   /* <-- add a slash to enable
-    SERIAL_ECHOPAIR("  _buffer_steps FR:", fr_mm_s);
+    SERIAL_ECHOPAIR("  _populate_block FR:", fr_mm_s);
     SERIAL_ECHOPAIR(" A:", target[A_AXIS]);
     SERIAL_ECHOPAIR(" (", da);
     SERIAL_ECHOPAIR(" steps) B:", target[B_AXIS]);
@@ -1401,11 +1553,7 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
   if (de < 0) SBI(dm, E_AXIS);
 
   const float esteps_float = de * e_factor[extruder];
-  const int32_t esteps = ABS(esteps_float) + 0.5;
-
-  // Wait for the next available block
-  uint8_t next_buffer_head;
-  block_t * const block = get_next_free_block(next_buffer_head);
+  const uint32_t esteps = ABS(esteps_float) + 0.5;
 
   // Clear all flags, including the "busy" bit
   block->flag = 0x00;
@@ -1442,7 +1590,7 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
   block->step_event_count = MAX4(block->steps[A_AXIS], block->steps[B_AXIS], block->steps[C_AXIS], esteps);
 
   // Bail if this is a zero-length block
-  if (block->step_event_count < MIN_STEPS_PER_SEGMENT) return;
+  if (block->step_event_count < MIN_STEPS_PER_SEGMENT) return false;
 
   // For a mixing extruder, get a magnified step_event_count for each
   #if ENABLED(MIXING_EXTRUDER)
@@ -1682,12 +1830,16 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
   #endif
 
   #if ENABLED(ULTRA_LCD)
-    CRITICAL_SECTION_START
-      block_buffer_runtime_us += segment_time_us;
-    CRITICAL_SECTION_END
+    // Protect the access to the position.
+    const bool was_enabled = STEPPER_ISR_ENABLED();
+    if (was_enabled) DISABLE_STEPPER_DRIVER_INTERRUPT();
+
+    block_buffer_runtime_us += segment_time_us;
+
+    if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
   #endif
 
-  block->nominal_speed = block->millimeters * inverse_secs;           //   (mm/sec) Always > 0
+  block->nominal_speed_sqr = sq(block->millimeters * inverse_secs);   //   (mm/sec)^2 Always > 0
   block->nominal_rate = CEIL(block->step_event_count * inverse_secs); // (step/sec) Always > 0
 
   #if ENABLED(FILAMENT_WIDTH_SENSOR)
@@ -1775,8 +1927,8 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
   // Correct the speed
   if (speed_factor < 1.0) {
     LOOP_XYZE(i) current_speed[i] *= speed_factor;
-    block->nominal_speed *= speed_factor;
     block->nominal_rate *= speed_factor;
+    block->nominal_speed_sqr = block->nominal_speed_sqr * sq(speed_factor);
   }
 
   // Compute and limit the acceleration rate for the trapezoid generator.
@@ -1871,13 +2023,13 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
   block->acceleration_steps_per_s2 = accel;
   block->acceleration = accel / steps_per_mm;
   #if DISABLED(BEZIER_JERK_CONTROL)
-    block->acceleration_rate = (long)(accel * (4096.0 * 4096.0 / (HAL_STEPPER_TIMER_RATE))); // * 8.388608
+    block->acceleration_rate = (uint32_t)(accel * (4096.0 * 4096.0 / (HAL_STEPPER_TIMER_RATE)));
   #endif
   #if ENABLED(LIN_ADVANCE)
     if (block->use_advance_lead) {
       block->advance_speed = (HAL_STEPPER_TIMER_RATE) / (extruder_advance_K * block->e_D_ratio * block->acceleration * axis_steps_per_mm[E_AXIS_N]);
       #if ENABLED(LA_DEBUG)
-        if (extruder_advance_K * block->e_D_ratio * block->acceleration * 2 < block->nominal_speed * block->e_D_ratio)
+        if (extruder_advance_K * block->e_D_ratio * block->acceleration * 2 < SQRT(block->nominal_speed_sqr) * block->e_D_ratio)
           SERIAL_ECHOLNPGM("More than 2 steps per eISR loop executed.");
         if (block->advance_speed < 200)
           SERIAL_ECHOLNPGM("eISR running at > 10kHz.");
@@ -1885,7 +2037,7 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
     }
   #endif
 
-  float vmax_junction; // Initial limit on the segment entry velocity
+  float vmax_junction_sqr; // Initial limit on the segment entry velocity (mm/s)^2
 
   #if ENABLED(JUNCTION_DEVIATION)
 
@@ -1911,7 +2063,17 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
      * changed dynamically during operation nor can the line move geometry. This must be kept in
      * memory in the event of a feedrate override changing the nominal speeds of blocks, which can
      * change the overall maximum entry speed conditions of all blocks.
-     */
+     *
+     * #######
+     * https://github.com/MarlinFirmware/Marlin/issues/10341#issuecomment-388191754
+     *
+     * hoffbaked: on May 10 2018 tuned and improved the GRBL algorithm for Marlin:
+          Okay! It seems to be working good. I somewhat arbitrarily cut it off at 1mm
+          on then on anything with less sides than an octagon. With this, and the
+          reverse pass actually recalculating things, a corner acceleration value
+          of 1000 junction deviation of .05 are pretty reasonable. If the cycles
+          can be spared, a better acos could be used. For all I know, it may be
+          already calculated in a different place. */
 
     // Unit vector of previous path line segment
     static float previous_unit_vec[
@@ -1932,7 +2094,7 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
     };
 
     // Skip first block or when previous_nominal_speed is used as a flag for homing and offset cycles.
-    if (moves_queued && !UNEAR_ZERO(previous_nominal_speed)) {
+    if (moves_queued && !UNEAR_ZERO(previous_nominal_speed_sqr)) {
       // Compute cosine of angle between previous and current path. (prev_unit_vec is negative)
       // NOTE: Max junction velocity is computed without sin() or acos() by trig half angle identity.
       float junction_cos_theta = -previous_unit_vec[X_AXIS] * unit_vec[X_AXIS]
@@ -1946,21 +2108,33 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
       // NOTE: Computed without any expensive trig, sin() or acos(), by trig half angle identity of cos(theta).
       if (junction_cos_theta > 0.999999) {
         // For a 0 degree acute junction, just set minimum junction speed.
-        vmax_junction = MINIMUM_PLANNER_SPEED;
+        vmax_junction_sqr = sq(MINIMUM_PLANNER_SPEED);
       }
       else {
-        junction_cos_theta = MAX(junction_cos_theta, -0.999999); // Check for numerical round-off to avoid divide by zero.
+        NOLESS(junction_cos_theta, -0.999999); // Check for numerical round-off to avoid divide by zero.
         const float sin_theta_d2 = SQRT(0.5 * (1.0 - junction_cos_theta)); // Trig half angle identity. Always positive.
 
         // TODO: Technically, the acceleration used in calculation needs to be limited by the minimum of the
         // two junctions. However, this shouldn't be a significant problem except in extreme circumstances.
-        vmax_junction = SQRT((block->acceleration * JUNCTION_DEVIATION_FACTOR * sin_theta_d2) / (1.0 - sin_theta_d2));
+        vmax_junction_sqr = (JUNCTION_ACCELERATION_FACTOR * JUNCTION_DEVIATION_FACTOR * sin_theta_d2) / (1.0 - sin_theta_d2);
+        if (block->millimeters < 1.0) {
+
+          // Fast acos approximation, minus the error bar to be safe
+          const float junction_theta = (RADIANS(-40) * sq(junction_cos_theta) - RADIANS(50)) * junction_cos_theta + RADIANS(90) - 0.18;
+
+          // If angle is greater than 135 degrees (octagon), find speed for approximate arc
+          if (junction_theta > RADIANS(135)) {
+            const float limit_sqr = block->millimeters / (RADIANS(180) - junction_theta) * JUNCTION_ACCELERATION_FACTOR;
+            NOMORE(vmax_junction_sqr, limit_sqr);
+          }
+        }
       }
 
-      vmax_junction = MIN3(vmax_junction, block->nominal_speed, previous_nominal_speed);
+      // Get the lowest speed
+      vmax_junction_sqr = MIN3(vmax_junction_sqr, block->nominal_speed_sqr, previous_nominal_speed_sqr);
     }
     else // Init entry speed to zero. Assume it starts from rest. Planner will correct this later.
-      vmax_junction = 0.0;
+      vmax_junction_sqr = 0.0;
 
     COPY(previous_unit_vec, unit_vec);
 
@@ -1976,13 +2150,15 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
     // Exit speed limited by a jerk to full halt of a previous last segment
     static float previous_safe_speed;
 
-    float safe_speed = block->nominal_speed;
+    const float nominal_speed = SQRT(block->nominal_speed_sqr);
+    float safe_speed = nominal_speed;
+
     uint8_t limited = 0;
     LOOP_XYZE(i) {
       const float jerk = ABS(current_speed[i]), maxj = max_jerk[i];
       if (jerk > maxj) {
         if (limited) {
-          const float mjerk = maxj * block->nominal_speed;
+          const float mjerk = maxj * nominal_speed;
           if (jerk * safe_speed > mjerk) safe_speed = mjerk / jerk;
         }
         else {
@@ -1992,19 +2168,21 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
       }
     }
 
-    if (moves_queued && !UNEAR_ZERO(previous_nominal_speed)) {
+    float vmax_junction;
+    if (moves_queued && !UNEAR_ZERO(previous_nominal_speed_sqr)) {
       // Estimate a maximum velocity allowed at a joint of two successive segments.
       // If this maximum velocity allowed is lower than the minimum of the entry / exit safe velocities,
       // then the machine is not coasting anymore and the safe entry / exit velocities shall be used.
 
-      // The junction velocity will be shared between successive segments. Limit the junction velocity to their minimum.
-      // Pick the smaller of the nominal speeds. Higher speed shall not be achieved at the junction during coasting.
-      vmax_junction = MIN(block->nominal_speed, previous_nominal_speed);
-
       // Factor to multiply the previous / current nominal velocities to get componentwise limited velocities.
       float v_factor = 1;
       limited = 0;
 
+      // The junction velocity will be shared between successive segments. Limit the junction velocity to their minimum.
+      // Pick the smaller of the nominal speeds. Higher speed shall not be achieved at the junction during coasting.
+      const float previous_nominal_speed = SQRT(previous_nominal_speed_sqr);
+      vmax_junction = MIN(nominal_speed, previous_nominal_speed);
+
       // Now limit the jerk in all axes.
       const float smaller_speed_factor = vmax_junction / previous_nominal_speed;
       LOOP_XYZE(axis) {
@@ -2039,16 +2217,19 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
       vmax_junction = safe_speed;
 
     previous_safe_speed = safe_speed;
+    vmax_junction_sqr = sq(vmax_junction);
+
   #endif // Classic Jerk Limiting
 
   // Max entry speed of this block equals the max exit speed of the previous block.
-  block->max_entry_speed = vmax_junction;
+  block->max_entry_speed_sqr = vmax_junction_sqr;
 
   // Initialize block entry speed. Compute based on deceleration to user-defined MINIMUM_PLANNER_SPEED.
-  const float v_allowable = max_allowable_speed(-block->acceleration, MINIMUM_PLANNER_SPEED, block->millimeters);
-  // If stepper ISR is disabled, this indicates buffer_segment wants to add a split block.
-  // In this case start with the max. allowed speed to avoid an interrupted first move.
-  block->entry_speed = STEPPER_ISR_ENABLED() ? MINIMUM_PLANNER_SPEED : MIN(vmax_junction, v_allowable);
+  const float v_allowable_sqr = max_allowable_speed_sqr(-block->acceleration, sq(MINIMUM_PLANNER_SPEED), block->millimeters);
+
+  // If we are trying to add a split block, start with the
+  // max. allowed speed to avoid an interrupted first move.
+  block->entry_speed_sqr = !split_move ? sq(MINIMUM_PLANNER_SPEED) : MIN(vmax_junction_sqr, v_allowable_sqr);
 
   // Initialize planner efficiency flags
   // Set flag if block will always reach maximum junction speed regardless of entry/exit speeds.
@@ -2058,25 +2239,22 @@ void Planner::_buffer_steps(const int32_t (&target)[XYZE]
   // block nominal speed limits both the current and next maximum junction speeds. Hence, in both
   // the reverse and forward planners, the corresponding block junction speed will always be at the
   // the maximum junction speed and may always be ignored for any speed reduction checks.
-  block->flag |= block->nominal_speed <= v_allowable ? BLOCK_FLAG_RECALCULATE | BLOCK_FLAG_NOMINAL_LENGTH : BLOCK_FLAG_RECALCULATE;
+  block->flag |= block->nominal_speed_sqr <= v_allowable_sqr ? BLOCK_FLAG_RECALCULATE | BLOCK_FLAG_NOMINAL_LENGTH : BLOCK_FLAG_RECALCULATE;
 
   // Update previous path unit_vector and nominal speed
   COPY(previous_speed, current_speed);
-  previous_nominal_speed = block->nominal_speed;
-
-  // Move buffer head
-  block_buffer_head = next_buffer_head;
+  previous_nominal_speed_sqr = block->nominal_speed_sqr;
 
-  // Update the position (only when a move was queued)
+  // Update the position
   static_assert(COUNT(target) > 1, "Parameter to _buffer_steps must be (&target)[XYZE]!");
   COPY(position, target);
   #if HAS_POSITION_FLOAT
     COPY(position_float, target_float);
   #endif
 
-  recalculate();
-
-} // _buffer_steps()
+  // Movement was accepted
+  return true;
+} // _populate_block()
 
 /**
  * Planner::buffer_sync_block
@@ -2087,29 +2265,15 @@ void Planner::buffer_sync_block() {
   uint8_t next_buffer_head;
   block_t * const block = get_next_free_block(next_buffer_head);
 
-  block->flag = BLOCK_FLAG_SYNC_POSITION;
+  // Clear block
+  memset(block, 0, sizeof(block_t));
 
-  block->steps[A_AXIS] = position[A_AXIS];
-  block->steps[B_AXIS] = position[B_AXIS];
-  block->steps[C_AXIS] = position[C_AXIS];
-  block->steps[E_AXIS] = position[E_AXIS];
-
-  #if ENABLED(LIN_ADVANCE)
-    block->use_advance_lead = false;
-  #endif
-
-  block->nominal_speed   =
-  block->entry_speed     =
-  block->max_entry_speed =
-  block->millimeters     =
-  block->acceleration    = 0;
+  block->flag = BLOCK_FLAG_SYNC_POSITION;
 
-  block->step_event_count          =
-  block->nominal_rate              =
-  block->initial_rate              =
-  block->final_rate                =
-  block->acceleration_steps_per_s2 =
-  block->segment_time_us           = 0;
+  block->position[A_AXIS] = position[A_AXIS];
+  block->position[B_AXIS] = position[B_AXIS];
+  block->position[C_AXIS] = position[C_AXIS];
+  block->position[E_AXIS] = position[E_AXIS];
 
   block_buffer_head = next_buffer_head;
   stepper.wake_up();
@@ -2127,7 +2291,11 @@ void Planner::buffer_sync_block() {
  *  extruder    - target extruder
  *  millimeters - the length of the movement, if known
  */
-void Planner::buffer_segment(const float &a, const float &b, const float &c, const float &e, const float &fr_mm_s, const uint8_t extruder, const float &millimeters/*=0.0*/) {
+bool Planner::buffer_segment(const float &a, const float &b, const float &c, const float &e, const float &fr_mm_s, const uint8_t extruder, const float &millimeters/*=0.0*/) {
+
+  // If we are cleaning, do not accept queuing of movements
+  if (cleaning_buffer_counter) return false;
+
   // When changing extruders recalculate steps corresponding to the E position
   #if ENABLED(DISTINCT_E_FACTORS)
     if (last_extruder != extruder && axis_steps_per_mm[E_AXIS_N] != axis_steps_per_mm[E_AXIS + last_extruder]) {
@@ -2196,37 +2364,80 @@ void Planner::buffer_segment(const float &a, const float &b, const float &c, con
       const float between_float[ABCE] = { _BETWEEN_F(A), _BETWEEN_F(B), _BETWEEN_F(C), _BETWEEN_F(E) };
     #endif
 
-    DISABLE_STEPPER_DRIVER_INTERRUPT();
+    // The new head value is not assigned yet
+    uint8_t buffer_head = 0;
+    bool added = false;
 
-    _buffer_steps(between
-      #if HAS_POSITION_FLOAT
-        , between_float
-      #endif
-      , fr_mm_s, extruder, millimeters * 0.5
-    );
+    uint8_t next_buffer_head;
+    block_t *block = get_next_free_block(next_buffer_head, 2);
 
-    const uint8_t next = block_buffer_head;
+    // Fill the block with the specified movement
+    if (
+      _populate_block(block, true, between
+        #if HAS_POSITION_FLOAT
+          , between_float
+        #endif
+        , fr_mm_s, extruder, millimeters * 0.5
+      )
+    ) {
+      // Movement accepted - Point to the next reserved block
+      block = &block_buffer[next_buffer_head];
+
+      // Store into the new to be stored head
+      buffer_head = next_buffer_head;
+      added = true;
+
+      // And advance the pointer to the next unused slot
+      next_buffer_head = next_block_index(next_buffer_head);
+    }
+
+    // Fill the second part of the block with the 2nd part of the movement
+    if (
+      _populate_block(block, true, target
+        #if HAS_POSITION_FLOAT
+          , target_float
+        #endif
+        , fr_mm_s, extruder, millimeters * 0.5
+      )
+    ) {
+      // Movement accepted - If this block is a continuation
+      // of the previous one, mark it as such
+      if (added) SBI(block->flag, BLOCK_BIT_CONTINUED);
+
+      // Store into the new to be stored head
+      buffer_head = next_buffer_head;
+      added = true;
+    }
 
-    _buffer_steps(target
+    // If any of the movements was added
+    if (added) {
+
+      // Move buffer head and add all the blocks that were filled
+      // successfully to the movement queue.
+      block_buffer_head = buffer_head;
+
+      // Update the position (only when a move was queued)
+      static_assert(COUNT(target) > 1, "Parameter to _buffer_steps must be (&target)[XYZE]!");
+      COPY(position, target);
       #if HAS_POSITION_FLOAT
-        , target_float
+        COPY(position_float, target_float);
       #endif
-      , fr_mm_s, extruder, millimeters * 0.5
-    );
 
-    SBI(block_buffer[next].flag, BLOCK_BIT_CONTINUED);
-    ENABLE_STEPPER_DRIVER_INTERRUPT();
+      // Recalculate and optimize trapezoidal speed profiles
+      recalculate();
+    }
   }
-  else
-    _buffer_steps(target
+  else if (
+    !_buffer_steps(target
       #if HAS_POSITION_FLOAT
         , target_float
       #endif
       , fr_mm_s, extruder, millimeters
-    );
+    )
+  ) return false;
 
   stepper.wake_up();
-
+  return true;
 } // buffer_segment()
 
 /**
@@ -2253,7 +2464,7 @@ void Planner::_set_position_mm(const float &a, const float &b, const float &c, c
     position_float[C_AXIS] = c;
     position_float[E_AXIS] = e;
   #endif
-  previous_nominal_speed = 0.0; // Resets planner junction speeds. Assumes start from rest.
+  previous_nominal_speed_sqr = 0.0; // Resets planner junction speeds. Assumes start from rest.
   ZERO(previous_speed);
   buffer_sync_block();
 }
@@ -2273,22 +2484,6 @@ void Planner::set_position_mm_kinematic(const float (&cart)[XYZE]) {
   #endif
 }
 
-/**
- * Sync from the stepper positions. (e.g., after an interrupted move)
- */
-void Planner::sync_from_steppers() {
-  LOOP_XYZE(i) {
-    position[i] = stepper.position((AxisEnum)i);
-    #if HAS_POSITION_FLOAT
-      position_float[i] = position[i] * steps_to_mm[i
-        #if ENABLED(DISTINCT_E_FACTORS)
-          + (i == E_AXIS ? active_extruder : 0)
-        #endif
-      ];
-    #endif
-  }
-}
-
 /**
  * Setters for planner position (also setting stepper position).
  */
diff --git a/Marlin/planner.h b/Marlin/planner.h
index 35ce6c2807..d022eca088 100644
--- a/Marlin/planner.h
+++ b/Marlin/planner.h
@@ -49,7 +49,7 @@ enum BlockFlagBit : char {
   // from a safe speed (in consideration of jerking from zero speed).
   BLOCK_BIT_NOMINAL_LENGTH,
 
-  // The block is busy
+  // The block is busy, being interpreted by the stepper ISR
   BLOCK_BIT_BUSY,
 
   // The block is segment 2+ of a longer move
@@ -80,24 +80,35 @@ typedef struct {
 
   uint8_t flag;                             // Block flags (See BlockFlag enum above)
 
-  unsigned char active_extruder;            // The extruder to move (if E move)
+  // Fields used by the motion planner to manage acceleration
+  float nominal_speed_sqr,                  // The nominal speed for this block in (mm/sec)^2
+        entry_speed_sqr,                    // Entry speed at previous-current junction in (mm/sec)^2
+        max_entry_speed_sqr,                // Maximum allowable junction entry speed in (mm/sec)^2
+        millimeters,                        // The total travel of this block in mm
+        acceleration;                       // acceleration mm/sec^2
 
-  // Fields used by the Bresenham algorithm for tracing the line
-  int32_t steps[NUM_AXIS];                  // Step count along each axis
+  union {
+    // Data used by all move blocks
+    struct {
+      // Fields used by the Bresenham algorithm for tracing the line
+      uint32_t steps[NUM_AXIS];             // Step count along each axis
+    };
+    // Data used by all sync blocks
+    struct {
+      int32_t position[NUM_AXIS];           // New position to force when this sync block is executed
+    };
+  };
   uint32_t step_event_count;                // The number of step events required to complete this block
 
+  uint8_t active_extruder;                  // The extruder to move (if E move)
+
   #if ENABLED(MIXING_EXTRUDER)
     uint32_t mix_event_count[MIXING_STEPPERS]; // Scaled step_event_count for the mixing steppers
   #endif
 
   // Settings for the trapezoid generator
-  int32_t accelerate_until,                 // The index of the step event on which to stop acceleration
-          decelerate_after;                 // The index of the step event on which to start decelerating
-
-  uint32_t nominal_rate,                    // The nominal step rate for this block in step_events/sec
-           initial_rate,                    // The jerk-adjusted step rate at start of block
-           final_rate,                      // The minimal rate at exit
-           acceleration_steps_per_s2;       // acceleration steps/sec^2
+  uint32_t accelerate_until,                // The index of the step event on which to stop acceleration
+           decelerate_after;                // The index of the step event on which to start decelerating
 
   #if ENABLED(BEZIER_JERK_CONTROL)
     uint32_t cruise_rate;                   // The actual cruise rate to use, between end of the acceleration phase and start of deceleration phase
@@ -106,7 +117,7 @@ typedef struct {
     uint32_t acceleration_time_inverse,     // Inverse of acceleration and deceleration periods, expressed as integer. Scale depends on CPU being used
              deceleration_time_inverse;
   #else
-    int32_t acceleration_rate;              // The acceleration rate used for acceleration calculation
+    uint32_t acceleration_rate;             // The acceleration rate used for acceleration calculation
   #endif
 
   uint8_t direction_bits;                   // The direction bit set for this block (refers to *_DIRECTION_BIT in config.h)
@@ -120,12 +131,10 @@ typedef struct {
     float e_D_ratio;
   #endif
 
-  // Fields used by the motion planner to manage acceleration
-  float nominal_speed,                      // The nominal speed for this block in mm/sec
-        entry_speed,                        // Entry speed at previous-current junction in mm/sec
-        max_entry_speed,                    // Maximum allowable junction entry speed in mm/sec
-        millimeters,                        // The total travel of this block in mm
-        acceleration;                       // acceleration mm/sec^2
+  uint32_t nominal_rate,                    // The nominal step rate for this block in step_events/sec
+           initial_rate,                    // The jerk-adjusted step rate at start of block
+           final_rate,                      // The minimal rate at exit
+           acceleration_steps_per_s2;       // acceleration steps/sec^2
 
   #if FAN_COUNT > 0
     uint16_t fan_speed[FAN_COUNT];
@@ -162,6 +171,7 @@ class Planner {
     static block_t block_buffer[BLOCK_BUFFER_SIZE];
     static volatile uint8_t block_buffer_head,      // Index of the next block to be pushed
                             block_buffer_tail;      // Index of the busy block, if any
+    static uint16_t cleaning_buffer_counter;        // A counter to disable queuing of blocks
 
     #if ENABLED(DISTINCT_E_FACTORS)
       static uint8_t last_extruder;                 // Respond to extruder change
@@ -229,6 +239,10 @@ class Planner {
       #endif
     #endif
 
+    #if ENABLED(ABORT_ON_ENDSTOP_HIT_FEATURE_ENABLED)
+      static bool abort_on_endstop_hit;
+    #endif
+
   private:
 
     /**
@@ -243,9 +257,9 @@ class Planner {
     static float previous_speed[NUM_AXIS];
 
     /**
-     * Nominal speed of previous path line segment
+     * Nominal speed of previous path line segment (mm/s)^2
      */
-    static float previous_nominal_speed;
+    static float previous_nominal_speed_sqr;
 
     /**
      * Limit where 64bit math is necessary for acceleration calculation
@@ -304,15 +318,6 @@ class Planner {
     // Manage fans, paste pressure, etc.
     static void check_axes_activity();
 
-    /**
-     * Number of moves currently in the planner
-     */
-    FORCE_INLINE static uint8_t movesplanned() { return BLOCK_MOD(block_buffer_head - block_buffer_tail + BLOCK_BUFFER_SIZE); }
-
-    FORCE_INLINE static void clear_block_buffer() { block_buffer_head = block_buffer_tail = 0; }
-
-    FORCE_INLINE static bool is_full() { return block_buffer_tail == next_block_index(block_buffer_head); }
-
     // Update multipliers based on new diameter measurements
     static void calculate_volumetric_multipliers();
 
@@ -420,16 +425,32 @@ class Planner {
       #define ARG_Z const float &rz
     #endif
 
+    // Number of moves currently in the planner
+    FORCE_INLINE static uint8_t movesplanned() { return BLOCK_MOD(block_buffer_head - block_buffer_tail); }
+
+    // Remove all blocks from the buffer
+    FORCE_INLINE static void clear_block_buffer() { block_buffer_head = block_buffer_tail = 0; }
+
+    // Check if movement queue is full
+    FORCE_INLINE static bool is_full() { return block_buffer_tail == next_block_index(block_buffer_head); }
+
+    // Get count of movement slots free
+    FORCE_INLINE static uint8_t moves_free() { return BLOCK_BUFFER_SIZE - 1 - movesplanned(); }
+
     /**
      * Planner::get_next_free_block
      *
-     * - Get the next head index (passed by reference)
-     * - Wait for a space to open up in the planner
-     * - Return the head block
+     * - Get the next head indices (passed by reference)
+     * - Wait for the number of spaces to open up in the planner
+     * - Return the first head block
      */
-    FORCE_INLINE static block_t* get_next_free_block(uint8_t &next_buffer_head) {
+    FORCE_INLINE static block_t* get_next_free_block(uint8_t &next_buffer_head, const uint8_t count=1) {
+
+      // Wait until there are enough slots free
+      while (moves_free() < count) { idle(); }
+
+      // Return the first available block
       next_buffer_head = next_block_index(block_buffer_head);
-      while (block_buffer_tail == next_buffer_head) idle(); // while (is_full)
       return &block_buffer[block_buffer_head];
     }
 
@@ -442,8 +463,30 @@ class Planner {
      *  fr_mm_s     - (target) speed of the move
      *  extruder    - target extruder
      *  millimeters - the length of the movement, if known
+     *
+     * Returns true if movement was buffered, false otherwise
+     */
+    static bool _buffer_steps(const int32_t (&target)[XYZE]
+      #if HAS_POSITION_FLOAT
+        , const float (&target_float)[XYZE]
+      #endif
+      , float fr_mm_s, const uint8_t extruder, const float &millimeters=0.0
+    );
+
+    /**
+     * Planner::_populate_block
+     *
+     * Fills a new linear movement in the block (in terms of steps).
+     *
+     *  target      - target position in steps units
+     *  fr_mm_s     - (target) speed of the move
+     *  extruder    - target extruder
+     *  millimeters - the length of the movement, if known
+     *
+     * Returns true is movement is acceptable, false otherwise
      */
-    static void _buffer_steps(const int32_t (&target)[XYZE]
+    static bool _populate_block(block_t * const block, bool split_move,
+        const int32_t (&target)[XYZE]
       #if HAS_POSITION_FLOAT
         , const float (&target_float)[XYZE]
       #endif
@@ -468,7 +511,7 @@ class Planner {
      *  extruder    - target extruder
      *  millimeters - the length of the movement, if known
      */
-    static void buffer_segment(const float &a, const float &b, const float &c, const float &e, const float &fr_mm_s, const uint8_t extruder, const float &millimeters=0.0);
+    static bool buffer_segment(const float &a, const float &b, const float &c, const float &e, const float &fr_mm_s, const uint8_t extruder, const float &millimeters=0.0);
 
     static void _set_position_mm(const float &a, const float &b, const float &c, const float &e);
 
@@ -485,11 +528,11 @@ class Planner {
      *  extruder     - target extruder
      *  millimeters  - the length of the movement, if known
      */
-    FORCE_INLINE static void buffer_line(ARG_X, ARG_Y, ARG_Z, const float &e, const float &fr_mm_s, const uint8_t extruder, const float millimeters = 0.0) {
+    FORCE_INLINE static bool buffer_line(ARG_X, ARG_Y, ARG_Z, const float &e, const float &fr_mm_s, const uint8_t extruder, const float millimeters = 0.0) {
       #if PLANNER_LEVELING && IS_CARTESIAN
         apply_leveling(rx, ry, rz);
       #endif
-      buffer_segment(rx, ry, rz, e, fr_mm_s, extruder, millimeters);
+      return buffer_segment(rx, ry, rz, e, fr_mm_s, extruder, millimeters);
     }
 
     /**
@@ -502,7 +545,7 @@ class Planner {
      *  extruder     - target extruder
      *  millimeters  - the length of the movement, if known
      */
-    FORCE_INLINE static void buffer_line_kinematic(const float (&cart)[XYZE], const float &fr_mm_s, const uint8_t extruder, const float millimeters = 0.0) {
+    FORCE_INLINE static bool buffer_line_kinematic(const float (&cart)[XYZE], const float &fr_mm_s, const uint8_t extruder, const float millimeters = 0.0) {
       #if PLANNER_LEVELING
         float raw[XYZ] = { cart[X_AXIS], cart[Y_AXIS], cart[Z_AXIS] };
         apply_leveling(raw);
@@ -511,9 +554,9 @@ class Planner {
       #endif
       #if IS_KINEMATIC
         inverse_kinematics(raw);
-        buffer_segment(delta[A_AXIS], delta[B_AXIS], delta[C_AXIS], cart[E_AXIS], fr_mm_s, extruder, millimeters);
+        return buffer_segment(delta[A_AXIS], delta[B_AXIS], delta[C_AXIS], cart[E_AXIS], fr_mm_s, extruder, millimeters);
       #else
-        buffer_segment(raw[X_AXIS], raw[Y_AXIS], raw[Z_AXIS], cart[E_AXIS], fr_mm_s, extruder, millimeters);
+        return buffer_segment(raw[X_AXIS], raw[Y_AXIS], raw[Z_AXIS], cart[E_AXIS], fr_mm_s, extruder, millimeters);
       #endif
     }
 
@@ -537,11 +580,6 @@ class Planner {
     FORCE_INLINE static void set_z_position_mm(const float &z) { set_position_mm(Z_AXIS, z); }
     FORCE_INLINE static void set_e_position_mm(const float &e) { set_position_mm(E_AXIS, e); }
 
-    /**
-     * Sync from the stepper positions. (e.g., after an interrupted move)
-     */
-    static void sync_from_steppers();
-
     /**
      * Get an axis position according to stepper position(s)
      * For CORE machines apply translation from ABC to XYZ.
@@ -553,34 +591,37 @@ class Planner {
       FORCE_INLINE static float get_axis_position_degrees(const AxisEnum axis) { return get_axis_position_mm(axis); }
     #endif
 
-    /**
-     * Does the buffer have any blocks queued?
-     */
-    FORCE_INLINE static bool has_blocks_queued() { return (block_buffer_head != block_buffer_tail); }
+    // Called to force a quick stop of the machine (for example, when an emergency
+    // stop is required, or when endstops are hit)
+    static void quick_stop();
+
+    // Called when an endstop is triggered. Causes the machine to stop inmediately
+    static void endstop_triggered(const AxisEnum axis);
+
+    // Triggered position of an axis in mm (not core-savvy)
+    static float triggered_position_mm(const AxisEnum axis);
 
-    //
-    // Block until all buffered steps are executed
-    //
+    // Block until all buffered steps are executed / cleaned
     static void synchronize();
 
-    /**
-     * "Discard" the block and "release" the memory.
-     * Called when the current block is no longer needed.
-     */
-    FORCE_INLINE static void discard_current_block() {
-      if (has_blocks_queued())
-        block_buffer_tail = BLOCK_MOD(block_buffer_tail + 1);
+    // Wait for moves to finish and disable all steppers
+    static void finish_and_disable();
+
+    // Periodic tick to handle cleaning timeouts
+    // Called from the Temperature ISR at ~1kHz
+    static void tick() {
+      if (cleaning_buffer_counter) {
+        --cleaning_buffer_counter;
+        #if ENABLED(SD_FINISHED_STEPPERRELEASE) && defined(SD_FINISHED_RELEASECOMMAND)
+          if (!cleaning_buffer_counter) enqueue_and_echo_commands_P(PSTR(SD_FINISHED_RELEASECOMMAND));
+        #endif
+      }
     }
 
     /**
-     * "Discard" the next block if it's continued.
-     * Called after an interrupted move to throw away the rest of the move.
+     * Does the buffer have any blocks queued?
      */
-    FORCE_INLINE static bool discard_continued_block() {
-      const bool discard = has_blocks_queued() && TEST(block_buffer[block_buffer_tail].flag, BLOCK_BIT_CONTINUED);
-      if (discard) discard_current_block();
-      return discard;
-    }
+    FORCE_INLINE static bool has_blocks_queued() { return (block_buffer_head != block_buffer_tail); }
 
     /**
      * The current block. NULL if the buffer is empty.
@@ -603,23 +644,42 @@ class Planner {
         #if ENABLED(ULTRA_LCD)
           block_buffer_runtime_us -= block->segment_time_us; // We can't be sure how long an active block will take, so don't count it.
         #endif
+
+        // Mark the block as busy, so the planner does not attempt to replan it
         SBI(block->flag, BLOCK_BIT_BUSY);
         return block;
       }
-      else {
-        #if ENABLED(ULTRA_LCD)
-          clear_block_buffer_runtime(); // paranoia. Buffer is empty now - so reset accumulated time to zero.
-        #endif
-        return NULL;
-      }
+
+      // The queue became empty
+      #if ENABLED(ULTRA_LCD)
+        clear_block_buffer_runtime(); // paranoia. Buffer is empty now - so reset accumulated time to zero.
+      #endif
+
+      return NULL;
+    }
+
+    /**
+     * "Discard" the block and "release" the memory.
+     * Called when the current block is no longer needed.
+     * NB: There MUST be a current block to call this function!!
+     */
+    FORCE_INLINE static void discard_current_block() {
+      block_buffer_tail = BLOCK_MOD(block_buffer_tail + 1);
     }
 
     #if ENABLED(ULTRA_LCD)
 
       static uint16_t block_buffer_runtime() {
-        CRITICAL_SECTION_START
-          millis_t bbru = block_buffer_runtime_us;
-        CRITICAL_SECTION_END
+        // Protect the access to the variable. Only required for AVR, as
+        //  any 32bit CPU offers atomic access to 32bit variables
+        bool was_enabled = STEPPER_ISR_ENABLED();
+        if (was_enabled) DISABLE_STEPPER_DRIVER_INTERRUPT();
+
+        millis_t bbru = block_buffer_runtime_us;
+
+        // Reenable Stepper ISR
+        if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
+
         // To translate µs to ms a division by 1000 would be required.
         // We introduce 2.4% error here by dividing by 1024.
         // Doesn't matter because block_buffer_runtime_us is already too small an estimation.
@@ -630,9 +690,15 @@ class Planner {
       }
 
       static void clear_block_buffer_runtime() {
-        CRITICAL_SECTION_START
-          block_buffer_runtime_us = 0;
-        CRITICAL_SECTION_END
+        // Protect the access to the variable. Only required for AVR, as
+        //  any 32bit CPU offers atomic access to 32bit variables
+        bool was_enabled = STEPPER_ISR_ENABLED();
+        if (was_enabled) DISABLE_STEPPER_DRIVER_INTERRUPT();
+
+        block_buffer_runtime_us = 0;
+
+        // Reenable Stepper ISR
+        if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
       }
 
     #endif
@@ -675,12 +741,12 @@ class Planner {
     }
 
     /**
-     * Calculate the maximum allowable speed at this point, in order
-     * to reach 'target_velocity' using 'acceleration' within a given
+     * Calculate the maximum allowable speed squared at this point, in order
+     * to reach 'target_velocity_sqr' using 'acceleration' within a given
      * 'distance'.
      */
-    static float max_allowable_speed(const float &accel, const float &target_velocity, const float &distance) {
-      return SQRT(sq(target_velocity) - 2 * accel * distance);
+    static float max_allowable_speed_sqr(const float &accel, const float &target_velocity_sqr, const float &distance) {
+      return target_velocity_sqr - 2 * accel * distance;
     }
 
     #if ENABLED(BEZIER_JERK_CONTROL)
diff --git a/Marlin/planner_bezier.cpp b/Marlin/planner_bezier.cpp
index 6fc80c9ad4..5ed7c043ce 100644
--- a/Marlin/planner_bezier.cpp
+++ b/Marlin/planner_bezier.cpp
@@ -41,8 +41,7 @@
 #define MAX_STEP 0.1
 #define SIGMA 0.1
 
-/* Compute the linear interpolation between to real numbers.
-*/
+// Compute the linear interpolation between two real numbers.
 inline static float interp(float a, float b, float t) { return (1.0 - t) * a + t * b; }
 
 /**
@@ -188,12 +187,15 @@ void cubic_b_spline(const float position[NUM_AXIS], const float target[NUM_AXIS]
     bez_target[Z_AXIS] = interp(position[Z_AXIS], target[Z_AXIS], t);
     bez_target[E_AXIS] = interp(position[E_AXIS], target[E_AXIS], t);
     clamp_to_software_endstops(bez_target);
+
     #if HAS_UBL_AND_CURVES
       float pos[XYZ] = { bez_target[X_AXIS], bez_target[Y_AXIS], bez_target[Z_AXIS] };
       planner.apply_leveling(pos);
-      planner.buffer_segment(pos[X_AXIS], pos[Y_AXIS], pos[Z_AXIS], bez_target[E_AXIS], fr_mm_s, active_extruder);
+      if (!planner.buffer_segment(pos[X_AXIS], pos[Y_AXIS], pos[Z_AXIS], bez_target[E_AXIS], fr_mm_s, active_extruder))
+        break;
     #else
-      planner.buffer_line_kinematic(bez_target, fr_mm_s, extruder);
+      if (!planner.buffer_line_kinematic(bez_target, fr_mm_s, extruder))
+        break;
     #endif
   }
 }
diff --git a/Marlin/stepper.cpp b/Marlin/stepper.cpp
index 1a2eefe3bb..63bbdd1325 100644
--- a/Marlin/stepper.cpp
+++ b/Marlin/stepper.cpp
@@ -75,10 +75,6 @@ Stepper stepper; // Singleton
 
 block_t* Stepper::current_block = NULL;  // A pointer to the block currently being traced
 
-#if ENABLED(ABORT_ON_ENDSTOP_HIT_FEATURE_ENABLED)
-  bool Stepper::abort_on_endstop_hit = false;
-#endif
-
 #if ENABLED(X_DUAL_ENDSTOPS) || ENABLED(Y_DUAL_ENDSTOPS) || ENABLED(Z_DUAL_ENDSTOPS)
   bool Stepper::performing_homing = false;
 #endif
@@ -90,7 +86,6 @@ block_t* Stepper::current_block = NULL;  // A pointer to the block currently bei
 // private:
 
 uint8_t Stepper::last_direction_bits = 0;        // The next stepping-bits to be output
-int16_t Stepper::cleaning_buffer_counter = 0;
 
 #if ENABLED(X_DUAL_ENDSTOPS)
   bool Stepper::locked_x_motor = false, Stepper::locked_x2_motor = false;
@@ -107,7 +102,7 @@ int32_t Stepper::counter_X = 0,
         Stepper::counter_Z = 0,
         Stepper::counter_E = 0;
 
-volatile uint32_t Stepper::step_events_completed = 0; // The number of step events executed in the current block
+uint32_t Stepper::step_events_completed = 0; // The number of step events executed in the current block
 
 #if ENABLED(BEZIER_JERK_CONTROL)
   int32_t __attribute__((used)) Stepper::bezier_A __asm__("bezier_A");    // A coefficient in Bézier speed curve with alias for assembler
@@ -119,16 +114,17 @@ volatile uint32_t Stepper::step_events_completed = 0; // The number of step even
   bool Stepper::bezier_2nd_half;    // =false If Bézier curve has been initialized or not
 #endif
 
+uint32_t Stepper::nextMainISR = 0;
+bool Stepper::all_steps_done = false;
+
 #if ENABLED(LIN_ADVANCE)
 
   uint32_t Stepper::LA_decelerate_after;
 
-  constexpr uint16_t ADV_NEVER = 65535;
-
-  uint16_t Stepper::nextMainISR = 0,
-           Stepper::nextAdvanceISR = ADV_NEVER,
-           Stepper::eISR_Rate = ADV_NEVER,
-           Stepper::current_adv_steps = 0,
+  constexpr uint32_t ADV_NEVER = 0xFFFFFFFF;
+  uint32_t Stepper::nextAdvanceISR = ADV_NEVER,
+           Stepper::eISR_Rate = ADV_NEVER;
+  uint16_t Stepper::current_adv_steps = 0,
            Stepper::final_adv_steps,
            Stepper::max_adv_steps;
 
@@ -144,7 +140,7 @@ volatile uint32_t Stepper::step_events_completed = 0; // The number of step even
 
 #endif // LIN_ADVANCE
 
-int32_t Stepper::acceleration_time, Stepper::deceleration_time;
+uint32_t Stepper::acceleration_time, Stepper::deceleration_time;
 
 volatile int32_t Stepper::count_position[NUM_AXIS] = { 0 };
 volatile signed char Stepper::count_direction[NUM_AXIS] = { 1, 1, 1, 1 };
@@ -153,11 +149,11 @@ volatile signed char Stepper::count_direction[NUM_AXIS] = { 1, 1, 1, 1 };
   int32_t Stepper::counter_m[MIXING_STEPPERS];
 #endif
 
+uint32_t Stepper::ticks_nominal;
 uint8_t Stepper::step_loops, Stepper::step_loops_nominal;
 
-uint16_t Stepper::OCR1A_nominal;
 #if DISABLED(BEZIER_JERK_CONTROL)
-  uint16_t Stepper::acc_step_rate; // needed for deceleration start point
+  uint32_t Stepper::acc_step_rate; // needed for deceleration start point
 #endif
 
 volatile int32_t Stepper::endstops_trigsteps[XYZ];
@@ -1112,201 +1108,95 @@ void Stepper::set_directions() {
  *  2000     1 KHz - sleep rate
  *  4000   500  Hz - init rate
  */
-ISR(TIMER1_COMPA_vect) {
-  /**
-   * On AVR there is no hardware prioritization and preemption of
-   * interrupts, so this emulates it. The UART has first priority
-   * (otherwise, characters will be lost due to UART overflow).
-   * Then: Stepper, Endstops, Temperature, and -finally- all others.
-   *
-   * This ISR needs to run with as little preemption as possible, so
-   * the Temperature ISR is disabled here. Now only the UART, Endstops,
-   * and Arduino-defined interrupts can preempt.
-   */
-  const bool temp_isr_was_enabled = TEMPERATURE_ISR_ENABLED();
-  DISABLE_TEMPERATURE_INTERRUPT();
-  DISABLE_STEPPER_DRIVER_INTERRUPT();
-  sei();
-
-  #if ENABLED(LIN_ADVANCE)
-    Stepper::advance_isr_scheduler();
-  #else
-    Stepper::isr();
-  #endif
-
-  // Disable global interrupts and reenable this ISR
-  cli();
-  ENABLE_STEPPER_DRIVER_INTERRUPT();
-  // Reenable the temperature ISR (if it was enabled)
-  if (temp_isr_was_enabled) ENABLE_TEMPERATURE_INTERRUPT();
-}
-
-void Stepper::isr() {
-
-  uint16_t ocr_val;
 
-  #define ENDSTOP_NOMINAL_OCR_VAL 3000 // Check endstops every 1.5ms to guarantee two stepper ISRs within 5ms for BLTouch
-  #define OCR_VAL_TOLERANCE       1000 // First max delay is 2.0ms, last min delay is 0.5ms, all others 1.5ms
+HAL_STEP_TIMER_ISR {
+  HAL_timer_isr_prologue(STEP_TIMER_NUM);
 
-  #define _SPLIT(L) (ocr_val = (uint16_t)L)
-  #if ENABLED(ENDSTOP_INTERRUPTS_FEATURE)
+  // Program timer compare for the maximum period, so it does NOT
+  // flag an interrupt while this ISR is running - So changes from small
+  // periods to big periods are respected and the timer does not reset to 0
+  HAL_timer_set_compare(STEP_TIMER_NUM, HAL_TIMER_TYPE_MAX);
 
-    #define SPLIT(L) _SPLIT(L)
-
-  #else // !ENDSTOP_INTERRUPTS_FEATURE : Sample endstops between stepping ISRs
-
-    static uint32_t step_remaining = 0;
-
-    #define SPLIT(L) do { \
-      _SPLIT(L); \
-      if (ENDSTOPS_ENABLED && L > ENDSTOP_NOMINAL_OCR_VAL) { \
-        const uint16_t remainder = (uint16_t)L % (ENDSTOP_NOMINAL_OCR_VAL); \
-        ocr_val = (remainder < OCR_VAL_TOLERANCE) ? ENDSTOP_NOMINAL_OCR_VAL + remainder : ENDSTOP_NOMINAL_OCR_VAL; \
-        step_remaining = (uint16_t)L - ocr_val; \
-      } \
-    }while(0)
-
-    if (step_remaining && ENDSTOPS_ENABLED) {   // Just check endstops - not yet time for a step
-      endstops.update();
-
-      // Next ISR either for endstops or stepping
-      ocr_val = step_remaining <= ENDSTOP_NOMINAL_OCR_VAL ? step_remaining : ENDSTOP_NOMINAL_OCR_VAL;
-      step_remaining -= ocr_val;
-      _NEXT_ISR(ocr_val);
-      NOLESS(OCR1A, TCNT1 + 16);
-      return;
-    }
-
-  #endif // !ENDSTOP_INTERRUPTS_FEATURE
+  // Call the ISR scheduler
+  hal_timer_t ticks = Stepper::isr_scheduler();
 
+  // Now 'ticks' contains the period to the next Stepper ISR.
+  // Potential problem: Since the timer continues to run, the requested
+  // compare value may already have passed.
   //
-  // When cleaning, discard the current block and run fast
-  //
-  if (cleaning_buffer_counter) {
-    if (cleaning_buffer_counter < 0) {          // Count up for endstop hit
-      if (current_block) planner.discard_current_block(); // Discard the active block that led to the trigger
-      if (!planner.discard_continued_block())   // Discard next CONTINUED block
-        cleaning_buffer_counter = 0;            // Keep discarding until non-CONTINUED
-    }
-    else {
-      planner.discard_current_block();
-      --cleaning_buffer_counter;                // Count down for abort print
-      #if ENABLED(SD_FINISHED_STEPPERRELEASE) && defined(SD_FINISHED_RELEASECOMMAND)
-        if (!cleaning_buffer_counter) enqueue_and_echo_commands_P(PSTR(SD_FINISHED_RELEASECOMMAND));
-      #endif
-    }
-    current_block = NULL;                       // Prep to get a new block after cleaning
-    _NEXT_ISR(200);                             // Run at max speed - 10 KHz
-    return;
-  }
-
-  // If there is no current block, attempt to pop one from the buffer
-  if (!current_block) {
+  // Assuming at least 6µs between calls to this ISR...
+  // On AVR the ISR epilogue is estimated at 40 instructions - close to 2.5µS.
+  // On ARM the ISR epilogue is estimated at 10 instructions - close to 200nS.
+  // In either case leave at least 4µS for other tasks to execute.
+  const hal_timer_t minticks = HAL_timer_get_count(STEP_TIMER_NUM) + hal_timer_t((HAL_TICKS_PER_US) * 4); // ISR never takes more than 1ms, so this shouldn't cause trouble
+  NOLESS(ticks, MAX(minticks, hal_timer_t((STEP_TIMER_MIN_INTERVAL) * (HAL_TICKS_PER_US))));
 
-    // Anything in the buffer?
-    if ((current_block = planner.get_current_block())) {
+  // Set the next ISR to fire at the proper time
+  HAL_timer_set_compare(STEP_TIMER_NUM, ticks);
 
-      // Sync block? Sync the stepper counts and return
-      while (TEST(current_block->flag, BLOCK_BIT_SYNC_POSITION)) {
-        _set_position(
-          current_block->steps[A_AXIS], current_block->steps[B_AXIS],
-          current_block->steps[C_AXIS], current_block->steps[E_AXIS]
-        );
-        planner.discard_current_block();
-        if (!(current_block = planner.get_current_block())) return;
-      }
+  HAL_timer_isr_epilogue(STEP_TIMER_NUM);
+}
 
-      // Initialize the trapezoid generator from the current block.
-      static int8_t last_extruder = -1;
+#define STEP_MULTIPLY(A,B) MultiU24X32toH16(A, B)
 
-      #if ENABLED(LIN_ADVANCE)
-        #if E_STEPPERS > 1
-          if (current_block->active_extruder != last_extruder) {
-            current_adv_steps = 0; // If the now active extruder wasn't in use during the last move, its pressure is most likely gone.
-            LA_active_extruder = current_block->active_extruder;
-          }
-        #endif
+hal_timer_t Stepper::isr_scheduler() {
+  uint32_t interval;
 
-        if ((use_advance_lead = current_block->use_advance_lead)) {
-          LA_decelerate_after = current_block->decelerate_after;
-          final_adv_steps = current_block->final_adv_steps;
-          max_adv_steps = current_block->max_adv_steps;
-        }
-      #endif
+  // Run main stepping pulse phase ISR if we have to
+  if (!nextMainISR) Stepper::stepper_pulse_phase_isr();
 
-      if (current_block->direction_bits != last_direction_bits || current_block->active_extruder != last_extruder) {
-        last_direction_bits = current_block->direction_bits;
-        last_extruder = current_block->active_extruder;
-        set_directions();
-      }
-
-      // No acceleration / deceleration time elapsed so far
-      acceleration_time = deceleration_time = 0;
+  #if ENABLED(LIN_ADVANCE)
+    // Run linear advance stepper ISR if we have to
+    if (!nextAdvanceISR) nextAdvanceISR = Stepper::advance_isr();
+  #endif
 
-      // No step events completed so far
-      step_events_completed = 0;
+  // ^== Time critical. NOTHING besides pulse generation should be above here!!!
 
-      // step_rate to timer interval
-      OCR1A_nominal = calc_timer_interval(current_block->nominal_rate);
+  // Run main stepping block processing ISR if we have to
+  if (!nextMainISR) nextMainISR = Stepper::stepper_block_phase_isr();
 
-      // make a note of the number of step loops required at nominal speed
-      step_loops_nominal = step_loops;
+  #if ENABLED(LIN_ADVANCE)
+    // Select the closest interval in time
+    interval = (nextAdvanceISR <= nextMainISR)
+      ? nextAdvanceISR
+      : nextMainISR;
 
-      #if DISABLED(BEZIER_JERK_CONTROL)
-        // Set as deceleration point the initial rate of the block
-        acc_step_rate = current_block->initial_rate;
-      #endif
+  #else // !ENABLED(LIN_ADVANCE)
 
-      #if ENABLED(BEZIER_JERK_CONTROL)
-        // Initialize the Bézier speed curve
-        _calc_bezier_curve_coeffs(current_block->initial_rate, current_block->cruise_rate, current_block->acceleration_time_inverse);
+    // The interval is just the remaining time to the stepper ISR
+    interval = nextMainISR;
+  #endif
 
-        // We have not started the 2nd half of the trapezoid
-        bezier_2nd_half = false;
-      #endif
+  // Limit the value to the maximum possible value of the timer
+  if (interval > HAL_TIMER_TYPE_MAX)
+    interval = HAL_TIMER_TYPE_MAX;
 
-      // Initialize Bresenham counters to 1/2 the ceiling
-      counter_X = counter_Y = counter_Z = counter_E = -(current_block->step_event_count >> 1);
-      #if ENABLED(MIXING_EXTRUDER)
-        MIXING_STEPPERS_LOOP(i)
-          counter_m[i] = -(current_block->mix_event_count[i] >> 1);
-      #endif
+  // Compute the time remaining for the main isr
+  nextMainISR -= interval;
 
-      // No step events completed so far
-      step_events_completed = 0;
+  #if ENABLED(LIN_ADVANCE)
+    // Compute the time remaining for the advance isr
+    if (nextAdvanceISR != ADV_NEVER)
+      nextAdvanceISR -= interval;
+  #endif
 
-      #if ENABLED(ENDSTOP_INTERRUPTS_FEATURE)
-        e_hit = 2; // Needed for the case an endstop is already triggered before the new move begins.
-                   // No 'change' can be detected.
-      #endif
+  return (hal_timer_t)interval;
+}
 
-      #if ENABLED(Z_LATE_ENABLE)
-        // If delayed Z enable, postpone move for 1mS
-        if (current_block->steps[Z_AXIS] > 0) {
-          enable_Z();
-          _NEXT_ISR(2000); // Run at slow speed - 1 KHz
-          return;
-        }
-      #endif
-    }
-    else {
-      _NEXT_ISR(2000); // Run at slow speed - 1 KHz
-      return;
-    }
-  }
+// This part of the ISR should ONLY create the pulses for the steppers
+// -- Nothing more, nothing less -- We want to avoid jitter from where
+// the pulses should be generated (when the interrupt triggers) to the
+// time pulses are actually created. So, PLEASE DO NOT PLACE ANY CODE
+// above this line that can conditionally change that time (we are trying
+// to keep the delay between the interrupt triggering and pulse generation
+// as constant as possible!!!!
+void Stepper::stepper_pulse_phase_isr() {
 
-  // Update endstops state, if enabled
-  #if ENABLED(ENDSTOP_INTERRUPTS_FEATURE)
-    if (e_hit && ENDSTOPS_ENABLED) {
-      endstops.update();
-      e_hit--;
-    }
-  #else
-    if (ENDSTOPS_ENABLED) endstops.update();
-  #endif
+  // If there is no current block, do nothing
+  if (!current_block) return;
 
   // Take multiple steps per interrupt (For high speed moves)
-  bool all_steps_done = false;
+  all_steps_done = false;
   for (uint8_t i = step_loops; i--;) {
 
     #define _COUNTER(AXIS) counter_## AXIS
@@ -1501,114 +1391,208 @@ void Stepper::isr() {
     #endif
 
   } // steps_loop
+}
 
-  // Calculate new timer value
-  if (step_events_completed <= (uint32_t)current_block->accelerate_until) {
+// This is the last half of the stepper interrupt: This one processes and
+// properly schedules blocks from the planner. This is executed after creating
+// the step pulses, so it is not time critical, as pulses are already done.
 
-    #if ENABLED(BEZIER_JERK_CONTROL)
-      // Get the next speed to use (Jerk limited!)
-      uint16_t acc_step_rate =
-        acceleration_time < current_block->acceleration_time
-          ? _eval_bezier_curve(acceleration_time)
-          : current_block->cruise_rate;
-    #else
-      acc_step_rate = MultiU24X32toH16(acceleration_time, current_block->acceleration_rate) + current_block->initial_rate;
-      NOMORE(acc_step_rate, current_block->nominal_rate);
-    #endif
+uint32_t Stepper::stepper_block_phase_isr() {
 
-    // step_rate to timer interval
-    const uint16_t interval = calc_timer_interval(acc_step_rate);
+  // If no queued movements, just wait 1ms for the next move
+  uint32_t interval = (HAL_STEPPER_TIMER_RATE / 1000);
 
-    SPLIT(interval);  // split step into multiple ISRs if larger than ENDSTOP_NOMINAL_OCR_VAL
-    _NEXT_ISR(ocr_val);
+  // If there is a current block
+  if (current_block) {
 
-    acceleration_time += interval;
+    // Calculate new timer value
+    if (step_events_completed <= current_block->accelerate_until) {
 
-    #if ENABLED(LIN_ADVANCE)
-      if (current_block->use_advance_lead) {
-        if (step_events_completed == step_loops || (e_steps && eISR_Rate != current_block->advance_speed)) {
-          nextAdvanceISR = 0; // Wake up eISR on first acceleration loop and fire ISR if final adv_rate is reached
-          eISR_Rate = current_block->advance_speed;
+      #if ENABLED(BEZIER_JERK_CONTROL)
+        // Get the next speed to use (Jerk limited!)
+        uint32_t acc_step_rate =
+          acceleration_time < current_block->acceleration_time
+            ? _eval_bezier_curve(acceleration_time)
+            : current_block->cruise_rate;
+      #else
+        acc_step_rate = STEP_MULTIPLY(acceleration_time, current_block->acceleration_rate) + current_block->initial_rate;
+        NOMORE(acc_step_rate, current_block->nominal_rate);
+      #endif
+
+      // step_rate to timer interval
+      interval = calc_timer_interval(acc_step_rate);
+      acceleration_time += interval;
+
+      #if ENABLED(LIN_ADVANCE)
+        if (current_block->use_advance_lead) {
+          if (step_events_completed == step_loops || (e_steps && eISR_Rate != current_block->advance_speed)) {
+            nextAdvanceISR = 0; // Wake up eISR on first acceleration loop and fire ISR if final adv_rate is reached
+            eISR_Rate = current_block->advance_speed;
+          }
         }
-      }
-      else {
-        eISR_Rate = ADV_NEVER;
-        if (e_steps) nextAdvanceISR = 0;
-      }
-    #endif // LIN_ADVANCE
+        else {
+          eISR_Rate = ADV_NEVER;
+          if (e_steps) nextAdvanceISR = 0;
+        }
+      #endif // LIN_ADVANCE
+    }
+    else if (step_events_completed > current_block->decelerate_after) {
+      uint32_t step_rate;
+
+      #if ENABLED(BEZIER_JERK_CONTROL)
+        // If this is the 1st time we process the 2nd half of the trapezoid...
+        if (!bezier_2nd_half) {
+          // Initialize the Bézier speed curve
+          _calc_bezier_curve_coeffs(current_block->cruise_rate, current_block->final_rate, current_block->deceleration_time_inverse);
+          bezier_2nd_half = true;
+        }
+
+        // Calculate the next speed to use
+        step_rate = deceleration_time < current_block->deceleration_time
+          ? _eval_bezier_curve(deceleration_time)
+          : current_block->final_rate;
+      #else
+
+        // Using the old trapezoidal control
+        step_rate = STEP_MULTIPLY(deceleration_time, current_block->acceleration_rate);
+        if (step_rate < acc_step_rate) { // Still decelerating?
+          step_rate = acc_step_rate - step_rate;
+          NOLESS(step_rate, current_block->final_rate);
+        }
+        else
+          step_rate = current_block->final_rate;
+      #endif
+
+      // step_rate to timer interval
+      interval = calc_timer_interval(step_rate);
+      deceleration_time += interval;
+
+      #if ENABLED(LIN_ADVANCE)
+        if (current_block->use_advance_lead) {
+          if (step_events_completed <= current_block->decelerate_after + step_loops || (e_steps && eISR_Rate != current_block->advance_speed)) {
+            nextAdvanceISR = 0; // Wake up eISR on first deceleration loop
+            eISR_Rate = current_block->advance_speed;
+          }
+        }
+        else {
+          eISR_Rate = ADV_NEVER;
+          if (e_steps) nextAdvanceISR = 0;
+        }
+      #endif // LIN_ADVANCE
+    }
+    else {
+
+      #if ENABLED(LIN_ADVANCE)
+        // If there are any esteps, fire the next advance_isr "now"
+        if (e_steps && eISR_Rate != current_block->advance_speed) nextAdvanceISR = 0;
+      #endif
+
+      // The timer interval is just the nominal value for the nominal speed
+      interval = ticks_nominal;
+
+      // Ensure this runs at the correct step rate, even if it just came off an acceleration
+      step_loops = step_loops_nominal;
+    }
+
+    // If current block is finished, reset pointer
+    if (all_steps_done) {
+      current_block = NULL;
+      planner.discard_current_block();
+    }
   }
-  else if (step_events_completed > (uint32_t)current_block->decelerate_after) {
-    uint16_t step_rate;
 
-    #if ENABLED(BEZIER_JERK_CONTROL)
-      // If this is the 1st time we process the 2nd half of the trapezoid...
-      if (!bezier_2nd_half) {
-        // Initialize the Bézier speed curve
-        _calc_bezier_curve_coeffs(current_block->cruise_rate, current_block->final_rate, current_block->deceleration_time_inverse);
-        bezier_2nd_half = true;
+  // If there is no current block at this point, attempt to pop one from the buffer
+  // and prepare its movement
+  if (!current_block) {
+
+    // Anything in the buffer?
+    if ((current_block = planner.get_current_block())) {
+
+      // Sync block? Sync the stepper counts and return
+      while (TEST(current_block->flag, BLOCK_BIT_SYNC_POSITION)) {
+        _set_position(
+          current_block->position[A_AXIS], current_block->position[B_AXIS],
+          current_block->position[C_AXIS], current_block->position[E_AXIS]
+        );
+        planner.discard_current_block();
+
+        // Try to get a new block
+        if (!(current_block = planner.get_current_block()))
+          return interval; // No more queued movements!
       }
 
-      // Calculate the next speed to use
-      step_rate = deceleration_time < current_block->deceleration_time
-        ? _eval_bezier_curve(deceleration_time)
-        : current_block->final_rate;
-    #else
+      // Initialize the trapezoid generator from the current block.
+      static int8_t last_extruder = -1;
+
+      #if ENABLED(LIN_ADVANCE)
+        #if E_STEPPERS > 1
+          if (current_block->active_extruder != last_extruder) {
+            current_adv_steps = 0; // If the now active extruder wasn't in use during the last move, its pressure is most likely gone.
+            LA_active_extruder = current_block->active_extruder;
+          }
+        #endif
+
+        if ((use_advance_lead = current_block->use_advance_lead)) {
+          LA_decelerate_after = current_block->decelerate_after;
+          final_adv_steps = current_block->final_adv_steps;
+          max_adv_steps = current_block->max_adv_steps;
+        }
+      #endif
 
-      // Using the old trapezoidal control
-      step_rate = MultiU24X32toH16(deceleration_time, current_block->acceleration_rate);
-      if (step_rate < acc_step_rate) { // Still decelerating?
-        step_rate = acc_step_rate - step_rate;
-        NOLESS(step_rate, current_block->final_rate);
+      if (current_block->direction_bits != last_direction_bits || current_block->active_extruder != last_extruder) {
+        last_direction_bits = current_block->direction_bits;
+        last_extruder = current_block->active_extruder;
+        set_directions();
       }
-      else
-        step_rate = current_block->final_rate;
 
-    #endif
+      // No acceleration / deceleration time elapsed so far
+      acceleration_time = deceleration_time = 0;
 
-    // step_rate to timer interval
-    const uint16_t interval = calc_timer_interval(step_rate);
+      // No step events completed so far
+      step_events_completed = 0;
 
-    SPLIT(interval);  // split step into multiple ISRs if larger than ENDSTOP_NOMINAL_OCR_VAL
-    _NEXT_ISR(ocr_val);
+      // step_rate to timer interval for the nominal speed
+      ticks_nominal = calc_timer_interval(current_block->nominal_rate);
 
-    deceleration_time += interval;
+      // make a note of the number of step loops required at nominal speed
+      step_loops_nominal = step_loops;
 
-    #if ENABLED(LIN_ADVANCE)
-      if (current_block->use_advance_lead) {
-        if (step_events_completed <= (uint32_t)current_block->decelerate_after + step_loops || (e_steps && eISR_Rate != current_block->advance_speed)) {
-          nextAdvanceISR = 0; // Wake up eISR on first deceleration loop
-          eISR_Rate = current_block->advance_speed;
-        }
-      }
-      else {
-        eISR_Rate = ADV_NEVER;
-        if (e_steps) nextAdvanceISR = 0;
-      }
-    #endif // LIN_ADVANCE
-  }
-  else {
+      #if DISABLED(BEZIER_JERK_CONTROL)
+        // Set as deceleration point the initial rate of the block
+        acc_step_rate = current_block->initial_rate;
+      #endif
 
-    #if ENABLED(LIN_ADVANCE)
-      // If we have esteps to execute, fire the next advance_isr "now"
-      if (e_steps && eISR_Rate != current_block->advance_speed) nextAdvanceISR = 0;
-    #endif
+      #if ENABLED(BEZIER_JERK_CONTROL)
+        // Initialize the Bézier speed curve
+        _calc_bezier_curve_coeffs(current_block->initial_rate, current_block->cruise_rate, current_block->acceleration_time_inverse);
 
-    SPLIT(OCR1A_nominal);  // split step into multiple ISRs if larger than ENDSTOP_NOMINAL_OCR_VAL
-    _NEXT_ISR(ocr_val);
+        // We have not started the 2nd half of the trapezoid
+        bezier_2nd_half = false;
+      #endif
 
-    // ensure we're running at the correct step rate, even if we just came off an acceleration
-    step_loops = step_loops_nominal;
-  }
+      // Initialize Bresenham counters to 1/2 the ceiling
+      counter_X = counter_Y = counter_Z = counter_E = -((int32_t)(current_block->step_event_count >> 1));
+      #if ENABLED(MIXING_EXTRUDER)
+        MIXING_STEPPERS_LOOP(i)
+          counter_m[i] = -(current_block->mix_event_count[i] >> 1);
+      #endif
 
-  #if DISABLED(LIN_ADVANCE)
-    NOLESS(OCR1A, TCNT1 + 16);
-  #endif
+      #if ENABLED(ENDSTOP_INTERRUPTS_FEATURE)
+        e_hit = 2; // Needed for the case an endstop is already triggered before the new move begins.
+                   // No 'change' can be detected.
+      #endif
 
-  // If current block is finished, reset pointer
-  if (all_steps_done) {
-    current_block = NULL;
-    planner.discard_current_block();
+      #if ENABLED(Z_LATE_ENABLE)
+        // If delayed Z enable, enable it now. This option will severely interfere with
+        //  timing between pulses when chaining motion between blocks, and it could lead
+        //  to lost steps in both X and Y axis, so avoid using it unless strictly necessary!!
+        if (current_block->steps[Z_AXIS]) enable_Z();
+      #endif
+    }
   }
+
+  // Return the interval to wait
+  return interval;
 }
 
 #if ENABLED(LIN_ADVANCE)
@@ -1617,8 +1601,8 @@ void Stepper::isr() {
   #define EXTRA_CYCLES_E (STEP_PULSE_CYCLES - (CYCLES_EATEN_E))
 
   // Timer interrupt for E. e_steps is set in the main routine;
-
-  void Stepper::advance_isr() {
+  uint32_t Stepper::advance_isr() {
+    uint32_t interval;
 
     #if ENABLED(MK2_MULTIPLEXER) // For SNMM even-numbered steppers are reversed
       #define SET_E_STEP_DIR(INDEX) do{ if (e_steps) E0_DIR_WRITE(e_steps < 0 ? !INVERT_E## INDEX ##_DIR ^ TEST(INDEX, 0) : INVERT_E## INDEX ##_DIR ^ TEST(INDEX, 0)); }while(0)
@@ -1679,21 +1663,21 @@ void Stepper::isr() {
       if (step_events_completed > LA_decelerate_after && current_adv_steps > final_adv_steps) {
         e_steps--;
         current_adv_steps--;
-        nextAdvanceISR = eISR_Rate;
+        interval = eISR_Rate;
       }
       else if (step_events_completed < LA_decelerate_after && current_adv_steps < max_adv_steps) {
              //step_events_completed <= (uint32_t)current_block->accelerate_until) {
         e_steps++;
         current_adv_steps++;
-        nextAdvanceISR = eISR_Rate;
+        interval = eISR_Rate;
       }
       else {
-        nextAdvanceISR = ADV_NEVER;
+        interval = ADV_NEVER;
         eISR_Rate = ADV_NEVER;
       }
     }
     else
-      nextAdvanceISR = ADV_NEVER;
+      interval = ADV_NEVER;
 
     switch (LA_active_extruder) {
       case 0: SET_E_STEP_DIR(0); break;
@@ -1715,7 +1699,7 @@ void Stepper::isr() {
     while (e_steps) {
 
       #if EXTRA_CYCLES_E > 20
-        uint32_t pulse_start = TCNT0;
+        hal_timer_t pulse_start = HAL_timer_get_count(PULSE_TIMER_NUM);
       #endif
 
       switch (LA_active_extruder) {
@@ -1766,39 +1750,9 @@ void Stepper::isr() {
       #endif
 
     } // e_steps
-  }
-
-  void Stepper::advance_isr_scheduler() {
-
-    // Run main stepping ISR if flagged
-    if (!nextMainISR) isr();
 
-    // Run Advance stepping ISR if flagged
-    if (!nextAdvanceISR) advance_isr();
-
-    // Is the next advance ISR scheduled before the next main ISR?
-    if (nextAdvanceISR <= nextMainISR) {
-      // Set up the next interrupt
-      OCR1A = nextAdvanceISR;
-      // New interval for the next main ISR
-      if (nextMainISR) nextMainISR -= nextAdvanceISR;
-      // Will call Stepper::advance_isr on the next interrupt
-      nextAdvanceISR = 0;
-    }
-    else {
-      // The next main ISR comes first
-      OCR1A = nextMainISR;
-      // New interval for the next advance ISR, if any
-      if (nextAdvanceISR && nextAdvanceISR != ADV_NEVER)
-        nextAdvanceISR -= nextMainISR;
-      // Will call Stepper::isr on the next interrupt
-      nextMainISR = 0;
-    }
-
-    // Don't run the ISR faster than possible
-    NOLESS(OCR1A, TCNT1 + 16);
+    return interval;
   }
-
 #endif // LIN_ADVANCE
 
 void Stepper::init() {
@@ -2013,30 +1967,43 @@ void Stepper::_set_position(const int32_t &a, const int32_t &b, const int32_t &c
  * Get a stepper's position in steps.
  */
 int32_t Stepper::position(const AxisEnum axis) {
-  CRITICAL_SECTION_START;
-  const int32_t count_pos = count_position[axis];
-  CRITICAL_SECTION_END;
-  return count_pos;
-}
+  // Protect the access to the position. Only required for AVR, as
+  //  any 32bit CPU offers atomic access to 32bit variables
+  const bool was_enabled = STEPPER_ISR_ENABLED();
+  if (was_enabled) DISABLE_STEPPER_DRIVER_INTERRUPT();
+
+  const int32_t v = count_position[axis];
 
-void Stepper::finish_and_disable() {
-  planner.synchronize();
-  disable_all_steppers();
+  // Reenable Stepper ISR
+  if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
+  return v;
 }
 
 void Stepper::quick_stop() {
+  const bool was_enabled = STEPPER_ISR_ENABLED();
   DISABLE_STEPPER_DRIVER_INTERRUPT();
-  kill_current_block();
-  current_block = NULL;
-  cleaning_buffer_counter = 5000;
-  planner.clear_block_buffer();
-  ENABLE_STEPPER_DRIVER_INTERRUPT();
-  #if ENABLED(ULTRA_LCD)
-    planner.clear_block_buffer_runtime();
-  #endif
+
+  if (current_block) {
+    step_events_completed = current_block->step_event_count;
+    current_block = NULL;
+  }
+
+  if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
+}
+
+void Stepper::kill_current_block() {
+  const bool was_enabled = STEPPER_ISR_ENABLED();
+  DISABLE_STEPPER_DRIVER_INTERRUPT();
+
+  if (current_block)
+    step_events_completed = current_block->step_event_count;
+
+  if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
 }
 
 void Stepper::endstop_triggered(const AxisEnum axis) {
+  const bool was_enabled = STEPPER_ISR_ENABLED();
+  if (was_enabled) DISABLE_STEPPER_DRIVER_INTERRUPT();
 
   #if IS_CORE
 
@@ -2051,8 +2018,31 @@ void Stepper::endstop_triggered(const AxisEnum axis) {
 
   #endif // !COREXY && !COREXZ && !COREYZ
 
-  kill_current_block();
-  cleaning_buffer_counter = -1; // Discard the rest of the move
+  // Discard the rest of the move if there is a current block
+  if (current_block) {
+
+    // Kill the current block being executed
+    step_events_completed = current_block->step_event_count;
+
+    // Prep to get a new block after cleaning
+    current_block = NULL;
+  }
+
+  if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
+}
+
+int32_t Stepper::triggered_position(const AxisEnum axis) {
+  // Protect the access to the position. Only required for AVR, as
+  //  any 32bit CPU offers atomic access to 32bit variables
+  const bool was_enabled = STEPPER_ISR_ENABLED();
+  if (was_enabled) DISABLE_STEPPER_DRIVER_INTERRUPT();
+
+  const int32_t v = endstops_trigsteps[axis];
+
+  // Reenable Stepper ISR
+  if (was_enabled) ENABLE_STEPPER_DRIVER_INTERRUPT();
+
+  return v;
 }
 
 void Stepper::report_positions() {
diff --git a/Marlin/stepper.h b/Marlin/stepper.h
index 198da72dd7..df640a0580 100644
--- a/Marlin/stepper.h
+++ b/Marlin/stepper.h
@@ -85,10 +85,6 @@ class Stepper {
 
     static block_t* current_block;  // A pointer to the block currently being traced
 
-    #if ENABLED(ABORT_ON_ENDSTOP_HIT_FEATURE_ENABLED)
-      static bool abort_on_endstop_hit;
-    #endif
-
     #if ENABLED(X_DUAL_ENDSTOPS) || ENABLED(Y_DUAL_ENDSTOPS) || ENABLED(Z_DUAL_ENDSTOPS)
       static bool performing_homing;
     #endif
@@ -100,8 +96,6 @@ class Stepper {
       static uint32_t motor_current_setting[3];
     #endif
 
-    static int16_t cleaning_buffer_counter;
-
   private:
 
     static uint8_t last_direction_bits;        // The next stepping-bits to be output
@@ -118,7 +112,7 @@ class Stepper {
 
     // Counter variables for the Bresenham line tracer
     static int32_t counter_X, counter_Y, counter_Z, counter_E;
-    static volatile uint32_t step_events_completed; // The number of step events executed in the current block
+    static uint32_t step_events_completed; // The number of step events executed in the current block
 
     #if ENABLED(BEZIER_JERK_CONTROL)
       static int32_t bezier_A,     // A coefficient in Bézier speed curve
@@ -130,12 +124,14 @@ class Stepper {
                   bezier_2nd_half; // If Bézier curve has been initialized or not
     #endif
 
+    static uint32_t nextMainISR;   // time remaining for the next Step ISR
+    static bool all_steps_done;    // all steps done
+
     #if ENABLED(LIN_ADVANCE)
 
       static uint32_t LA_decelerate_after; // Copy from current executed block. Needed because current_block is set to NULL "too early".
-      static uint16_t nextMainISR, nextAdvanceISR, eISR_Rate, current_adv_steps,
-                      final_adv_steps, max_adv_steps; // Copy from current executed block. Needed because current_block is set to NULL "too early".
-      #define _NEXT_ISR(T) nextMainISR = T
+      static uint32_t nextAdvanceISR, eISR_Rate;
+      static uint16_t current_adv_steps, final_adv_steps, max_adv_steps; // Copy from current executed block. Needed because current_block is set to NULL "too early".
       static int8_t e_steps;
       static bool use_advance_lead;
       #if E_STEPPERS > 1
@@ -144,18 +140,14 @@ class Stepper {
         static constexpr int8_t LA_active_extruder = 0;
       #endif
 
-    #else // !LIN_ADVANCE
-
-      #define _NEXT_ISR(T) OCR1A = T
+    #endif // LIN_ADVANCE
 
-    #endif // !LIN_ADVANCE
-
-    static int32_t acceleration_time, deceleration_time;
+    static uint32_t acceleration_time, deceleration_time;
     static uint8_t step_loops, step_loops_nominal;
 
-    static uint16_t OCR1A_nominal;
+    static uint32_t ticks_nominal;
     #if DISABLED(BEZIER_JERK_CONTROL)
-      static uint16_t acc_step_rate; // needed for deceleration start point
+      static uint32_t acc_step_rate; // needed for deceleration start point
     #endif
 
     static volatile int32_t endstops_trigsteps[XYZ];
@@ -188,88 +180,50 @@ class Stepper {
     //
     Stepper() { };
 
-    //
     // Initialize stepper hardware
-    //
     static void init();
 
-    //
     // Interrupt Service Routines
-    //
-
-    static void isr();
 
-    #if ENABLED(LIN_ADVANCE)
-      static void advance_isr();
-      static void advance_isr_scheduler();
-    #endif
+    // The ISR scheduler
+    static hal_timer_t isr_scheduler();
 
-    //
-    // Set the current position in steps
-    //
-    static void _set_position(const int32_t &a, const int32_t &b, const int32_t &c, const int32_t &e);
+    // The stepper pulse phase ISR
+    static void stepper_pulse_phase_isr();
 
-    FORCE_INLINE static void _set_position(const AxisEnum a, const int32_t &v) { count_position[a] = v; }
+    // The stepper block processing phase ISR
+    static uint32_t stepper_block_phase_isr();
 
-    FORCE_INLINE static void set_position(const int32_t &a, const int32_t &b, const int32_t &c, const int32_t &e) {
-      planner.synchronize();
-      CRITICAL_SECTION_START;
-      _set_position(a, b, c, e);
-      CRITICAL_SECTION_END;
-    }
-
-    static void set_position(const AxisEnum a, const int32_t &v) {
-      planner.synchronize();
-      CRITICAL_SECTION_START;
-      count_position[a] = v;
-      CRITICAL_SECTION_END;
-    }
-
-    FORCE_INLINE static void _set_e_position(const int32_t &e) { count_position[E_AXIS] = e; }
-
-    static void set_e_position(const int32_t &e) {
-      planner.synchronize();
-      CRITICAL_SECTION_START;
-      count_position[E_AXIS] = e;
-      CRITICAL_SECTION_END;
-    }
-
-    //
-    // Set direction bits for all steppers
-    //
-    static void set_directions();
+    #if ENABLED(LIN_ADVANCE)
+      // The Linear advance stepper ISR
+      static uint32_t advance_isr();
+    #endif
 
-    //
     // Get the position of a stepper, in steps
-    //
     static int32_t position(const AxisEnum axis);
 
-    //
     // Report the positions of the steppers, in steps
-    //
     static void report_positions();
 
-    //
     // The stepper subsystem goes to sleep when it runs out of things to execute. Call this
     // to notify the subsystem that it is time to go to work.
-    //
     static void wake_up();
 
-    //
-    // Wait for moves to finish and disable all steppers
-    //
-    static void finish_and_disable();
-
-    //
-    // Quickly stop all steppers and clear the blocks queue
-    //
+    // Quickly stop all steppers
     static void quick_stop();
 
-    //
     // The direction of a single motor
-    //
     FORCE_INLINE static bool motor_direction(const AxisEnum axis) { return TEST(last_direction_bits, axis); }
 
+    // Kill current block
+    static void kill_current_block();
+
+    // Handle a triggered endstop
+    static void endstop_triggered(const AxisEnum axis);
+
+    // Triggered position of an axis in steps
+    static int32_t triggered_position(const AxisEnum axis);
+
     #if HAS_DIGIPOTSS || HAS_MOTOR_CURRENT_PWM
       static void digitalPotWrite(const int16_t address, const int16_t value);
       static void digipot_current(const uint8_t driver, const int16_t current);
@@ -301,32 +255,22 @@ class Stepper {
       static void babystep(const AxisEnum axis, const bool direction); // perform a short step with a single stepper motor, outside of any convention
     #endif
 
-    static inline void kill_current_block() {
-      step_events_completed = current_block->step_event_count;
-    }
-
-    //
-    // Handle a triggered endstop
-    //
-    static void endstop_triggered(const AxisEnum axis);
-
-    //
-    // Triggered position of an axis in mm (not core-savvy)
-    //
-    FORCE_INLINE static float triggered_position_mm(const AxisEnum axis) {
-      return endstops_trigsteps[axis] * planner.steps_to_mm[axis];
-    }
-
     #if HAS_MOTOR_CURRENT_PWM
       static void refresh_motor_power();
     #endif
 
   private:
 
-    FORCE_INLINE static uint16_t calc_timer_interval(uint16_t step_rate) {
-      uint16_t timer;
+    // Set the current position in steps
+    static void _set_position(const int32_t &a, const int32_t &b, const int32_t &c, const int32_t &e);
+
+    // Set direction bits for all steppers
+    static void set_directions();
 
-      NOMORE(step_rate, MAX_STEP_FREQUENCY);
+    FORCE_INLINE static uint32_t calc_timer_interval(uint32_t step_rate) {
+      uint32_t timer;
+
+      NOMORE(step_rate, uint32_t(MAX_STEP_FREQUENCY));
 
       if (step_rate > 20000) { // If steprate > 20kHz >> step 4 times
         step_rate >>= 2;
@@ -340,12 +284,14 @@ class Stepper {
         step_loops = 1;
       }
 
-      NOLESS(step_rate, F_CPU / 500000);
+      NOLESS(step_rate, uint32_t(F_CPU / 500000U));
       step_rate -= F_CPU / 500000; // Correct for minimal speed
       if (step_rate >= (8 * 256)) { // higher step rate
-        uint16_t table_address = (uint16_t)&speed_lookuptable_fast[(uint8_t)(step_rate >> 8)][0],
-                 gain = (uint16_t)pgm_read_word_near(table_address + 2);
-        timer = (uint16_t)pgm_read_word_near(table_address) - MultiU16X8toH16(step_rate & 0x00FF, gain);
+        const uint8_t tmp_step_rate = (step_rate & 0x00FF);
+        const uint16_t table_address = (uint16_t)&speed_lookuptable_fast[(uint8_t)(step_rate >> 8)][0],
+                       gain = (uint16_t)pgm_read_word_near(table_address + 2);
+        timer = MultiU16X8toH16(tmp_step_rate, gain);
+        timer = (uint16_t)pgm_read_word_near(table_address) - timer;
       }
       else { // lower step rates
         uint16_t table_address = (uint16_t)&speed_lookuptable_slow[0][0];
@@ -355,9 +301,9 @@ class Stepper {
       }
       if (timer < 100) { // (20kHz - this should never happen)
         timer = 100;
-        SERIAL_PROTOCOL(MSG_STEPPER_TOO_HIGH);
-        SERIAL_PROTOCOLLN(step_rate);
+        SERIAL_ECHOLNPAIR(MSG_STEPPER_TOO_HIGH, step_rate);
       }
+
       return timer;
     }
 
diff --git a/Marlin/temperature.cpp b/Marlin/temperature.cpp
index ca344aab18..38fdb00bc0 100644
--- a/Marlin/temperature.cpp
+++ b/Marlin/temperature.cpp
@@ -1780,6 +1780,7 @@ void Temperature::set_current_temp_raw() {
  *  - Step the babysteps value for each axis towards 0
  *  - For PINS_DEBUGGING, monitor and report endstop pins
  *  - For ENDSTOP_INTERRUPTS_FEATURE check endstops if flagged
+ *  - Call planner.tick to count down its "ignore" time
  */
 HAL_TEMP_TIMER_ISR {
   HAL_timer_isr_prologue(TEMP_TIMER_NUM);
@@ -2301,25 +2302,22 @@ void Temperature::isr() {
   #endif // BABYSTEPPING
 
   #if ENABLED(PINS_DEBUGGING)
-    extern bool endstop_monitor_flag;
-    // run the endstop monitor at 15Hz
-    static uint8_t endstop_monitor_count = 16;  // offset this check from the others
-    if (endstop_monitor_flag) {
-      endstop_monitor_count += _BV(1);  //  15 Hz
-      endstop_monitor_count &= 0x7F;
-      if (!endstop_monitor_count) endstop_monitor();  // report changes in endstop status
-    }
+    endstops.run_monitor();  // report changes in endstop status
   #endif
 
+  // Update endstops state, if enabled
   #if ENABLED(ENDSTOP_INTERRUPTS_FEATURE)
-
     extern volatile uint8_t e_hit;
-
     if (e_hit && ENDSTOPS_ENABLED) {
-      endstops.update();  // call endstop update routine
+      endstops.update();
       e_hit--;
     }
+  #else
+    if (ENDSTOPS_ENABLED) endstops.update();
   #endif
+
+  // Periodically call the planner timer
+  planner.tick();
 }
 
 #if HAS_TEMP_SENSOR
diff --git a/Marlin/ubl_motion.cpp b/Marlin/ubl_motion.cpp
index 9ffdff3bb6..0e8e7b9092 100644
--- a/Marlin/ubl_motion.cpp
+++ b/Marlin/ubl_motion.cpp
@@ -257,7 +257,8 @@
               z_position = end[Z_AXIS];
             }
 
-            planner.buffer_segment(rx, ry, z_position + z0, e_position, feed_rate, extruder);
+            if (!planner.buffer_segment(rx, ry, z_position + z0, e_position, feed_rate, extruder))
+              break;
           } //else printf("FIRST MOVE PRUNED  ");
         }
 
@@ -314,7 +315,8 @@
             e_position = end[E_AXIS];
             z_position = end[Z_AXIS];
           }
-          planner.buffer_segment(rx, next_mesh_line_y, z_position + z0, e_position, feed_rate, extruder);
+          if (!planner.buffer_segment(rx, next_mesh_line_y, z_position + z0, e_position, feed_rate, extruder))
+            break;
           current_yi += dyi;
           yi_cnt--;
         }
@@ -337,7 +339,8 @@
             z_position = end[Z_AXIS];
           }
 
-          planner.buffer_segment(next_mesh_line_x, ry, z_position + z0, e_position, feed_rate, extruder);
+          if (!planner.buffer_segment(next_mesh_line_x, ry, z_position + z0, e_position, feed_rate, extruder))
+            break;
           current_xi += dxi;
           xi_cnt--;
         }
@@ -366,7 +369,7 @@
     inline void _O2 ubl_buffer_segment_raw(const float (&in_raw)[XYZE], const float &fr) {
 
       #if ENABLED(SKEW_CORRECTION)
-        float raw[XYZE] = { in_raw[X_AXIS], in_raw[Y_AXIS], in_raw[Z_AXIS], in_raw[E_AXIS] };
+        float raw[XYZE] = { in_raw[X_AXIS], in_raw[Y_AXIS], in_raw[Z_AXIS] };
         planner.skew(raw[X_AXIS], raw[Y_AXIS], raw[Z_AXIS]);
       #else
         const float (&raw)[XYZE] = in_raw;
@@ -438,7 +441,7 @@
         uint16_t segments = lroundf(cartesian_xy_mm * (1.0 / (DELTA_SEGMENT_MIN_LENGTH))); // cartesian fixed segment length
       #endif
 
-      NOLESS(segments, 1);                        // must have at least one segment
+      NOLESS(segments, 1U);                        // must have at least one segment
       const float inv_segments = 1.0 / segments;  // divide once, multiply thereafter
 
       #if IS_SCARA // scale the feed rate from mm/s to degrees/s
diff --git a/Marlin/ultralcd.cpp b/Marlin/ultralcd.cpp
index 95bef8e11a..2940cbc0de 100644
--- a/Marlin/ultralcd.cpp
+++ b/Marlin/ultralcd.cpp
@@ -2393,12 +2393,10 @@ void lcd_quick_feedback(const bool clear_buttons) {
 
     void _lcd_do_nothing() {}
     void _lcd_hard_stop() {
-      stepper.quick_stop();
       const screenFunc_t old_screen = currentScreen;
       currentScreen = _lcd_do_nothing;
-      while (planner.movesplanned()) idle();
+      planner.quick_stop();
       currentScreen = old_screen;
-      stepper.cleaning_buffer_counter = 0;
       set_current_from_steppers_for_axis(ALL_AXES);
       sync_plan_position();
     }
@@ -3806,7 +3804,7 @@ void lcd_quick_feedback(const bool clear_buttons) {
 
     // M540 S - Abort on endstop hit when SD printing
     #if ENABLED(ABORT_ON_ENDSTOP_HIT_FEATURE_ENABLED)
-      MENU_ITEM_EDIT(bool, MSG_ENDSTOP_ABORT, &stepper.abort_on_endstop_hit);
+      MENU_ITEM_EDIT(bool, MSG_ENDSTOP_ABORT, &planner.abort_on_endstop_hit);
     #endif
 
     END_MENU();
-- 
GitLab