diff --git a/.github/workflows/lha_bot.yml b/.github/workflows/lha_bot.yml
index 690548ef1..92df8153c 100644
--- a/.github/workflows/lha_bot.yml
+++ b/.github/workflows/lha_bot.yml
@@ -4,7 +4,7 @@ name: LHA Benchmarks
 on:
   push:
     branches-ignore:
-      - "*"
+      - "**"
     tags:
   pull_request:
     types:
diff --git a/.github/workflows/lha_bot_rust.yml b/.github/workflows/lha_bot_rust.yml
index bdc330d9a..b4ba3aba2 100644
--- a/.github/workflows/lha_bot_rust.yml
+++ b/.github/workflows/lha_bot_rust.yml
@@ -4,7 +4,7 @@ name: LHA Benchmarks (Rust)
 on:
   push:
     branches-ignore:
-      - "*"
+      - "**"
     tags:
   pull_request:
     types:
diff --git a/crates/eko/src/lib.rs b/crates/eko/src/lib.rs
index 242f404b5..531a88674 100644
--- a/crates/eko/src/lib.rs
+++ b/crates/eko/src/lib.rs
@@ -1,5 +1,6 @@
 //! Interface to the eko Python package.
 
+use ekore::constants::{MAX_ORDER_QCD, MAX_ORDER_QED};
 use ekore::harmonics::cache::Cache;
 use num::Complex;
 use std::ffi::c_void;
@@ -7,71 +8,72 @@ use std::ffi::c_void;
 pub mod bib;
 pub mod mellin;
 
-/// Wrapper to pass arguments back to Python.
-struct RawCmplx {
-    re: Vec<f64>,
-    im: Vec<f64>,
-}
-
-/// Map tensor with shape (o,d,d) to c-ordered list.
+/// Map tensor with shape `(order_QCD, d, d)` into pre-allocated output buffers.
 ///
-/// This is needed for the QCD singlet.
-fn unravel<const DIM: usize>(res: Vec<[[Complex<f64>; DIM]; DIM]>, order_qcd: usize) -> RawCmplx {
-    let mut target = RawCmplx {
-        re: Vec::<f64>::new(),
-        im: Vec::<f64>::new(),
-    };
+/// Only the first `order_qcd` entries along the outer axis are written;
+/// remaining buffer slots are left undefined.
+fn unravel_qcd<const DIM: usize>(
+    res: &[[[Complex<f64>; DIM]; DIM]; MAX_ORDER_QCD],
+    order_qcd: usize,
+    out_re: &mut [f64],
+    out_im: &mut [f64],
+) {
+    let mut i = 0;
     for obj in res.iter().take(order_qcd) {
-        for col in obj.iter().take(DIM) {
-            for el in col.iter().take(DIM) {
-                target.re.push(el.re);
-                target.im.push(el.im);
+        for col in obj.iter() {
+            for el in col.iter() {
+                out_re[i] = el.re;
+                out_im[i] = el.im;
+                i += 1;
             }
         }
     }
-    target
 }
 
-/// Map tensor with shape (o,o',d,d) to c-ordered list.
+/// Map tensor with shape `(order_QCD+1, order_QED+1, d, d)` into pre-allocated output buffers.
 ///
-/// This is needed for the QED singlet and valence.
+/// The first `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis
+/// are written; remaining buffer slots are left undefined.
 fn unravel_qed<const DIM: usize>(
-    res: Vec<Vec<[[Complex<f64>; DIM]; DIM]>>,
+    res: &[[[[Complex<f64>; DIM]; DIM]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1],
     order_qcd: usize,
     order_qed: usize,
-) -> RawCmplx {
-    let mut target = RawCmplx {
-        re: Vec::<f64>::new(),
-        im: Vec::<f64>::new(),
-    };
+    out_re: &mut [f64],
+    out_im: &mut [f64],
+) {
+    let mut i = 0;
     for obj_ in res.iter().take(order_qcd + 1) {
         for obj in obj_.iter().take(order_qed + 1) {
-            for col in obj.iter().take(DIM) {
-                for el in col.iter().take(DIM) {
-                    target.re.push(el.re);
-                    target.im.push(el.im);
+            for col in obj.iter() {
+                for el in col.iter() {
+                    out_re[i] = el.re;
+                    out_im[i] = el.im;
+                    i += 1;
                 }
             }
         }
     }
-    target
 }
 
-/// Map tensor with shape (o,o',d) to c-ordered list.
+/// Map tensor with shape `(order_QCD+1, order_QED+1)` into pre-allocated output buffers.
 ///
-/// This is needed for the QED non-singlet.
-fn unravel_qed_ns(res: Vec<Vec<Complex<f64>>>, order_qcd: usize, order_qed: usize) -> RawCmplx {
-    let mut target = RawCmplx {
-        re: Vec::<f64>::new(),
-        im: Vec::<f64>::new(),
-    };
+/// The first `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis
+/// are written; remaining buffer slots are left undefined.
+fn unravel_qed_ns(
+    res: &[[Complex<f64>; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1],
+    order_qcd: usize,
+    order_qed: usize,
+    out_re: &mut [f64],
+    out_im: &mut [f64],
+) {
+    let mut i = 0;
     for col in res.iter().take(order_qcd + 1) {
         for el in col.iter().take(order_qed + 1) {
-            target.re.push(el.re);
-            target.im.push(el.im);
+            out_re[i] = el.re;
+            out_im[i] = el.im;
+            i += 1;
         }
     }
-    target
 }
 
 /// Integration kernel inside quad.
@@ -93,42 +95,46 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 {
     let path = mellin::TalbotPath::new(u, args.logx, is_singlet);
     let jac = path.jac() * path.prefactor();
     let mut c = Cache::new(path.n());
-    let mut raw = RawCmplx {
-        re: Vec::<f64>::new(),
-        im: Vec::<f64>::new(),
-    };
-    let n3lo_ad_variation = std::slice::from_raw_parts(args.n3lo_ad_variation, 7)
+    let n3lo_ad_variation: [u8; 7] = std::slice::from_raw_parts(args.n3lo_ad_variation, 7)
         .try_into()
         .unwrap();
 
+    let max_buffer_size = (args.order_qcd + 1) * (args.order_qed + 1) * 16;
+    let out_re = std::slice::from_raw_parts_mut(args.re_gamma, max_buffer_size);
+    let out_im = std::slice::from_raw_parts_mut(args.im_gamma, max_buffer_size);
+
     if args.is_ome {
         if is_singlet {
-            raw = unravel(
-                ekore::operator_matrix_elements::unpolarized::spacelike::A_singlet(
+            unravel_qcd(
+                &ekore::operator_matrix_elements::unpolarized::spacelike::A_singlet(
                     args.order_qcd,
                     &mut c,
                     args.nf,
                     args.L,
                 ),
                 args.order_qcd,
+                out_re,
+                out_im,
             );
         } else {
-            raw = unravel(
-                ekore::operator_matrix_elements::unpolarized::spacelike::A_non_singlet(
+            unravel_qcd(
+                &ekore::operator_matrix_elements::unpolarized::spacelike::A_non_singlet(
                     args.order_qcd,
                     &mut c,
                     args.nf,
                     args.L,
                 ),
                 args.order_qcd,
+                out_re,
+                out_im,
             );
         }
     } else if is_singlet {
         if args.order_qed > 0 {
             let gamma_singlet_qed =
                 ekore::anomalous_dimensions::unpolarized::spacelike::gamma_singlet_qed;
-            raw = unravel_qed(
-                gamma_singlet_qed(
+            unravel_qed(
+                &gamma_singlet_qed(
                     args.order_qcd,
                     args.order_qed,
                     &mut c,
@@ -137,28 +143,32 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 {
                 ),
                 args.order_qcd,
                 args.order_qed,
+                out_re,
+                out_im,
             );
         } else {
             let gamma_singlet_qcd = match args.is_polarized {
                 true => ekore::anomalous_dimensions::polarized::spacelike::gamma_singlet_qcd,
                 false => ekore::anomalous_dimensions::unpolarized::spacelike::gamma_singlet_qcd,
             };
-            raw = unravel(
-                gamma_singlet_qcd(
+            unravel_qcd(
+                &gamma_singlet_qcd(
                     args.order_qcd,
                     &mut c,
                     args.nf,
                     n3lo_ad_variation[0..4].try_into().unwrap(),
                 ),
                 args.order_qcd,
+                out_re,
+                out_im,
             );
         }
     } else if args.order_qed > 0 {
         if is_qed_valence {
             let gamma_valence_qed =
                 ekore::anomalous_dimensions::unpolarized::spacelike::gamma_valence_qed;
-            raw = unravel_qed(
-                gamma_valence_qed(
+            unravel_qed(
+                &gamma_valence_qed(
                     args.order_qcd,
                     args.order_qed,
                     &mut c,
@@ -167,11 +177,13 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 {
                 ),
                 args.order_qcd,
                 args.order_qed,
+                out_re,
+                out_im,
             );
         } else {
             let gamma_ns_qed = ekore::anomalous_dimensions::unpolarized::spacelike::gamma_ns_qed;
-            raw = unravel_qed_ns(
-                gamma_ns_qed(
+            unravel_qed_ns(
+                &gamma_ns_qed(
                     args.order_qcd,
                     args.order_qed,
                     args.mode0,
@@ -181,10 +193,11 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 {
                 ),
                 args.order_qcd,
                 args.order_qed,
+                out_re,
+                out_im,
             );
         }
     } else {
-        // we can not do 1D
         let gamma_ns_qcd = match args.is_polarized {
             true => ekore::anomalous_dimensions::polarized::spacelike::gamma_ns_qcd,
             false => ekore::anomalous_dimensions::unpolarized::spacelike::gamma_ns_qcd,
@@ -196,16 +209,16 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 {
             args.nf,
             n3lo_ad_variation[4..7].try_into().unwrap(),
         );
-        for el in res.iter().take(args.order_qcd) {
-            raw.re.push(el.re);
-            raw.im.push(el.im);
+        for (i, el) in res.iter().take(args.order_qcd).enumerate() {
+            out_re[i] = el.re;
+            out_im[i] = el.im;
         }
     }
 
     // pass on
     (args.py)(
-        raw.re.as_ptr(),
-        raw.im.as_ptr(),
+        args.re_gamma as *const f64,
+        args.im_gamma as *const f64,
         c.n().re,
         c.n().im,
         jac.re,
@@ -317,6 +330,9 @@ pub struct QuadArgs {
     pub alphaem_running: bool,
     // additional param required for N3LO
     pub n3lo_ad_variation: *const u8,
+    // pre-allocated gamma output buffers (owned by Python, written by Rust)
+    pub re_gamma: *mut f64,
+    pub im_gamma: *mut f64,
 }
 
 /// Empty placeholder function for python callback.
@@ -403,5 +419,7 @@ pub unsafe extern "C" fn empty_args() -> QuadArgs {
         a_half_y: 0,
         alphaem_running: false,
         n3lo_ad_variation: [0, 0, 0, 0, 0, 0, 0].as_ptr(),
+        re_gamma: std::ptr::null_mut(),
+        im_gamma: std::ptr::null_mut(),
     }
 }
diff --git a/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs b/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs
index dd636b0ad..178cb7eb9 100644
--- a/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs
+++ b/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs
@@ -1,6 +1,6 @@
 //! The polarized, space-like anomalous dimensions at various couplings power.
 
-use crate::constants::{PID_NSM, PID_NSP, PID_NSV};
+use crate::constants::{MAX_ORDER_QCD, PID_NSM, PID_NSP, PID_NSV};
 use crate::harmonics::cache::Cache;
 use num::complex::Complex;
 use num::Zero;
@@ -9,14 +9,17 @@ pub mod as2;
 // pub mod as3;
 
 /// Compute the tower of the non-singlet anomalous dimensions.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD,)`. Only the first `order_qcd` entries
+/// are filled; remaining slots are zero.
 pub fn gamma_ns_qcd(
     order_qcd: usize,
     mode: u16,
     c: &mut Cache,
     nf: u8,
     _n3lo_variation: [u8; 3],
-) -> Vec<Complex<f64>> {
-    let mut gamma_ns = vec![Complex::<f64>::zero(); order_qcd];
+) -> [Complex<f64>; MAX_ORDER_QCD] {
+    let mut gamma_ns = [Complex::<f64>::zero(); MAX_ORDER_QCD];
     gamma_ns[0] = as1::gamma_ns(c, nf);
     // NLO and beyond
     if order_qcd >= 2 {
@@ -42,19 +45,16 @@ pub fn gamma_ns_qcd(
 }
 
 /// Compute the tower of the singlet anomalous dimension matrices.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD, d, d)`. Only the first `order_qcd`
+/// entries along the outer axis are filled; remaining slots are zero.
 pub fn gamma_singlet_qcd(
     order_qcd: usize,
     c: &mut Cache,
     nf: u8,
     _n3lo_variation: [u8; 4],
-) -> Vec<[[Complex<f64>; 2]; 2]> {
-    let mut gamma_S = vec![
-        [
-            [Complex::<f64>::zero(), Complex::<f64>::zero()],
-            [Complex::<f64>::zero(), Complex::<f64>::zero()]
-        ];
-        order_qcd
-    ];
+) -> [[[Complex<f64>; 2]; 2]; MAX_ORDER_QCD] {
+    let mut gamma_S = [[[Complex::<f64>::zero(); 2]; 2]; MAX_ORDER_QCD];
     gamma_S[0] = as1::gamma_singlet(c, nf);
     // NLO and beyond
     if order_qcd >= 2 {
diff --git a/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs b/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs
index 4589b9278..b8984a25d 100644
--- a/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs
+++ b/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs
@@ -1,7 +1,8 @@
 //! The unpolarized, space-like anomalous dimensions at various couplings power.
 
 use crate::constants::{
-    ED2, EU2, PID_NSM, PID_NSM_D, PID_NSM_U, PID_NSP, PID_NSP_D, PID_NSP_U, PID_NSV,
+    ED2, EU2, MAX_ORDER_QCD, MAX_ORDER_QED, PID_NSM, PID_NSM_D, PID_NSM_U, PID_NSP, PID_NSP_D,
+    PID_NSP_U, PID_NSV,
 };
 use crate::harmonics::cache::Cache;
 use num::complex::Complex;
@@ -19,14 +20,17 @@ pub mod as4;
 /// `n3lo_variation = (ns_p, ns_m, ns_v)` is a list indicating which variation should
 /// be used. `variation = 1,2` is the upper/lower bound, while any other value
 /// returns the central (averaged) value.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD,)`. Only the first `order_qcd` entries
+/// are filled; remaining slots are zero.
 pub fn gamma_ns_qcd(
     order_qcd: usize,
     mode: u16,
     c: &mut Cache,
     nf: u8,
     n3lo_variation: [u8; 3],
-) -> Vec<Complex<f64>> {
-    let mut gamma_ns = vec![Complex::<f64>::zero(); order_qcd];
+) -> [Complex<f64>; MAX_ORDER_QCD] {
+    let mut gamma_ns = [Complex::<f64>::zero(); MAX_ORDER_QCD];
     gamma_ns[0] = as1::gamma_ns(c, nf);
     // NLO and beyond
     if order_qcd >= 2 {
@@ -66,19 +70,16 @@ pub fn gamma_ns_qcd(
 /// `n3lo_variation = (gg, gq, qg, qq)` is a list indicating which variation should
 /// be used. `variation = 1,2` is the upper/lower bound, while any other value
 /// returns the central (averaged) value.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD, d, d)`. Only the first `order_qcd`
+/// entries along the outer axis are filled; remaining slots are zero.
 pub fn gamma_singlet_qcd(
     order_qcd: usize,
     c: &mut Cache,
     nf: u8,
     n3lo_variation: [u8; 4],
-) -> Vec<[[Complex<f64>; 2]; 2]> {
-    let mut gamma_S = vec![
-        [
-            [Complex::<f64>::zero(), Complex::<f64>::zero()],
-            [Complex::<f64>::zero(), Complex::<f64>::zero()]
-        ];
-        order_qcd
-    ];
+) -> [[[Complex<f64>; 2]; 2]; MAX_ORDER_QCD] {
+    let mut gamma_S = [[[Complex::<f64>::zero(); 2]; 2]; MAX_ORDER_QCD];
     gamma_S[0] = as1::gamma_singlet(c, nf);
     // NLO and beyond
     if order_qcd >= 2 {
@@ -100,6 +101,10 @@ pub fn gamma_singlet_qcd(
 /// `n3lo_variation = (ns_p, ns_m, ns_v)` is a list indicating which variation should
 /// be used. `variation = 1,2` is the upper/lower bound, while any other value
 /// returns the central (averaged) value.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD+1, MAX_ORDER_QED+1)`. The first
+/// `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis
+/// are filled; remaining slots are zero.
 pub fn gamma_ns_qed(
     order_qcd: usize,
     order_qed: usize,
@@ -107,9 +112,8 @@ pub fn gamma_ns_qed(
     c: &mut Cache,
     nf: u8,
     n3lo_variation: [u8; 3],
-) -> Vec<Vec<Complex<f64>>> {
-    let col = vec![Complex::<f64>::zero(); order_qed + 1];
-    let mut gamma_ns = vec![col; order_qcd + 1];
+) -> [[Complex<f64>; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1] {
+    let mut gamma_ns = [[Complex::<f64>::zero(); MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1];
 
     // QCD corrections
     let qcd_mode = match mode {
@@ -152,23 +156,18 @@ pub fn gamma_ns_qed(
 /// `n3lo_variation = (gg, gq, qg, qq, ns_p, ns_m, ns_v)` is a list indicating which variation should
 /// be used. `variation = 1,2` is the upper/lower bound, while any other value
 /// returns the central (averaged) value.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD+1, MAX_ORDER_QED+1, d, d)`. The first
+/// `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis
+/// are filled; remaining slots are zero.
 pub fn gamma_singlet_qed(
     order_qcd: usize,
     order_qed: usize,
     c: &mut Cache,
     nf: u8,
     n3lo_variation: [u8; 7],
-) -> Vec<Vec<[[Complex<f64>; 4]; 4]>> {
-    let col = vec![
-        [[
-            Complex::<f64>::zero(),
-            Complex::<f64>::zero(),
-            Complex::<f64>::zero(),
-            Complex::<f64>::zero()
-        ]; 4];
-        order_qed + 1
-    ];
-    let mut gamma_s = vec![col; order_qcd + 1];
+) -> [[[[Complex<f64>; 4]; 4]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1] {
+    let mut gamma_s = [[[[Complex::<f64>::zero(); 4]; 4]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1];
 
     // QCD corrections
     let gamma_qcd_s = gamma_singlet_qcd(order_qcd, c, nf, n3lo_variation[0..4].try_into().unwrap());
@@ -223,15 +222,18 @@ pub fn gamma_singlet_qed(
 /// `n3lo_variation = (ns_p, ns_m, ns_v)` is a list indicating which variation should
 /// be used. `variation = 1,2` is the upper/lower bound, while any other value
 /// returns the central (averaged) value.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD+1, MAX_ORDER_QED+1, d, d)`. The first
+/// `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis
+/// are filled; remaining slots are zero.
 pub fn gamma_valence_qed(
     order_qcd: usize,
     order_qed: usize,
     c: &mut Cache,
     nf: u8,
     n3lo_variation: [u8; 3],
-) -> Vec<Vec<[[Complex<f64>; 2]; 2]>> {
-    let col = vec![[[Complex::<f64>::zero(), Complex::<f64>::zero(),]; 2]; order_qed + 1];
-    let mut gamma_v = vec![col; order_qcd + 1];
+) -> [[[[Complex<f64>; 2]; 2]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1] {
+    let mut gamma_v = [[[[Complex::<f64>::zero(); 2]; 2]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1];
 
     // QCD corrections
     let gamma_qcd_nsv = gamma_ns_qcd(order_qcd, PID_NSV, c, nf, n3lo_variation);
diff --git a/crates/ekore/src/constants.rs b/crates/ekore/src/constants.rs
index a66cf71be..a8e1ad923 100644
--- a/crates/ekore/src/constants.rs
+++ b/crates/ekore/src/constants.rs
@@ -67,6 +67,12 @@ pub const PID_NSM_U: u16 = 10202;
 /// valence-like non-singlet down-sector |PID|.
 pub const PID_NSM_D: u16 = 10203;
 
+/// Maximum QCD coupling power implemented.
+pub const MAX_ORDER_QCD: usize = 4;
+
+/// Maximum QED coupling power implemented.
+pub const MAX_ORDER_QED: usize = 2;
+
 /// |QED| electric charge combinations.
 pub struct ChargeCombinations {
     pub nf: u8,
diff --git a/crates/ekore/src/harmonics/cache.rs b/crates/ekore/src/harmonics/cache.rs
index 595834017..3c1b2049f 100644
--- a/crates/ekore/src/harmonics/cache.rs
+++ b/crates/ekore/src/harmonics/cache.rs
@@ -1,12 +1,11 @@
 //! Cache harmonic sums for given Mellin N.
 
 use num::{complex::Complex, Zero};
-use std::collections::HashMap;
 
 use crate::harmonics::{g_functions, w1, w2, w3, w4, w5};
 
 /// List of available elements.
-#[derive(Debug, PartialEq, Eq, Hash)]
+#[derive(Debug, PartialEq, Eq)]
 pub enum K {
     /// $S_1(N)$
     S1,
@@ -50,12 +49,41 @@ pub enum K {
     Sm21o,
 }
 
+impl K {
+    fn idx(&self) -> usize {
+        match self {
+            K::S1 => 0,
+            K::S2 => 1,
+            K::S3 => 2,
+            K::S4 => 3,
+            K::S5 => 4,
+            K::S1h => 5,
+            K::S2h => 6,
+            K::S3h => 7,
+            K::S1mh => 8,
+            K::S2mh => 9,
+            K::S3mh => 10,
+            K::G3 => 11,
+            K::Sm1e => 12,
+            K::Sm1o => 13,
+            K::Sm2e => 14,
+            K::Sm2o => 15,
+            K::Sm3e => 16,
+            K::Sm3o => 17,
+            K::Sm21e => 18,
+            K::Sm21o => 19,
+        }
+    }
+}
+
+const CACHE_SIZE: usize = 20;
+
 /// Hold all cached values.
 pub struct Cache {
     /// Mellin N
     n: Complex<f64>,
-    /// Mapping
-    m: HashMap<K, Complex<f64>>,
+    /// Flat lookup table indexed by K::idx()
+    m: [Option<Complex<f64>>; CACHE_SIZE],
 }
 
 impl Cache {
@@ -63,7 +91,7 @@ impl Cache {
     pub fn new(n: Complex<f64>) -> Self {
         Self {
             n,
-            m: HashMap::new(),
+            m: [None; CACHE_SIZE],
         }
     }
 
@@ -74,10 +102,9 @@ impl Cache {
 
     /// Retrieve an element.
     pub fn get(&mut self, k: K) -> Complex<f64> {
-        let val = self.m.get(&k);
-        // already there?
-        if let Some(value) = val {
-            return *value;
+        let idx = k.idx();
+        if let Some(val) = self.m[idx] {
+            return val;
         }
         // compute new
         let val = match k {
@@ -103,7 +130,7 @@ impl Cache {
             K::Sm21o => w3::Sm21o(self),
         };
         // insert
-        self.m.insert(k, val);
+        self.m[idx] = Some(val);
         val
     }
 }
@@ -141,6 +168,47 @@ mod tests {
         assert_approx_eq_cmplx!(f64, m, cmplx!(2., 0.));
     }
 
+    #[test]
+    fn test_cache_idx_mapping() {
+        let all_variants = [
+            K::S1,
+            K::S2,
+            K::S3,
+            K::S4,
+            K::S5,
+            K::S1h,
+            K::S2h,
+            K::S3h,
+            K::S1mh,
+            K::S2mh,
+            K::S3mh,
+            K::G3,
+            K::Sm1e,
+            K::Sm1o,
+            K::Sm2e,
+            K::Sm2o,
+            K::Sm3e,
+            K::Sm3o,
+            K::Sm21e,
+            K::Sm21o,
+        ];
+        // size: number of variants matches CACHE_SIZE
+        assert_eq!(all_variants.len(), CACHE_SIZE);
+
+        let mut c = Cache::new(cmplx!(2., 0.));
+        let mut seen = [false; CACHE_SIZE];
+        for v in all_variants {
+            let idx = v.idx();
+            // mapping is continuous: no duplicate indices
+            assert!(!seen[idx], "duplicate index {idx}");
+            seen[idx] = true;
+            // exercises Cache::get(), panics if idx() >= CACHE_SIZE (size check)
+            c.get(v);
+        }
+        // every slot 0..CACHE_SIZE was covered (no holes)
+        assert!(seen.iter().all(|&b| b), "index mapping has holes");
+    }
+
     #[test]
     fn test_recursive_harmonic_sum() {
         const SX: [fn(Complex<f64>) -> Complex<f64>; 5] = [w1::S1, w2::S2, w3::S3, w4::S4, w5::S5];
diff --git a/crates/ekore/src/lib.rs b/crates/ekore/src/lib.rs
index db937583d..6505e6957 100644
--- a/crates/ekore/src/lib.rs
+++ b/crates/ekore/src/lib.rs
@@ -5,7 +5,7 @@
 
 pub mod anomalous_dimensions;
 pub mod bib;
-mod constants;
+pub mod constants;
 pub mod harmonics;
 pub mod operator_matrix_elements;
 pub mod util;
diff --git a/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs b/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs
index 3c57d33ef..fa87d0441 100644
--- a/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs
+++ b/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs
@@ -1,4 +1,5 @@
 //! The unpolarized, space-like |OME| at various couplings power.
+use crate::constants::MAX_ORDER_QCD;
 use crate::harmonics::cache::Cache;
 use num::complex::Complex;
 use num::Zero;
@@ -6,32 +7,16 @@ pub mod as1;
 pub mod as2;
 
 /// Compute the tower of the singlet |OME|.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD, d, d)`. Only the first `matching_order_qcd`
+/// entries along the outer axis are filled; remaining slots are zero.
 pub fn A_singlet(
     matching_order_qcd: usize,
     c: &mut Cache,
     nf: u8,
     L: f64,
-) -> Vec<[[Complex<f64>; 3]; 3]> {
-    let mut A_s = vec![
-        [
-            [
-                Complex::<f64>::zero(),
-                Complex::<f64>::zero(),
-                Complex::<f64>::zero()
-            ],
-            [
-                Complex::<f64>::zero(),
-                Complex::<f64>::zero(),
-                Complex::<f64>::zero()
-            ],
-            [
-                Complex::<f64>::zero(),
-                Complex::<f64>::zero(),
-                Complex::<f64>::zero()
-            ]
-        ];
-        matching_order_qcd
-    ];
+) -> [[[Complex<f64>; 3]; 3]; MAX_ORDER_QCD] {
+    let mut A_s = [[[Complex::<f64>::zero(); 3]; 3]; MAX_ORDER_QCD];
     if matching_order_qcd >= 1 {
         A_s[0] = as1::A_singlet(c, nf, L);
     }
@@ -43,19 +28,16 @@ pub fn A_singlet(
 }
 
 /// Compute the tower of the non-singlet |OME|.
+///
+/// Returns an array of shape `(MAX_ORDER_QCD, d, d)`. Only the first `matching_order_qcd`
+/// entries along the outer axis are filled; remaining slots are zero.
 pub fn A_non_singlet(
     matching_order_qcd: usize,
     c: &mut Cache,
     nf: u8,
     L: f64,
-) -> Vec<[[Complex<f64>; 2]; 2]> {
-    let mut A_ns = vec![
-        [
-            [Complex::<f64>::zero(), Complex::<f64>::zero()],
-            [Complex::<f64>::zero(), Complex::<f64>::zero()]
-        ];
-        matching_order_qcd
-    ];
+) -> [[[Complex<f64>; 2]; 2]; MAX_ORDER_QCD] {
+    let mut A_ns = [[[Complex::<f64>::zero(); 2]; 2]; MAX_ORDER_QCD];
     if matching_order_qcd >= 1 {
         A_ns[0] = as1::A_ns(c, nf, L);
     }
diff --git a/extras/gsoc/performance.md b/extras/gsoc/performance.md
index f07d3af9c..8b0e4b8e7 100644
--- a/extras/gsoc/performance.md
+++ b/extras/gsoc/performance.md
@@ -158,3 +158,76 @@ This adds a fixed overhead of several milliseconds on every `integrate.quad` nod
 ### Decision
 
 The `nb → rs → nb` architecture introduced in [526](https://github.com/NNPDF/eko/pull/526) was not viable, hence the PR was closed. The preferred short-term path remains `scipy → Rust → Numba → Rust → scipy`. See [architecture.md §5](./architecture.md#5-scipyintegratequad) for the full discussion.
+
+---
+
+## 542: Remove mallocs, single shared buffer
+
+After [542](https://github.com/NNPDF/eko/pull/542), all mallocs were removed from `eko` and `ekore`, the gamma output is now written directly into a single Python-owned `f64` buffer that is pre-allocated once and passed through `QuadArgs`, minor python loops were removed.
+
+### Full-suite timing (`/usr/bin/time -v`)
+
+```text
+====================== 3 passed, 12 deselected, 1 warning in 219.57s (0:03:39) ======================
+        Command being timed: "poe lha -m nnlo and sv"
+        User time (seconds): 342.81
+        System time (seconds): 2.75
+        Percent of CPU this job got: 101%
+        Elapsed (wall clock) time (h:mm:ss or m:ss): 5:40.23
+        Average shared text size (kbytes): 0
+        Average unshared data size (kbytes): 0
+        Average stack size (kbytes): 0
+        Average total size (kbytes): 0
+        Maximum resident set size (kbytes): 931916
+        Average resident set size (kbytes): 0
+        Major (requiring I/O) page faults: 1681
+        Minor (reclaiming a frame) page faults: 679694
+        Voluntary context switches: 3758
+        Involuntary context switches: 3360
+        Swaps: 0
+        File system inputs: 366568
+        File system outputs: 179328
+        Socket messages sent: 0
+        Socket messages received: 0
+        Signals delivered: 0
+        Page size (bytes): 4096
+        Exit status: 0
+```
+
+### Comparison with master
+
+| | Wall clock | pytest time |
+| --- | ---------- | ----------- |
+| [master](https://github.com/NNPDF/eko/tree/5aebe0abdea3e75ed7761b5ad4ce31d7fd227cd4) | 6:33 | 4:16 |
+| PR #542 | 5:40 | 3:39 |
+| **improvement** | | **−14.5%** |
+
+### Rust/Python time split and memory analysis
+
+Some debugging revealed where time is actually spent per integration:
+
+```text
+Evolution: Rust/Python split over 891240 quad evaluations — Rust (anomalous dims): 1.573 s (9.5%), Python (numba callback): 15.015 s (90.5%)
+Matching:  Rust/Python split over 1847076 quad evaluations — Rust (anomalous dims): 2.654 s (11.4%), Python (numba callback): 20.665 s (88.6%)
+```
+
+~90% of integration time is spent in the Python/Numba callback. This sets a hard ceiling: even eliminating the Rust side entirely could only improve total runtime by ~10%.
+
+Regarding the peak RSS (~910 MB): this is not a maturin issue. Switching the compile step to `pip install -e . crates/eko` produces nearly identical RSS. The process sits at ~800 MB for the entire run; the peak is caused by a transient spike at the end when computed results are compared against reference values. The 800 MB baseline itself is dominated by Numba `cfunc` compilation at import time:
+
+```text
+[MEM] eko.__init__: START                                      RSS=   58.1 MB  (baseline reset)
+[MEM] interpolation: loaded (all @njit functions)              RSS=  147.3 MB  delta=+89.2 MB
+[MEM] beta: loaded (all @njit functions)                       RSS=  186.4 MB  delta=+38.8 MB
+[MEM] quad_ker: cb_quad_ker_qcd @cfunc loaded                  RSS=  539.9 MB  delta=+353.0 MB
+[MEM] quad_ker: cb_quad_ker_ome @cfunc loaded                  RSS=  557.1 MB  delta=+17.1 MB
+[MEM] quad_ker: cb_quad_ker_qed @cfunc loaded (all cfuncs done)  RSS=  722.9 MB  delta=+165.9 MB
+[MEM] eko.__init__: END (all submodules loaded)                RSS=  722.9 MB
+```
+
+Of the 722.9 MB loaded at startup, only ~58 MB (~8%) is non-Numba code; the three `cfunc`s alone account for ~74%. The long-term fix is to port them to Rust (see [#383](https://github.com/NNPDF/eko/issues/383)), at which point the memory footprint will drop substantially.
+
+### Further optimisation attempts and conclusions
+
+- **quad_vec**: tried passing multiple integrand points at once; failed because `scipy.LowLevelCallable` cannot be used with `quad_vec`, causing a regression to 4:19.
+- **SIMD / FMA**: passing batches of points to `rust_quad_ker` could allow LLVM to emit SIMD instructions, but this requires significant changes for a gain capped at ~5–10% on the Rust share (i.e. ≤1% overall).
diff --git a/src/eko/evolution_operator/__init__.py.patch b/src/eko/evolution_operator/__init__.py.patch
index 1bbb13439..7c0698604 100644
--- a/src/eko/evolution_operator/__init__.py.patch
+++ b/src/eko/evolution_operator/__init__.py.patch
@@ -1,5 +1,5 @@
 diff --git a/src/eko/evolution_operator/__init__.py b/src/eko/evolution_operator/__init__.py
-index f3c234f0..3980359e 100644
+index d25e6e4e..6c93b180 100644
 --- a/src/eko/evolution_operator/__init__.py
 +++ b/src/eko/evolution_operator/__init__.py
 @@ -3,7 +3,6 @@ r"""Contains the central operator classes.
@@ -90,13 +90,13 @@ index f3c234f0..3980359e 100644
 +        cfg.as1 = self.as_list[1]
 +        cfg.as0 = self.as_list[0]
 +        as_list_len = self.as_list.shape[0]
-+        for j, a in enumerate(self.as_list.tolist()):
-+            cfg.as_list[j] = a
++        as_list = np.ascontiguousarray(self.as_list, dtype=np.float64)
++        ekors.ffi.memmove(cfg.as_list, as_list, as_list.nbytes)
 +        cfg.as_list_len = as_list_len
 +        a_half_x = self.a_half_list.shape[0]
 +        a_half_y = self.a_half_list.shape[1]
-+        for j, a in enumerate(self.a_half_list.flatten().tolist()):
-+            cfg.a_half[j] = a
++        a_half = np.ascontiguousarray(self.a_half_list, dtype=np.float64)
++        ekors.ffi.memmove(cfg.a_half, a_half, a_half.nbytes)
 +        cfg.a_half_x = a_half_x
 +        cfg.a_half_y = a_half_y
 +
@@ -109,7 +109,7 @@ index f3c234f0..3980359e 100644
      def run_op_integration(
          self,
          log_grid,
-@@ -312,18 +291,62 @@ class Operator(sv.ScaleVariationModeMixin):
+@@ -312,18 +291,65 @@ class Operator(sv.ScaleVariationModeMixin):
          """
          column = []
          k, logx = log_grid
@@ -141,8 +141,15 @@ index f3c234f0..3980359e 100644
 +        max_area_len = max_areas_shape[0] * max_areas_shape[1]
 +        areas_ffi = ekors.ffi.new("double[]", max_area_len)
 +        cfg.areas = areas_ffi
++        gamma_len = (self.order[0] + 1) * (self.order[1] + 1) * 16
++        re_gamma_ffi = ekors.ffi.new("double[]", gamma_len)
++        im_gamma_ffi = ekors.ffi.new("double[]", gamma_len)
++        cfg.re_gamma = re_gamma_ffi
++        cfg.im_gamma = im_gamma_ffi
 +
 +        self.update_cfg(cfg)
++
++        func = LowLevelCallable(ekors.lib.rust_quad_ker, ekors.ffi.addressof(cfg))
 +
          # iterate basis functions
          for j, bf in enumerate(self.int_disp):
@@ -155,8 +162,8 @@ index f3c234f0..3980359e 100644
              temp_dict = {}
 +            # prepare areas for C
 +            curareas = bf.areas_representation
-+            for j, x in enumerate(curareas.flatten().tolist()):
-+                cfg.areas[j] = x
++            areas = np.ascontiguousarray(curareas, dtype=np.float64)
++            ekors.ffi.memmove(cfg.areas, areas, areas.nbytes)
 +            cfg.areas_x = curareas.shape[0]
 +            cfg.areas_y = curareas.shape[1]
              # iterate sectors
@@ -164,10 +171,6 @@ index f3c234f0..3980359e 100644
 +            for label in labels:
 +                cfg.mode0 = label[0]
 +                cfg.mode1 = label[1]
-+                # construct the low level object
-+                func = LowLevelCallable(
-+                    ekors.lib.rust_quad_ker, ekors.ffi.addressof(cfg)
-+                )
                  res = integrate.quad(
 -                    self.quad_ker(
 -                        label=label, logx=logx, areas=bf.areas_representation