From da770fca3cb59c6729bdeec5fc406e6d8afffb71 Mon Sep 17 00:00:00 2001 From: AkshatRai07 Date: Wed, 24 Jun 2026 19:45:58 +0530 Subject: [PATCH 1/9] Removed mallocs in ekore --- crates/eko/src/lib.rs | 18 +++---- .../polarized/spacelike.rs | 14 ++---- .../unpolarized/spacelike.rs | 37 ++++----------- crates/ekore/src/harmonics/cache.rs | 47 +++++++++++++++---- .../unpolarized/spacelike.rs | 33 ++----------- 5 files changed, 64 insertions(+), 85 deletions(-) diff --git a/crates/eko/src/lib.rs b/crates/eko/src/lib.rs index 242f404b5..f6edcf2c8 100644 --- a/crates/eko/src/lib.rs +++ b/crates/eko/src/lib.rs @@ -16,7 +16,7 @@ struct RawCmplx { /// Map tensor with shape (o,d,d) to c-ordered list. /// /// This is needed for the QCD singlet. -fn unravel(res: Vec<[[Complex; DIM]; DIM]>, order_qcd: usize) -> RawCmplx { +fn unravel(res: &[[[Complex; DIM]; DIM]], order_qcd: usize) -> RawCmplx { let mut target = RawCmplx { re: Vec::::new(), im: Vec::::new(), @@ -36,7 +36,7 @@ fn unravel(res: Vec<[[Complex; DIM]; DIM]>, order_qcd: us /// /// This is needed for the QED singlet and valence. fn unravel_qed( - res: Vec; DIM]; DIM]>>, + res: &[[[[Complex; DIM]; DIM]; 3]], order_qcd: usize, order_qed: usize, ) -> RawCmplx { @@ -60,7 +60,7 @@ fn unravel_qed( /// Map tensor with shape (o,o',d) to c-ordered list. /// /// This is needed for the QED non-singlet. -fn unravel_qed_ns(res: Vec>>, order_qcd: usize, order_qed: usize) -> RawCmplx { +fn unravel_qed_ns(res: &[[Complex; 3]], order_qcd: usize, order_qed: usize) -> RawCmplx { let mut target = RawCmplx { re: Vec::::new(), im: Vec::::new(), @@ -104,7 +104,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { if args.is_ome { if is_singlet { raw = unravel( - ekore::operator_matrix_elements::unpolarized::spacelike::A_singlet( + &ekore::operator_matrix_elements::unpolarized::spacelike::A_singlet( args.order_qcd, &mut c, args.nf, @@ -114,7 +114,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { ); } else { raw = unravel( - ekore::operator_matrix_elements::unpolarized::spacelike::A_non_singlet( + &ekore::operator_matrix_elements::unpolarized::spacelike::A_non_singlet( args.order_qcd, &mut c, args.nf, @@ -128,7 +128,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { let gamma_singlet_qed = ekore::anomalous_dimensions::unpolarized::spacelike::gamma_singlet_qed; raw = unravel_qed( - gamma_singlet_qed( + &gamma_singlet_qed( args.order_qcd, args.order_qed, &mut c, @@ -144,7 +144,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { false => ekore::anomalous_dimensions::unpolarized::spacelike::gamma_singlet_qcd, }; raw = unravel( - gamma_singlet_qcd( + &gamma_singlet_qcd( args.order_qcd, &mut c, args.nf, @@ -158,7 +158,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { let gamma_valence_qed = ekore::anomalous_dimensions::unpolarized::spacelike::gamma_valence_qed; raw = unravel_qed( - gamma_valence_qed( + &gamma_valence_qed( args.order_qcd, args.order_qed, &mut c, @@ -171,7 +171,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { } else { let gamma_ns_qed = ekore::anomalous_dimensions::unpolarized::spacelike::gamma_ns_qed; raw = unravel_qed_ns( - gamma_ns_qed( + &gamma_ns_qed( args.order_qcd, args.order_qed, args.mode0, diff --git a/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs b/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs index dd636b0ad..f7210fda2 100644 --- a/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs +++ b/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs @@ -15,8 +15,8 @@ pub fn gamma_ns_qcd( c: &mut Cache, nf: u8, _n3lo_variation: [u8; 3], -) -> Vec> { - let mut gamma_ns = vec![Complex::::zero(); order_qcd]; +) -> [Complex; 4] { + let mut gamma_ns = [Complex::::zero(); 4]; gamma_ns[0] = as1::gamma_ns(c, nf); // NLO and beyond if order_qcd >= 2 { @@ -47,14 +47,8 @@ pub fn gamma_singlet_qcd( c: &mut Cache, nf: u8, _n3lo_variation: [u8; 4], -) -> Vec<[[Complex; 2]; 2]> { - let mut gamma_S = vec![ - [ - [Complex::::zero(), Complex::::zero()], - [Complex::::zero(), Complex::::zero()] - ]; - order_qcd - ]; +) -> [[[Complex; 2]; 2]; 4] { + let mut gamma_S = [[[Complex::::zero(); 2]; 2]; 4]; gamma_S[0] = as1::gamma_singlet(c, nf); // NLO and beyond if order_qcd >= 2 { diff --git a/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs b/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs index 4589b9278..c1dadd157 100644 --- a/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs +++ b/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs @@ -25,8 +25,8 @@ pub fn gamma_ns_qcd( c: &mut Cache, nf: u8, n3lo_variation: [u8; 3], -) -> Vec> { - let mut gamma_ns = vec![Complex::::zero(); order_qcd]; +) -> [Complex; 4] { + let mut gamma_ns = [Complex::::zero(); 4]; gamma_ns[0] = as1::gamma_ns(c, nf); // NLO and beyond if order_qcd >= 2 { @@ -71,14 +71,8 @@ pub fn gamma_singlet_qcd( c: &mut Cache, nf: u8, n3lo_variation: [u8; 4], -) -> Vec<[[Complex; 2]; 2]> { - let mut gamma_S = vec![ - [ - [Complex::::zero(), Complex::::zero()], - [Complex::::zero(), Complex::::zero()] - ]; - order_qcd - ]; +) -> [[[Complex; 2]; 2]; 4] { + let mut gamma_S = [[[Complex::::zero(); 2]; 2]; 4]; gamma_S[0] = as1::gamma_singlet(c, nf); // NLO and beyond if order_qcd >= 2 { @@ -107,9 +101,8 @@ pub fn gamma_ns_qed( c: &mut Cache, nf: u8, n3lo_variation: [u8; 3], -) -> Vec>> { - let col = vec![Complex::::zero(); order_qed + 1]; - let mut gamma_ns = vec![col; order_qcd + 1]; +) -> [[Complex; 3]; 5] { + let mut gamma_ns = [[Complex::::zero(); 3]; 5]; // QCD corrections let qcd_mode = match mode { @@ -158,17 +151,8 @@ pub fn gamma_singlet_qed( c: &mut Cache, nf: u8, n3lo_variation: [u8; 7], -) -> Vec; 4]; 4]>> { - let col = vec![ - [[ - Complex::::zero(), - Complex::::zero(), - Complex::::zero(), - Complex::::zero() - ]; 4]; - order_qed + 1 - ]; - let mut gamma_s = vec![col; order_qcd + 1]; +) -> [[[[Complex; 4]; 4]; 3]; 5] { + let mut gamma_s = [[[[Complex::::zero(); 4]; 4]; 3]; 5]; // QCD corrections let gamma_qcd_s = gamma_singlet_qcd(order_qcd, c, nf, n3lo_variation[0..4].try_into().unwrap()); @@ -229,9 +213,8 @@ pub fn gamma_valence_qed( c: &mut Cache, nf: u8, n3lo_variation: [u8; 3], -) -> Vec; 2]; 2]>> { - let col = vec![[[Complex::::zero(), Complex::::zero(),]; 2]; order_qed + 1]; - let mut gamma_v = vec![col; order_qcd + 1]; +) -> [[[[Complex; 2]; 2]; 3]; 5] { + let mut gamma_v = [[[[Complex::::zero(); 2]; 2]; 3]; 5]; // QCD corrections let gamma_qcd_nsv = gamma_ns_qcd(order_qcd, PID_NSV, c, nf, n3lo_variation); diff --git a/crates/ekore/src/harmonics/cache.rs b/crates/ekore/src/harmonics/cache.rs index 595834017..71a8fd9dc 100644 --- a/crates/ekore/src/harmonics/cache.rs +++ b/crates/ekore/src/harmonics/cache.rs @@ -1,12 +1,11 @@ //! Cache harmonic sums for given Mellin N. use num::{complex::Complex, Zero}; -use std::collections::HashMap; use crate::harmonics::{g_functions, w1, w2, w3, w4, w5}; /// List of available elements. -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Debug, PartialEq, Eq)] pub enum K { /// $S_1(N)$ S1, @@ -50,12 +49,41 @@ pub enum K { Sm21o, } +impl K { + fn idx(&self) -> usize { + match self { + K::S1 => 0, + K::S2 => 1, + K::S3 => 2, + K::S4 => 3, + K::S5 => 4, + K::S1h => 5, + K::S2h => 6, + K::S3h => 7, + K::S1mh => 8, + K::S2mh => 9, + K::S3mh => 10, + K::G3 => 11, + K::Sm1e => 12, + K::Sm1o => 13, + K::Sm2e => 14, + K::Sm2o => 15, + K::Sm3e => 16, + K::Sm3o => 17, + K::Sm21e => 18, + K::Sm21o => 19, + } + } +} + +const N_CACHE: usize = 20; + /// Hold all cached values. pub struct Cache { /// Mellin N n: Complex, - /// Mapping - m: HashMap>, + /// Flat lookup table indexed by K::idx() + m: [Option>; N_CACHE], } impl Cache { @@ -63,7 +91,7 @@ impl Cache { pub fn new(n: Complex) -> Self { Self { n, - m: HashMap::new(), + m: [None; N_CACHE], } } @@ -74,10 +102,9 @@ impl Cache { /// Retrieve an element. pub fn get(&mut self, k: K) -> Complex { - let val = self.m.get(&k); - // already there? - if let Some(value) = val { - return *value; + let idx = k.idx(); + if let Some(val) = self.m[idx] { + return val; } // compute new let val = match k { @@ -103,7 +130,7 @@ impl Cache { K::Sm21o => w3::Sm21o(self), }; // insert - self.m.insert(k, val); + self.m[idx] = Some(val); val } } diff --git a/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs b/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs index 3c57d33ef..3130fbc23 100644 --- a/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs +++ b/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs @@ -11,27 +11,8 @@ pub fn A_singlet( c: &mut Cache, nf: u8, L: f64, -) -> Vec<[[Complex; 3]; 3]> { - let mut A_s = vec![ - [ - [ - Complex::::zero(), - Complex::::zero(), - Complex::::zero() - ], - [ - Complex::::zero(), - Complex::::zero(), - Complex::::zero() - ], - [ - Complex::::zero(), - Complex::::zero(), - Complex::::zero() - ] - ]; - matching_order_qcd - ]; +) -> [[[Complex; 3]; 3]; 4] { + let mut A_s = [[[Complex::::zero(); 3]; 3]; 4]; if matching_order_qcd >= 1 { A_s[0] = as1::A_singlet(c, nf, L); } @@ -48,14 +29,8 @@ pub fn A_non_singlet( c: &mut Cache, nf: u8, L: f64, -) -> Vec<[[Complex; 2]; 2]> { - let mut A_ns = vec![ - [ - [Complex::::zero(), Complex::::zero()], - [Complex::::zero(), Complex::::zero()] - ]; - matching_order_qcd - ]; +) -> [[[Complex; 2]; 2]; 4] { + let mut A_ns = [[[Complex::::zero(); 2]; 2]; 4]; if matching_order_qcd >= 1 { A_ns[0] = as1::A_ns(c, nf, L); } From 325576258824e36a3a1be37665c65408ee1f3423 Mon Sep 17 00:00:00 2001 From: AkshatRai07 Date: Wed, 24 Jun 2026 20:13:06 +0530 Subject: [PATCH 2/9] Single buffer for integration and removed python malloc loops --- crates/eko/src/lib.rs | 122 +++++++++++-------- src/eko/evolution_operator/__init__.py.patch | 27 ++-- 2 files changed, 83 insertions(+), 66 deletions(-) diff --git a/crates/eko/src/lib.rs b/crates/eko/src/lib.rs index f6edcf2c8..c2f6283d1 100644 --- a/crates/eko/src/lib.rs +++ b/crates/eko/src/lib.rs @@ -7,71 +7,69 @@ use std::ffi::c_void; pub mod bib; pub mod mellin; -/// Wrapper to pass arguments back to Python. -struct RawCmplx { - re: Vec, - im: Vec, -} - -/// Map tensor with shape (o,d,d) to c-ordered list. +/// Map tensor with shape (o,d,d) into pre-allocated output buffers. /// /// This is needed for the QCD singlet. -fn unravel(res: &[[[Complex; DIM]; DIM]], order_qcd: usize) -> RawCmplx { - let mut target = RawCmplx { - re: Vec::::new(), - im: Vec::::new(), - }; +fn unravel( + res: &[[[Complex; DIM]; DIM]], + order_qcd: usize, + out_re: &mut [f64], + out_im: &mut [f64], +) { + let mut i = 0; for obj in res.iter().take(order_qcd) { - for col in obj.iter().take(DIM) { - for el in col.iter().take(DIM) { - target.re.push(el.re); - target.im.push(el.im); + for col in obj.iter() { + for el in col.iter() { + out_re[i] = el.re; + out_im[i] = el.im; + i += 1; } } } - target } -/// Map tensor with shape (o,o',d,d) to c-ordered list. +/// Map tensor with shape (o,o',d,d) into pre-allocated output buffers. /// /// This is needed for the QED singlet and valence. fn unravel_qed( res: &[[[[Complex; DIM]; DIM]; 3]], order_qcd: usize, order_qed: usize, -) -> RawCmplx { - let mut target = RawCmplx { - re: Vec::::new(), - im: Vec::::new(), - }; + out_re: &mut [f64], + out_im: &mut [f64], +) { + let mut i = 0; for obj_ in res.iter().take(order_qcd + 1) { for obj in obj_.iter().take(order_qed + 1) { - for col in obj.iter().take(DIM) { - for el in col.iter().take(DIM) { - target.re.push(el.re); - target.im.push(el.im); + for col in obj.iter() { + for el in col.iter() { + out_re[i] = el.re; + out_im[i] = el.im; + i += 1; } } } } - target } -/// Map tensor with shape (o,o',d) to c-ordered list. +/// Map tensor with shape (o,o',d) into pre-allocated output buffers. /// /// This is needed for the QED non-singlet. -fn unravel_qed_ns(res: &[[Complex; 3]], order_qcd: usize, order_qed: usize) -> RawCmplx { - let mut target = RawCmplx { - re: Vec::::new(), - im: Vec::::new(), - }; +fn unravel_qed_ns( + res: &[[Complex; 3]], + order_qcd: usize, + order_qed: usize, + out_re: &mut [f64], + out_im: &mut [f64], +) { + let mut i = 0; for col in res.iter().take(order_qcd + 1) { for el in col.iter().take(order_qed + 1) { - target.re.push(el.re); - target.im.push(el.im); + out_re[i] = el.re; + out_im[i] = el.im; + i += 1; } } - target } /// Integration kernel inside quad. @@ -93,17 +91,17 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { let path = mellin::TalbotPath::new(u, args.logx, is_singlet); let jac = path.jac() * path.prefactor(); let mut c = Cache::new(path.n()); - let mut raw = RawCmplx { - re: Vec::::new(), - im: Vec::::new(), - }; - let n3lo_ad_variation = std::slice::from_raw_parts(args.n3lo_ad_variation, 7) + let n3lo_ad_variation: [u8; 7] = std::slice::from_raw_parts(args.n3lo_ad_variation, 7) .try_into() .unwrap(); + let n_max = (args.order_qcd + 1) * (args.order_qed + 1) * 16; + let out_re = std::slice::from_raw_parts_mut(args.re_gamma, n_max); + let out_im = std::slice::from_raw_parts_mut(args.im_gamma, n_max); + if args.is_ome { if is_singlet { - raw = unravel( + unravel( &ekore::operator_matrix_elements::unpolarized::spacelike::A_singlet( args.order_qcd, &mut c, @@ -111,9 +109,11 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { args.L, ), args.order_qcd, + out_re, + out_im, ); } else { - raw = unravel( + unravel( &ekore::operator_matrix_elements::unpolarized::spacelike::A_non_singlet( args.order_qcd, &mut c, @@ -121,13 +121,15 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { args.L, ), args.order_qcd, + out_re, + out_im, ); } } else if is_singlet { if args.order_qed > 0 { let gamma_singlet_qed = ekore::anomalous_dimensions::unpolarized::spacelike::gamma_singlet_qed; - raw = unravel_qed( + unravel_qed( &gamma_singlet_qed( args.order_qcd, args.order_qed, @@ -137,13 +139,15 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { ), args.order_qcd, args.order_qed, + out_re, + out_im, ); } else { let gamma_singlet_qcd = match args.is_polarized { true => ekore::anomalous_dimensions::polarized::spacelike::gamma_singlet_qcd, false => ekore::anomalous_dimensions::unpolarized::spacelike::gamma_singlet_qcd, }; - raw = unravel( + unravel( &gamma_singlet_qcd( args.order_qcd, &mut c, @@ -151,13 +155,15 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { n3lo_ad_variation[0..4].try_into().unwrap(), ), args.order_qcd, + out_re, + out_im, ); } } else if args.order_qed > 0 { if is_qed_valence { let gamma_valence_qed = ekore::anomalous_dimensions::unpolarized::spacelike::gamma_valence_qed; - raw = unravel_qed( + unravel_qed( &gamma_valence_qed( args.order_qcd, args.order_qed, @@ -167,10 +173,12 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { ), args.order_qcd, args.order_qed, + out_re, + out_im, ); } else { let gamma_ns_qed = ekore::anomalous_dimensions::unpolarized::spacelike::gamma_ns_qed; - raw = unravel_qed_ns( + unravel_qed_ns( &gamma_ns_qed( args.order_qcd, args.order_qed, @@ -181,10 +189,11 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { ), args.order_qcd, args.order_qed, + out_re, + out_im, ); } } else { - // we can not do 1D let gamma_ns_qcd = match args.is_polarized { true => ekore::anomalous_dimensions::polarized::spacelike::gamma_ns_qcd, false => ekore::anomalous_dimensions::unpolarized::spacelike::gamma_ns_qcd, @@ -196,16 +205,16 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { args.nf, n3lo_ad_variation[4..7].try_into().unwrap(), ); - for el in res.iter().take(args.order_qcd) { - raw.re.push(el.re); - raw.im.push(el.im); + for (i, el) in res.iter().take(args.order_qcd).enumerate() { + out_re[i] = el.re; + out_im[i] = el.im; } } // pass on (args.py)( - raw.re.as_ptr(), - raw.im.as_ptr(), + args.re_gamma as *const f64, + args.im_gamma as *const f64, c.n().re, c.n().im, jac.re, @@ -317,6 +326,9 @@ pub struct QuadArgs { pub alphaem_running: bool, // additional param required for N3LO pub n3lo_ad_variation: *const u8, + // pre-allocated gamma output buffers (owned by Python, written by Rust) + pub re_gamma: *mut f64, + pub im_gamma: *mut f64, } /// Empty placeholder function for python callback. @@ -403,5 +415,7 @@ pub unsafe extern "C" fn empty_args() -> QuadArgs { a_half_y: 0, alphaem_running: false, n3lo_ad_variation: [0, 0, 0, 0, 0, 0, 0].as_ptr(), + re_gamma: std::ptr::null_mut(), + im_gamma: std::ptr::null_mut(), } } diff --git a/src/eko/evolution_operator/__init__.py.patch b/src/eko/evolution_operator/__init__.py.patch index 1bbb13439..7c0698604 100644 --- a/src/eko/evolution_operator/__init__.py.patch +++ b/src/eko/evolution_operator/__init__.py.patch @@ -1,5 +1,5 @@ diff --git a/src/eko/evolution_operator/__init__.py b/src/eko/evolution_operator/__init__.py -index f3c234f0..3980359e 100644 +index d25e6e4e..6c93b180 100644 --- a/src/eko/evolution_operator/__init__.py +++ b/src/eko/evolution_operator/__init__.py @@ -3,7 +3,6 @@ r"""Contains the central operator classes. @@ -90,13 +90,13 @@ index f3c234f0..3980359e 100644 + cfg.as1 = self.as_list[1] + cfg.as0 = self.as_list[0] + as_list_len = self.as_list.shape[0] -+ for j, a in enumerate(self.as_list.tolist()): -+ cfg.as_list[j] = a ++ as_list = np.ascontiguousarray(self.as_list, dtype=np.float64) ++ ekors.ffi.memmove(cfg.as_list, as_list, as_list.nbytes) + cfg.as_list_len = as_list_len + a_half_x = self.a_half_list.shape[0] + a_half_y = self.a_half_list.shape[1] -+ for j, a in enumerate(self.a_half_list.flatten().tolist()): -+ cfg.a_half[j] = a ++ a_half = np.ascontiguousarray(self.a_half_list, dtype=np.float64) ++ ekors.ffi.memmove(cfg.a_half, a_half, a_half.nbytes) + cfg.a_half_x = a_half_x + cfg.a_half_y = a_half_y + @@ -109,7 +109,7 @@ index f3c234f0..3980359e 100644 def run_op_integration( self, log_grid, -@@ -312,18 +291,62 @@ class Operator(sv.ScaleVariationModeMixin): +@@ -312,18 +291,65 @@ class Operator(sv.ScaleVariationModeMixin): """ column = [] k, logx = log_grid @@ -141,8 +141,15 @@ index f3c234f0..3980359e 100644 + max_area_len = max_areas_shape[0] * max_areas_shape[1] + areas_ffi = ekors.ffi.new("double[]", max_area_len) + cfg.areas = areas_ffi ++ gamma_len = (self.order[0] + 1) * (self.order[1] + 1) * 16 ++ re_gamma_ffi = ekors.ffi.new("double[]", gamma_len) ++ im_gamma_ffi = ekors.ffi.new("double[]", gamma_len) ++ cfg.re_gamma = re_gamma_ffi ++ cfg.im_gamma = im_gamma_ffi + + self.update_cfg(cfg) ++ ++ func = LowLevelCallable(ekors.lib.rust_quad_ker, ekors.ffi.addressof(cfg)) + # iterate basis functions for j, bf in enumerate(self.int_disp): @@ -155,8 +162,8 @@ index f3c234f0..3980359e 100644 temp_dict = {} + # prepare areas for C + curareas = bf.areas_representation -+ for j, x in enumerate(curareas.flatten().tolist()): -+ cfg.areas[j] = x ++ areas = np.ascontiguousarray(curareas, dtype=np.float64) ++ ekors.ffi.memmove(cfg.areas, areas, areas.nbytes) + cfg.areas_x = curareas.shape[0] + cfg.areas_y = curareas.shape[1] # iterate sectors @@ -164,10 +171,6 @@ index f3c234f0..3980359e 100644 + for label in labels: + cfg.mode0 = label[0] + cfg.mode1 = label[1] -+ # construct the low level object -+ func = LowLevelCallable( -+ ekors.lib.rust_quad_ker, ekors.ffi.addressof(cfg) -+ ) res = integrate.quad( - self.quad_ker( - label=label, logx=logx, areas=bf.areas_representation From 5083adc7101a08f4aa8cedbda51c8b6afcffd2da Mon Sep 17 00:00:00 2001 From: AkshatRai07 Date: Thu, 25 Jun 2026 19:27:18 +0530 Subject: [PATCH 3/9] Changed cache size const name and added related tests --- crates/ekore/src/harmonics/cache.rs | 47 +++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/crates/ekore/src/harmonics/cache.rs b/crates/ekore/src/harmonics/cache.rs index 71a8fd9dc..3c1b2049f 100644 --- a/crates/ekore/src/harmonics/cache.rs +++ b/crates/ekore/src/harmonics/cache.rs @@ -76,14 +76,14 @@ impl K { } } -const N_CACHE: usize = 20; +const CACHE_SIZE: usize = 20; /// Hold all cached values. pub struct Cache { /// Mellin N n: Complex, /// Flat lookup table indexed by K::idx() - m: [Option>; N_CACHE], + m: [Option>; CACHE_SIZE], } impl Cache { @@ -91,7 +91,7 @@ impl Cache { pub fn new(n: Complex) -> Self { Self { n, - m: [None; N_CACHE], + m: [None; CACHE_SIZE], } } @@ -168,6 +168,47 @@ mod tests { assert_approx_eq_cmplx!(f64, m, cmplx!(2., 0.)); } + #[test] + fn test_cache_idx_mapping() { + let all_variants = [ + K::S1, + K::S2, + K::S3, + K::S4, + K::S5, + K::S1h, + K::S2h, + K::S3h, + K::S1mh, + K::S2mh, + K::S3mh, + K::G3, + K::Sm1e, + K::Sm1o, + K::Sm2e, + K::Sm2o, + K::Sm3e, + K::Sm3o, + K::Sm21e, + K::Sm21o, + ]; + // size: number of variants matches CACHE_SIZE + assert_eq!(all_variants.len(), CACHE_SIZE); + + let mut c = Cache::new(cmplx!(2., 0.)); + let mut seen = [false; CACHE_SIZE]; + for v in all_variants { + let idx = v.idx(); + // mapping is continuous: no duplicate indices + assert!(!seen[idx], "duplicate index {idx}"); + seen[idx] = true; + // exercises Cache::get(), panics if idx() >= CACHE_SIZE (size check) + c.get(v); + } + // every slot 0..CACHE_SIZE was covered (no holes) + assert!(seen.iter().all(|&b| b), "index mapping has holes"); + } + #[test] fn test_recursive_harmonic_sum() { const SX: [fn(Complex) -> Complex; 5] = [w1::S1, w2::S2, w3::S3, w4::S4, w5::S5]; From ec36567e4d5659559c5b925eeaaf1bd65294c526 Mon Sep 17 00:00:00 2001 From: AkshatRai07 Date: Thu, 25 Jun 2026 19:45:43 +0530 Subject: [PATCH 4/9] Add MAX_ORDER_QED/QCD and rm magic numbers --- .../polarized/spacelike.rs | 10 ++++---- .../unpolarized/spacelike.rs | 23 ++++++++++--------- crates/ekore/src/constants.rs | 6 +++++ .../unpolarized/spacelike.rs | 9 ++++---- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs b/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs index f7210fda2..6b99aea7e 100644 --- a/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs +++ b/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs @@ -1,6 +1,6 @@ //! The polarized, space-like anomalous dimensions at various couplings power. -use crate::constants::{PID_NSM, PID_NSP, PID_NSV}; +use crate::constants::{MAX_ORDER_QCD, PID_NSM, PID_NSP, PID_NSV}; use crate::harmonics::cache::Cache; use num::complex::Complex; use num::Zero; @@ -15,8 +15,8 @@ pub fn gamma_ns_qcd( c: &mut Cache, nf: u8, _n3lo_variation: [u8; 3], -) -> [Complex; 4] { - let mut gamma_ns = [Complex::::zero(); 4]; +) -> [Complex; MAX_ORDER_QCD] { + let mut gamma_ns = [Complex::::zero(); MAX_ORDER_QCD]; gamma_ns[0] = as1::gamma_ns(c, nf); // NLO and beyond if order_qcd >= 2 { @@ -47,8 +47,8 @@ pub fn gamma_singlet_qcd( c: &mut Cache, nf: u8, _n3lo_variation: [u8; 4], -) -> [[[Complex; 2]; 2]; 4] { - let mut gamma_S = [[[Complex::::zero(); 2]; 2]; 4]; +) -> [[[Complex; 2]; 2]; MAX_ORDER_QCD] { + let mut gamma_S = [[[Complex::::zero(); 2]; 2]; MAX_ORDER_QCD]; gamma_S[0] = as1::gamma_singlet(c, nf); // NLO and beyond if order_qcd >= 2 { diff --git a/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs b/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs index c1dadd157..cfe07862e 100644 --- a/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs +++ b/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs @@ -1,7 +1,8 @@ //! The unpolarized, space-like anomalous dimensions at various couplings power. use crate::constants::{ - ED2, EU2, PID_NSM, PID_NSM_D, PID_NSM_U, PID_NSP, PID_NSP_D, PID_NSP_U, PID_NSV, + ED2, EU2, MAX_ORDER_QCD, MAX_ORDER_QED, PID_NSM, PID_NSM_D, PID_NSM_U, PID_NSP, PID_NSP_D, + PID_NSP_U, PID_NSV, }; use crate::harmonics::cache::Cache; use num::complex::Complex; @@ -25,8 +26,8 @@ pub fn gamma_ns_qcd( c: &mut Cache, nf: u8, n3lo_variation: [u8; 3], -) -> [Complex; 4] { - let mut gamma_ns = [Complex::::zero(); 4]; +) -> [Complex; MAX_ORDER_QCD] { + let mut gamma_ns = [Complex::::zero(); MAX_ORDER_QCD]; gamma_ns[0] = as1::gamma_ns(c, nf); // NLO and beyond if order_qcd >= 2 { @@ -71,8 +72,8 @@ pub fn gamma_singlet_qcd( c: &mut Cache, nf: u8, n3lo_variation: [u8; 4], -) -> [[[Complex; 2]; 2]; 4] { - let mut gamma_S = [[[Complex::::zero(); 2]; 2]; 4]; +) -> [[[Complex; 2]; 2]; MAX_ORDER_QCD] { + let mut gamma_S = [[[Complex::::zero(); 2]; 2]; MAX_ORDER_QCD]; gamma_S[0] = as1::gamma_singlet(c, nf); // NLO and beyond if order_qcd >= 2 { @@ -101,8 +102,8 @@ pub fn gamma_ns_qed( c: &mut Cache, nf: u8, n3lo_variation: [u8; 3], -) -> [[Complex; 3]; 5] { - let mut gamma_ns = [[Complex::::zero(); 3]; 5]; +) -> [[Complex; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1] { + let mut gamma_ns = [[Complex::::zero(); MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1]; // QCD corrections let qcd_mode = match mode { @@ -151,8 +152,8 @@ pub fn gamma_singlet_qed( c: &mut Cache, nf: u8, n3lo_variation: [u8; 7], -) -> [[[[Complex; 4]; 4]; 3]; 5] { - let mut gamma_s = [[[[Complex::::zero(); 4]; 4]; 3]; 5]; +) -> [[[[Complex; 4]; 4]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1] { + let mut gamma_s = [[[[Complex::::zero(); 4]; 4]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1]; // QCD corrections let gamma_qcd_s = gamma_singlet_qcd(order_qcd, c, nf, n3lo_variation[0..4].try_into().unwrap()); @@ -213,8 +214,8 @@ pub fn gamma_valence_qed( c: &mut Cache, nf: u8, n3lo_variation: [u8; 3], -) -> [[[[Complex; 2]; 2]; 3]; 5] { - let mut gamma_v = [[[[Complex::::zero(); 2]; 2]; 3]; 5]; +) -> [[[[Complex; 2]; 2]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1] { + let mut gamma_v = [[[[Complex::::zero(); 2]; 2]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1]; // QCD corrections let gamma_qcd_nsv = gamma_ns_qcd(order_qcd, PID_NSV, c, nf, n3lo_variation); diff --git a/crates/ekore/src/constants.rs b/crates/ekore/src/constants.rs index a66cf71be..a8e1ad923 100644 --- a/crates/ekore/src/constants.rs +++ b/crates/ekore/src/constants.rs @@ -67,6 +67,12 @@ pub const PID_NSM_U: u16 = 10202; /// valence-like non-singlet down-sector |PID|. pub const PID_NSM_D: u16 = 10203; +/// Maximum QCD coupling power implemented. +pub const MAX_ORDER_QCD: usize = 4; + +/// Maximum QED coupling power implemented. +pub const MAX_ORDER_QED: usize = 2; + /// |QED| electric charge combinations. pub struct ChargeCombinations { pub nf: u8, diff --git a/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs b/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs index 3130fbc23..ad1bb3765 100644 --- a/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs +++ b/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs @@ -1,4 +1,5 @@ //! The unpolarized, space-like |OME| at various couplings power. +use crate::constants::MAX_ORDER_QCD; use crate::harmonics::cache::Cache; use num::complex::Complex; use num::Zero; @@ -11,8 +12,8 @@ pub fn A_singlet( c: &mut Cache, nf: u8, L: f64, -) -> [[[Complex; 3]; 3]; 4] { - let mut A_s = [[[Complex::::zero(); 3]; 3]; 4]; +) -> [[[Complex; 3]; 3]; MAX_ORDER_QCD] { + let mut A_s = [[[Complex::::zero(); 3]; 3]; MAX_ORDER_QCD]; if matching_order_qcd >= 1 { A_s[0] = as1::A_singlet(c, nf, L); } @@ -29,8 +30,8 @@ pub fn A_non_singlet( c: &mut Cache, nf: u8, L: f64, -) -> [[[Complex; 2]; 2]; 4] { - let mut A_ns = [[[Complex::::zero(); 2]; 2]; 4]; +) -> [[[Complex; 2]; 2]; MAX_ORDER_QCD] { + let mut A_ns = [[[Complex::::zero(); 2]; 2]; MAX_ORDER_QCD]; if matching_order_qcd >= 1 { A_ns[0] = as1::A_ns(c, nf, L); } From d48e8197e368454aca656d0b2b9f2d5da932654d Mon Sep 17 00:00:00 2001 From: AkshatRai07 Date: Thu, 25 Jun 2026 20:37:12 +0530 Subject: [PATCH 5/9] Changed dimensions in docstring and res array --- crates/eko/src/lib.rs | 30 +++++++++++++++++------------- crates/ekore/src/lib.rs | 2 +- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/crates/eko/src/lib.rs b/crates/eko/src/lib.rs index c2f6283d1..f52a42e98 100644 --- a/crates/eko/src/lib.rs +++ b/crates/eko/src/lib.rs @@ -1,5 +1,6 @@ //! Interface to the eko Python package. +use ekore::constants::{MAX_ORDER_QCD, MAX_ORDER_QED}; use ekore::harmonics::cache::Cache; use num::Complex; use std::ffi::c_void; @@ -7,11 +8,12 @@ use std::ffi::c_void; pub mod bib; pub mod mellin; -/// Map tensor with shape (o,d,d) into pre-allocated output buffers. +/// Map tensor with shape `(order_QCD, d, d)` into pre-allocated output buffers. /// -/// This is needed for the QCD singlet. -fn unravel( - res: &[[[Complex; DIM]; DIM]], +/// Only the first `order_qcd` entries along the outer axis are written; +/// remaining buffer slots are left undefined. +fn unravel_qcd( + res: &[[[Complex; DIM]; DIM]; MAX_ORDER_QCD], order_qcd: usize, out_re: &mut [f64], out_im: &mut [f64], @@ -28,11 +30,12 @@ fn unravel( } } -/// Map tensor with shape (o,o',d,d) into pre-allocated output buffers. +/// Map tensor with shape `(order_QCD+1, order_QED+1, d, d)` into pre-allocated output buffers. /// -/// This is needed for the QED singlet and valence. +/// The first `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis +/// are written; remaining buffer slots are left undefined. fn unravel_qed( - res: &[[[[Complex; DIM]; DIM]; 3]], + res: &[[[[Complex; DIM]; DIM]; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1], order_qcd: usize, order_qed: usize, out_re: &mut [f64], @@ -52,11 +55,12 @@ fn unravel_qed( } } -/// Map tensor with shape (o,o',d) into pre-allocated output buffers. +/// Map tensor with shape `(order_QCD+1, order_QED+1)` into pre-allocated output buffers. /// -/// This is needed for the QED non-singlet. +/// The first `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis +/// are written; remaining buffer slots are left undefined. fn unravel_qed_ns( - res: &[[Complex; 3]], + res: &[[Complex; MAX_ORDER_QED + 1]; MAX_ORDER_QCD + 1], order_qcd: usize, order_qed: usize, out_re: &mut [f64], @@ -101,7 +105,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { if args.is_ome { if is_singlet { - unravel( + unravel_qcd( &ekore::operator_matrix_elements::unpolarized::spacelike::A_singlet( args.order_qcd, &mut c, @@ -113,7 +117,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { out_im, ); } else { - unravel( + unravel_qcd( &ekore::operator_matrix_elements::unpolarized::spacelike::A_non_singlet( args.order_qcd, &mut c, @@ -147,7 +151,7 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { true => ekore::anomalous_dimensions::polarized::spacelike::gamma_singlet_qcd, false => ekore::anomalous_dimensions::unpolarized::spacelike::gamma_singlet_qcd, }; - unravel( + unravel_qcd( &gamma_singlet_qcd( args.order_qcd, &mut c, diff --git a/crates/ekore/src/lib.rs b/crates/ekore/src/lib.rs index db937583d..6505e6957 100644 --- a/crates/ekore/src/lib.rs +++ b/crates/ekore/src/lib.rs @@ -5,7 +5,7 @@ pub mod anomalous_dimensions; pub mod bib; -mod constants; +pub mod constants; pub mod harmonics; pub mod operator_matrix_elements; pub mod util; From dc43f3aba71478774f602ac8fe4256f5917a35d7 Mon Sep 17 00:00:00 2001 From: AkshatRai07 Date: Fri, 26 Jun 2026 13:19:10 +0530 Subject: [PATCH 6/9] Add return dimensions to docs in ekore --- .../polarized/spacelike.rs | 6 ++++++ .../unpolarized/spacelike.rs | 18 ++++++++++++++++++ .../unpolarized/spacelike.rs | 6 ++++++ 3 files changed, 30 insertions(+) diff --git a/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs b/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs index 6b99aea7e..178cb7eb9 100644 --- a/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs +++ b/crates/ekore/src/anomalous_dimensions/polarized/spacelike.rs @@ -9,6 +9,9 @@ pub mod as2; // pub mod as3; /// Compute the tower of the non-singlet anomalous dimensions. +/// +/// Returns an array of shape `(MAX_ORDER_QCD,)`. Only the first `order_qcd` entries +/// are filled; remaining slots are zero. pub fn gamma_ns_qcd( order_qcd: usize, mode: u16, @@ -42,6 +45,9 @@ pub fn gamma_ns_qcd( } /// Compute the tower of the singlet anomalous dimension matrices. +/// +/// Returns an array of shape `(MAX_ORDER_QCD, d, d)`. Only the first `order_qcd` +/// entries along the outer axis are filled; remaining slots are zero. pub fn gamma_singlet_qcd( order_qcd: usize, c: &mut Cache, diff --git a/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs b/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs index cfe07862e..b8984a25d 100644 --- a/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs +++ b/crates/ekore/src/anomalous_dimensions/unpolarized/spacelike.rs @@ -20,6 +20,9 @@ pub mod as4; /// `n3lo_variation = (ns_p, ns_m, ns_v)` is a list indicating which variation should /// be used. `variation = 1,2` is the upper/lower bound, while any other value /// returns the central (averaged) value. +/// +/// Returns an array of shape `(MAX_ORDER_QCD,)`. Only the first `order_qcd` entries +/// are filled; remaining slots are zero. pub fn gamma_ns_qcd( order_qcd: usize, mode: u16, @@ -67,6 +70,9 @@ pub fn gamma_ns_qcd( /// `n3lo_variation = (gg, gq, qg, qq)` is a list indicating which variation should /// be used. `variation = 1,2` is the upper/lower bound, while any other value /// returns the central (averaged) value. +/// +/// Returns an array of shape `(MAX_ORDER_QCD, d, d)`. Only the first `order_qcd` +/// entries along the outer axis are filled; remaining slots are zero. pub fn gamma_singlet_qcd( order_qcd: usize, c: &mut Cache, @@ -95,6 +101,10 @@ pub fn gamma_singlet_qcd( /// `n3lo_variation = (ns_p, ns_m, ns_v)` is a list indicating which variation should /// be used. `variation = 1,2` is the upper/lower bound, while any other value /// returns the central (averaged) value. +/// +/// Returns an array of shape `(MAX_ORDER_QCD+1, MAX_ORDER_QED+1)`. The first +/// `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis +/// are filled; remaining slots are zero. pub fn gamma_ns_qed( order_qcd: usize, order_qed: usize, @@ -146,6 +156,10 @@ pub fn gamma_ns_qed( /// `n3lo_variation = (gg, gq, qg, qq, ns_p, ns_m, ns_v)` is a list indicating which variation should /// be used. `variation = 1,2` is the upper/lower bound, while any other value /// returns the central (averaged) value. +/// +/// Returns an array of shape `(MAX_ORDER_QCD+1, MAX_ORDER_QED+1, d, d)`. The first +/// `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis +/// are filled; remaining slots are zero. pub fn gamma_singlet_qed( order_qcd: usize, order_qed: usize, @@ -208,6 +222,10 @@ pub fn gamma_singlet_qed( /// `n3lo_variation = (ns_p, ns_m, ns_v)` is a list indicating which variation should /// be used. `variation = 1,2` is the upper/lower bound, while any other value /// returns the central (averaged) value. +/// +/// Returns an array of shape `(MAX_ORDER_QCD+1, MAX_ORDER_QED+1, d, d)`. The first +/// `order_qcd + 1` entries along the QCD axis and `order_qed + 1` along the QED axis +/// are filled; remaining slots are zero. pub fn gamma_valence_qed( order_qcd: usize, order_qed: usize, diff --git a/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs b/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs index ad1bb3765..fa87d0441 100644 --- a/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs +++ b/crates/ekore/src/operator_matrix_elements/unpolarized/spacelike.rs @@ -7,6 +7,9 @@ pub mod as1; pub mod as2; /// Compute the tower of the singlet |OME|. +/// +/// Returns an array of shape `(MAX_ORDER_QCD, d, d)`. Only the first `matching_order_qcd` +/// entries along the outer axis are filled; remaining slots are zero. pub fn A_singlet( matching_order_qcd: usize, c: &mut Cache, @@ -25,6 +28,9 @@ pub fn A_singlet( } /// Compute the tower of the non-singlet |OME|. +/// +/// Returns an array of shape `(MAX_ORDER_QCD, d, d)`. Only the first `matching_order_qcd` +/// entries along the outer axis are filled; remaining slots are zero. pub fn A_non_singlet( matching_order_qcd: usize, c: &mut Cache, From 2f8656e1113c4bb5b319db723dbba575df6faeeb Mon Sep 17 00:00:00 2001 From: Felix Hekhorn Date: Fri, 26 Jun 2026 12:29:53 +0300 Subject: [PATCH 7/9] Rename `n_max` --- crates/eko/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/eko/src/lib.rs b/crates/eko/src/lib.rs index f52a42e98..531a88674 100644 --- a/crates/eko/src/lib.rs +++ b/crates/eko/src/lib.rs @@ -99,9 +99,9 @@ pub unsafe extern "C" fn rust_quad_ker(u: f64, rargs: *mut c_void) -> f64 { .try_into() .unwrap(); - let n_max = (args.order_qcd + 1) * (args.order_qed + 1) * 16; - let out_re = std::slice::from_raw_parts_mut(args.re_gamma, n_max); - let out_im = std::slice::from_raw_parts_mut(args.im_gamma, n_max); + let max_buffer_size = (args.order_qcd + 1) * (args.order_qed + 1) * 16; + let out_re = std::slice::from_raw_parts_mut(args.re_gamma, max_buffer_size); + let out_im = std::slice::from_raw_parts_mut(args.im_gamma, max_buffer_size); if args.is_ome { if is_singlet { From 02d23f1d39daa9a83c0673f70532029d1c936d55 Mon Sep 17 00:00:00 2001 From: AkshatRai07 Date: Fri, 26 Jun 2026 16:05:24 +0530 Subject: [PATCH 8/9] Removed all branches from LHA Bench --- .github/workflows/lha_bot.yml | 2 +- .github/workflows/lha_bot_rust.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lha_bot.yml b/.github/workflows/lha_bot.yml index 690548ef1..92df8153c 100644 --- a/.github/workflows/lha_bot.yml +++ b/.github/workflows/lha_bot.yml @@ -4,7 +4,7 @@ name: LHA Benchmarks on: push: branches-ignore: - - "*" + - "**" tags: pull_request: types: diff --git a/.github/workflows/lha_bot_rust.yml b/.github/workflows/lha_bot_rust.yml index bdc330d9a..b4ba3aba2 100644 --- a/.github/workflows/lha_bot_rust.yml +++ b/.github/workflows/lha_bot_rust.yml @@ -4,7 +4,7 @@ name: LHA Benchmarks (Rust) on: push: branches-ignore: - - "*" + - "**" tags: pull_request: types: From 9bfbbc9fd31046e5ef1daa59003aa2d3f542483c Mon Sep 17 00:00:00 2001 From: AkshatRai07 Date: Fri, 26 Jun 2026 16:33:41 +0530 Subject: [PATCH 9/9] Update performance.md with #542 --- extras/gsoc/performance.md | 73 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/extras/gsoc/performance.md b/extras/gsoc/performance.md index f07d3af9c..8b0e4b8e7 100644 --- a/extras/gsoc/performance.md +++ b/extras/gsoc/performance.md @@ -158,3 +158,76 @@ This adds a fixed overhead of several milliseconds on every `integrate.quad` nod ### Decision The `nb → rs → nb` architecture introduced in [526](https://github.com/NNPDF/eko/pull/526) was not viable, hence the PR was closed. The preferred short-term path remains `scipy → Rust → Numba → Rust → scipy`. See [architecture.md §5](./architecture.md#5-scipyintegratequad) for the full discussion. + +--- + +## 542: Remove mallocs, single shared buffer + +After [542](https://github.com/NNPDF/eko/pull/542), all mallocs were removed from `eko` and `ekore`, the gamma output is now written directly into a single Python-owned `f64` buffer that is pre-allocated once and passed through `QuadArgs`, minor python loops were removed. + +### Full-suite timing (`/usr/bin/time -v`) + +```text +====================== 3 passed, 12 deselected, 1 warning in 219.57s (0:03:39) ====================== + Command being timed: "poe lha -m nnlo and sv" + User time (seconds): 342.81 + System time (seconds): 2.75 + Percent of CPU this job got: 101% + Elapsed (wall clock) time (h:mm:ss or m:ss): 5:40.23 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 931916 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 1681 + Minor (reclaiming a frame) page faults: 679694 + Voluntary context switches: 3758 + Involuntary context switches: 3360 + Swaps: 0 + File system inputs: 366568 + File system outputs: 179328 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 +``` + +### Comparison with master + +| | Wall clock | pytest time | +| --- | ---------- | ----------- | +| [master](https://github.com/NNPDF/eko/tree/5aebe0abdea3e75ed7761b5ad4ce31d7fd227cd4) | 6:33 | 4:16 | +| PR #542 | 5:40 | 3:39 | +| **improvement** | | **−14.5%** | + +### Rust/Python time split and memory analysis + +Some debugging revealed where time is actually spent per integration: + +```text +Evolution: Rust/Python split over 891240 quad evaluations — Rust (anomalous dims): 1.573 s (9.5%), Python (numba callback): 15.015 s (90.5%) +Matching: Rust/Python split over 1847076 quad evaluations — Rust (anomalous dims): 2.654 s (11.4%), Python (numba callback): 20.665 s (88.6%) +``` + +~90% of integration time is spent in the Python/Numba callback. This sets a hard ceiling: even eliminating the Rust side entirely could only improve total runtime by ~10%. + +Regarding the peak RSS (~910 MB): this is not a maturin issue. Switching the compile step to `pip install -e . crates/eko` produces nearly identical RSS. The process sits at ~800 MB for the entire run; the peak is caused by a transient spike at the end when computed results are compared against reference values. The 800 MB baseline itself is dominated by Numba `cfunc` compilation at import time: + +```text +[MEM] eko.__init__: START RSS= 58.1 MB (baseline reset) +[MEM] interpolation: loaded (all @njit functions) RSS= 147.3 MB delta=+89.2 MB +[MEM] beta: loaded (all @njit functions) RSS= 186.4 MB delta=+38.8 MB +[MEM] quad_ker: cb_quad_ker_qcd @cfunc loaded RSS= 539.9 MB delta=+353.0 MB +[MEM] quad_ker: cb_quad_ker_ome @cfunc loaded RSS= 557.1 MB delta=+17.1 MB +[MEM] quad_ker: cb_quad_ker_qed @cfunc loaded (all cfuncs done) RSS= 722.9 MB delta=+165.9 MB +[MEM] eko.__init__: END (all submodules loaded) RSS= 722.9 MB +``` + +Of the 722.9 MB loaded at startup, only ~58 MB (~8%) is non-Numba code; the three `cfunc`s alone account for ~74%. The long-term fix is to port them to Rust (see [#383](https://github.com/NNPDF/eko/issues/383)), at which point the memory footprint will drop substantially. + +### Further optimisation attempts and conclusions + +- **quad_vec**: tried passing multiple integrand points at once; failed because `scipy.LowLevelCallable` cannot be used with `quad_vec`, causing a regression to 4:19. +- **SIMD / FMA**: passing batches of points to `rust_quad_ker` could allow LLVM to emit SIMD instructions, but this requires significant changes for a gain capped at ~5–10% on the Rust share (i.e. ≤1% overall).