From cdffbbd0cac7dfe7f4c668ca4a136a4bd48b10f6 Mon Sep 17 00:00:00 2001 From: Robrecht Blancquaert Date: Thu, 27 Oct 2022 14:08:55 +0200 Subject: [PATCH 01/14] Added ARM neon backend support Co-authored-by: pinkforest <36498018+pinkforest@users.noreply.github.com> Co-authored-by: Robrecht Blancquaert --- curve25519-dalek/src/backend/vector/mod.rs | 3 + .../src/backend/vector/neon/constants.rs | 3117 +++++++++++++++++ .../src/backend/vector/neon/edwards.rs | 547 +++ .../src/backend/vector/neon/field.rs | 784 +++++ .../src/backend/vector/neon/mod.rs | 16 + 5 files changed, 4467 insertions(+) create mode 100644 curve25519-dalek/src/backend/vector/neon/constants.rs create mode 100644 curve25519-dalek/src/backend/vector/neon/edwards.rs create mode 100644 curve25519-dalek/src/backend/vector/neon/field.rs create mode 100644 curve25519-dalek/src/backend/vector/neon/mod.rs diff --git a/curve25519-dalek/src/backend/vector/mod.rs b/curve25519-dalek/src/backend/vector/mod.rs index 2839dca45..54ed077d8 100644 --- a/curve25519-dalek/src/backend/vector/mod.rs +++ b/curve25519-dalek/src/backend/vector/mod.rs @@ -19,4 +19,7 @@ pub mod avx2; #[cfg(nightly)] pub mod ifma; +#[cfg(nightly)] +pub mod neon; + pub mod scalar_mul; diff --git a/curve25519-dalek/src/backend/vector/neon/constants.rs b/curve25519-dalek/src/backend/vector/neon/constants.rs new file mode 100644 index 000000000..ab380c4ba --- /dev/null +++ b/curve25519-dalek/src/backend/vector/neon/constants.rs @@ -0,0 +1,3117 @@ +// -*- mode: rust; -*- +// +// This file is part of curve25519-dalek. +// Copyright (c) 2016-2019 Isis Lovecruft, Henry de Valence +// 2021-2022 Robrecht Blancquaert +// See LICENSE for licensing information. +// +// Authors: +// - Isis Agora Lovecruft +// - Henry de Valence +// - Robrecht Blancquaert + +//! This module contains constants used by the NEON backend. + +use packed_simd::u32x4; + +use crate::backend::vector::neon::edwards::{CachedPoint, ExtendedPoint}; +use crate::backend::vector::neon::field::FieldElement2625x4; +use crate::window::NafLookupTable8; + +/// The identity element as an `ExtendedPoint`. +pub(crate) static EXTENDEDPOINT_IDENTITY: ExtendedPoint = ExtendedPoint(FieldElement2625x4([ + (u32x4::new(0, 1, 0, 0), u32x4::new(1, 0, 0, 0)), + (u32x4::splat(0), u32x4::splat(0)), + (u32x4::splat(0), u32x4::splat(0)), + (u32x4::splat(0), u32x4::splat(0)), + (u32x4::splat(0), u32x4::splat(0)) +])); + +/// The identity element as a `CachedPoint`. +pub(crate) static CACHEDPOINT_IDENTITY: CachedPoint = CachedPoint(FieldElement2625x4([ + (u32x4::new(121647, 121666, 0, 0), u32x4::new(243332, 67108845, 0, 33554431)), + (u32x4::new(67108864, 0, 33554431, 0), u32x4::new(0, 67108863, 0, 33554431)), + (u32x4::new(67108863, 0, 33554431, 0), u32x4::new(0, 67108863, 0, 33554431)), + (u32x4::new(67108863, 0, 33554431, 0), u32x4::new(0, 67108863, 0, 33554431)), + (u32x4::new(67108863, 0, 33554431, 0), u32x4::new(0, 67108863, 0, 33554431)) +])); + +/// The low limbs of (2p, 2p, 2p, 2p), so that +/// ```ascii,no_run +/// (2p, 2p, 2p, 2p) = [P_TIMES_2_LO, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI] +/// ``` +pub(crate) static P_TIMES_2_LO: (u32x4, u32x4) = ( + u32x4::new( + 67108845 << 1, + 67108845 << 1, + 33554431 << 1, + 33554431 << 1), + u32x4::new( + 67108845 << 1, + 67108845 << 1, + 33554431 << 1, + 33554431 << 1) +); + +/// The high limbs of (2p, 2p, 2p, 2p), so that +/// ```ascii,no_run +/// (2p, 2p, 2p, 2p) = [P_TIMES_2_LO, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI] +/// ``` +pub(crate) static P_TIMES_2_HI: (u32x4, u32x4) = ( + u32x4::new( + 67108863 << 1, + 67108863 << 1, + 33554431 << 1, + 33554431 << 1), + u32x4::new( + 67108863 << 1, + 67108863 << 1, + 33554431 << 1, + 33554431 << 1) +); + +/// The low limbs of (16p, 16p, 16p, 16p), so that +/// ```ascii,no_run +/// (16p, 16p, 16p, 16p) = [P_TIMES_16_LO, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI] +/// ``` +pub(crate) static P_TIMES_16_LO: (u32x4, u32x4) = ( + u32x4::new( + 67108845 << 4, + 67108845 << 4, + 33554431 << 4, + 33554431 << 4), + u32x4::new( + 67108845 << 4, + 67108845 << 4, + 33554431 << 4, + 33554431 << 4) + ); + +/// The high limbs of (16p, 16p, 16p, 16p), so that +/// ```ascii,no_run +/// (16p, 16p, 16p, 16p) = [P_TIMES_16_LO, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI] +/// ``` +pub(crate) static P_TIMES_16_HI: (u32x4, u32x4) = ( + u32x4::new( + 67108863 << 4, + 67108863 << 4, + 33554431 << 4, + 33554431 << 4), + u32x4::new( + 67108863 << 4, + 67108863 << 4, + 33554431 << 4, + 33554431 << 4) +); + +/// Odd multiples of the Ed25519 basepoint: +pub(crate) static BASEPOINT_ODD_LOOKUP_TABLE: NafLookupTable8 = NafLookupTable8([ + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 3571425, + 10045002, + 19036563, + 1096096,), u32x4::new( + 243332, + 65897020, + 0, + 28963681,)), + (u32x4::new( + 30896895, + 63055514, + 1614915, + 5095970,), u32x4::new( + 0, + 53791688, + 0, + 31258312,)), + (u32x4::new( + 13347627, + 40339464, + 2236269, + 11185503,), u32x4::new( + 0, + 22520087, + 0, + 8659512,)), + (u32x4::new( + 11125413, + 29139905, + 32037254, + 28360723,), u32x4::new( + 0, + 64556417, + 0, + 9635759,)), + (u32x4::new( + 33268144, + 47262491, + 4336918, + 15795740,), u32x4::new( + 0, + 22027545, + 0, + 4846528,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 47099681, + 31447946, + 29365447, + 24740513,), u32x4::new( + 42991046, + 18317844, + 16051644, + 21404226,)), + (u32x4::new( + 31708133, + 28909527, + 2366091, + 13703791,), u32x4::new( + 469246, + 54159622, + 2601402, + 32988002,)), + (u32x4::new( + 63432457, + 30251794, + 15163516, + 18491340,), u32x4::new( + 28144087, + 35605455, + 13682295, + 18474872,)), + (u32x4::new( + 12221607, + 4967598, + 26061980, + 26008006,), u32x4::new( + 20226147, + 9726961, + 17410, + 18051083,)), + (u32x4::new( + 60569645, + 62487085, + 11911242, + 21920922,), u32x4::new( + 4092105, + 38186967, + 22431483, + 31366585,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 18147205, + 62587998, + 2554617, + 536692,), u32x4::new( + 11924528, + 26674131, + 17645433, + 24341419,)), + (u32x4::new( + 11573357, + 27579485, + 31491870, + 29000885,), u32x4::new( + 10800976, + 51902791, + 28076395, + 20464029,)), + (u32x4::new( + 56031649, + 10856669, + 11791193, + 26769430,), u32x4::new( + 25306956, + 5922200, + 6630685, + 9385098,)), + (u32x4::new( + 31319348, + 23906711, + 16290213, + 32142166,), u32x4::new( + 61106354, + 17181823, + 3548308, + 12022566,)), + (u32x4::new( + 5904298, + 50218605, + 11826440, + 5492249,), u32x4::new( + 10379071, + 3472255, + 172742, + 31948344,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 10625852, + 15193821, + 22918394, + 23676410,), u32x4::new( + 53695416, + 54987793, + 10067515, + 11747680,)), + (u32x4::new( + 65013325, + 1309652, + 29616320, + 28922974,), u32x4::new( + 60360891, + 19621771, + 9938982, + 30406429,)), + (u32x4::new( + 54967954, + 65931918, + 5595602, + 25719523,), u32x4::new( + 64909864, + 30566415, + 15945272, + 8495317,)), + (u32x4::new( + 1167157, + 55265018, + 11507029, + 31641054,), u32x4::new( + 43497904, + 2367338, + 12937761, + 27517066,)), + (u32x4::new( + 656704, + 2544994, + 13006713, + 480979,), u32x4::new( + 38471594, + 62541240, + 25353597, + 11531760,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 22176662, + 3984313, + 27495285, + 4110608,), u32x4::new( + 2909584, + 30594106, + 15677919, + 2549183,)), + (u32x4::new( + 33979105, + 62269905, + 2071511, + 6894756,), u32x4::new( + 53189950, + 47232857, + 6408191, + 6123225,)), + (u32x4::new( + 32553873, + 63948030, + 12612401, + 3633166,), u32x4::new( + 24054373, + 37626618, + 14481327, + 8520484,)), + (u32x4::new( + 56552486, + 10749438, + 12034813, + 28811946,), u32x4::new( + 1445640, + 36755601, + 12104575, + 10257833,)), + (u32x4::new( + 22795808, + 48761311, + 1136056, + 9380768,), u32x4::new( + 1411523, + 5341811, + 27318329, + 9686767,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 21157200, + 39156966, + 20473176, + 4934657,), u32x4::new( + 61478183, + 45121537, + 5429856, + 13035023,)), + (u32x4::new( + 7954529, + 58789246, + 31440083, + 7054221,), u32x4::new( + 38438565, + 36856107, + 1364112, + 14548122,)), + (u32x4::new( + 26120083, + 36321360, + 4919997, + 31687496,), u32x4::new( + 33757765, + 36237559, + 15243054, + 32163861,)), + (u32x4::new( + 25878307, + 46544824, + 19455951, + 2414935,), u32x4::new( + 16844726, + 56521560, + 32680554, + 26660660,)), + (u32x4::new( + 48360220, + 43407178, + 12187042, + 24925816,), u32x4::new( + 7423722, + 25746484, + 12814654, + 17395963,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 63153652, + 32195955, + 4087908, + 8431689,), u32x4::new( + 30392384, + 47203165, + 8986649, + 9053039,)), + (u32x4::new( + 63659241, + 47988767, + 2931872, + 19953600,), u32x4::new( + 11747107, + 51610101, + 20952181, + 13364887,)), + (u32x4::new( + 3659197, + 58790649, + 5930099, + 2605312,), u32x4::new( + 28477896, + 580728, + 20579735, + 2610622,)), + (u32x4::new( + 41781607, + 17161358, + 10690531, + 24368015,), u32x4::new( + 47027031, + 36742339, + 5414694, + 13156365,)), + (u32x4::new( + 13237853, + 51182423, + 8954802, + 29006542,), u32x4::new( + 22643989, + 56896541, + 22830593, + 10289708,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 1401265, + 58846825, + 30911620, + 32239180,), u32x4::new( + 15391552, + 15200821, + 6339309, + 16403588,)), + (u32x4::new( + 55913797, + 29541724, + 1664461, + 21709410,), u32x4::new( + 38470488, + 47097092, + 17674945, + 32666066,)), + (u32x4::new( + 22844482, + 10797709, + 27548106, + 31638735,), u32x4::new( + 34500968, + 26611503, + 19727211, + 13160873,)), + (u32x4::new( + 31485204, + 14496164, + 13981208, + 10276888,), u32x4::new( + 5748808, + 35024436, + 2740987, + 7479021,)), + (u32x4::new( + 58541207, + 14866135, + 32344041, + 545930,), u32x4::new( + 62661488, + 6941250, + 27940205, + 11976112,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 39849808, + 44781685, + 15697329, + 24387845,), u32x4::new( + 12501486, + 50260092, + 23199481, + 31929024,)), + (u32x4::new( + 24823070, + 27956017, + 27034296, + 10316465,), u32x4::new( + 47664045, + 11152446, + 15719183, + 30181617,)), + (u32x4::new( + 20771189, + 19969144, + 31433937, + 19185213,), u32x4::new( + 27565920, + 10384445, + 2893359, + 9255362,)), + (u32x4::new( + 42894974, + 11925545, + 32134441, + 32738810,), u32x4::new( + 55916336, + 32479272, + 19563550, + 5511385,)), + (u32x4::new( + 17857161, + 47809169, + 14564114, + 27997751,), u32x4::new( + 33024640, + 38669671, + 31956536, + 27313245,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 58237774, + 15917425, + 18872208, + 19394230,), u32x4::new( + 17374297, + 6101419, + 4839741, + 6596900,)), + (u32x4::new( + 66947393, + 15744215, + 18368993, + 17750160,), u32x4::new( + 41006525, + 9205497, + 2629667, + 32170865,)), + (u32x4::new( + 66481381, + 1919414, + 28338762, + 7372967,), u32x4::new( + 33819153, + 4156199, + 27126309, + 12739816,)), + (u32x4::new( + 44117158, + 58545296, + 22521371, + 11809712,), u32x4::new( + 28998792, + 50731010, + 30215699, + 25748377,)), + (u32x4::new( + 23561284, + 4160244, + 9035405, + 24895184,), u32x4::new( + 39761639, + 59253416, + 8684759, + 22487864,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 12671134, + 56419053, + 16092401, + 30038207,), u32x4::new( + 4002647, + 47822606, + 7151311, + 28430768,)), + (u32x4::new( + 61041684, + 35765374, + 30598048, + 19666539,), u32x4::new( + 44150175, + 40140037, + 290469, + 28442674,)), + (u32x4::new( + 18847796, + 1371617, + 33316881, + 13199936,), u32x4::new( + 43646578, + 17068881, + 12074900, + 1537415,)), + (u32x4::new( + 10052225, + 38316070, + 27469797, + 5297537,), u32x4::new( + 50725570, + 20435349, + 10339121, + 2779737,)), + (u32x4::new( + 18372189, + 15466385, + 24762130, + 22217964,), u32x4::new( + 23503887, + 47844464, + 10415034, + 2606889,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 55082775, + 45300503, + 16032654, + 5964396,), u32x4::new( + 17743504, + 24634761, + 19493066, + 5184611,)), + (u32x4::new( + 50172633, + 35093294, + 10040575, + 23616256,), u32x4::new( + 4543900, + 61852191, + 4049821, + 7423669,)), + (u32x4::new( + 20295398, + 40009376, + 10487190, + 15670429,), u32x4::new( + 51972856, + 58649552, + 20436392, + 3432497,)), + (u32x4::new( + 35189420, + 54117751, + 12825868, + 6283038,), u32x4::new( + 27540739, + 30648758, + 22658912, + 9466689,)), + (u32x4::new( + 51737549, + 40725785, + 17409814, + 25201086,), u32x4::new( + 21156239, + 34176168, + 26814520, + 5956424,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 8211442, + 8014184, + 6260823, + 22108096,), u32x4::new( + 32182620, + 51844847, + 2466270, + 28582231,)), + (u32x4::new( + 27199739, + 3848333, + 31738017, + 10892045,), u32x4::new( + 4963982, + 65391770, + 32551997, + 28906469,)), + (u32x4::new( + 16606846, + 32207068, + 26404535, + 7614129,), u32x4::new( + 45416902, + 65584718, + 13821785, + 2646060,)), + (u32x4::new( + 36090634, + 57981287, + 32247670, + 22837502,), u32x4::new( + 31003861, + 55448117, + 6062915, + 20369975,)), + (u32x4::new( + 27381403, + 50578107, + 522631, + 29521058,), u32x4::new( + 31137497, + 40220737, + 27628049, + 1824195,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 59402443, + 17056879, + 29262689, + 6131785,), u32x4::new( + 52551472, + 43367471, + 29423199, + 18899208,)), + (u32x4::new( + 5749414, + 43514612, + 11365899, + 21514624,), u32x4::new( + 65591890, + 60945892, + 19841732, + 5628567,)), + (u32x4::new( + 19334369, + 52500268, + 12307673, + 5267367,), u32x4::new( + 3212103, + 9035822, + 29142161, + 30520954,)), + (u32x4::new( + 57261330, + 6819646, + 22089161, + 9800373,), u32x4::new( + 55155453, + 62250856, + 13766735, + 25244545,)), + (u32x4::new( + 54370226, + 61888301, + 24496089, + 2540581,), u32x4::new( + 65637506, + 60274355, + 18154273, + 11687259,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 12521903, + 26014045, + 13995625, + 33360175,), u32x4::new( + 23605474, + 7376434, + 27229267, + 17195036,)), + (u32x4::new( + 59482891, + 10074423, + 574357, + 3857753,), u32x4::new( + 61377787, + 50306685, + 5241065, + 20234396,)), + (u32x4::new( + 23674717, + 6997172, + 20771841, + 16858511,), u32x4::new( + 40565304, + 29973136, + 7049812, + 14585010,)), + (u32x4::new( + 1427477, + 13295732, + 31762066, + 31499740,), u32x4::new( + 60419925, + 54666164, + 22009424, + 8089609,)), + (u32x4::new( + 58154031, + 41593020, + 15342328, + 957047,), u32x4::new( + 38937260, + 37037498, + 24871992, + 32973409,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 30654745, + 51286025, + 21206982, + 2433562,), u32x4::new( + 12780105, + 31732574, + 33087964, + 33081189,)), + (u32x4::new( + 66640017, + 42720009, + 16567620, + 15300745,), u32x4::new( + 1530367, + 33001123, + 20930247, + 21042661,)), + (u32x4::new( + 15003356, + 5294119, + 22985605, + 18928772,), u32x4::new( + 32628461, + 18230172, + 14773298, + 27193722,)), + (u32x4::new( + 27555, + 65346287, + 17017174, + 7837720,), u32x4::new( + 21499787, + 42855613, + 22474984, + 13675085,)), + (u32x4::new( + 24164369, + 50130116, + 5973149, + 24152073,), u32x4::new( + 1577334, + 25400030, + 18648484, + 32228854,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 49518649, + 59119280, + 31670678, + 20396561,), u32x4::new( + 61728330, + 651402, + 176032, + 9529498,)), + (u32x4::new( + 61765532, + 9082232, + 32794568, + 15526956,), u32x4::new( + 48543100, + 32614212, + 19001206, + 25680229,)), + (u32x4::new( + 32086091, + 10373081, + 8996131, + 31822823,), u32x4::new( + 35788988, + 49973190, + 30542040, + 17858455,)), + (u32x4::new( + 48130197, + 58121889, + 27753291, + 29923268,), u32x4::new( + 54448075, + 43300790, + 9336565, + 15770022,)), + (u32x4::new( + 57725546, + 20557498, + 9366233, + 16023566,), u32x4::new( + 16189031, + 2837363, + 24315301, + 27003505,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 28286608, + 10767548, + 18220739, + 5413236,), u32x4::new( + 48253387, + 58255702, + 11864864, + 28527159,)), + (u32x4::new( + 45038176, + 58655197, + 25648758, + 10951484,), u32x4::new( + 42564382, + 34542843, + 23146954, + 22234334,)), + (u32x4::new( + 14858710, + 24978793, + 15040559, + 4379220,), u32x4::new( + 47621477, + 40271440, + 15650420, + 1998736,)), + (u32x4::new( + 24106391, + 9626149, + 344505, + 25253814,), u32x4::new( + 34579800, + 59687089, + 25718289, + 25904133,)), + (u32x4::new( + 1981195, + 37751302, + 26132048, + 1764722,), u32x4::new( + 13288231, + 28808622, + 12531301, + 18292949,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 13869851, + 31448904, + 14963539, + 7581293,), u32x4::new( + 20536485, + 35021083, + 21257574, + 33356609,)), + (u32x4::new( + 36903364, + 18429241, + 11097857, + 5943856,), u32x4::new( + 60583077, + 40015815, + 30509523, + 31915271,)), + (u32x4::new( + 49161801, + 40681915, + 67892, + 25454357,), u32x4::new( + 22779677, + 25798439, + 15964829, + 5863227,)), + (u32x4::new( + 60810637, + 4496471, + 5217137, + 14095116,), u32x4::new( + 50942411, + 50712663, + 2507380, + 26844507,)), + (u32x4::new( + 34579752, + 53519385, + 10859797, + 18816024,), u32x4::new( + 42552864, + 39478521, + 6783896, + 17277037,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 43287109, + 27900723, + 33182187, + 2766754,), u32x4::new( + 17041989, + 1018260, + 33392790, + 4830032,)), + (u32x4::new( + 60194178, + 30788903, + 24728888, + 14513195,), u32x4::new( + 20897010, + 28843233, + 20111980, + 17475240,)), + (u32x4::new( + 46042274, + 19257042, + 4628173, + 31649727,), u32x4::new( + 27388316, + 66631493, + 11541886, + 6408028,)), + (u32x4::new( + 57024680, + 49536568, + 32050358, + 31321917,), u32x4::new( + 17437691, + 49672356, + 2884755, + 20493991,)), + (u32x4::new( + 59553007, + 46782643, + 29001173, + 1814088,), u32x4::new( + 21930692, + 51319706, + 14965872, + 30748046,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 16441817, + 36111849, + 6900424, + 602234,), u32x4::new( + 46522199, + 16441484, + 8135070, + 21726541,)), + (u32x4::new( + 37711225, + 32701959, + 11679112, + 13125533,), u32x4::new( + 32154135, + 9407918, + 26554289, + 620848,)), + (u32x4::new( + 19233407, + 30086864, + 14679568, + 2797374,), u32x4::new( + 4892806, + 7993077, + 247658, + 5632804,)), + (u32x4::new( + 37427262, + 26675495, + 27125659, + 13496131,), u32x4::new( + 50718473, + 40115609, + 28505351, + 27837393,)), + (u32x4::new( + 196819, + 18410429, + 7070012, + 21691388,), u32x4::new( + 29763371, + 24754123, + 9727048, + 10930179,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 28319289, + 40734650, + 16225680, + 24739184,), u32x4::new( + 64272368, + 35356897, + 7866648, + 13635853,)), + (u32x4::new( + 34165295, + 48328447, + 27041670, + 23643655,), u32x4::new( + 48949950, + 52963288, + 30411133, + 6045174,)), + (u32x4::new( + 18583559, + 41649834, + 9813585, + 26098520,), u32x4::new( + 25682734, + 26733526, + 19276490, + 10654728,)), + (u32x4::new( + 34867476, + 52715968, + 5694571, + 13380978,), u32x4::new( + 15134994, + 1831255, + 8608001, + 17266401,)), + (u32x4::new( + 59925903, + 44282172, + 27802465, + 1855069,), u32x4::new( + 14234749, + 36635487, + 11302294, + 10938429,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 8373273, + 49064494, + 4932071, + 32997499,), u32x4::new( + 38472880, + 29335908, + 14504412, + 22460029,)), + (u32x4::new( + 31795930, + 50785923, + 25835990, + 25790073,), u32x4::new( + 65669841, + 11360450, + 9969157, + 9008164,)), + (u32x4::new( + 50262498, + 45869261, + 16124434, + 15336007,), u32x4::new( + 882762, + 42522623, + 11277198, + 26296377,)), + (u32x4::new( + 42332732, + 59129236, + 14452816, + 567985,), u32x4::new( + 208061, + 34722729, + 32008143, + 14828749,)), + (u32x4::new( + 17937794, + 36846032, + 32102665, + 4442466,), u32x4::new( + 19745435, + 31633451, + 7146411, + 15812027,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 30741269, + 38648744, + 12562645, + 30092623,), u32x4::new( + 25073992, + 28730659, + 27911745, + 30000958,)), + (u32x4::new( + 2859794, + 25991700, + 17776078, + 27091930,), u32x4::new( + 2328322, + 60061146, + 18581824, + 18039008,)), + (u32x4::new( + 58206333, + 17917354, + 1972306, + 11853766,), u32x4::new( + 2655376, + 60543390, + 18416710, + 13287440,)), + (u32x4::new( + 62746330, + 61423885, + 21246577, + 2266675,), u32x4::new( + 60099139, + 14804707, + 14772234, + 20679434,)), + (u32x4::new( + 26987698, + 15488817, + 715616, + 2339565,), u32x4::new( + 51980752, + 17333865, + 21965103, + 10839820,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 18672548, + 57660959, + 16042910, + 19519287,), u32x4::new( + 62865851, + 17580961, + 26628347, + 23774759,)), + (u32x4::new( + 368070, + 3464471, + 25888304, + 30370559,), u32x4::new( + 52396053, + 45426828, + 28745251, + 9246829,)), + (u32x4::new( + 29090099, + 57950037, + 23104657, + 4903923,), u32x4::new( + 10987778, + 56163684, + 23621539, + 10332760,)), + (u32x4::new( + 53338235, + 44851161, + 21606845, + 31069622,), u32x4::new( + 4243630, + 34464392, + 11286454, + 5802022,)), + (u32x4::new( + 46710757, + 63389067, + 11642865, + 1980986,), u32x4::new( + 12967337, + 28162061, + 3854192, + 30432268,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 12179834, + 41005450, + 12809619, + 33525228,), u32x4::new( + 4624405, + 46957889, + 16968743, + 11827816,)), + (u32x4::new( + 51521162, + 12466775, + 31791271, + 15303651,), u32x4::new( + 49798465, + 62714504, + 6509600, + 12918560,)), + (u32x4::new( + 20445559, + 1756449, + 28848701, + 7920171,), u32x4::new( + 9835040, + 5900071, + 28757409, + 12376688,)), + (u32x4::new( + 18259496, + 14281012, + 21767026, + 10232236,), u32x4::new( + 20000226, + 12400540, + 4104902, + 23570543,)), + (u32x4::new( + 3687440, + 26546648, + 13328821, + 26841081,), u32x4::new( + 49822734, + 22334054, + 244496, + 24862543,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 59523541, + 62195428, + 3853227, + 13954801,), u32x4::new( + 12387708, + 47627615, + 27221350, + 17899572,)), + (u32x4::new( + 63193587, + 36343307, + 14595132, + 6880795,), u32x4::new( + 1364792, + 37648434, + 3259017, + 20536046,)), + (u32x4::new( + 30362834, + 10440372, + 9574624, + 11729232,), u32x4::new( + 63861613, + 21748389, + 5530846, + 2721586,)), + (u32x4::new( + 18339760, + 1550632, + 17170271, + 25732971,), u32x4::new( + 28459263, + 63142237, + 21642345, + 31557672,)), + (u32x4::new( + 10611282, + 5204623, + 18049257, + 214175,), u32x4::new( + 19432723, + 49809070, + 26010406, + 27449522,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 19770733, + 26478685, + 9464541, + 29158041,), u32x4::new( + 28604307, + 45196604, + 7586524, + 6641859,)), + (u32x4::new( + 65654484, + 52230498, + 30886612, + 19112823,), u32x4::new( + 47271809, + 38942611, + 16020035, + 10773481,)), + (u32x4::new( + 27464323, + 54451016, + 20646645, + 17732915,), u32x4::new( + 23008717, + 53626684, + 3253189, + 15614410,)), + (u32x4::new( + 52381752, + 40693008, + 7063024, + 28469981,), u32x4::new( + 51159478, + 44543211, + 19941777, + 5985451,)), + (u32x4::new( + 13553668, + 35524849, + 14788737, + 1883845,), u32x4::new( + 12385775, + 47958835, + 29135466, + 1776722,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 36719806, + 20827965, + 23175373, + 32996806,), u32x4::new( + 42041892, + 65708790, + 5467143, + 20884008,)), + (u32x4::new( + 43256281, + 40770646, + 17244063, + 31959819,), u32x4::new( + 64366384, + 43544617, + 25057754, + 12628720,)), + (u32x4::new( + 17337782, + 58472057, + 27906934, + 15305274,), u32x4::new( + 30292418, + 39284317, + 16946773, + 24806712,)), + (u32x4::new( + 6485126, + 32447403, + 16261486, + 13561940,), u32x4::new( + 49439635, + 10738368, + 16419889, + 8897231,)), + (u32x4::new( + 44812203, + 40122262, + 25496058, + 2759794,), u32x4::new( + 25295304, + 52178368, + 24154195, + 29334408,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 42307254, + 57217102, + 1088936, + 3832827,), u32x4::new( + 33905401, + 23130334, + 6958056, + 12622851,)), + (u32x4::new( + 3881189, + 14870059, + 19712830, + 6071598,), u32x4::new( + 38147944, + 60776394, + 3427938, + 13765703,)), + (u32x4::new( + 7666911, + 24227591, + 17077136, + 22967588,), u32x4::new( + 6874639, + 30915523, + 11451695, + 24292224,)), + (u32x4::new( + 13659529, + 31984463, + 28764736, + 20506164,), u32x4::new( + 64729627, + 49321636, + 28284636, + 25472371,)), + (u32x4::new( + 39360308, + 42281399, + 9446504, + 868960,), u32x4::new( + 49227724, + 21351115, + 30561851, + 11292096,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 7071115, + 46444090, + 5387916, + 15432877,), u32x4::new( + 27226682, + 41506862, + 2398278, + 3978240,)), + (u32x4::new( + 51009614, + 54216973, + 24368938, + 31392616,), u32x4::new( + 38456150, + 62313644, + 6729154, + 99724,)), + (u32x4::new( + 17474332, + 62857913, + 2619930, + 30659308,), u32x4::new( + 18268181, + 32809239, + 22826292, + 24561895,)), + (u32x4::new( + 38187020, + 67003092, + 14118280, + 16500577,), u32x4::new( + 18808560, + 64983716, + 25712929, + 32518261,)), + (u32x4::new( + 25735813, + 62284262, + 10824872, + 20558596,), u32x4::new( + 48149681, + 31162667, + 22608274, + 26285185,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 963440, + 63742255, + 10230323, + 25515008,), u32x4::new( + 32506414, + 6105697, + 25980317, + 24645129,)), + (u32x4::new( + 7162189, + 8101249, + 14679265, + 33443386,), u32x4::new( + 2002396, + 8541405, + 19442276, + 4795881,)), + (u32x4::new( + 8116694, + 51463069, + 4415528, + 25599140,), u32x4::new( + 55805721, + 39582709, + 6719436, + 30033839,)), + (u32x4::new( + 14468202, + 42181869, + 25188826, + 9639755,), u32x4::new( + 47546189, + 62711146, + 32762447, + 18338064,)), + (u32x4::new( + 33880058, + 32810909, + 8969931, + 13095238,), u32x4::new( + 38360605, + 40138517, + 9246134, + 4928058,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 63655588, + 17883670, + 9410246, + 26162761,), u32x4::new( + 5000571, + 7349225, + 23785252, + 32751089,)), + (u32x4::new( + 28568737, + 10733123, + 9342397, + 21570673,), u32x4::new( + 54096560, + 32467591, + 20494687, + 21511513,)), + (u32x4::new( + 47675157, + 47932807, + 29250946, + 15672208,), u32x4::new( + 59760469, + 9945465, + 14939287, + 18437405,)), + (u32x4::new( + 37985267, + 8609815, + 31573002, + 3373596,), u32x4::new( + 47828883, + 20834216, + 13248616, + 24154292,)), + (u32x4::new( + 5543543, + 29553242, + 3386453, + 30501150,), u32x4::new( + 25058089, + 15236571, + 8814395, + 32462955,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 39158670, + 15322548, + 20495103, + 3312736,), u32x4::new( + 14557171, + 12985179, + 8044741, + 3176899,)), + (u32x4::new( + 24673290, + 29693310, + 21412266, + 18324699,), u32x4::new( + 2154518, + 40329021, + 17500543, + 3954277,)), + (u32x4::new( + 36758685, + 38738957, + 165513, + 14691866,), u32x4::new( + 3070475, + 10424235, + 17096536, + 16896898,)), + (u32x4::new( + 59790459, + 43094586, + 8720681, + 10423589,), u32x4::new( + 1122030, + 31545615, + 4463786, + 31811293,)), + (u32x4::new( + 49778992, + 60881044, + 20509974, + 5832494,), u32x4::new( + 64155961, + 31483358, + 4511231, + 20307815,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 2863373, + 40876242, + 26865913, + 24067353,), u32x4::new( + 15726407, + 40919070, + 12953902, + 9931535,)), + (u32x4::new( + 60934877, + 42512204, + 21649141, + 21945190,), u32x4::new( + 52211954, + 60984193, + 7046207, + 5363493,)), + (u32x4::new( + 4205971, + 64068464, + 18197273, + 7327176,), u32x4::new( + 51527794, + 21166920, + 20669933, + 11828242,)), + (u32x4::new( + 59782815, + 49617225, + 15379924, + 457923,), u32x4::new( + 9320508, + 21498914, + 3242540, + 31563182,)), + (u32x4::new( + 27714753, + 8664670, + 3366162, + 26338598,), u32x4::new( + 56775518, + 25796006, + 13129151, + 21388876,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 59276548, + 49972346, + 16795002, + 33455915,), u32x4::new( + 48430097, + 53857205, + 18627071, + 32474471,)), + (u32x4::new( + 42160315, + 50705892, + 13530540, + 28012698,), u32x4::new( + 19833221, + 55886870, + 20191784, + 9644313,)), + (u32x4::new( + 20372416, + 28414713, + 24084234, + 31804096,), u32x4::new( + 33815377, + 36131001, + 17251241, + 18291088,)), + (u32x4::new( + 56234667, + 14920441, + 2033267, + 29572003,), u32x4::new( + 1724043, + 45519699, + 17873735, + 501988,)), + (u32x4::new( + 50031659, + 31517850, + 15697583, + 1016845,), u32x4::new( + 43104661, + 54769582, + 8008601, + 27257051,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 52951491, + 66542164, + 14853573, + 30444631,), u32x4::new( + 12045973, + 24321813, + 16545674, + 18160646,)), + (u32x4::new( + 60107911, + 1126003, + 5947677, + 19486116,), u32x4::new( + 41119984, + 30860440, + 7935395, + 13354438,)), + (u32x4::new( + 17841328, + 11063269, + 1664538, + 26687568,), u32x4::new( + 6268968, + 22280371, + 17275484, + 4523163,)), + (u32x4::new( + 15886041, + 56799482, + 15446552, + 21712778,), u32x4::new( + 1005290, + 17827215, + 4978741, + 6854882,)), + (u32x4::new( + 34319277, + 47731002, + 20321804, + 28544575,), u32x4::new( + 29591814, + 63376351, + 24754545, + 26001714,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 66783087, + 5234346, + 46102, + 8566476,), u32x4::new( + 19947339, + 20180418, + 25398238, + 3726678,)), + (u32x4::new( + 63890180, + 46380965, + 20674069, + 5366544,), u32x4::new( + 59661487, + 48406612, + 31533614, + 7071217,)), + (u32x4::new( + 13104676, + 1406631, + 24326736, + 19854367,), u32x4::new( + 61039528, + 11019904, + 31967425, + 19219275,)), + (u32x4::new( + 39003597, + 30143957, + 15351834, + 8639435,), u32x4::new( + 57309582, + 61436794, + 15830475, + 10090318,)), + (u32x4::new( + 45923044, + 6700175, + 99413, + 21263025,), u32x4::new( + 23762647, + 53905481, + 6063914, + 10065424,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 42822326, + 57678669, + 4052879, + 25452667,), u32x4::new( + 54049411, + 2373092, + 22337016, + 7701046,)), + (u32x4::new( + 44382355, + 43307377, + 16761537, + 30373573,), u32x4::new( + 49790216, + 23230748, + 25655306, + 10519391,)), + (u32x4::new( + 919475, + 59371245, + 1273450, + 25558666,), u32x4::new( + 9724711, + 8556709, + 25755845, + 10887647,)), + (u32x4::new( + 25465699, + 44651158, + 17658392, + 11257418,), u32x4::new( + 29735193, + 22885150, + 7094716, + 26828565,)), + (u32x4::new( + 48237389, + 47661599, + 27054393, + 7328070,), u32x4::new( + 27280193, + 65616691, + 23062005, + 4170709,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 26535281, + 60238317, + 30343788, + 25790743,), u32x4::new( + 37993933, + 24614372, + 9523840, + 10401918,)), + (u32x4::new( + 2783987, + 29468958, + 4697011, + 19804475,), u32x4::new( + 37246678, + 46797720, + 10261254, + 18942252,)), + (u32x4::new( + 58135580, + 60247753, + 25301938, + 6844561,), u32x4::new( + 20949454, + 39844754, + 4552026, + 919057,)), + (u32x4::new( + 6694071, + 44126261, + 32285330, + 31370180,), u32x4::new( + 24603698, + 53328179, + 13971149, + 5325636,)), + (u32x4::new( + 64879487, + 582094, + 17982081, + 19190425,), u32x4::new( + 24951286, + 26923842, + 29077174, + 33286062,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 54863941, + 67016431, + 1224043, + 23371240,), u32x4::new( + 62940074, + 52101083, + 13523637, + 30366406,)), + (u32x4::new( + 36324581, + 25407485, + 18258623, + 4698602,), u32x4::new( + 50300544, + 2658516, + 26300935, + 2611030,)), + (u32x4::new( + 27183975, + 21791014, + 18105064, + 9875199,), u32x4::new( + 58118912, + 54198635, + 6400311, + 14767984,)), + (u32x4::new( + 33918318, + 42937962, + 14809334, + 22136592,), u32x4::new( + 10636588, + 29082337, + 29829692, + 28549776,)), + (u32x4::new( + 61080905, + 854212, + 12202487, + 20004503,), u32x4::new( + 9256495, + 6903981, + 20567109, + 347423,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 41391822, + 34336880, + 22362564, + 14247996,), u32x4::new( + 12115604, + 41583344, + 7639288, + 28910945,)), + (u32x4::new( + 62066617, + 59758859, + 26665947, + 11614812,), u32x4::new( + 65737664, + 45704543, + 30324810, + 12868376,)), + (u32x4::new( + 17491771, + 43589814, + 9454919, + 26047850,), u32x4::new( + 52629282, + 39304244, + 3868968, + 19296062,)), + (u32x4::new( + 17826638, + 30413590, + 32534225, + 32741469,), u32x4::new( + 15012391, + 14365713, + 33039233, + 14791399,)), + (u32x4::new( + 64115596, + 59197067, + 32739005, + 23275744,), u32x4::new( + 32954320, + 22241406, + 20788442, + 4942942,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 31956192, + 59570132, + 2784352, + 4237732,), u32x4::new( + 47222312, + 4860927, + 18658867, + 15279314,)), + (u32x4::new( + 63240583, + 28160478, + 23524941, + 13390861,), u32x4::new( + 66437406, + 57718120, + 33345312, + 28896298,)), + (u32x4::new( + 39026193, + 46239965, + 21440243, + 25070488,), u32x4::new( + 64012383, + 60999016, + 16517060, + 29565907,)), + (u32x4::new( + 18118181, + 60161496, + 4212092, + 23976240,), u32x4::new( + 36277753, + 62363144, + 5816868, + 16964362,)), + (u32x4::new( + 18196138, + 62490693, + 281468, + 7934713,), u32x4::new( + 56027312, + 62015725, + 4837237, + 32932252,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 29885826, + 51028067, + 30418143, + 33438769,), u32x4::new( + 62542283, + 39442528, + 31535876, + 143299,)), + (u32x4::new( + 17143063, + 56709783, + 14451852, + 15782104,), u32x4::new( + 32762665, + 14047066, + 26295037, + 5432487,)), + (u32x4::new( + 75151, + 533606, + 7539077, + 30926189,), u32x4::new( + 38410914, + 23771680, + 4872443, + 29199566,)), + (u32x4::new( + 61522396, + 48934708, + 16223126, + 207380,), u32x4::new( + 11171993, + 47975147, + 14164574, + 352966,)), + (u32x4::new( + 15449006, + 56530757, + 26796528, + 12045834,), u32x4::new( + 63738697, + 40667227, + 33001582, + 9101885,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 43331297, + 18431341, + 25801195, + 17267698,), u32x4::new( + 19365485, + 57295202, + 22218985, + 21284590,)), + (u32x4::new( + 2429849, + 19152559, + 10762172, + 22564684,), u32x4::new( + 21880390, + 66866426, + 20357935, + 22641906,)), + (u32x4::new( + 19771185, + 31652693, + 3666117, + 28136958,), u32x4::new( + 23624283, + 55101502, + 6313920, + 6783662,)), + (u32x4::new( + 3487137, + 7092443, + 11001876, + 26196524,), u32x4::new( + 47319246, + 44542068, + 17594073, + 15027760,)), + (u32x4::new( + 49563607, + 32191113, + 4991283, + 25400512,), u32x4::new( + 46539152, + 4155103, + 32368171, + 201203,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 20548943, + 14334571, + 4073874, + 6368588,), u32x4::new( + 53208883, + 56484515, + 15970071, + 25561889,)), + (u32x4::new( + 49915097, + 44030795, + 11202344, + 29284344,), u32x4::new( + 60258023, + 66225712, + 8075764, + 12383512,)), + (u32x4::new( + 45248912, + 4933668, + 9592153, + 5819559,), u32x4::new( + 31030983, + 38174071, + 32435814, + 7442522,)), + (u32x4::new( + 62688129, + 48218381, + 22089545, + 12897361,), u32x4::new( + 21050881, + 34278889, + 7569163, + 3225449,)), + (u32x4::new( + 19050183, + 51089071, + 32935757, + 22640195,), u32x4::new( + 66122318, + 47144608, + 18743677, + 25177079,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 41186817, + 46681702, + 31819867, + 32997133,), u32x4::new( + 38559207, + 27147015, + 30293819, + 16762988,)), + (u32x4::new( + 24154689, + 51762873, + 23883879, + 13510519,), u32x4::new( + 55338250, + 61224161, + 11663149, + 30803960,)), + (u32x4::new( + 18104238, + 14117824, + 11724021, + 21362053,), u32x4::new( + 65704761, + 35530242, + 13498058, + 33522849,)), + (u32x4::new( + 63812888, + 23995539, + 28920539, + 24005193,), u32x4::new( + 26412223, + 36582218, + 4251418, + 26160309,)), + (u32x4::new( + 16822053, + 66064082, + 3482145, + 31979593,), u32x4::new( + 45937188, + 54475379, + 612917, + 7976478,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 46509314, + 55327128, + 8944536, + 274914,), u32x4::new( + 26432930, + 53829300, + 21192572, + 3569894,)), + (u32x4::new( + 20919764, + 64356651, + 30642344, + 17215170,), u32x4::new( + 20335124, + 11203745, + 18663316, + 19024174,)), + (u32x4::new( + 59297055, + 53842463, + 3680204, + 9806710,), u32x4::new( + 54004169, + 51484914, + 29807998, + 20134199,)), + (u32x4::new( + 14781592, + 22628010, + 26877930, + 25880359,), u32x4::new( + 30434803, + 190607, + 30184292, + 8991040,)), + (u32x4::new( + 64400983, + 64591751, + 854562, + 28216111,), u32x4::new( + 20010398, + 50414793, + 9803872, + 22687008,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 15091184, + 32550863, + 8818643, + 4244752,), u32x4::new( + 43123513, + 64565526, + 408838, + 13206998,)), + (u32x4::new( + 16405061, + 60379639, + 31489017, + 20949281,), u32x4::new( + 27568751, + 38734986, + 8364264, + 12451020,)), + (u32x4::new( + 16005217, + 58008076, + 1406778, + 26546927,), u32x4::new( + 39571784, + 56365493, + 31274296, + 8918790,)), + (u32x4::new( + 23271122, + 19453469, + 27718201, + 32742670,), u32x4::new( + 234332, + 36785342, + 22601675, + 14331046,)), + (u32x4::new( + 40636025, + 22442705, + 22115403, + 23745859,), u32x4::new( + 41164945, + 61012, + 12499614, + 542137,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 62776018, + 32835413, + 17373246, + 17187309,), u32x4::new( + 54469193, + 21770290, + 15923753, + 28996575,)), + (u32x4::new( + 59385210, + 63082298, + 12568449, + 8509004,), u32x4::new( + 9483342, + 16105238, + 5756054, + 26890758,)), + (u32x4::new( + 53987996, + 38201748, + 5521661, + 19060159,), u32x4::new( + 18663191, + 9093637, + 27786835, + 31189196,)), + (u32x4::new( + 65872678, + 43635130, + 27903055, + 25020300,), u32x4::new( + 65772737, + 38110437, + 5213502, + 21909342,)), + (u32x4::new( + 4438979, + 9680838, + 10212446, + 4764184,), u32x4::new( + 13235684, + 58245995, + 20264570, + 21024049,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 60835961, + 48209103, + 31049052, + 4688268,), u32x4::new( + 12426713, + 59829045, + 22302488, + 29008521,)), + (u32x4::new( + 50401667, + 29716596, + 23531224, + 7581281,), u32x4::new( + 49071895, + 6952617, + 14934683, + 8218256,)), + (u32x4::new( + 1601446, + 36631413, + 31774811, + 29625330,), u32x4::new( + 56786114, + 8331539, + 23129509, + 19783344,)), + (u32x4::new( + 59514327, + 64513110, + 1772300, + 5701338,), u32x4::new( + 5737511, + 16147555, + 9461515, + 5703271,)), + (u32x4::new( + 33072974, + 54300426, + 11940114, + 1308663,), u32x4::new( + 15627555, + 4931627, + 28443714, + 20924342,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 18135013, + 20358426, + 4922557, + 10015355,), u32x4::new( + 65729669, + 34786528, + 26248549, + 29194359,)), + (u32x4::new( + 797666, + 34997544, + 24316856, + 25107230,), u32x4::new( + 24612576, + 4761401, + 15307321, + 32404252,)), + (u32x4::new( + 16501152, + 60565831, + 9487105, + 9316022,), u32x4::new( + 24986054, + 31917592, + 3962024, + 2501883,)), + (u32x4::new( + 63356796, + 50432342, + 18044926, + 30566881,), u32x4::new( + 42032028, + 31415202, + 13524600, + 16119907,)), + (u32x4::new( + 3927286, + 57022374, + 9265437, + 21620772,), u32x4::new( + 19481940, + 3806938, + 24836192, + 14572399,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 10785787, + 46564798, + 368445, + 33181384,), u32x4::new( + 5319843, + 52687136, + 30347110, + 29837357,)), + (u32x4::new( + 56436732, + 47859251, + 24141084, + 22250712,), u32x4::new( + 59046084, + 4963427, + 33463413, + 17168859,)), + (u32x4::new( + 15512044, + 6366740, + 4737504, + 27644548,), u32x4::new( + 30307977, + 25037929, + 14593903, + 12836490,)), + (u32x4::new( + 63878897, + 34013023, + 5860752, + 7244096,), u32x4::new( + 3689461, + 57012135, + 18389096, + 11589351,)), + (u32x4::new( + 4682110, + 36302830, + 653422, + 22316819,), u32x4::new( + 14081831, + 5657024, + 11088376, + 24110612,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 39907267, + 45940262, + 24887471, + 18342609,), u32x4::new( + 878445, + 40456159, + 12019082, + 345107,)), + (u32x4::new( + 12794982, + 28893944, + 9447505, + 11387200,), u32x4::new( + 16961963, + 13916996, + 10893728, + 25898006,)), + (u32x4::new( + 44934162, + 53465865, + 3583620, + 1102334,), u32x4::new( + 53917811, + 63478576, + 2426066, + 10389549,)), + (u32x4::new( + 45096036, + 37595344, + 19367718, + 20257175,), u32x4::new( + 10280866, + 41653449, + 27665642, + 375926,)), + (u32x4::new( + 45847901, + 24064074, + 32494820, + 32204556,), u32x4::new( + 10720704, + 51079060, + 1297436, + 29853825,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 66303987, + 36060363, + 16494578, + 24962147,), u32x4::new( + 11971403, + 49538586, + 25060560, + 1964341,)), + (u32x4::new( + 25988481, + 27641502, + 24909517, + 27237087,), u32x4::new( + 66646363, + 52777626, + 16360849, + 10459972,)), + (u32x4::new( + 43930529, + 34374176, + 31225968, + 8807030,), u32x4::new( + 10394758, + 35904854, + 25325589, + 19335583,)), + (u32x4::new( + 25094697, + 34380951, + 20051185, + 32287161,), u32x4::new( + 11739332, + 53887441, + 30517319, + 26601892,)), + (u32x4::new( + 8868546, + 35635502, + 32513071, + 28248087,), u32x4::new( + 51946989, + 14222744, + 19198839, + 23261841,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 51218008, + 5070126, + 11046681, + 5320810,), u32x4::new( + 61212079, + 34104447, + 23895089, + 6460727,)), + (u32x4::new( + 39843528, + 46278671, + 10426120, + 25624792,), u32x4::new( + 66658766, + 37140083, + 28933107, + 12969597,)), + (u32x4::new( + 59635793, + 40220191, + 5751421, + 173680,), u32x4::new( + 58321825, + 740337, + 1412847, + 7682623,)), + (u32x4::new( + 975962, + 56440763, + 20812276, + 22631115,), u32x4::new( + 49095824, + 19883130, + 2419746, + 31043648,)), + (u32x4::new( + 66208703, + 39669328, + 22525915, + 3748897,), u32x4::new( + 65994776, + 34533552, + 8126286, + 18326047,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 64176557, + 3912400, + 19351673, + 30068471,), u32x4::new( + 31190055, + 24221683, + 33142424, + 28698542,)), + (u32x4::new( + 34784792, + 4109933, + 3867193, + 19557314,), u32x4::new( + 2112512, + 32715890, + 24550117, + 16595976,)), + (u32x4::new( + 35542761, + 48024875, + 10925431, + 31526577,), u32x4::new( + 66577735, + 23189821, + 13375709, + 1735095,)), + (u32x4::new( + 59699254, + 43854093, + 29783239, + 24777271,), u32x4::new( + 19600372, + 39924461, + 2896720, + 1472185,)), + (u32x4::new( + 56389656, + 35980854, + 33172342, + 1370336,), u32x4::new( + 23707480, + 57654949, + 7850973, + 12655016,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 38372660, + 57101970, + 7044964, + 12732710,), u32x4::new( + 57535705, + 6043201, + 30858914, + 10946592,)), + (u32x4::new( + 21023468, + 6946992, + 26403324, + 23901823,), u32x4::new( + 35695559, + 23440687, + 4763891, + 6514074,)), + (u32x4::new( + 28662273, + 30933699, + 9352242, + 26354829,), u32x4::new( + 37402243, + 3145176, + 8770289, + 525937,)), + (u32x4::new( + 54933102, + 36695832, + 3281859, + 4755022,), u32x4::new( + 23043294, + 32794379, + 15618886, + 23602412,)), + (u32x4::new( + 9931565, + 29897140, + 2480737, + 24193701,), u32x4::new( + 7833615, + 2284939, + 893926, + 13421882,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 22917795, + 22088359, + 28978099, + 19794863,), u32x4::new( + 60542318, + 29878494, + 31053731, + 9080720,)), + (u32x4::new( + 23679072, + 52547035, + 28424916, + 20647332,), u32x4::new( + 4008761, + 28267029, + 12961289, + 1589095,)), + (u32x4::new( + 55616194, + 26678929, + 14998265, + 23274397,), u32x4::new( + 54625466, + 46244264, + 28627706, + 33030665,)), + (u32x4::new( + 11527330, + 6449415, + 26531607, + 3472938,), u32x4::new( + 41541592, + 62607682, + 19862690, + 20564723,)), + (u32x4::new( + 32843805, + 49066843, + 28425824, + 19521495,), u32x4::new( + 48792073, + 48242878, + 27392443, + 13175986,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 16185025, + 61537525, + 2961305, + 1492442,), u32x4::new( + 25123147, + 3095034, + 31896958, + 33089615,)), + (u32x4::new( + 64748157, + 18336595, + 16522231, + 25426312,), u32x4::new( + 65718949, + 35485695, + 30554083, + 10205918,)), + (u32x4::new( + 39626934, + 39271045, + 16420458, + 9826240,), u32x4::new( + 56483981, + 27128085, + 3783403, + 13360006,)), + (u32x4::new( + 30793778, + 66771960, + 17241420, + 6564573,), u32x4::new( + 61102581, + 29974476, + 32385512, + 9011754,)), + (u32x4::new( + 28068166, + 11862220, + 14323567, + 12380617,), u32x4::new( + 52090465, + 16029056, + 24495309, + 21409233,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 59411973, + 57437124, + 11695483, + 17586857,), u32x4::new( + 16108987, + 43449109, + 31098002, + 6248476,)), + (u32x4::new( + 42258047, + 61595931, + 29308533, + 11742653,), u32x4::new( + 43042345, + 27373650, + 30165249, + 21929989,)), + (u32x4::new( + 49907221, + 9620337, + 21888081, + 20981082,), u32x4::new( + 56288861, + 61562203, + 33223566, + 3582446,)), + (u32x4::new( + 57535017, + 41003416, + 22080416, + 14463796,), u32x4::new( + 65518565, + 18127889, + 24370863, + 33332664,)), + (u32x4::new( + 66655380, + 6430175, + 471782, + 11947673,), u32x4::new( + 30596400, + 18898659, + 15930721, + 4211851,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 6757410, + 65455566, + 13584784, + 11362173,), u32x4::new( + 10797127, + 24451471, + 19541370, + 29309435,)), + (u32x4::new( + 40360156, + 17685025, + 18326181, + 3846903,), u32x4::new( + 13693365, + 63049479, + 31900359, + 23385063,)), + (u32x4::new( + 52455038, + 57513503, + 22163311, + 27095042,), u32x4::new( + 48610726, + 66454160, + 12085341, + 26357004,)), + (u32x4::new( + 22097042, + 14063840, + 6705778, + 14342902,), u32x4::new( + 66139825, + 20702105, + 31279090, + 7495745,)), + (u32x4::new( + 27360710, + 49314837, + 18774847, + 7146436,), u32x4::new( + 37066216, + 42004961, + 22409916, + 10524446,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 1497507, + 33054449, + 11839906, + 2960428,), u32x4::new( + 40538463, + 18884538, + 25018820, + 4073970,)), + (u32x4::new( + 54484385, + 43640735, + 2808257, + 20710708,), u32x4::new( + 39840730, + 27222424, + 21783544, + 11848522,)), + (u32x4::new( + 45765237, + 48200555, + 9299019, + 9393151,), u32x4::new( + 34818188, + 56098995, + 13575233, + 21012731,)), + (u32x4::new( + 4265428, + 49627650, + 24960282, + 9425650,), u32x4::new( + 47883651, + 2797524, + 11853190, + 22877329,)), + (u32x4::new( + 25008173, + 64199503, + 380047, + 12107343,), u32x4::new( + 12329448, + 11914399, + 764281, + 29687002,)), + ])), + CachedPoint(FieldElement2625x4([ + (u32x4::new( + 35889734, + 23047226, + 4022841, + 7017445,), u32x4::new( + 7274086, + 53316179, + 25100176, + 15310676,)), + (u32x4::new( + 42409427, + 30270106, + 6823853, + 31551384,), u32x4::new( + 40645017, + 66489807, + 18021817, + 32669351,)), + (u32x4::new( + 39827134, + 43680850, + 28297996, + 20258133,), u32x4::new( + 26058742, + 52643238, + 22238331, + 21690533,)), + (u32x4::new( + 60808002, + 17499995, + 30042246, + 29310584,), u32x4::new( + 48219954, + 29389518, + 8680514, + 17844709,)), + (u32x4::new( + 6452896, + 50116553, + 9532047, + 26821214,), u32x4::new( + 44524351, + 50428429, + 21904953, + 12608048,)), + ])) +]); diff --git a/curve25519-dalek/src/backend/vector/neon/edwards.rs b/curve25519-dalek/src/backend/vector/neon/edwards.rs new file mode 100644 index 000000000..8808c0ebe --- /dev/null +++ b/curve25519-dalek/src/backend/vector/neon/edwards.rs @@ -0,0 +1,547 @@ +// -*- mode: rust; -*- +// +// This file is part of curve25519-dalek. +// Copyright (c) 2016-2019 Isis Lovecruft, Henry de Valence +// 2021-2022 Robrecht Blancquaert +// See LICENSE for licensing information. +// +// Authors: +// - Isis Agora Lovecruft +// - Henry de Valence +// - Robrecht Blancquaert + +//! Parallel Edwards Arithmetic for Curve25519. +//! +//! This module currently has two point types: +//! +//! * `ExtendedPoint`: a point stored in vector-friendly format, with +//! vectorized doubling and addition; +//! +//! * `CachedPoint`: used for readdition. +//! +//! Details on the formulas can be found in the documentation for the +//! `avx2` module. +//! +//! Similar to field, code for this was mostly copied from avx2 module. + +#![allow(non_snake_case)] + +use core::convert::From; +use core::ops::{Add, Neg, Sub}; + +use subtle::Choice; +use subtle::ConditionallySelectable; + +use crate::edwards; +use crate::window::{LookupTable, NafLookupTable5, NafLookupTable8}; + +use crate::traits::Identity; + +use super::constants; +use super::field::{FieldElement2625x4, Lanes, Shuffle}; + +/// A point on Curve25519, using parallel Edwards formulas for curve +/// operations. +/// +/// # Invariant +/// +/// The coefficients of an `ExtendedPoint` are bounded with +/// \\( b < 0.007 \\). +#[derive(Copy, Clone, Debug)] +pub struct ExtendedPoint(pub(super) FieldElement2625x4); + +impl From for ExtendedPoint { + fn from(P: edwards::EdwardsPoint) -> ExtendedPoint { + ExtendedPoint(FieldElement2625x4::new(&P.X, &P.Y, &P.Z, &P.T)) + } +} + +impl From for edwards::EdwardsPoint { + fn from(P: ExtendedPoint) -> edwards::EdwardsPoint { + let tmp = P.0.split(); + edwards::EdwardsPoint { + X: tmp[0], + Y: tmp[1], + Z: tmp[2], + T: tmp[3], + } + } +} + +impl ConditionallySelectable for ExtendedPoint { + fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self { + ExtendedPoint(FieldElement2625x4::conditional_select(&a.0, &b.0, choice)) + } + + fn conditional_assign(&mut self, other: &Self, choice: Choice) { + self.0.conditional_assign(&other.0, choice); + } +} + +impl Default for ExtendedPoint { + fn default() -> ExtendedPoint { + ExtendedPoint::identity() + } +} + +impl Identity for ExtendedPoint { + fn identity() -> ExtendedPoint { + constants::EXTENDEDPOINT_IDENTITY + } +} + +impl ExtendedPoint { + /// Compute the double of this point. + pub fn double(&self) -> ExtendedPoint { + // Want to compute (X1 Y1 Z1 X1+Y1). + // Not sure how to do this less expensively than computing + // (X1 Y1 Z1 T1) --(256bit shuffle)--> (X1 Y1 X1 Y1) + // (X1 Y1 X1 Y1) --(2x128b shuffle)--> (Y1 X1 Y1 X1) + // and then adding. + + // Set tmp0 = (X1 Y1 X1 Y1) + let mut tmp0 = self.0.shuffle(Shuffle::ABAB); + + // Set tmp1 = (Y1 X1 Y1 X1) + let mut tmp1 = tmp0.shuffle(Shuffle::BADC); + + // Set tmp0 = (X1 Y1 Z1 X1+Y1) + tmp0 = self.0.blend(tmp0 + tmp1, Lanes::D); + + // Set tmp1 = tmp0^2, negating the D values + tmp1 = tmp0.square_and_negate_D(); + // Now tmp1 = (S1 S2 S3 -S4) with b < 0.007 + + // See discussion of bounds in the module-level documentation. + // We want to compute + // + // + | S1 | S1 | S1 | S1 | + // + | S2 | | | S2 | + // + | | | S3 | | + // + | | | S3 | | + // + | | | |-S4 | + // + | | 2p | 2p | | + // - | | S2 | S2 | | + // ======================= + // S5 S6 S8 S9 + + let zero = FieldElement2625x4::zero(); + let S_1 = tmp1.shuffle(Shuffle::AAAA); + let S_2 = tmp1.shuffle(Shuffle::BBBB); + + tmp0 = zero.blend(tmp1 + tmp1, Lanes::C); + // tmp0 = (0, 0, 2S3, 0) + tmp0 = tmp0.blend(tmp1, Lanes::D); + // tmp0 = (0, 0, 2S3, -S4) + tmp0 = tmp0 + S_1; + // tmp0 = ( S1, S1, S1 + 2S3, S1 - S4) + tmp0 = tmp0 + zero.blend(S_2, Lanes::AD); + // tmp0 = (S1 + S2, S1, S1 + 2S3, S1 + S2 - S4) + tmp0 = tmp0 + zero.blend(S_2.negate_lazy(), Lanes::BC); + // tmp0 = (S1 + S2, S1 - S2, S1 - S2 + 2S3, S1 + S2 - S4) + // b < ( 1.01, 1.6, 2.33, 1.6) + // Now tmp0 = (S5, S6, S8, S9) + + // Set tmp1 = ( S9, S6, S6, S9) + // b < ( 1.6, 1.6, 1.6, 1.6) + tmp1 = tmp0.shuffle(Shuffle::DBBD); + // Set tmp0 = ( S8, S5, S8, S5) + // b < (2.33, 1.01, 2.33, 1.01) + tmp0 = tmp0.shuffle(Shuffle::CACA); + + // Bounds on (tmp0, tmp1) are (2.33, 1.6) < (2.5, 1.75). + ExtendedPoint(&tmp0 * &tmp1) + } + + pub fn mul_by_pow_2(&self, k: u32) -> ExtendedPoint { + let mut tmp: ExtendedPoint = *self; + for _ in 0..k { + tmp = tmp.double(); + } + tmp + } +} + +/// A cached point with some precomputed variables used for readdition. +/// +/// # Warning +/// +/// It is not safe to negate this point more than once. +/// +/// # Invariant +/// +/// As long as the `CachedPoint` is not repeatedly negated, its +/// coefficients will be bounded with \\( b < 1.0 \\). +#[derive(Copy, Clone, Debug)] +pub struct CachedPoint(pub(super) FieldElement2625x4); + +impl From for CachedPoint { + fn from(P: ExtendedPoint) -> CachedPoint { + let mut x = P.0; + + x = x.blend(x.diff_sum(), Lanes::AB); + // x = (Y2 - X2, Y2 + X2, Z2, T2) = (S2 S3 Z2 T2) + + x = x * (121666, 121666, 2 * 121666, 2 * 121665); + // x = (121666*S2 121666*S3 2*121666*Z2 2*121665*T2) + + x = x.blend(-x, Lanes::D); + // x = (121666*S2 121666*S3 2*121666*Z2 -2*121665*T2) + + // The coefficients of the output are bounded with b < 0.007. + CachedPoint(x) + } +} + +impl Default for CachedPoint { + fn default() -> CachedPoint { + CachedPoint::identity() + } +} + +impl Identity for CachedPoint { + fn identity() -> CachedPoint { + constants::CACHEDPOINT_IDENTITY + } +} + +impl ConditionallySelectable for CachedPoint { + fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self { + CachedPoint(FieldElement2625x4::conditional_select(&a.0, &b.0, choice)) + } + + fn conditional_assign(&mut self, other: &Self, choice: Choice) { + self.0.conditional_assign(&other.0, choice); + } +} + +impl<'a> Neg for &'a CachedPoint { + type Output = CachedPoint; + /// Lazily negate the point. + /// + /// # Warning + /// + /// Because this method does not perform a reduction, it is not + /// safe to repeatedly negate a point. + fn neg(self) -> CachedPoint { + let swapped = self.0.shuffle(Shuffle::BACD); + CachedPoint(swapped.blend(swapped.negate_lazy(), Lanes::D)) + } +} + +impl<'a, 'b> Add<&'b CachedPoint> for &'a ExtendedPoint { + type Output = ExtendedPoint; + + /// Add an `ExtendedPoint` and a `CachedPoint`. + fn add(self, other: &'b CachedPoint) -> ExtendedPoint { + // The coefficients of an `ExtendedPoint` are reduced after + // every operation. If the `CachedPoint` was negated, its + // coefficients grow by one bit. So on input, `self` is + // bounded with `b < 0.007` and `other` is bounded with + // `b < 1.0`. + + let mut tmp = self.0; + + tmp = tmp.blend(tmp.diff_sum(), Lanes::AB); + // tmp = (Y1-X1 Y1+X1 Z1 T1) = (S0 S1 Z1 T1) with b < 1.6 + + // (tmp, other) bounded with b < (1.6, 1.0) < (2.5, 1.75). + tmp = &tmp * &other.0; + // tmp = (S0*S2' S1*S3' Z1*Z2' T1*T2') = (S8 S9 S10 S11) + + tmp = tmp.shuffle(Shuffle::ABDC); + // tmp = (S8 S9 S11 S10) + + tmp = tmp.diff_sum(); + // tmp = (S9-S8 S9+S8 S10-S11 S10+S11) = (S12 S13 S14 S15) + + let t0 = tmp.shuffle(Shuffle::ADDA); + // t0 = (S12 S15 S15 S12) + let t1 = tmp.shuffle(Shuffle::CBCB); + // t1 = (S14 S13 S14 S13) + + // All coefficients of t0, t1 are bounded with b < 1.6. + // Return (S12*S14 S15*S13 S15*S14 S12*S13) = (X3 Y3 Z3 T3) + ExtendedPoint(&t0 * &t1) + } +} + +impl<'a, 'b> Sub<&'b CachedPoint> for &'a ExtendedPoint { + type Output = ExtendedPoint; + + /// Implement subtraction by negating the point and adding. + /// + /// Empirically, this seems about the same cost as a custom + /// subtraction impl (maybe because the benefit is cancelled by + /// increased code size?) + fn sub(self, other: &'b CachedPoint) -> ExtendedPoint { + self + &(-other) + } +} + +impl<'a> From<&'a edwards::EdwardsPoint> for LookupTable { + fn from(point: &'a edwards::EdwardsPoint) -> Self { + let P = ExtendedPoint::from(*point); + let mut points = [CachedPoint::from(P); 8]; + for i in 0..7 { + points[i + 1] = (&P + &points[i]).into(); + } + LookupTable(points) + } +} + +impl<'a> From<&'a edwards::EdwardsPoint> for NafLookupTable5 { + fn from(point: &'a edwards::EdwardsPoint) -> Self { + let A = ExtendedPoint::from(*point); + let mut Ai = [CachedPoint::from(A); 8]; + let A2 = A.double(); + for i in 0..7 { + Ai[i + 1] = (&A2 + &Ai[i]).into(); + } + // Now Ai = [A, 3A, 5A, 7A, 9A, 11A, 13A, 15A] + NafLookupTable5(Ai) + } +} + +impl<'a> From<&'a edwards::EdwardsPoint> for NafLookupTable8 { + fn from(point: &'a edwards::EdwardsPoint) -> Self { + let A = ExtendedPoint::from(*point); + let mut Ai = [CachedPoint::from(A); 64]; + let A2 = A.double(); + for i in 0..63 { + Ai[i + 1] = (&A2 + &Ai[i]).into(); + } + // Now Ai = [A, 3A, 5A, 7A, 9A, 11A, 13A, 15A, ..., 127A] + NafLookupTable8(Ai) + } +} + +#[cfg(test)] +mod test { + use super::*; + + fn serial_add(P: edwards::EdwardsPoint, Q: edwards::EdwardsPoint) -> edwards::EdwardsPoint { + use crate::backend::serial::u64::field::FieldElement51; + + let (X1, Y1, Z1, T1) = (P.X, P.Y, P.Z, P.T); + let (X2, Y2, Z2, T2) = (Q.X, Q.Y, Q.Z, Q.T); + + macro_rules! print_var { + ($x:ident) => { + println!("{} = {:?}", stringify!($x), $x.as_bytes()); + }; + } + + let S0 = &Y1 - &X1; // R1 + let S1 = &Y1 + &X1; // R3 + let S2 = &Y2 - &X2; // R2 + let S3 = &Y2 + &X2; // R4 + print_var!(S0); + print_var!(S1); + print_var!(S2); + print_var!(S3); + println!(""); + + let S4 = &S0 * &S2; // R5 = R1 * R2 + let S5 = &S1 * &S3; // R6 = R3 * R4 + let S6 = &Z1 * &Z2; // R8 + let S7 = &T1 * &T2; // R7 + print_var!(S4); + print_var!(S5); + print_var!(S6); + print_var!(S7); + println!(""); + + let S8 = &S4 * &FieldElement51([ 121666,0,0,0,0]); // R5 + let S9 = &S5 * &FieldElement51([ 121666,0,0,0,0]); // R6 + let S10 = &S6 * &FieldElement51([2*121666,0,0,0,0]); // R8 + let S11 = &S7 * &(-&FieldElement51([2*121665,0,0,0,0])); // R7 + print_var!(S8); + print_var!(S9); + print_var!(S10); + print_var!(S11); + println!(""); + + let S12 = &S9 - &S8; // R1 + let S13 = &S9 + &S8; // R4 + let S14 = &S10 - &S11; // R2 + let S15 = &S10 + &S11; // R3 + print_var!(S12); + print_var!(S13); + print_var!(S14); + print_var!(S15); + println!(""); + + let X3 = &S12 * &S14; // R1 * R2 + let Y3 = &S15 * &S13; // R3 * R4 + let Z3 = &S15 * &S14; // R2 * R3 + let T3 = &S12 * &S13; // R1 * R4 + + edwards::EdwardsPoint { + X: X3, + Y: Y3, + Z: Z3, + T: T3, + } + } + + fn addition_test_helper(P: edwards::EdwardsPoint, Q: edwards::EdwardsPoint) { + // Test the serial implementation of the parallel addition formulas + let R_serial: edwards::EdwardsPoint = serial_add(P.into(), Q.into()).into(); + + // Test the vector implementation of the parallel readdition formulas + let cached_Q = CachedPoint::from(ExtendedPoint::from(Q)); + let R_vector: edwards::EdwardsPoint = (&ExtendedPoint::from(P) + &cached_Q).into(); + let S_vector: edwards::EdwardsPoint = (&ExtendedPoint::from(P) - &cached_Q).into(); + + println!("Testing point addition:"); + println!("P = {:?}", P); + println!("Q = {:?}", Q); + println!("cached Q = {:?}", cached_Q); + println!("R = P + Q = {:?}", &P + &Q); + println!("R_serial = {:?}", R_serial); + println!("R_vector = {:?}", R_vector); + println!("S = P - Q = {:?}", &P - &Q); + println!("S_vector = {:?}", S_vector); + assert_eq!(R_serial.compress(), (&P + &Q).compress()); + assert_eq!(R_vector.compress(), (&P + &Q).compress()); + assert_eq!(S_vector.compress(), (&P - &Q).compress()); + println!("OK!\n"); + } + + #[test] + fn vector_addition_vs_serial_addition_vs_edwards_extendedpoint() { + use crate::constants; + use crate::scalar::Scalar; + + println!("Testing id +- id"); + let P = edwards::EdwardsPoint::identity(); + let Q = edwards::EdwardsPoint::identity(); + addition_test_helper(P, Q); + + println!("Testing id +- B"); + let P = edwards::EdwardsPoint::identity(); + let Q = constants::ED25519_BASEPOINT_POINT; + addition_test_helper(P, Q); + + println!("Testing B +- B"); + let P = constants::ED25519_BASEPOINT_POINT; + let Q = constants::ED25519_BASEPOINT_POINT; + addition_test_helper(P, Q); + + println!("Testing B +- kB"); + let P = constants::ED25519_BASEPOINT_POINT; + let Q = &constants::ED25519_BASEPOINT_TABLE * &Scalar::from(8475983829u64); + addition_test_helper(P, Q); + } + + fn serial_double(P: edwards::EdwardsPoint) -> edwards::EdwardsPoint { + let (X1, Y1, Z1, _T1) = (P.X, P.Y, P.Z, P.T); + + macro_rules! print_var { + ($x:ident) => { + println!("{} = {:?}", stringify!($x), $x.as_bytes()); + }; + } + + let S0 = &X1 + &Y1; // R1 + print_var!(S0); + println!(""); + + let S1 = X1.square(); + let S2 = Y1.square(); + let S3 = Z1.square(); + let S4 = S0.square(); + print_var!(S1); + print_var!(S2); + print_var!(S3); + print_var!(S4); + println!(""); + + let S5 = &S1 + &S2; + let S6 = &S1 - &S2; + let S7 = &S3 + &S3; + let S8 = &S7 + &S6; + let S9 = &S5 - &S4; + print_var!(S5); + print_var!(S6); + print_var!(S7); + print_var!(S8); + print_var!(S9); + println!(""); + + let X3 = &S8 * &S9; + let Y3 = &S5 * &S6; + let Z3 = &S8 * &S6; + let T3 = &S5 * &S9; + + edwards::EdwardsPoint { + X: X3, + Y: Y3, + Z: Z3, + T: T3, + } + } + + fn doubling_test_helper(P: edwards::EdwardsPoint) { + let R1: edwards::EdwardsPoint = serial_double(P.into()).into(); + let R2: edwards::EdwardsPoint = ExtendedPoint::from(P).double().into(); + println!("Testing point doubling:"); + println!("P = {:?}", P); + println!("(serial) R1 = {:?}", R1); + println!("(vector) R2 = {:?}", R2); + println!("P + P = {:?}", &P + &P); + assert_eq!(R1.compress(), (&P + &P).compress()); + assert_eq!(R2.compress(), (&P + &P).compress()); + println!("OK!\n"); + } + + #[test] + fn vector_doubling_vs_serial_doubling_vs_edwards_extendedpoint() { + use crate::constants; + use crate::scalar::Scalar; + + println!("Testing [2]id"); + let P = edwards::EdwardsPoint::identity(); + doubling_test_helper(P); + + println!("Testing [2]B"); + let P = constants::ED25519_BASEPOINT_POINT; + doubling_test_helper(P); + + println!("Testing [2]([k]B)"); + let P = &constants::ED25519_BASEPOINT_TABLE * &Scalar::from(8475983829u64); + doubling_test_helper(P); + } + + #[test] + fn basepoint_odd_lookup_table_verify() { + use crate::constants; + use crate::backend::vector::neon::constants::BASEPOINT_ODD_LOOKUP_TABLE; + + let basepoint_odd_table = NafLookupTable8::::from(&constants::ED25519_BASEPOINT_POINT); + println!("Testing basepoint table"); + + let table_B = &BASEPOINT_ODD_LOOKUP_TABLE; + for (b_vec, base_vec) in table_B.0.iter().zip(basepoint_odd_table.0.iter()) { + println!("aa"); + let b_splits = b_vec.0.split(); + let base_splits = base_vec.0.split(); + + println!("{:?}", base_splits[0]); + println!("{:?}", base_splits[1]); + println!("{:?}", base_splits[2]); + println!("{:?}", base_splits[3]); + println!("----"); + println!("{:?}", b_splits[0]); + println!("{:?}", b_splits[1]); + println!("{:?}", b_splits[2]); + println!("{:?}", b_splits[3]); + + assert_eq!(base_splits[0], b_splits[0]); + assert_eq!(base_splits[1], b_splits[1]); + assert_eq!(base_splits[2], b_splits[2]); + assert_eq!(base_splits[3], b_splits[3]); + } + } +} diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs new file mode 100644 index 000000000..8c709b3bc --- /dev/null +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -0,0 +1,784 @@ +// -*- mode: rust; -*- +// +// This file is part of curve25519-dalek. +// Copyright (c) 2016-2019 Isis Lovecruft, Henry de Valence +// 2021-2022 Robrecht Blancquaert +// See LICENSE for licensing information. +// +// Authors: +// - Isis Agora Lovecruft +// - Henry de Valence +// - Robrecht Blancquaert + +//! More details on the algorithms can be found in the `avx2` +//! module. Here comments are mostly added only when needed +//! to explain differenes between the 'base' avx2 version and +//! this re-implementation for arm neon. + +//! The most major difference is the split of one vector of 8 +//! limbs into to vectors holding 4 limbs each. For the rest +//! changes where made to account for different structure in +//! arm instructions. + +use core::ops::{Add, Mul, Neg}; +use packed_simd::{u32x4, u32x2, i32x4, u64x4, u64x2, IntoBits}; + +use crate::backend::vector::neon::constants::{P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO}; +use crate::backend::serial::u64::field::FieldElement51; + +/// Unpack 32-bit lanes: +/// ((a0, b0, a1, b1) ,(c0, d0, c1, d1)) +/// into +/// ((a0, b0), (c0, d0)) +/// ((a1, b1), (c1, d1)) +#[inline(always)] +fn unpack_pair(src: (u32x4, u32x4)) -> ((u32x2, u32x2), (u32x2, u32x2)) { + let a0: u32x2; + let a1: u32x2; + let b0: u32x2; + let b1: u32x2; + unsafe { + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vget_high_u32; + a0 = vget_low_u32(src.0.into_bits()).into_bits(); + a1 = vget_low_u32(src.1.into_bits()).into_bits(); + b0 = vget_high_u32(src.0.into_bits()).into_bits(); + b1 = vget_high_u32(src.1.into_bits()).into_bits(); + } + return ((a0, a1), (b0, b1)); +} + +/// ((a0, 0, b0, 0), (c0, 0, d0, 0)) +/// ((a1, 0, b1, 0), (c1, 0, d1, 0)) +/// into +/// ((a0, b0, a1, b1), (c0, d0, c1, d1)) +#[inline(always)] +fn repack_pair(x: (u32x4, u32x4), y: (u32x4, u32x4)) -> (u32x4, u32x4) { + unsafe { + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vcombine_u32; + use core::arch::aarch64::vset_lane_u32; + use core::arch::aarch64::vgetq_lane_u32; + + (vcombine_u32( + vset_lane_u32(vgetq_lane_u32(x.0.into_bits(), 2) , vget_low_u32(x.0.into_bits()), 1), + vset_lane_u32(vgetq_lane_u32(y.0.into_bits(), 2) , vget_low_u32(y.0.into_bits()), 1)).into_bits(), + vcombine_u32( + vset_lane_u32(vgetq_lane_u32(x.1.into_bits(), 2) , vget_low_u32(x.1.into_bits()), 1), + vset_lane_u32(vgetq_lane_u32(y.1.into_bits(), 2) , vget_low_u32(y.1.into_bits()), 1)).into_bits()) + } +} + +#[derive(Copy, Clone, Debug)] +pub enum Lanes { + C, + D, + AB, + AC, + CD, + AD, + BC, + ABCD, +} + +#[derive(Copy, Clone, Debug)] +pub enum Shuffle { + AAAA, + BBBB, + CACA, + DBBD, + ADDA, + CBCB, + ABAB, + BADC, + BACD, + ABDC, +} + +macro_rules! lane_shuffle { + {$l0:expr, $l1:expr, $l2:expr, $l3:expr, $l4:expr, $l5:expr, $l6:expr, $l7:expr, $x:expr} => { + unsafe { + use core::arch::aarch64::vgetq_lane_u32; + const c: [i32; 8] = [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7]; + (u32x4::new(if c[0] < 4 { vgetq_lane_u32($x.0.into_bits(), c[0]) } else { vgetq_lane_u32($x.1.into_bits(), c[0] - 4) }, + if c[1] < 4 { vgetq_lane_u32($x.0.into_bits(), c[1]) } else { vgetq_lane_u32($x.1.into_bits(), c[1] - 4) }, + if c[2] < 4 { vgetq_lane_u32($x.0.into_bits(), c[2]) } else { vgetq_lane_u32($x.1.into_bits(), c[2] - 4) }, + if c[3] < 4 { vgetq_lane_u32($x.0.into_bits(), c[3]) } else { vgetq_lane_u32($x.1.into_bits(), c[3] - 4) }), + u32x4::new(if c[4] < 4 { vgetq_lane_u32($x.0.into_bits(), c[4]) } else { vgetq_lane_u32($x.1.into_bits(), c[4] - 4) }, + if c[5] < 4 { vgetq_lane_u32($x.0.into_bits(), c[5]) } else { vgetq_lane_u32($x.1.into_bits(), c[5] - 4) }, + if c[6] < 4 { vgetq_lane_u32($x.0.into_bits(), c[6]) } else { vgetq_lane_u32($x.1.into_bits(), c[6] - 4) }, + if c[7] < 4 { vgetq_lane_u32($x.0.into_bits(), c[7]) } else { vgetq_lane_u32($x.1.into_bits(), c[7] - 4) })) + } + + } +} + +#[derive(Clone, Copy, Debug)] +pub struct FieldElement2625x4(pub(crate) [(u32x4, u32x4); 5]); + +use subtle::Choice; +use subtle::ConditionallySelectable; + +impl ConditionallySelectable for FieldElement2625x4 { + fn conditional_select( + a: &FieldElement2625x4, + b: &FieldElement2625x4, + choice: Choice, + ) -> FieldElement2625x4 { + let mask = (-(choice.unwrap_u8() as i32)) as u32; + let mask_vec = u32x4::splat(mask); + FieldElement2625x4([ + (a.0[0].0 ^ (mask_vec & (a.0[0].0 ^ b.0[0].0)), a.0[0].1 ^ (mask_vec & (a.0[0].1 ^ b.0[0].1))), + (a.0[1].0 ^ (mask_vec & (a.0[1].0 ^ b.0[1].0)), a.0[1].1 ^ (mask_vec & (a.0[1].1 ^ b.0[1].1))), + (a.0[2].0 ^ (mask_vec & (a.0[2].0 ^ b.0[2].0)), a.0[2].1 ^ (mask_vec & (a.0[2].1 ^ b.0[2].1))), + (a.0[3].0 ^ (mask_vec & (a.0[3].0 ^ b.0[3].0)), a.0[3].1 ^ (mask_vec & (a.0[3].1 ^ b.0[3].1))), + (a.0[4].0 ^ (mask_vec & (a.0[4].0 ^ b.0[4].0)), a.0[4].1 ^ (mask_vec & (a.0[4].1 ^ b.0[4].1))) + ]) + } + + fn conditional_assign( + &mut self, + other: &FieldElement2625x4, + choice: Choice, + ) { + let mask = (-(choice.unwrap_u8() as i32)) as u32; + let mask_vec = u32x4::splat(mask); + self.0[0].0 ^= mask_vec & (self.0[0].0 ^ other.0[0].0); + self.0[0].1 ^= mask_vec & (self.0[0].1 ^ other.0[0].1); + self.0[1].0 ^= mask_vec & (self.0[1].0 ^ other.0[1].0); + self.0[1].1 ^= mask_vec & (self.0[1].1 ^ other.0[1].1); + self.0[2].0 ^= mask_vec & (self.0[2].0 ^ other.0[2].0); + self.0[2].1 ^= mask_vec & (self.0[2].1 ^ other.0[2].1); + self.0[3].0 ^= mask_vec & (self.0[3].0 ^ other.0[3].0); + self.0[3].1 ^= mask_vec & (self.0[3].1 ^ other.0[3].1); + self.0[4].0 ^= mask_vec & (self.0[4].0 ^ other.0[4].0); + self.0[4].1 ^= mask_vec & (self.0[4].1 ^ other.0[4].1); + } +} + +impl FieldElement2625x4 { + + pub fn split(&self) -> [FieldElement51; 4] { + let mut out = [FieldElement51::ZERO; 4]; + for i in 0..5 { + let a_2i = self.0[i].0.extract(0) as u64; + let b_2i = self.0[i].0.extract(1) as u64; + let a_2i_1 = self.0[i].0.extract(2) as u64; + let b_2i_1 = self.0[i].0.extract(3) as u64; + let c_2i = self.0[i].1.extract(0) as u64; + let d_2i = self.0[i].1.extract(1) as u64; + let c_2i_1 = self.0[i].1.extract(2) as u64; + let d_2i_1 = self.0[i].1.extract(3) as u64; + + out[0].0[i] = a_2i + (a_2i_1 << 26); + out[1].0[i] = b_2i + (b_2i_1 << 26); + out[2].0[i] = c_2i + (c_2i_1 << 26); + out[3].0[i] = d_2i + (d_2i_1 << 26); + } + + out + } + + #[inline] + pub fn shuffle(&self, control: Shuffle) -> FieldElement2625x4 { + #[inline(always)] + fn shuffle_lanes(x: (u32x4, u32x4), control: Shuffle) -> (u32x4, u32x4) { + match control { + Shuffle::AAAA => lane_shuffle!(0, 0, 2, 2, 0, 0, 2, 2, x), + Shuffle::BBBB => lane_shuffle!(1, 1, 3, 3, 1, 1, 3, 3, x), + Shuffle::CACA => lane_shuffle!(4, 0, 6, 2, 4, 0, 6, 2, x), + Shuffle::DBBD => lane_shuffle!(5, 1, 7, 3, 1, 5, 3, 7, x), + Shuffle::ADDA => lane_shuffle!(0, 5, 2, 7, 5, 0, 7, 2, x), + Shuffle::CBCB => lane_shuffle!(4, 1, 6, 3, 4, 1, 6, 3, x), + Shuffle::ABAB => lane_shuffle!(0, 1, 2, 3, 0, 1, 2, 3, x), + Shuffle::BADC => lane_shuffle!(1, 0, 3, 2, 5, 4, 7, 6, x), + Shuffle::BACD => lane_shuffle!(1, 0, 3, 2, 4, 5, 6, 7, x), + Shuffle::ABDC => lane_shuffle!(0, 1, 2, 3, 5, 4, 7, 6, x), + } + } + + FieldElement2625x4([ + shuffle_lanes(self.0[0], control), + shuffle_lanes(self.0[1], control), + shuffle_lanes(self.0[2], control), + shuffle_lanes(self.0[3], control), + shuffle_lanes(self.0[4], control), + ]) + } + + // Can probably be sped up using multiple vset/vget instead of table + #[inline] + pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 { + #[inline(always)] + fn blend_lanes(x: (u32x4, u32x4), y: (u32x4, u32x4), control: Lanes) -> (u32x4, u32x4) { + unsafe { + use core::arch::aarch64::vqtbx1q_u8; + match control { + Lanes::C => { + (x.0, + vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits()) + } + Lanes::D => { + (x.0, + vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits()).into_bits()) + } + Lanes::AD => { + (vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits() ).into_bits(), + vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits() ).into_bits()) + } + Lanes::AB => { + (y.0, x.1) + } + Lanes::AC => { + (vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits(), + vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits()) + } + Lanes::CD => { + (x.0, y.1) + } + Lanes::BC => { + (vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits() ).into_bits(), + vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits() ).into_bits()) + } + Lanes::ABCD => { + y + } + + } + } + } + + FieldElement2625x4([ + blend_lanes(self.0[0], other.0[0], control), + blend_lanes(self.0[1], other.0[1], control), + blend_lanes(self.0[2], other.0[2], control), + blend_lanes(self.0[3], other.0[3], control), + blend_lanes(self.0[4], other.0[4], control), + ]) + } + + + pub fn zero() -> FieldElement2625x4 { + FieldElement2625x4([(u32x4::splat(0), u32x4::splat(0)); 5]) + } + + pub fn splat(x: &FieldElement51) -> FieldElement2625x4 { + FieldElement2625x4::new(x, x, x, x) + } + + + pub fn new( + x0: &FieldElement51, + x1: &FieldElement51, + x2: &FieldElement51, + x3: &FieldElement51 + ) -> FieldElement2625x4 { + let mut buf = [(u32x4::splat(0), u32x4::splat(0)); 5]; + let low_26_bits = (1 << 26) - 1; + for i in 0..5 { + let a_2i = (x0.0[i] & low_26_bits) as u32; + let a_2i_1 = (x0.0[i] >> 26) as u32; + let b_2i = (x1.0[i] & low_26_bits) as u32; + let b_2i_1 = (x1.0[i] >> 26) as u32; + let c_2i = (x2.0[i] & low_26_bits) as u32; + let c_2i_1 = (x2.0[i] >> 26) as u32; + let d_2i = (x3.0[i] & low_26_bits) as u32; + let d_2i_1 = (x3.0[i] >> 26) as u32; + + buf[i] = (u32x4::new(a_2i, b_2i, a_2i_1, b_2i_1), u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1)); + } + return FieldElement2625x4(buf).reduce(); + } + + #[inline] + pub fn negate_lazy(&self) -> FieldElement2625x4 { + FieldElement2625x4([ + (P_TIMES_2_LO.0 - self.0[0].0, P_TIMES_2_LO.1 - self.0[0].1), + (P_TIMES_2_HI.0 - self.0[1].0, P_TIMES_2_HI.1 - self.0[1].1), + (P_TIMES_2_HI.0 - self.0[2].0, P_TIMES_2_HI.1 - self.0[2].1), + (P_TIMES_2_HI.0 - self.0[3].0, P_TIMES_2_HI.1 - self.0[3].1), + (P_TIMES_2_HI.0 - self.0[4].0, P_TIMES_2_HI.1 - self.0[4].1), + ]) + } + + #[inline] + pub fn diff_sum(&self) -> FieldElement2625x4 { + let tmp1 = self.shuffle(Shuffle::BADC); + let tmp2 = self.blend(self.negate_lazy(), Lanes::AC); + tmp1 + tmp2 + } + + + pub fn reduce(&self) -> FieldElement2625x4 { + // Negated for shift right instead of left + let shifts = (i32x4::new(-26, -26, -25, -25), i32x4::new(-26, -26, -25, -25)); + let masks = (u32x4::new( + (1 << 26) - 1, + (1 << 26) - 1, + (1 << 25) - 1, + (1 << 25) - 1), + u32x4::new( + (1 << 26) - 1, + (1 << 26) - 1, + (1 << 25) - 1, + (1 << 25) - 1) + ); + + + // Use mutliple transposes instead of table lookup? + let rotated_carryout = |v: (u32x4, u32x4)| -> (u32x4, u32x4) { + unsafe { + use core::arch::aarch64::vqshlq_u32; + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vget_high_u32; + use core::arch::aarch64::vcombine_u32; + + let c: (u32x4, u32x4) = (vqshlq_u32(v.0.into_bits(), shifts.0.into_bits()).into_bits(), + vqshlq_u32(v.1.into_bits(), shifts.1.into_bits()).into_bits()); + (vcombine_u32(vget_high_u32(c.0.into_bits()), vget_low_u32(c.0.into_bits())).into_bits(), + vcombine_u32(vget_high_u32(c.1.into_bits()), vget_low_u32(c.1.into_bits())).into_bits()) + + } + }; + + let combine = |v_lo: (u32x4, u32x4), v_hi: (u32x4, u32x4)| -> (u32x4, u32x4) { + unsafe { + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vget_high_u32; + use core::arch::aarch64::vcombine_u32; + (vcombine_u32(vget_low_u32(v_lo.0.into_bits()), vget_high_u32(v_hi.0.into_bits())).into_bits(), + vcombine_u32(vget_low_u32(v_lo.1.into_bits()), vget_high_u32(v_hi.1.into_bits())).into_bits()) + } + }; + + let mut v = self.0; + + let c10 = rotated_carryout(v[0]); + let mut com = combine((u32x4::splat(0), u32x4::splat(0)), c10); + v[0] = ((v[0].0 & masks.0) + com.0, (v[0].1 & masks.1) + com.1); + + let c32 = rotated_carryout(v[1]); + com = combine(c10, c32); + v[1] = ((v[1].0 & masks.0) + com.0, (v[1].1 & masks.1) + com.1); + + let c54 = rotated_carryout(v[2]); + com = combine(c32, c54); + v[2] = ((v[2].0 & masks.0) + com.0, (v[2].1 & masks.1) + com.1); + + let c76 = rotated_carryout(v[3]); + com = combine(c54, c76); + v[3] = ((v[3].0 & masks.0) + com.0, (v[3].1 & masks.1) + com.1); + + let c98 = rotated_carryout(v[4]); + com = combine(c76, c98); + v[4] = ((v[4].0 & masks.0) + com.0, (v[4].1 & masks.1) + com.1); + + let c9_19: (u32x4, u32x4) = unsafe { + use core::arch::aarch64::vmulq_n_u32; + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vcombine_u32; + + let c9_19_spread: (u32x4, u32x4) = (vmulq_n_u32(c98.0.into_bits(), 19).into_bits(), vmulq_n_u32(c98.1.into_bits(), 19).into_bits()); + + (vcombine_u32(vget_low_u32(c9_19_spread.0.into_bits()), u32x2::splat(0).into_bits()).into_bits(), + vcombine_u32(vget_low_u32(c9_19_spread.1.into_bits()), u32x2::splat(0).into_bits()).into_bits()) + }; + v[0] = (v[0].0 + c9_19.0, v[0].1 + c9_19.1); + + FieldElement2625x4(v) + } + + #[inline] + fn reduce64(mut z: [(u64x2, u64x2); 10]) -> FieldElement2625x4 { + + #[allow(non_snake_case)] + let LOW_25_BITS: u64x2 = u64x2::splat((1 << 25) - 1); + #[allow(non_snake_case)] + let LOW_26_BITS: u64x2 = u64x2::splat((1 << 26) - 1); + + let carry = |z: &mut [(u64x2, u64x2); 10], i: usize| { + debug_assert!(i < 9); + if i % 2 == 0 { + z[i + 1].0 = z[i + 1].0 + (z[i].0 >> 26); + z[i + 1].1 = z[i + 1].1 + (z[i].1 >> 26); + z[i].0 = z[i].0 & LOW_26_BITS; + z[i].1 = z[i].1 & LOW_26_BITS; + } else { + z[i + 1].0 = z[i + 1].0 + (z[i].0 >> 25); + z[i + 1].1 = z[i + 1].1 + (z[i].1 >> 25); + z[i].0 = z[i].0 & LOW_25_BITS; + z[i].1 = z[i].1 & LOW_25_BITS; + } + }; + + carry(&mut z, 0); carry(&mut z, 4); + carry(&mut z, 1); carry(&mut z, 5); + carry(&mut z, 2); carry(&mut z, 6); + carry(&mut z, 3); carry(&mut z, 7); + carry(&mut z, 4); carry(&mut z, 8); + + let c = (z[9].0 >> 25, z[9].1 >> 25); + z[9] = (z[9].0 & LOW_25_BITS, z[9].1 & LOW_25_BITS); + let mut c0: (u64x2, u64x2) = (c.0 & LOW_26_BITS, c.1 & LOW_26_BITS); + let mut c1: (u64x2, u64x2) = (c.0 >> 26, c.1 >> 26); + + unsafe { + use core::arch::aarch64::vmulq_n_u32; + + c0 = (vmulq_n_u32(c0.0.into_bits(), 19).into_bits(), + vmulq_n_u32(c0.1.into_bits(), 19).into_bits()); + c1 = (vmulq_n_u32(c1.0.into_bits(), 19).into_bits(), + vmulq_n_u32(c1.1.into_bits(), 19).into_bits()); + } + + + z[0] = (z[0].0 + c0.0, z[0].1 + c0.1); + z[1] = (z[1].0 + c1.0, z[1].1 + c1.1); + carry(&mut z, 0); + + FieldElement2625x4([ + repack_pair((z[0].0.into_bits(), z[0].1.into_bits()), (z[1].0.into_bits(), z[1].1.into_bits())), + repack_pair((z[2].0.into_bits(), z[2].1.into_bits()), (z[3].0.into_bits(), z[3].1.into_bits())), + repack_pair((z[4].0.into_bits(), z[4].1.into_bits()), (z[5].0.into_bits(), z[5].1.into_bits())), + repack_pair((z[6].0.into_bits(), z[6].1.into_bits()), (z[7].0.into_bits(), z[7].1.into_bits())), + repack_pair((z[8].0.into_bits(), z[8].1.into_bits()), (z[9].0.into_bits(), z[9].1.into_bits())), + ]) + } + + #[allow(non_snake_case)] + pub fn square_and_negate_D(&self) -> FieldElement2625x4 { + #[inline(always)] + fn m(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> u64x4 { + use core::arch::aarch64::vmull_u32; + unsafe { + let z0: u64x2 = vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(); + let z1: u64x2 = vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits(); + u64x4::new(z0.extract(0), z0.extract(1), z1.extract(0), z1.extract(1)) + } + } + + #[inline(always)] + fn m_lo(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> (u32x2, u32x2) { + use core::arch::aarch64::vmull_u32; + unsafe { + let x: (u32x4, u32x4) = (vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(), + vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits()); + (u32x2::new(x.0.extract(0), x.0.extract(2)), u32x2::new(x.1.extract(0), x.1.extract(2))) + } + } + + let v19 = (u32x2::new(19, 19), u32x2::new(19, 19)); + + let (x0, x1) = unpack_pair(self.0[0]); + let (x2, x3) = unpack_pair(self.0[1]); + let (x4, x5) = unpack_pair(self.0[2]); + let (x6, x7) = unpack_pair(self.0[3]); + let (x8, x9) = unpack_pair(self.0[4]); + + let x0_2 = (x0.0 << 1, x0.1 << 1); + let x1_2 = (x1.0 << 1, x1.1 << 1); + let x2_2 = (x2.0 << 1, x2.1 << 1); + let x3_2 = (x3.0 << 1, x3.1 << 1); + let x4_2 = (x4.0 << 1, x4.1 << 1); + let x5_2 = (x5.0 << 1, x5.1 << 1); + let x6_2 = (x6.0 << 1, x6.1 << 1); + let x7_2 = (x7.0 << 1, x7.1 << 1); + + let x5_19 = m_lo(v19, x5); + let x6_19 = m_lo(v19, x6); + let x7_19 = m_lo(v19, x7); + let x8_19 = m_lo(v19, x8); + let x9_19 = m_lo(v19, x9); + + let z0 = m(x0, x0) + m(x2_2,x8_19) + m(x4_2,x6_19) + ((m(x1_2,x9_19) + m(x3_2,x7_19) + m(x5,x5_19)) << 1); + let z1 = m(x0_2,x1) + m(x3_2,x8_19) + m(x5_2,x6_19) + ((m(x2,x9_19) + m(x4,x7_19)) << 1); + let z2 = m(x0_2,x2) + m(x1_2,x1) + m(x4_2,x8_19) + m(x6,x6_19) + ((m(x3_2,x9_19) + m(x5_2,x7_19)) << 1); + let z3 = m(x0_2,x3) + m(x1_2,x2) + m(x5_2,x8_19) + ((m(x4,x9_19) + m(x6,x7_19)) << 1); + let z4 = m(x0_2,x4) + m(x1_2,x3_2) + m(x2, x2) + m(x6_2,x8_19) + ((m(x5_2,x9_19) + m(x7,x7_19)) << 1); + let z5 = m(x0_2,x5) + m(x1_2,x4) + m(x2_2,x3) + m(x7_2,x8_19) + ((m(x6,x9_19)) << 1); + let z6 = m(x0_2,x6) + m(x1_2,x5_2) + m(x2_2,x4) + m(x3_2,x3) + m(x8,x8_19) + ((m(x7_2,x9_19)) << 1); + let z7 = m(x0_2,x7) + m(x1_2,x6) + m(x2_2,x5) + m(x3_2,x4) + ((m(x8,x9_19)) << 1); + let z8 = m(x0_2,x8) + m(x1_2,x7_2) + m(x2_2,x6) + m(x3_2,x5_2) + m(x4,x4) + ((m(x9,x9_19)) << 1); + let z9 = m(x0_2,x9) + m(x1_2,x8) + m(x2_2,x7) + m(x3_2,x6) + m(x4_2,x5); + + + let low__p37 = u64x4::splat(0x3ffffed << 37); + let even_p37 = u64x4::splat(0x3ffffff << 37); + let odd__p37 = u64x4::splat(0x1ffffff << 37); + + let negate_D = |x_01: u64x4, p_01: u64x4| -> (u64x2, u64x2) { + unsafe { + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vget_high_u32; + use core::arch::aarch64::vcombine_u32; + + let x = (u64x2::new(x_01.extract(0), x_01.extract(1)), u64x2::new(x_01.extract(2), x_01.extract(3))); + let p = (u64x2::new(p_01.extract(0), p_01.extract(1)), u64x2::new(p_01.extract(2), p_01.extract(3))); + + (x.0.into_bits(), + vcombine_u32(vget_low_u32(x.1.into_bits()), + vget_high_u32((p.1 - x.1).into_bits())).into_bits()) + } + }; + + let z0s = negate_D(z0, low__p37); + let z1s = negate_D(z1, odd__p37); + let z2s = negate_D(z2, even_p37); + let z3s = negate_D(z3, odd__p37); + let z4s = negate_D(z4, even_p37); + let z5s = negate_D(z5, odd__p37); + let z6s = negate_D(z6, even_p37); + let z7s = negate_D(z7, odd__p37); + let z8s = negate_D(z8, even_p37); + let z9s = negate_D(z9, odd__p37); + + FieldElement2625x4::reduce64([z0s, z1s, z2s, z3s, z4s, z5s, z6s, z7s, z8s, z9s]) + } +} + +impl Neg for FieldElement2625x4 { + type Output = FieldElement2625x4; + #[inline] + fn neg(self) -> FieldElement2625x4 { + FieldElement2625x4([ + (P_TIMES_16_LO.0 - self.0[0].0, P_TIMES_16_LO.1 - self.0[0].1), + (P_TIMES_16_HI.0 - self.0[1].0, P_TIMES_16_HI.1 - self.0[1].1), + (P_TIMES_16_HI.0 - self.0[2].0, P_TIMES_16_HI.1 - self.0[2].1), + (P_TIMES_16_HI.0 - self.0[3].0, P_TIMES_16_HI.1 - self.0[3].1), + (P_TIMES_16_HI.0 - self.0[4].0, P_TIMES_16_HI.1 - self.0[4].1), + ]).reduce() + } +} + +impl Add for FieldElement2625x4 { + type Output = FieldElement2625x4; + #[inline] + fn add(self, rhs: FieldElement2625x4) -> FieldElement2625x4 { + FieldElement2625x4([ + (self.0[0].0 + rhs.0[0].0, self.0[0].1 + rhs.0[0].1), + (self.0[1].0 + rhs.0[1].0, self.0[1].1 + rhs.0[1].1), + (self.0[2].0 + rhs.0[2].0, self.0[2].1 + rhs.0[2].1), + (self.0[3].0 + rhs.0[3].0, self.0[3].1 + rhs.0[3].1), + (self.0[4].0 + rhs.0[4].0, self.0[4].1 + rhs.0[4].1), + ]) + } +} + +impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 { + type Output = FieldElement2625x4; + #[inline] + fn mul(self, scalars: (u32, u32, u32, u32)) -> FieldElement2625x4 { + unsafe { + use core::arch::aarch64::vmull_u32; + + let consts = (u32x2::new(scalars.0, scalars.1), u32x2::new(scalars.2, scalars.3)); + + let (b0, b1) = unpack_pair(self.0[0]); + let (b2, b3) = unpack_pair(self.0[1]); + let (b4, b5) = unpack_pair(self.0[2]); + let (b6, b7) = unpack_pair(self.0[3]); + let (b8, b9) = unpack_pair(self.0[4]); + + + FieldElement2625x4::reduce64([ + (vmull_u32(b0.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b0.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b1.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b1.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b2.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b2.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b3.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b3.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b4.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b4.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b5.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b5.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b6.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b6.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b7.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b7.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b8.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b8.1.into_bits(), consts.1.into_bits()).into_bits()), + (vmull_u32(b9.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b9.1.into_bits(), consts.1.into_bits()).into_bits()) + ]) + } + } +} + +impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { + type Output = FieldElement2625x4; + fn mul(self, rhs: &'b FieldElement2625x4) -> FieldElement2625x4 { + #[inline(always)] + fn m(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> u64x4 { + use core::arch::aarch64::vmull_u32; + unsafe { + let z0: u64x2 = vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(); + let z1: u64x2 = vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits(); + u64x4::new(z0.extract(0), z0.extract(1), z1.extract(0), z1.extract(1)) + } + } + + #[inline(always)] + fn m_lo(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> (u32x2, u32x2) { + use core::arch::aarch64::vmull_u32; + unsafe { + let x: (u32x4, u32x4) = (vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(), + vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits()); + (u32x2::new(x.0.extract(0), x.0.extract(2)), u32x2::new(x.1.extract(0), x.1.extract(2))) + } + } + + let (x0, x1) = unpack_pair(self.0[0]); + let (x2, x3) = unpack_pair(self.0[1]); + let (x4, x5) = unpack_pair(self.0[2]); + let (x6, x7) = unpack_pair(self.0[3]); + let (x8, x9) = unpack_pair(self.0[4]); + + let (y0, y1) = unpack_pair(rhs.0[0]); + let (y2, y3) = unpack_pair(rhs.0[1]); + let (y4, y5) = unpack_pair(rhs.0[2]); + let (y6, y7) = unpack_pair(rhs.0[3]); + let (y8, y9) = unpack_pair(rhs.0[4]); + + let v19 = (u32x2::new(19, 19), u32x2::new(19, 19)); + + let y1_19 = m_lo(v19, y1); + let y2_19 = m_lo(v19, y2); + let y3_19 = m_lo(v19, y3); + let y4_19 = m_lo(v19, y4); + let y5_19 = m_lo(v19, y5); + let y6_19 = m_lo(v19, y6); + let y7_19 = m_lo(v19, y7); + let y8_19 = m_lo(v19, y8); + let y9_19 = m_lo(v19, y9); + + let x1_2 = (x1.0 + x1.0, x1.1 + x1.1); + let x3_2 = (x3.0 + x3.0, x3.1 + x3.1); + let x5_2 = (x5.0 + x5.0, x5.1 + x5.1); + let x7_2 = (x7.0 + x7.0, x7.1 + x7.1); + let x9_2 = (x9.0 + x9.0, x9.1 + x9.1); + + let z0 = m(x0,y0) + m(x1_2,y9_19) + m(x2,y8_19) + m(x3_2,y7_19) + m(x4,y6_19) + m(x5_2,y5_19) + m(x6,y4_19) + m(x7_2,y3_19) + m(x8,y2_19) + m(x9_2,y1_19); + let z1 = m(x0,y1) + m(x1,y0) + m(x2,y9_19) + m(x3,y8_19) + m(x4,y7_19) + m(x5,y6_19) + m(x6,y5_19) + m(x7,y4_19) + m(x8,y3_19) + m(x9,y2_19); + let z2 = m(x0,y2) + m(x1_2,y1) + m(x2,y0) + m(x3_2,y9_19) + m(x4,y8_19) + m(x5_2,y7_19) + m(x6,y6_19) + m(x7_2,y5_19) + m(x8,y4_19) + m(x9_2,y3_19); + let z3 = m(x0,y3) + m(x1,y2) + m(x2,y1) + m(x3,y0) + m(x4,y9_19) + m(x5,y8_19) + m(x6,y7_19) + m(x7,y6_19) + m(x8,y5_19) + m(x9,y4_19); + let z4 = m(x0,y4) + m(x1_2,y3) + m(x2,y2) + m(x3_2,y1) + m(x4,y0) + m(x5_2,y9_19) + m(x6,y8_19) + m(x7_2,y7_19) + m(x8,y6_19) + m(x9_2,y5_19); + let z5 = m(x0,y5) + m(x1,y4) + m(x2,y3) + m(x3,y2) + m(x4,y1) + m(x5,y0) + m(x6,y9_19) + m(x7,y8_19) + m(x8,y7_19) + m(x9,y6_19); + let z6 = m(x0,y6) + m(x1_2,y5) + m(x2,y4) + m(x3_2,y3) + m(x4,y2) + m(x5_2,y1) + m(x6,y0) + m(x7_2,y9_19) + m(x8,y8_19) + m(x9_2,y7_19); + let z7 = m(x0,y7) + m(x1,y6) + m(x2,y5) + m(x3,y4) + m(x4,y3) + m(x5,y2) + m(x6,y1) + m(x7,y0) + m(x8,y9_19) + m(x9,y8_19); + let z8 = m(x0,y8) + m(x1_2,y7) + m(x2,y6) + m(x3_2,y5) + m(x4,y4) + m(x5_2,y3) + m(x6,y2) + m(x7_2,y1) + m(x8,y0) + m(x9_2,y9_19); + let z9 = m(x0,y9) + m(x1,y8) + m(x2,y7) + m(x3,y6) + m(x4,y5) + m(x5,y4) + m(x6,y3) + m(x7,y2) + m(x8,y1) + m(x9,y0); + + let f = |x: u64x4| -> (u64x2, u64x2) { + ((u64x2::new(x.extract(0), x.extract(1))).into_bits(), (u64x2::new(x.extract(2), x.extract(3))).into_bits()) + }; + + FieldElement2625x4::reduce64([f(z0), f(z1), f(z2), f(z3), f(z4), f(z5), f(z6), f(z7), f(z8), f(z9)]) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_unpack_repack_pair() { + let x0 = FieldElement51([10000 + (10001 << 26), 0, 0, 0, 0]); + let x1 = FieldElement51([10100 + (10101 << 26), 0, 0, 0, 0]); + let x2 = FieldElement51([10200 + (10201 << 26), 0, 0, 0, 0]); + let x3 = FieldElement51([10300 + (10301 << 26), 0, 0, 0, 0]); + + let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3); + + let src = vec.0[0]; + + let (a, b) = unpack_pair(src); + + let expected_a = (u32x2::new(10000, 10100), u32x2::new(10200, 10300)); + let expected_b = (u32x2::new(10001, 10101), u32x2::new(10201, 10301)); + + assert_eq!(a, expected_a); + assert_eq!(b, expected_b); + + let expected_src = repack_pair( + (u32x4::new(a.0.extract(0), 0, a.0.extract(1), 0), + u32x4::new(a.1.extract(0), 0, a.1.extract(1), 0)), + (u32x4::new(b.0.extract(0), 0, b.0.extract(1), 0), + u32x4::new(b.1.extract(0), 0, b.1.extract(1), 0))); + + assert_eq!(src, expected_src); + } + + #[test] + fn scale_by_curve_constants() { + let mut x = FieldElement2625x4::splat(&FieldElement51::ONE); + + x = x * (121666, 121666, 2*121666, 2*121665); + + let xs = x.split(); + assert_eq!(xs[0], FieldElement51([121666, 0, 0, 0, 0])); + assert_eq!(xs[1], FieldElement51([121666, 0, 0, 0, 0])); + assert_eq!(xs[2], FieldElement51([2 * 121666, 0, 0, 0, 0])); + assert_eq!(xs[3], FieldElement51([2 * 121665, 0, 0, 0, 0])); + } + + #[test] + fn diff_sum_vs_serial() { + let x0 = FieldElement51([10000, 10001, 10002, 10003, 10004]); + let x1 = FieldElement51([10100, 10101, 10102, 10103, 10104]); + let x2 = FieldElement51([10200, 10201, 10202, 10203, 10204]); + let x3 = FieldElement51([10300, 10301, 10302, 10303, 10304]); + + let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3).diff_sum(); + + let result = vec.split(); + + assert_eq!(result[0], &x1 - &x0); + assert_eq!(result[1], &x1 + &x0); + assert_eq!(result[2], &x3 - &x2); + assert_eq!(result[3], &x3 + &x2); + } + + #[test] + fn square_vs_serial() { + let x0 = FieldElement51([10000, 10001, 10002, 10003, 10004]); + let x1 = FieldElement51([10100, 10101, 10102, 10103, 10104]); + let x2 = FieldElement51([10200, 10201, 10202, 10203, 10204]); + let x3 = FieldElement51([10300, 10301, 10302, 10303, 10304]); + + let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3); + + let result = vec.square_and_negate_D().split(); + + assert_eq!(result[0], &x0 * &x0); + assert_eq!(result[1], &x1 * &x1); + assert_eq!(result[2], &x2 * &x2); + assert_eq!(result[3], -&(&x3 * &x3)); + } + + #[test] + fn multiply_vs_serial() { + let x0 = FieldElement51([10000, 10001, 10002, 10003, 10004]); + let x1 = FieldElement51([10100, 10101, 10102, 10103, 10104]); + let x2 = FieldElement51([10200, 10201, 10202, 10203, 10204]); + let x3 = FieldElement51([10300, 10301, 10302, 10303, 10304]); + + let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3); + let vecprime = vec.clone(); + + let result = (&vec * &vecprime).split(); + + assert_eq!(result[0], &x0 * &x0); + assert_eq!(result[1], &x1 * &x1); + assert_eq!(result[2], &x2 * &x2); + assert_eq!(result[3], &x3 * &x3); + } + + #[test] + fn new_split_roundtrips() { + let x0 = FieldElement51::from_bytes(&[0x10; 32]); + let x1 = FieldElement51::from_bytes(&[0x11; 32]); + let x2 = FieldElement51::from_bytes(&[0x12; 32]); + let x3 = FieldElement51::from_bytes(&[0x13; 32]); + + let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3); + + let splits = vec.split(); + + assert_eq!(x0, splits[0]); + assert_eq!(x1, splits[1]); + assert_eq!(x2, splits[2]); + assert_eq!(x3, splits[3]); + } +} diff --git a/curve25519-dalek/src/backend/vector/neon/mod.rs b/curve25519-dalek/src/backend/vector/neon/mod.rs new file mode 100644 index 000000000..de3f33c25 --- /dev/null +++ b/curve25519-dalek/src/backend/vector/neon/mod.rs @@ -0,0 +1,16 @@ +// -*- mode: rust; -*- +// +// This file is part of curve25519-dalek. +// Copyright (c) 2016-2019 Isis Lovecruft, Henry de Valence +// See LICENSE for licensing information. +// +// Authors: +// - Isis Agora Lovecruft +// - Henry de Valence +// - Robrecht Blancquaert + +pub(crate) mod field; + +pub(crate) mod edwards; + +pub(crate) mod constants; From 13b52a09444215da1fc2e46bb475848e3bdeddfb Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Wed, 7 Dec 2022 15:17:21 +0100 Subject: [PATCH 02/14] Use packed_simd::shuffle instead of vqtbx1q_u8 --- .../src/backend/vector/neon/field.rs | 59 ++++++++----------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 8c709b3bc..3fff8eee2 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -211,40 +211,33 @@ impl FieldElement2625x4 { pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 { #[inline(always)] fn blend_lanes(x: (u32x4, u32x4), y: (u32x4, u32x4), control: Lanes) -> (u32x4, u32x4) { - unsafe { - use core::arch::aarch64::vqtbx1q_u8; - match control { - Lanes::C => { - (x.0, - vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits()) - } - Lanes::D => { - (x.0, - vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits()).into_bits()) - } - Lanes::AD => { - (vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits() ).into_bits(), - vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits() ).into_bits()) - } - Lanes::AB => { - (y.0, x.1) - } - Lanes::AC => { - (vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits(), - vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits()) - } - Lanes::CD => { - (x.0, y.1) - } - Lanes::BC => { - (vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits() ).into_bits(), - vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits() ).into_bits()) - } - Lanes::ABCD => { - y - } - + use packed_simd::shuffle; + match control { + Lanes::C => { + (x.0, shuffle!(y.1, x.1, [0, 5, 2, 7])) + } + Lanes::D => { + (x.0, shuffle!(y.1, x.1, [4, 1, 6, 3])) + } + Lanes::AD => { + (shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [4, 1, 6, 3])) + } + Lanes::AB => { + (y.0, x.1) } + Lanes::AC => { + (shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [0, 5, 2, 7])) + } + Lanes::CD => { + (x.0, y.1) + } + Lanes::BC => { + (shuffle!(y.0, x.0, [4, 1, 6, 3]), shuffle!(y.1, x.1, [0, 5, 2, 7])) + } + Lanes::ABCD => { + y + } + } } From 14e05d4663d28d6da8ccae900d94cce97b7ef7c4 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Thu, 8 Dec 2022 15:14:20 +0100 Subject: [PATCH 03/14] Use shuffle! macro instead of manual lane swapping in FieldElement2625x4::shuffle Co-authored-by: Robrecht Blacquaert --- .../src/backend/vector/neon/field.rs | 39 ++++++------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 3fff8eee2..9a463d2ea 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -95,24 +95,6 @@ pub enum Shuffle { ABDC, } -macro_rules! lane_shuffle { - {$l0:expr, $l1:expr, $l2:expr, $l3:expr, $l4:expr, $l5:expr, $l6:expr, $l7:expr, $x:expr} => { - unsafe { - use core::arch::aarch64::vgetq_lane_u32; - const c: [i32; 8] = [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7]; - (u32x4::new(if c[0] < 4 { vgetq_lane_u32($x.0.into_bits(), c[0]) } else { vgetq_lane_u32($x.1.into_bits(), c[0] - 4) }, - if c[1] < 4 { vgetq_lane_u32($x.0.into_bits(), c[1]) } else { vgetq_lane_u32($x.1.into_bits(), c[1] - 4) }, - if c[2] < 4 { vgetq_lane_u32($x.0.into_bits(), c[2]) } else { vgetq_lane_u32($x.1.into_bits(), c[2] - 4) }, - if c[3] < 4 { vgetq_lane_u32($x.0.into_bits(), c[3]) } else { vgetq_lane_u32($x.1.into_bits(), c[3] - 4) }), - u32x4::new(if c[4] < 4 { vgetq_lane_u32($x.0.into_bits(), c[4]) } else { vgetq_lane_u32($x.1.into_bits(), c[4] - 4) }, - if c[5] < 4 { vgetq_lane_u32($x.0.into_bits(), c[5]) } else { vgetq_lane_u32($x.1.into_bits(), c[5] - 4) }, - if c[6] < 4 { vgetq_lane_u32($x.0.into_bits(), c[6]) } else { vgetq_lane_u32($x.1.into_bits(), c[6] - 4) }, - if c[7] < 4 { vgetq_lane_u32($x.0.into_bits(), c[7]) } else { vgetq_lane_u32($x.1.into_bits(), c[7] - 4) })) - } - - } -} - #[derive(Clone, Copy, Debug)] pub struct FieldElement2625x4(pub(crate) [(u32x4, u32x4); 5]); @@ -183,17 +165,18 @@ impl FieldElement2625x4 { pub fn shuffle(&self, control: Shuffle) -> FieldElement2625x4 { #[inline(always)] fn shuffle_lanes(x: (u32x4, u32x4), control: Shuffle) -> (u32x4, u32x4) { + use packed_simd::shuffle; match control { - Shuffle::AAAA => lane_shuffle!(0, 0, 2, 2, 0, 0, 2, 2, x), - Shuffle::BBBB => lane_shuffle!(1, 1, 3, 3, 1, 1, 3, 3, x), - Shuffle::CACA => lane_shuffle!(4, 0, 6, 2, 4, 0, 6, 2, x), - Shuffle::DBBD => lane_shuffle!(5, 1, 7, 3, 1, 5, 3, 7, x), - Shuffle::ADDA => lane_shuffle!(0, 5, 2, 7, 5, 0, 7, 2, x), - Shuffle::CBCB => lane_shuffle!(4, 1, 6, 3, 4, 1, 6, 3, x), - Shuffle::ABAB => lane_shuffle!(0, 1, 2, 3, 0, 1, 2, 3, x), - Shuffle::BADC => lane_shuffle!(1, 0, 3, 2, 5, 4, 7, 6, x), - Shuffle::BACD => lane_shuffle!(1, 0, 3, 2, 4, 5, 6, 7, x), - Shuffle::ABDC => lane_shuffle!(0, 1, 2, 3, 5, 4, 7, 6, x), + Shuffle::AAAA => (shuffle!(x.0, x.1, [0, 0, 2, 2]), shuffle!(x.0, x.1, [0, 0, 2, 2])), + Shuffle::BBBB => (shuffle!(x.0, x.1, [1, 1, 3, 3]), shuffle!(x.0, x.1, [1, 1, 3, 3])), + Shuffle::CACA => (shuffle!(x.0, x.1, [4, 0, 6, 2]), shuffle!(x.0, x.1, [4, 0, 6, 2])), + Shuffle::DBBD => (shuffle!(x.0, x.1, [5, 1, 7, 3]), shuffle!(x.0, x.1, [1, 5, 3, 7])), + Shuffle::ADDA => (shuffle!(x.0, x.1, [0, 5, 2, 7]), shuffle!(x.0, x.1, [5, 0, 7, 2])), + Shuffle::CBCB => (shuffle!(x.0, x.1, [4, 1, 6, 3]), shuffle!(x.0, x.1, [4, 1, 6, 3])), + Shuffle::ABAB => (shuffle!(x.0, x.1, [0, 1, 2, 3]), shuffle!(x.0, x.1, [0, 1, 2, 3])), + Shuffle::BADC => (shuffle!(x.0, x.1, [1, 0, 3, 2]), shuffle!(x.0, x.1, [5, 4, 7, 6])), + Shuffle::BACD => (shuffle!(x.0, x.1, [1, 0, 3, 2]), shuffle!(x.0, x.1, [4, 5, 6, 7])), + Shuffle::ABDC => (shuffle!(x.0, x.1, [0, 1, 2, 3]), shuffle!(x.0, x.1, [5, 4, 7, 6])), } } From 58a853dcd32fc41c9d0c543c7b5ae5f4393f1887 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Wed, 14 Dec 2022 10:38:48 +0100 Subject: [PATCH 04/14] Rustfmt: retain some manual formatting --- .../src/backend/vector/neon/edwards.rs | 6 +- .../src/backend/vector/neon/field.rs | 242 +++++++++++------- 2 files changed, 149 insertions(+), 99 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/edwards.rs b/curve25519-dalek/src/backend/vector/neon/edwards.rs index 8808c0ebe..d99fce854 100644 --- a/curve25519-dalek/src/backend/vector/neon/edwards.rs +++ b/curve25519-dalek/src/backend/vector/neon/edwards.rs @@ -320,6 +320,7 @@ impl<'a> From<&'a edwards::EdwardsPoint> for NafLookupTable8 { mod test { use super::*; + #[rustfmt::skip] // Skip S8..S11 formatting fn serial_add(P: edwards::EdwardsPoint, Q: edwards::EdwardsPoint) -> edwards::EdwardsPoint { use crate::backend::serial::u64::field::FieldElement51; @@ -516,10 +517,11 @@ mod test { #[test] fn basepoint_odd_lookup_table_verify() { - use crate::constants; use crate::backend::vector::neon::constants::BASEPOINT_ODD_LOOKUP_TABLE; + use crate::constants; - let basepoint_odd_table = NafLookupTable8::::from(&constants::ED25519_BASEPOINT_POINT); + let basepoint_odd_table = + NafLookupTable8::::from(&constants::ED25519_BASEPOINT_POINT); println!("Testing basepoint table"); let table_B = &BASEPOINT_ODD_LOOKUP_TABLE; diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 9a463d2ea..63db311f1 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -21,10 +21,12 @@ //! arm instructions. use core::ops::{Add, Mul, Neg}; -use packed_simd::{u32x4, u32x2, i32x4, u64x4, u64x2, IntoBits}; +use packed_simd::{i32x4, u32x2, u32x4, u64x2, u64x4, IntoBits}; -use crate::backend::vector::neon::constants::{P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO}; use crate::backend::serial::u64::field::FieldElement51; +use crate::backend::vector::neon::constants::{ + P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO, +}; /// Unpack 32-bit lanes: /// ((a0, b0, a1, b1) ,(c0, d0, c1, d1)) @@ -38,8 +40,8 @@ fn unpack_pair(src: (u32x4, u32x4)) -> ((u32x2, u32x2), (u32x2, u32x2)) { let b0: u32x2; let b1: u32x2; unsafe { - use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vget_high_u32; + use core::arch::aarch64::vget_low_u32; a0 = vget_low_u32(src.0.into_bits()).into_bits(); a1 = vget_low_u32(src.1.into_bits()).into_bits(); b0 = vget_high_u32(src.0.into_bits()).into_bits(); @@ -53,12 +55,13 @@ fn unpack_pair(src: (u32x4, u32x4)) -> ((u32x2, u32x2), (u32x2, u32x2)) { /// into /// ((a0, b0, a1, b1), (c0, d0, c1, d1)) #[inline(always)] +#[rustfmt::skip] // Retain formatting of the return tuples fn repack_pair(x: (u32x4, u32x4), y: (u32x4, u32x4)) -> (u32x4, u32x4) { unsafe { - use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vcombine_u32; - use core::arch::aarch64::vset_lane_u32; + use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vgetq_lane_u32; + use core::arch::aarch64::vset_lane_u32; (vcombine_u32( vset_lane_u32(vgetq_lane_u32(x.0.into_bits(), 2) , vget_low_u32(x.0.into_bits()), 1), @@ -110,19 +113,30 @@ impl ConditionallySelectable for FieldElement2625x4 { let mask = (-(choice.unwrap_u8() as i32)) as u32; let mask_vec = u32x4::splat(mask); FieldElement2625x4([ - (a.0[0].0 ^ (mask_vec & (a.0[0].0 ^ b.0[0].0)), a.0[0].1 ^ (mask_vec & (a.0[0].1 ^ b.0[0].1))), - (a.0[1].0 ^ (mask_vec & (a.0[1].0 ^ b.0[1].0)), a.0[1].1 ^ (mask_vec & (a.0[1].1 ^ b.0[1].1))), - (a.0[2].0 ^ (mask_vec & (a.0[2].0 ^ b.0[2].0)), a.0[2].1 ^ (mask_vec & (a.0[2].1 ^ b.0[2].1))), - (a.0[3].0 ^ (mask_vec & (a.0[3].0 ^ b.0[3].0)), a.0[3].1 ^ (mask_vec & (a.0[3].1 ^ b.0[3].1))), - (a.0[4].0 ^ (mask_vec & (a.0[4].0 ^ b.0[4].0)), a.0[4].1 ^ (mask_vec & (a.0[4].1 ^ b.0[4].1))) + ( + a.0[0].0 ^ (mask_vec & (a.0[0].0 ^ b.0[0].0)), + a.0[0].1 ^ (mask_vec & (a.0[0].1 ^ b.0[0].1)), + ), + ( + a.0[1].0 ^ (mask_vec & (a.0[1].0 ^ b.0[1].0)), + a.0[1].1 ^ (mask_vec & (a.0[1].1 ^ b.0[1].1)), + ), + ( + a.0[2].0 ^ (mask_vec & (a.0[2].0 ^ b.0[2].0)), + a.0[2].1 ^ (mask_vec & (a.0[2].1 ^ b.0[2].1)), + ), + ( + a.0[3].0 ^ (mask_vec & (a.0[3].0 ^ b.0[3].0)), + a.0[3].1 ^ (mask_vec & (a.0[3].1 ^ b.0[3].1)), + ), + ( + a.0[4].0 ^ (mask_vec & (a.0[4].0 ^ b.0[4].0)), + a.0[4].1 ^ (mask_vec & (a.0[4].1 ^ b.0[4].1)), + ), ]) } - fn conditional_assign( - &mut self, - other: &FieldElement2625x4, - choice: Choice, - ) { + fn conditional_assign(&mut self, other: &FieldElement2625x4, choice: Choice) { let mask = (-(choice.unwrap_u8() as i32)) as u32; let mask_vec = u32x4::splat(mask); self.0[0].0 ^= mask_vec & (self.0[0].0 ^ other.0[0].0); @@ -139,16 +153,15 @@ impl ConditionallySelectable for FieldElement2625x4 { } impl FieldElement2625x4 { - pub fn split(&self) -> [FieldElement51; 4] { let mut out = [FieldElement51::ZERO; 4]; for i in 0..5 { - let a_2i = self.0[i].0.extract(0) as u64; - let b_2i = self.0[i].0.extract(1) as u64; + let a_2i = self.0[i].0.extract(0) as u64; + let b_2i = self.0[i].0.extract(1) as u64; let a_2i_1 = self.0[i].0.extract(2) as u64; let b_2i_1 = self.0[i].0.extract(3) as u64; - let c_2i = self.0[i].1.extract(0) as u64; - let d_2i = self.0[i].1.extract(1) as u64; + let c_2i = self.0[i].1.extract(0) as u64; + let d_2i = self.0[i].1.extract(1) as u64; let c_2i_1 = self.0[i].1.extract(2) as u64; let d_2i_1 = self.0[i].1.extract(3) as u64; @@ -164,6 +177,7 @@ impl FieldElement2625x4 { #[inline] pub fn shuffle(&self, control: Shuffle) -> FieldElement2625x4 { #[inline(always)] + #[rustfmt::skip] // Retain format of the return tuples fn shuffle_lanes(x: (u32x4, u32x4), control: Shuffle) -> (u32x4, u32x4) { use packed_simd::shuffle; match control { @@ -193,34 +207,18 @@ impl FieldElement2625x4 { #[inline] pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 { #[inline(always)] + #[rustfmt::skip] // Retain format of the return tuples fn blend_lanes(x: (u32x4, u32x4), y: (u32x4, u32x4), control: Lanes) -> (u32x4, u32x4) { use packed_simd::shuffle; match control { - Lanes::C => { - (x.0, shuffle!(y.1, x.1, [0, 5, 2, 7])) - } - Lanes::D => { - (x.0, shuffle!(y.1, x.1, [4, 1, 6, 3])) - } - Lanes::AD => { - (shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [4, 1, 6, 3])) - } - Lanes::AB => { - (y.0, x.1) - } - Lanes::AC => { - (shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [0, 5, 2, 7])) - } - Lanes::CD => { - (x.0, y.1) - } - Lanes::BC => { - (shuffle!(y.0, x.0, [4, 1, 6, 3]), shuffle!(y.1, x.1, [0, 5, 2, 7])) - } - Lanes::ABCD => { - y - } - + Lanes::C => (x.0, shuffle!(y.1, x.1, [0, 5, 2, 7])), + Lanes::D => (x.0, shuffle!(y.1, x.1, [4, 1, 6, 3])), + Lanes::AD => (shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [4, 1, 6, 3])), + Lanes::AB => (y.0, x.1), + Lanes::AC => (shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [0, 5, 2, 7])), + Lanes::CD => (x.0, y.1), + Lanes::BC => (shuffle!(y.0, x.0, [4, 1, 6, 3]), shuffle!(y.1, x.1, [0, 5, 2, 7])), + Lanes::ABCD => y, } } @@ -233,7 +231,6 @@ impl FieldElement2625x4 { ]) } - pub fn zero() -> FieldElement2625x4 { FieldElement2625x4([(u32x4::splat(0), u32x4::splat(0)); 5]) } @@ -242,26 +239,28 @@ impl FieldElement2625x4 { FieldElement2625x4::new(x, x, x, x) } - pub fn new( x0: &FieldElement51, x1: &FieldElement51, x2: &FieldElement51, - x3: &FieldElement51 + x3: &FieldElement51, ) -> FieldElement2625x4 { let mut buf = [(u32x4::splat(0), u32x4::splat(0)); 5]; let low_26_bits = (1 << 26) - 1; for i in 0..5 { - let a_2i = (x0.0[i] & low_26_bits) as u32; + let a_2i = (x0.0[i] & low_26_bits) as u32; let a_2i_1 = (x0.0[i] >> 26) as u32; - let b_2i = (x1.0[i] & low_26_bits) as u32; + let b_2i = (x1.0[i] & low_26_bits) as u32; let b_2i_1 = (x1.0[i] >> 26) as u32; - let c_2i = (x2.0[i] & low_26_bits) as u32; + let c_2i = (x2.0[i] & low_26_bits) as u32; let c_2i_1 = (x2.0[i] >> 26) as u32; - let d_2i = (x3.0[i] & low_26_bits) as u32; + let d_2i = (x3.0[i] & low_26_bits) as u32; let d_2i_1 = (x3.0[i] >> 26) as u32; - buf[i] = (u32x4::new(a_2i, b_2i, a_2i_1, b_2i_1), u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1)); + buf[i] = ( + u32x4::new(a_2i, b_2i, a_2i_1, b_2i_1), + u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1), + ); } return FieldElement2625x4(buf).reduce(); } @@ -284,46 +283,61 @@ impl FieldElement2625x4 { tmp1 + tmp2 } - pub fn reduce(&self) -> FieldElement2625x4 { // Negated for shift right instead of left - let shifts = (i32x4::new(-26, -26, -25, -25), i32x4::new(-26, -26, -25, -25)); - let masks = (u32x4::new( - (1 << 26) - 1, - (1 << 26) - 1, - (1 << 25) - 1, - (1 << 25) - 1), - u32x4::new( - (1 << 26) - 1, - (1 << 26) - 1, - (1 << 25) - 1, - (1 << 25) - 1) + let shifts = ( + i32x4::new(-26, -26, -25, -25), + i32x4::new(-26, -26, -25, -25), + ); + let masks = ( + u32x4::new((1 << 26) - 1, (1 << 26) - 1, (1 << 25) - 1, (1 << 25) - 1), + u32x4::new((1 << 26) - 1, (1 << 26) - 1, (1 << 25) - 1, (1 << 25) - 1), ); - // Use mutliple transposes instead of table lookup? let rotated_carryout = |v: (u32x4, u32x4)| -> (u32x4, u32x4) { unsafe { - use core::arch::aarch64::vqshlq_u32; - use core::arch::aarch64::vget_low_u32; - use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vcombine_u32; + use core::arch::aarch64::vget_high_u32; + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vqshlq_u32; - let c: (u32x4, u32x4) = (vqshlq_u32(v.0.into_bits(), shifts.0.into_bits()).into_bits(), - vqshlq_u32(v.1.into_bits(), shifts.1.into_bits()).into_bits()); - (vcombine_u32(vget_high_u32(c.0.into_bits()), vget_low_u32(c.0.into_bits())).into_bits(), - vcombine_u32(vget_high_u32(c.1.into_bits()), vget_low_u32(c.1.into_bits())).into_bits()) - + let c: (u32x4, u32x4) = ( + vqshlq_u32(v.0.into_bits(), shifts.0.into_bits()).into_bits(), + vqshlq_u32(v.1.into_bits(), shifts.1.into_bits()).into_bits(), + ); + ( + vcombine_u32( + vget_high_u32(c.0.into_bits()), + vget_low_u32(c.0.into_bits()), + ) + .into_bits(), + vcombine_u32( + vget_high_u32(c.1.into_bits()), + vget_low_u32(c.1.into_bits()), + ) + .into_bits(), + ) } }; let combine = |v_lo: (u32x4, u32x4), v_hi: (u32x4, u32x4)| -> (u32x4, u32x4) { unsafe { - use core::arch::aarch64::vget_low_u32; - use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vcombine_u32; - (vcombine_u32(vget_low_u32(v_lo.0.into_bits()), vget_high_u32(v_hi.0.into_bits())).into_bits(), - vcombine_u32(vget_low_u32(v_lo.1.into_bits()), vget_high_u32(v_hi.1.into_bits())).into_bits()) + use core::arch::aarch64::vget_high_u32; + use core::arch::aarch64::vget_low_u32; + ( + vcombine_u32( + vget_low_u32(v_lo.0.into_bits()), + vget_high_u32(v_hi.0.into_bits()), + ) + .into_bits(), + vcombine_u32( + vget_low_u32(v_lo.1.into_bits()), + vget_high_u32(v_hi.1.into_bits()), + ) + .into_bits(), + ) } }; @@ -349,12 +363,16 @@ impl FieldElement2625x4 { com = combine(c76, c98); v[4] = ((v[4].0 & masks.0) + com.0, (v[4].1 & masks.1) + com.1); - let c9_19: (u32x4, u32x4) = unsafe { - use core::arch::aarch64::vmulq_n_u32; - use core::arch::aarch64::vget_low_u32; + #[rustfmt::skip] // Retain formatting of return tuple + let c9_19: (u32x4, u32x4) = unsafe { use core::arch::aarch64::vcombine_u32; + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vmulq_n_u32; - let c9_19_spread: (u32x4, u32x4) = (vmulq_n_u32(c98.0.into_bits(), 19).into_bits(), vmulq_n_u32(c98.1.into_bits(), 19).into_bits()); + let c9_19_spread: (u32x4, u32x4) = ( + vmulq_n_u32(c98.0.into_bits(), 19).into_bits(), + vmulq_n_u32(c98.1.into_bits(), 19).into_bits(), + ); (vcombine_u32(vget_low_u32(c9_19_spread.0.into_bits()), u32x2::splat(0).into_bits()).into_bits(), vcombine_u32(vget_low_u32(c9_19_spread.1.into_bits()), u32x2::splat(0).into_bits()).into_bits()) @@ -365,8 +383,8 @@ impl FieldElement2625x4 { } #[inline] + #[rustfmt::skip] // Retain formatting of carry and repacking fn reduce64(mut z: [(u64x2, u64x2); 10]) -> FieldElement2625x4 { - #[allow(non_snake_case)] let LOW_25_BITS: u64x2 = u64x2::splat((1 << 25) - 1); #[allow(non_snake_case)] @@ -407,7 +425,6 @@ impl FieldElement2625x4 { vmulq_n_u32(c1.1.into_bits(), 19).into_bits()); } - z[0] = (z[0].0 + c0.0, z[0].1 + c0.1); z[1] = (z[1].0 + c1.0, z[1].1 + c1.1); carry(&mut z, 0); @@ -422,6 +439,7 @@ impl FieldElement2625x4 { } #[allow(non_snake_case)] + #[rustfmt::skip] // keep alignment of formulas pub fn square_and_negate_D(&self) -> FieldElement2625x4 { #[inline(always)] fn m(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> u64x4 { @@ -522,7 +540,8 @@ impl Neg for FieldElement2625x4 { (P_TIMES_16_HI.0 - self.0[2].0, P_TIMES_16_HI.1 - self.0[2].1), (P_TIMES_16_HI.0 - self.0[3].0, P_TIMES_16_HI.1 - self.0[3].1), (P_TIMES_16_HI.0 - self.0[4].0, P_TIMES_16_HI.1 - self.0[4].1), - ]).reduce() + ]) + .reduce() } } @@ -543,11 +562,15 @@ impl Add for FieldElement2625x4 { impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 { type Output = FieldElement2625x4; #[inline] + #[rustfmt::skip] // Retain formatting of packing fn mul(self, scalars: (u32, u32, u32, u32)) -> FieldElement2625x4 { unsafe { use core::arch::aarch64::vmull_u32; - let consts = (u32x2::new(scalars.0, scalars.1), u32x2::new(scalars.2, scalars.3)); + let consts = ( + u32x2::new(scalars.0, scalars.1), + u32x2::new(scalars.2, scalars.3), + ); let (b0, b1) = unpack_pair(self.0[0]); let (b2, b3) = unpack_pair(self.0[1]); @@ -555,7 +578,6 @@ impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 { let (b6, b7) = unpack_pair(self.0[3]); let (b8, b9) = unpack_pair(self.0[4]); - FieldElement2625x4::reduce64([ (vmull_u32(b0.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b0.1.into_bits(), consts.1.into_bits()).into_bits()), (vmull_u32(b1.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b1.1.into_bits(), consts.1.into_bits()).into_bits()), @@ -574,6 +596,8 @@ impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 { impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { type Output = FieldElement2625x4; + + #[rustfmt::skip] // Retain formatting of z_i computation fn mul(self, rhs: &'b FieldElement2625x4) -> FieldElement2625x4 { #[inline(always)] fn m(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> u64x4 { @@ -589,9 +613,14 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { fn m_lo(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> (u32x2, u32x2) { use core::arch::aarch64::vmull_u32; unsafe { - let x: (u32x4, u32x4) = (vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(), - vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits()); - (u32x2::new(x.0.extract(0), x.0.extract(2)), u32x2::new(x.1.extract(0), x.1.extract(2))) + let x: (u32x4, u32x4) = ( + vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(), + vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits(), + ); + ( + u32x2::new(x.0.extract(0), x.0.extract(2)), + u32x2::new(x.1.extract(0), x.1.extract(2)), + ) } } @@ -637,10 +666,24 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { let z9 = m(x0,y9) + m(x1,y8) + m(x2,y7) + m(x3,y6) + m(x4,y5) + m(x5,y4) + m(x6,y3) + m(x7,y2) + m(x8,y1) + m(x9,y0); let f = |x: u64x4| -> (u64x2, u64x2) { - ((u64x2::new(x.extract(0), x.extract(1))).into_bits(), (u64x2::new(x.extract(2), x.extract(3))).into_bits()) + ( + (u64x2::new(x.extract(0), x.extract(1))).into_bits(), + (u64x2::new(x.extract(2), x.extract(3))).into_bits(), + ) }; - FieldElement2625x4::reduce64([f(z0), f(z1), f(z2), f(z3), f(z4), f(z5), f(z6), f(z7), f(z8), f(z9)]) + FieldElement2625x4::reduce64([ + f(z0), + f(z1), + f(z2), + f(z3), + f(z4), + f(z5), + f(z6), + f(z7), + f(z8), + f(z9), + ]) } } @@ -668,10 +711,15 @@ mod test { assert_eq!(b, expected_b); let expected_src = repack_pair( - (u32x4::new(a.0.extract(0), 0, a.0.extract(1), 0), - u32x4::new(a.1.extract(0), 0, a.1.extract(1), 0)), - (u32x4::new(b.0.extract(0), 0, b.0.extract(1), 0), - u32x4::new(b.1.extract(0), 0, b.1.extract(1), 0))); + ( + u32x4::new(a.0.extract(0), 0, a.0.extract(1), 0), + u32x4::new(a.1.extract(0), 0, a.1.extract(1), 0), + ), + ( + u32x4::new(b.0.extract(0), 0, b.0.extract(1), 0), + u32x4::new(b.1.extract(0), 0, b.1.extract(1), 0), + ), + ); assert_eq!(src, expected_src); } @@ -680,7 +728,7 @@ mod test { fn scale_by_curve_constants() { let mut x = FieldElement2625x4::splat(&FieldElement51::ONE); - x = x * (121666, 121666, 2*121666, 2*121665); + x = x * (121666, 121666, 2 * 121666, 2 * 121665); let xs = x.split(); assert_eq!(xs[0], FieldElement51([121666, 0, 0, 0, 0])); From c49e4656c0ef9a23ecd1e1e164e825a2a8b84cce Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Wed, 14 Dec 2022 11:34:01 +0100 Subject: [PATCH 05/14] rustfmt on neon constants --- .../src/backend/vector/neon/constants.rs | 4491 ++++++----------- 1 file changed, 1437 insertions(+), 3054 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/constants.rs b/curve25519-dalek/src/backend/vector/neon/constants.rs index ab380c4ba..c9fb0bf58 100644 --- a/curve25519-dalek/src/backend/vector/neon/constants.rs +++ b/curve25519-dalek/src/backend/vector/neon/constants.rs @@ -24,16 +24,31 @@ pub(crate) static EXTENDEDPOINT_IDENTITY: ExtendedPoint = ExtendedPoint(FieldEle (u32x4::splat(0), u32x4::splat(0)), (u32x4::splat(0), u32x4::splat(0)), (u32x4::splat(0), u32x4::splat(0)), - (u32x4::splat(0), u32x4::splat(0)) + (u32x4::splat(0), u32x4::splat(0)), ])); /// The identity element as a `CachedPoint`. pub(crate) static CACHEDPOINT_IDENTITY: CachedPoint = CachedPoint(FieldElement2625x4([ - (u32x4::new(121647, 121666, 0, 0), u32x4::new(243332, 67108845, 0, 33554431)), - (u32x4::new(67108864, 0, 33554431, 0), u32x4::new(0, 67108863, 0, 33554431)), - (u32x4::new(67108863, 0, 33554431, 0), u32x4::new(0, 67108863, 0, 33554431)), - (u32x4::new(67108863, 0, 33554431, 0), u32x4::new(0, 67108863, 0, 33554431)), - (u32x4::new(67108863, 0, 33554431, 0), u32x4::new(0, 67108863, 0, 33554431)) + ( + u32x4::new(121647, 121666, 0, 0), + u32x4::new(243332, 67108845, 0, 33554431), + ), + ( + u32x4::new(67108864, 0, 33554431, 0), + u32x4::new(0, 67108863, 0, 33554431), + ), + ( + u32x4::new(67108863, 0, 33554431, 0), + u32x4::new(0, 67108863, 0, 33554431), + ), + ( + u32x4::new(67108863, 0, 33554431, 0), + u32x4::new(0, 67108863, 0, 33554431), + ), + ( + u32x4::new(67108863, 0, 33554431, 0), + u32x4::new(0, 67108863, 0, 33554431), + ), ])); /// The low limbs of (2p, 2p, 2p, 2p), so that @@ -41,16 +56,8 @@ pub(crate) static CACHEDPOINT_IDENTITY: CachedPoint = CachedPoint(FieldElement26 /// (2p, 2p, 2p, 2p) = [P_TIMES_2_LO, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI] /// ``` pub(crate) static P_TIMES_2_LO: (u32x4, u32x4) = ( - u32x4::new( - 67108845 << 1, - 67108845 << 1, - 33554431 << 1, - 33554431 << 1), - u32x4::new( - 67108845 << 1, - 67108845 << 1, - 33554431 << 1, - 33554431 << 1) + u32x4::new(67108845 << 1, 67108845 << 1, 33554431 << 1, 33554431 << 1), + u32x4::new(67108845 << 1, 67108845 << 1, 33554431 << 1, 33554431 << 1), ); /// The high limbs of (2p, 2p, 2p, 2p), so that @@ -58,16 +65,8 @@ pub(crate) static P_TIMES_2_LO: (u32x4, u32x4) = ( /// (2p, 2p, 2p, 2p) = [P_TIMES_2_LO, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI] /// ``` pub(crate) static P_TIMES_2_HI: (u32x4, u32x4) = ( - u32x4::new( - 67108863 << 1, - 67108863 << 1, - 33554431 << 1, - 33554431 << 1), - u32x4::new( - 67108863 << 1, - 67108863 << 1, - 33554431 << 1, - 33554431 << 1) + u32x4::new(67108863 << 1, 67108863 << 1, 33554431 << 1, 33554431 << 1), + u32x4::new(67108863 << 1, 67108863 << 1, 33554431 << 1, 33554431 << 1), ); /// The low limbs of (16p, 16p, 16p, 16p), so that @@ -75,3043 +74,1427 @@ pub(crate) static P_TIMES_2_HI: (u32x4, u32x4) = ( /// (16p, 16p, 16p, 16p) = [P_TIMES_16_LO, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI] /// ``` pub(crate) static P_TIMES_16_LO: (u32x4, u32x4) = ( - u32x4::new( - 67108845 << 4, - 67108845 << 4, - 33554431 << 4, - 33554431 << 4), - u32x4::new( - 67108845 << 4, - 67108845 << 4, - 33554431 << 4, - 33554431 << 4) - ); + u32x4::new(67108845 << 4, 67108845 << 4, 33554431 << 4, 33554431 << 4), + u32x4::new(67108845 << 4, 67108845 << 4, 33554431 << 4, 33554431 << 4), +); /// The high limbs of (16p, 16p, 16p, 16p), so that /// ```ascii,no_run /// (16p, 16p, 16p, 16p) = [P_TIMES_16_LO, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI] /// ``` pub(crate) static P_TIMES_16_HI: (u32x4, u32x4) = ( - u32x4::new( - 67108863 << 4, - 67108863 << 4, - 33554431 << 4, - 33554431 << 4), - u32x4::new( - 67108863 << 4, - 67108863 << 4, - 33554431 << 4, - 33554431 << 4) + u32x4::new(67108863 << 4, 67108863 << 4, 33554431 << 4, 33554431 << 4), + u32x4::new(67108863 << 4, 67108863 << 4, 33554431 << 4, 33554431 << 4), ); /// Odd multiples of the Ed25519 basepoint: pub(crate) static BASEPOINT_ODD_LOOKUP_TABLE: NafLookupTable8 = NafLookupTable8([ CachedPoint(FieldElement2625x4([ - (u32x4::new( - 3571425, - 10045002, - 19036563, - 1096096,), u32x4::new( - 243332, - 65897020, - 0, - 28963681,)), - (u32x4::new( - 30896895, - 63055514, - 1614915, - 5095970,), u32x4::new( - 0, - 53791688, - 0, - 31258312,)), - (u32x4::new( - 13347627, - 40339464, - 2236269, - 11185503,), u32x4::new( - 0, - 22520087, - 0, - 8659512,)), - (u32x4::new( - 11125413, - 29139905, - 32037254, - 28360723,), u32x4::new( - 0, - 64556417, - 0, - 9635759,)), - (u32x4::new( - 33268144, - 47262491, - 4336918, - 15795740,), u32x4::new( - 0, - 22027545, - 0, - 4846528,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 47099681, - 31447946, - 29365447, - 24740513,), u32x4::new( - 42991046, - 18317844, - 16051644, - 21404226,)), - (u32x4::new( - 31708133, - 28909527, - 2366091, - 13703791,), u32x4::new( - 469246, - 54159622, - 2601402, - 32988002,)), - (u32x4::new( - 63432457, - 30251794, - 15163516, - 18491340,), u32x4::new( - 28144087, - 35605455, - 13682295, - 18474872,)), - (u32x4::new( - 12221607, - 4967598, - 26061980, - 26008006,), u32x4::new( - 20226147, - 9726961, - 17410, - 18051083,)), - (u32x4::new( - 60569645, - 62487085, - 11911242, - 21920922,), u32x4::new( - 4092105, - 38186967, - 22431483, - 31366585,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 18147205, - 62587998, - 2554617, - 536692,), u32x4::new( - 11924528, - 26674131, - 17645433, - 24341419,)), - (u32x4::new( - 11573357, - 27579485, - 31491870, - 29000885,), u32x4::new( - 10800976, - 51902791, - 28076395, - 20464029,)), - (u32x4::new( - 56031649, - 10856669, - 11791193, - 26769430,), u32x4::new( - 25306956, - 5922200, - 6630685, - 9385098,)), - (u32x4::new( - 31319348, - 23906711, - 16290213, - 32142166,), u32x4::new( - 61106354, - 17181823, - 3548308, - 12022566,)), - (u32x4::new( - 5904298, - 50218605, - 11826440, - 5492249,), u32x4::new( - 10379071, - 3472255, - 172742, - 31948344,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 10625852, - 15193821, - 22918394, - 23676410,), u32x4::new( - 53695416, - 54987793, - 10067515, - 11747680,)), - (u32x4::new( - 65013325, - 1309652, - 29616320, - 28922974,), u32x4::new( - 60360891, - 19621771, - 9938982, - 30406429,)), - (u32x4::new( - 54967954, - 65931918, - 5595602, - 25719523,), u32x4::new( - 64909864, - 30566415, - 15945272, - 8495317,)), - (u32x4::new( - 1167157, - 55265018, - 11507029, - 31641054,), u32x4::new( - 43497904, - 2367338, - 12937761, - 27517066,)), - (u32x4::new( - 656704, - 2544994, - 13006713, - 480979,), u32x4::new( - 38471594, - 62541240, - 25353597, - 11531760,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 22176662, - 3984313, - 27495285, - 4110608,), u32x4::new( - 2909584, - 30594106, - 15677919, - 2549183,)), - (u32x4::new( - 33979105, - 62269905, - 2071511, - 6894756,), u32x4::new( - 53189950, - 47232857, - 6408191, - 6123225,)), - (u32x4::new( - 32553873, - 63948030, - 12612401, - 3633166,), u32x4::new( - 24054373, - 37626618, - 14481327, - 8520484,)), - (u32x4::new( - 56552486, - 10749438, - 12034813, - 28811946,), u32x4::new( - 1445640, - 36755601, - 12104575, - 10257833,)), - (u32x4::new( - 22795808, - 48761311, - 1136056, - 9380768,), u32x4::new( - 1411523, - 5341811, - 27318329, - 9686767,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 21157200, - 39156966, - 20473176, - 4934657,), u32x4::new( - 61478183, - 45121537, - 5429856, - 13035023,)), - (u32x4::new( - 7954529, - 58789246, - 31440083, - 7054221,), u32x4::new( - 38438565, - 36856107, - 1364112, - 14548122,)), - (u32x4::new( - 26120083, - 36321360, - 4919997, - 31687496,), u32x4::new( - 33757765, - 36237559, - 15243054, - 32163861,)), - (u32x4::new( - 25878307, - 46544824, - 19455951, - 2414935,), u32x4::new( - 16844726, - 56521560, - 32680554, - 26660660,)), - (u32x4::new( - 48360220, - 43407178, - 12187042, - 24925816,), u32x4::new( - 7423722, - 25746484, - 12814654, - 17395963,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 63153652, - 32195955, - 4087908, - 8431689,), u32x4::new( - 30392384, - 47203165, - 8986649, - 9053039,)), - (u32x4::new( - 63659241, - 47988767, - 2931872, - 19953600,), u32x4::new( - 11747107, - 51610101, - 20952181, - 13364887,)), - (u32x4::new( - 3659197, - 58790649, - 5930099, - 2605312,), u32x4::new( - 28477896, - 580728, - 20579735, - 2610622,)), - (u32x4::new( - 41781607, - 17161358, - 10690531, - 24368015,), u32x4::new( - 47027031, - 36742339, - 5414694, - 13156365,)), - (u32x4::new( - 13237853, - 51182423, - 8954802, - 29006542,), u32x4::new( - 22643989, - 56896541, - 22830593, - 10289708,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 1401265, - 58846825, - 30911620, - 32239180,), u32x4::new( - 15391552, - 15200821, - 6339309, - 16403588,)), - (u32x4::new( - 55913797, - 29541724, - 1664461, - 21709410,), u32x4::new( - 38470488, - 47097092, - 17674945, - 32666066,)), - (u32x4::new( - 22844482, - 10797709, - 27548106, - 31638735,), u32x4::new( - 34500968, - 26611503, - 19727211, - 13160873,)), - (u32x4::new( - 31485204, - 14496164, - 13981208, - 10276888,), u32x4::new( - 5748808, - 35024436, - 2740987, - 7479021,)), - (u32x4::new( - 58541207, - 14866135, - 32344041, - 545930,), u32x4::new( - 62661488, - 6941250, - 27940205, - 11976112,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 39849808, - 44781685, - 15697329, - 24387845,), u32x4::new( - 12501486, - 50260092, - 23199481, - 31929024,)), - (u32x4::new( - 24823070, - 27956017, - 27034296, - 10316465,), u32x4::new( - 47664045, - 11152446, - 15719183, - 30181617,)), - (u32x4::new( - 20771189, - 19969144, - 31433937, - 19185213,), u32x4::new( - 27565920, - 10384445, - 2893359, - 9255362,)), - (u32x4::new( - 42894974, - 11925545, - 32134441, - 32738810,), u32x4::new( - 55916336, - 32479272, - 19563550, - 5511385,)), - (u32x4::new( - 17857161, - 47809169, - 14564114, - 27997751,), u32x4::new( - 33024640, - 38669671, - 31956536, - 27313245,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 58237774, - 15917425, - 18872208, - 19394230,), u32x4::new( - 17374297, - 6101419, - 4839741, - 6596900,)), - (u32x4::new( - 66947393, - 15744215, - 18368993, - 17750160,), u32x4::new( - 41006525, - 9205497, - 2629667, - 32170865,)), - (u32x4::new( - 66481381, - 1919414, - 28338762, - 7372967,), u32x4::new( - 33819153, - 4156199, - 27126309, - 12739816,)), - (u32x4::new( - 44117158, - 58545296, - 22521371, - 11809712,), u32x4::new( - 28998792, - 50731010, - 30215699, - 25748377,)), - (u32x4::new( - 23561284, - 4160244, - 9035405, - 24895184,), u32x4::new( - 39761639, - 59253416, - 8684759, - 22487864,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 12671134, - 56419053, - 16092401, - 30038207,), u32x4::new( - 4002647, - 47822606, - 7151311, - 28430768,)), - (u32x4::new( - 61041684, - 35765374, - 30598048, - 19666539,), u32x4::new( - 44150175, - 40140037, - 290469, - 28442674,)), - (u32x4::new( - 18847796, - 1371617, - 33316881, - 13199936,), u32x4::new( - 43646578, - 17068881, - 12074900, - 1537415,)), - (u32x4::new( - 10052225, - 38316070, - 27469797, - 5297537,), u32x4::new( - 50725570, - 20435349, - 10339121, - 2779737,)), - (u32x4::new( - 18372189, - 15466385, - 24762130, - 22217964,), u32x4::new( - 23503887, - 47844464, - 10415034, - 2606889,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 55082775, - 45300503, - 16032654, - 5964396,), u32x4::new( - 17743504, - 24634761, - 19493066, - 5184611,)), - (u32x4::new( - 50172633, - 35093294, - 10040575, - 23616256,), u32x4::new( - 4543900, - 61852191, - 4049821, - 7423669,)), - (u32x4::new( - 20295398, - 40009376, - 10487190, - 15670429,), u32x4::new( - 51972856, - 58649552, - 20436392, - 3432497,)), - (u32x4::new( - 35189420, - 54117751, - 12825868, - 6283038,), u32x4::new( - 27540739, - 30648758, - 22658912, - 9466689,)), - (u32x4::new( - 51737549, - 40725785, - 17409814, - 25201086,), u32x4::new( - 21156239, - 34176168, - 26814520, - 5956424,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 8211442, - 8014184, - 6260823, - 22108096,), u32x4::new( - 32182620, - 51844847, - 2466270, - 28582231,)), - (u32x4::new( - 27199739, - 3848333, - 31738017, - 10892045,), u32x4::new( - 4963982, - 65391770, - 32551997, - 28906469,)), - (u32x4::new( - 16606846, - 32207068, - 26404535, - 7614129,), u32x4::new( - 45416902, - 65584718, - 13821785, - 2646060,)), - (u32x4::new( - 36090634, - 57981287, - 32247670, - 22837502,), u32x4::new( - 31003861, - 55448117, - 6062915, - 20369975,)), - (u32x4::new( - 27381403, - 50578107, - 522631, - 29521058,), u32x4::new( - 31137497, - 40220737, - 27628049, - 1824195,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 59402443, - 17056879, - 29262689, - 6131785,), u32x4::new( - 52551472, - 43367471, - 29423199, - 18899208,)), - (u32x4::new( - 5749414, - 43514612, - 11365899, - 21514624,), u32x4::new( - 65591890, - 60945892, - 19841732, - 5628567,)), - (u32x4::new( - 19334369, - 52500268, - 12307673, - 5267367,), u32x4::new( - 3212103, - 9035822, - 29142161, - 30520954,)), - (u32x4::new( - 57261330, - 6819646, - 22089161, - 9800373,), u32x4::new( - 55155453, - 62250856, - 13766735, - 25244545,)), - (u32x4::new( - 54370226, - 61888301, - 24496089, - 2540581,), u32x4::new( - 65637506, - 60274355, - 18154273, - 11687259,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 12521903, - 26014045, - 13995625, - 33360175,), u32x4::new( - 23605474, - 7376434, - 27229267, - 17195036,)), - (u32x4::new( - 59482891, - 10074423, - 574357, - 3857753,), u32x4::new( - 61377787, - 50306685, - 5241065, - 20234396,)), - (u32x4::new( - 23674717, - 6997172, - 20771841, - 16858511,), u32x4::new( - 40565304, - 29973136, - 7049812, - 14585010,)), - (u32x4::new( - 1427477, - 13295732, - 31762066, - 31499740,), u32x4::new( - 60419925, - 54666164, - 22009424, - 8089609,)), - (u32x4::new( - 58154031, - 41593020, - 15342328, - 957047,), u32x4::new( - 38937260, - 37037498, - 24871992, - 32973409,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 30654745, - 51286025, - 21206982, - 2433562,), u32x4::new( - 12780105, - 31732574, - 33087964, - 33081189,)), - (u32x4::new( - 66640017, - 42720009, - 16567620, - 15300745,), u32x4::new( - 1530367, - 33001123, - 20930247, - 21042661,)), - (u32x4::new( - 15003356, - 5294119, - 22985605, - 18928772,), u32x4::new( - 32628461, - 18230172, - 14773298, - 27193722,)), - (u32x4::new( - 27555, - 65346287, - 17017174, - 7837720,), u32x4::new( - 21499787, - 42855613, - 22474984, - 13675085,)), - (u32x4::new( - 24164369, - 50130116, - 5973149, - 24152073,), u32x4::new( - 1577334, - 25400030, - 18648484, - 32228854,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 49518649, - 59119280, - 31670678, - 20396561,), u32x4::new( - 61728330, - 651402, - 176032, - 9529498,)), - (u32x4::new( - 61765532, - 9082232, - 32794568, - 15526956,), u32x4::new( - 48543100, - 32614212, - 19001206, - 25680229,)), - (u32x4::new( - 32086091, - 10373081, - 8996131, - 31822823,), u32x4::new( - 35788988, - 49973190, - 30542040, - 17858455,)), - (u32x4::new( - 48130197, - 58121889, - 27753291, - 29923268,), u32x4::new( - 54448075, - 43300790, - 9336565, - 15770022,)), - (u32x4::new( - 57725546, - 20557498, - 9366233, - 16023566,), u32x4::new( - 16189031, - 2837363, - 24315301, - 27003505,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 28286608, - 10767548, - 18220739, - 5413236,), u32x4::new( - 48253387, - 58255702, - 11864864, - 28527159,)), - (u32x4::new( - 45038176, - 58655197, - 25648758, - 10951484,), u32x4::new( - 42564382, - 34542843, - 23146954, - 22234334,)), - (u32x4::new( - 14858710, - 24978793, - 15040559, - 4379220,), u32x4::new( - 47621477, - 40271440, - 15650420, - 1998736,)), - (u32x4::new( - 24106391, - 9626149, - 344505, - 25253814,), u32x4::new( - 34579800, - 59687089, - 25718289, - 25904133,)), - (u32x4::new( - 1981195, - 37751302, - 26132048, - 1764722,), u32x4::new( - 13288231, - 28808622, - 12531301, - 18292949,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 13869851, - 31448904, - 14963539, - 7581293,), u32x4::new( - 20536485, - 35021083, - 21257574, - 33356609,)), - (u32x4::new( - 36903364, - 18429241, - 11097857, - 5943856,), u32x4::new( - 60583077, - 40015815, - 30509523, - 31915271,)), - (u32x4::new( - 49161801, - 40681915, - 67892, - 25454357,), u32x4::new( - 22779677, - 25798439, - 15964829, - 5863227,)), - (u32x4::new( - 60810637, - 4496471, - 5217137, - 14095116,), u32x4::new( - 50942411, - 50712663, - 2507380, - 26844507,)), - (u32x4::new( - 34579752, - 53519385, - 10859797, - 18816024,), u32x4::new( - 42552864, - 39478521, - 6783896, - 17277037,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 43287109, - 27900723, - 33182187, - 2766754,), u32x4::new( - 17041989, - 1018260, - 33392790, - 4830032,)), - (u32x4::new( - 60194178, - 30788903, - 24728888, - 14513195,), u32x4::new( - 20897010, - 28843233, - 20111980, - 17475240,)), - (u32x4::new( - 46042274, - 19257042, - 4628173, - 31649727,), u32x4::new( - 27388316, - 66631493, - 11541886, - 6408028,)), - (u32x4::new( - 57024680, - 49536568, - 32050358, - 31321917,), u32x4::new( - 17437691, - 49672356, - 2884755, - 20493991,)), - (u32x4::new( - 59553007, - 46782643, - 29001173, - 1814088,), u32x4::new( - 21930692, - 51319706, - 14965872, - 30748046,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 16441817, - 36111849, - 6900424, - 602234,), u32x4::new( - 46522199, - 16441484, - 8135070, - 21726541,)), - (u32x4::new( - 37711225, - 32701959, - 11679112, - 13125533,), u32x4::new( - 32154135, - 9407918, - 26554289, - 620848,)), - (u32x4::new( - 19233407, - 30086864, - 14679568, - 2797374,), u32x4::new( - 4892806, - 7993077, - 247658, - 5632804,)), - (u32x4::new( - 37427262, - 26675495, - 27125659, - 13496131,), u32x4::new( - 50718473, - 40115609, - 28505351, - 27837393,)), - (u32x4::new( - 196819, - 18410429, - 7070012, - 21691388,), u32x4::new( - 29763371, - 24754123, - 9727048, - 10930179,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 28319289, - 40734650, - 16225680, - 24739184,), u32x4::new( - 64272368, - 35356897, - 7866648, - 13635853,)), - (u32x4::new( - 34165295, - 48328447, - 27041670, - 23643655,), u32x4::new( - 48949950, - 52963288, - 30411133, - 6045174,)), - (u32x4::new( - 18583559, - 41649834, - 9813585, - 26098520,), u32x4::new( - 25682734, - 26733526, - 19276490, - 10654728,)), - (u32x4::new( - 34867476, - 52715968, - 5694571, - 13380978,), u32x4::new( - 15134994, - 1831255, - 8608001, - 17266401,)), - (u32x4::new( - 59925903, - 44282172, - 27802465, - 1855069,), u32x4::new( - 14234749, - 36635487, - 11302294, - 10938429,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 8373273, - 49064494, - 4932071, - 32997499,), u32x4::new( - 38472880, - 29335908, - 14504412, - 22460029,)), - (u32x4::new( - 31795930, - 50785923, - 25835990, - 25790073,), u32x4::new( - 65669841, - 11360450, - 9969157, - 9008164,)), - (u32x4::new( - 50262498, - 45869261, - 16124434, - 15336007,), u32x4::new( - 882762, - 42522623, - 11277198, - 26296377,)), - (u32x4::new( - 42332732, - 59129236, - 14452816, - 567985,), u32x4::new( - 208061, - 34722729, - 32008143, - 14828749,)), - (u32x4::new( - 17937794, - 36846032, - 32102665, - 4442466,), u32x4::new( - 19745435, - 31633451, - 7146411, - 15812027,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 30741269, - 38648744, - 12562645, - 30092623,), u32x4::new( - 25073992, - 28730659, - 27911745, - 30000958,)), - (u32x4::new( - 2859794, - 25991700, - 17776078, - 27091930,), u32x4::new( - 2328322, - 60061146, - 18581824, - 18039008,)), - (u32x4::new( - 58206333, - 17917354, - 1972306, - 11853766,), u32x4::new( - 2655376, - 60543390, - 18416710, - 13287440,)), - (u32x4::new( - 62746330, - 61423885, - 21246577, - 2266675,), u32x4::new( - 60099139, - 14804707, - 14772234, - 20679434,)), - (u32x4::new( - 26987698, - 15488817, - 715616, - 2339565,), u32x4::new( - 51980752, - 17333865, - 21965103, - 10839820,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 18672548, - 57660959, - 16042910, - 19519287,), u32x4::new( - 62865851, - 17580961, - 26628347, - 23774759,)), - (u32x4::new( - 368070, - 3464471, - 25888304, - 30370559,), u32x4::new( - 52396053, - 45426828, - 28745251, - 9246829,)), - (u32x4::new( - 29090099, - 57950037, - 23104657, - 4903923,), u32x4::new( - 10987778, - 56163684, - 23621539, - 10332760,)), - (u32x4::new( - 53338235, - 44851161, - 21606845, - 31069622,), u32x4::new( - 4243630, - 34464392, - 11286454, - 5802022,)), - (u32x4::new( - 46710757, - 63389067, - 11642865, - 1980986,), u32x4::new( - 12967337, - 28162061, - 3854192, - 30432268,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 12179834, - 41005450, - 12809619, - 33525228,), u32x4::new( - 4624405, - 46957889, - 16968743, - 11827816,)), - (u32x4::new( - 51521162, - 12466775, - 31791271, - 15303651,), u32x4::new( - 49798465, - 62714504, - 6509600, - 12918560,)), - (u32x4::new( - 20445559, - 1756449, - 28848701, - 7920171,), u32x4::new( - 9835040, - 5900071, - 28757409, - 12376688,)), - (u32x4::new( - 18259496, - 14281012, - 21767026, - 10232236,), u32x4::new( - 20000226, - 12400540, - 4104902, - 23570543,)), - (u32x4::new( - 3687440, - 26546648, - 13328821, - 26841081,), u32x4::new( - 49822734, - 22334054, - 244496, - 24862543,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 59523541, - 62195428, - 3853227, - 13954801,), u32x4::new( - 12387708, - 47627615, - 27221350, - 17899572,)), - (u32x4::new( - 63193587, - 36343307, - 14595132, - 6880795,), u32x4::new( - 1364792, - 37648434, - 3259017, - 20536046,)), - (u32x4::new( - 30362834, - 10440372, - 9574624, - 11729232,), u32x4::new( - 63861613, - 21748389, - 5530846, - 2721586,)), - (u32x4::new( - 18339760, - 1550632, - 17170271, - 25732971,), u32x4::new( - 28459263, - 63142237, - 21642345, - 31557672,)), - (u32x4::new( - 10611282, - 5204623, - 18049257, - 214175,), u32x4::new( - 19432723, - 49809070, - 26010406, - 27449522,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 19770733, - 26478685, - 9464541, - 29158041,), u32x4::new( - 28604307, - 45196604, - 7586524, - 6641859,)), - (u32x4::new( - 65654484, - 52230498, - 30886612, - 19112823,), u32x4::new( - 47271809, - 38942611, - 16020035, - 10773481,)), - (u32x4::new( - 27464323, - 54451016, - 20646645, - 17732915,), u32x4::new( - 23008717, - 53626684, - 3253189, - 15614410,)), - (u32x4::new( - 52381752, - 40693008, - 7063024, - 28469981,), u32x4::new( - 51159478, - 44543211, - 19941777, - 5985451,)), - (u32x4::new( - 13553668, - 35524849, - 14788737, - 1883845,), u32x4::new( - 12385775, - 47958835, - 29135466, - 1776722,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 36719806, - 20827965, - 23175373, - 32996806,), u32x4::new( - 42041892, - 65708790, - 5467143, - 20884008,)), - (u32x4::new( - 43256281, - 40770646, - 17244063, - 31959819,), u32x4::new( - 64366384, - 43544617, - 25057754, - 12628720,)), - (u32x4::new( - 17337782, - 58472057, - 27906934, - 15305274,), u32x4::new( - 30292418, - 39284317, - 16946773, - 24806712,)), - (u32x4::new( - 6485126, - 32447403, - 16261486, - 13561940,), u32x4::new( - 49439635, - 10738368, - 16419889, - 8897231,)), - (u32x4::new( - 44812203, - 40122262, - 25496058, - 2759794,), u32x4::new( - 25295304, - 52178368, - 24154195, - 29334408,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 42307254, - 57217102, - 1088936, - 3832827,), u32x4::new( - 33905401, - 23130334, - 6958056, - 12622851,)), - (u32x4::new( - 3881189, - 14870059, - 19712830, - 6071598,), u32x4::new( - 38147944, - 60776394, - 3427938, - 13765703,)), - (u32x4::new( - 7666911, - 24227591, - 17077136, - 22967588,), u32x4::new( - 6874639, - 30915523, - 11451695, - 24292224,)), - (u32x4::new( - 13659529, - 31984463, - 28764736, - 20506164,), u32x4::new( - 64729627, - 49321636, - 28284636, - 25472371,)), - (u32x4::new( - 39360308, - 42281399, - 9446504, - 868960,), u32x4::new( - 49227724, - 21351115, - 30561851, - 11292096,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 7071115, - 46444090, - 5387916, - 15432877,), u32x4::new( - 27226682, - 41506862, - 2398278, - 3978240,)), - (u32x4::new( - 51009614, - 54216973, - 24368938, - 31392616,), u32x4::new( - 38456150, - 62313644, - 6729154, - 99724,)), - (u32x4::new( - 17474332, - 62857913, - 2619930, - 30659308,), u32x4::new( - 18268181, - 32809239, - 22826292, - 24561895,)), - (u32x4::new( - 38187020, - 67003092, - 14118280, - 16500577,), u32x4::new( - 18808560, - 64983716, - 25712929, - 32518261,)), - (u32x4::new( - 25735813, - 62284262, - 10824872, - 20558596,), u32x4::new( - 48149681, - 31162667, - 22608274, - 26285185,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 963440, - 63742255, - 10230323, - 25515008,), u32x4::new( - 32506414, - 6105697, - 25980317, - 24645129,)), - (u32x4::new( - 7162189, - 8101249, - 14679265, - 33443386,), u32x4::new( - 2002396, - 8541405, - 19442276, - 4795881,)), - (u32x4::new( - 8116694, - 51463069, - 4415528, - 25599140,), u32x4::new( - 55805721, - 39582709, - 6719436, - 30033839,)), - (u32x4::new( - 14468202, - 42181869, - 25188826, - 9639755,), u32x4::new( - 47546189, - 62711146, - 32762447, - 18338064,)), - (u32x4::new( - 33880058, - 32810909, - 8969931, - 13095238,), u32x4::new( - 38360605, - 40138517, - 9246134, - 4928058,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 63655588, - 17883670, - 9410246, - 26162761,), u32x4::new( - 5000571, - 7349225, - 23785252, - 32751089,)), - (u32x4::new( - 28568737, - 10733123, - 9342397, - 21570673,), u32x4::new( - 54096560, - 32467591, - 20494687, - 21511513,)), - (u32x4::new( - 47675157, - 47932807, - 29250946, - 15672208,), u32x4::new( - 59760469, - 9945465, - 14939287, - 18437405,)), - (u32x4::new( - 37985267, - 8609815, - 31573002, - 3373596,), u32x4::new( - 47828883, - 20834216, - 13248616, - 24154292,)), - (u32x4::new( - 5543543, - 29553242, - 3386453, - 30501150,), u32x4::new( - 25058089, - 15236571, - 8814395, - 32462955,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 39158670, - 15322548, - 20495103, - 3312736,), u32x4::new( - 14557171, - 12985179, - 8044741, - 3176899,)), - (u32x4::new( - 24673290, - 29693310, - 21412266, - 18324699,), u32x4::new( - 2154518, - 40329021, - 17500543, - 3954277,)), - (u32x4::new( - 36758685, - 38738957, - 165513, - 14691866,), u32x4::new( - 3070475, - 10424235, - 17096536, - 16896898,)), - (u32x4::new( - 59790459, - 43094586, - 8720681, - 10423589,), u32x4::new( - 1122030, - 31545615, - 4463786, - 31811293,)), - (u32x4::new( - 49778992, - 60881044, - 20509974, - 5832494,), u32x4::new( - 64155961, - 31483358, - 4511231, - 20307815,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 2863373, - 40876242, - 26865913, - 24067353,), u32x4::new( - 15726407, - 40919070, - 12953902, - 9931535,)), - (u32x4::new( - 60934877, - 42512204, - 21649141, - 21945190,), u32x4::new( - 52211954, - 60984193, - 7046207, - 5363493,)), - (u32x4::new( - 4205971, - 64068464, - 18197273, - 7327176,), u32x4::new( - 51527794, - 21166920, - 20669933, - 11828242,)), - (u32x4::new( - 59782815, - 49617225, - 15379924, - 457923,), u32x4::new( - 9320508, - 21498914, - 3242540, - 31563182,)), - (u32x4::new( - 27714753, - 8664670, - 3366162, - 26338598,), u32x4::new( - 56775518, - 25796006, - 13129151, - 21388876,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 59276548, - 49972346, - 16795002, - 33455915,), u32x4::new( - 48430097, - 53857205, - 18627071, - 32474471,)), - (u32x4::new( - 42160315, - 50705892, - 13530540, - 28012698,), u32x4::new( - 19833221, - 55886870, - 20191784, - 9644313,)), - (u32x4::new( - 20372416, - 28414713, - 24084234, - 31804096,), u32x4::new( - 33815377, - 36131001, - 17251241, - 18291088,)), - (u32x4::new( - 56234667, - 14920441, - 2033267, - 29572003,), u32x4::new( - 1724043, - 45519699, - 17873735, - 501988,)), - (u32x4::new( - 50031659, - 31517850, - 15697583, - 1016845,), u32x4::new( - 43104661, - 54769582, - 8008601, - 27257051,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 52951491, - 66542164, - 14853573, - 30444631,), u32x4::new( - 12045973, - 24321813, - 16545674, - 18160646,)), - (u32x4::new( - 60107911, - 1126003, - 5947677, - 19486116,), u32x4::new( - 41119984, - 30860440, - 7935395, - 13354438,)), - (u32x4::new( - 17841328, - 11063269, - 1664538, - 26687568,), u32x4::new( - 6268968, - 22280371, - 17275484, - 4523163,)), - (u32x4::new( - 15886041, - 56799482, - 15446552, - 21712778,), u32x4::new( - 1005290, - 17827215, - 4978741, - 6854882,)), - (u32x4::new( - 34319277, - 47731002, - 20321804, - 28544575,), u32x4::new( - 29591814, - 63376351, - 24754545, - 26001714,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 66783087, - 5234346, - 46102, - 8566476,), u32x4::new( - 19947339, - 20180418, - 25398238, - 3726678,)), - (u32x4::new( - 63890180, - 46380965, - 20674069, - 5366544,), u32x4::new( - 59661487, - 48406612, - 31533614, - 7071217,)), - (u32x4::new( - 13104676, - 1406631, - 24326736, - 19854367,), u32x4::new( - 61039528, - 11019904, - 31967425, - 19219275,)), - (u32x4::new( - 39003597, - 30143957, - 15351834, - 8639435,), u32x4::new( - 57309582, - 61436794, - 15830475, - 10090318,)), - (u32x4::new( - 45923044, - 6700175, - 99413, - 21263025,), u32x4::new( - 23762647, - 53905481, - 6063914, - 10065424,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 42822326, - 57678669, - 4052879, - 25452667,), u32x4::new( - 54049411, - 2373092, - 22337016, - 7701046,)), - (u32x4::new( - 44382355, - 43307377, - 16761537, - 30373573,), u32x4::new( - 49790216, - 23230748, - 25655306, - 10519391,)), - (u32x4::new( - 919475, - 59371245, - 1273450, - 25558666,), u32x4::new( - 9724711, - 8556709, - 25755845, - 10887647,)), - (u32x4::new( - 25465699, - 44651158, - 17658392, - 11257418,), u32x4::new( - 29735193, - 22885150, - 7094716, - 26828565,)), - (u32x4::new( - 48237389, - 47661599, - 27054393, - 7328070,), u32x4::new( - 27280193, - 65616691, - 23062005, - 4170709,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 26535281, - 60238317, - 30343788, - 25790743,), u32x4::new( - 37993933, - 24614372, - 9523840, - 10401918,)), - (u32x4::new( - 2783987, - 29468958, - 4697011, - 19804475,), u32x4::new( - 37246678, - 46797720, - 10261254, - 18942252,)), - (u32x4::new( - 58135580, - 60247753, - 25301938, - 6844561,), u32x4::new( - 20949454, - 39844754, - 4552026, - 919057,)), - (u32x4::new( - 6694071, - 44126261, - 32285330, - 31370180,), u32x4::new( - 24603698, - 53328179, - 13971149, - 5325636,)), - (u32x4::new( - 64879487, - 582094, - 17982081, - 19190425,), u32x4::new( - 24951286, - 26923842, - 29077174, - 33286062,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 54863941, - 67016431, - 1224043, - 23371240,), u32x4::new( - 62940074, - 52101083, - 13523637, - 30366406,)), - (u32x4::new( - 36324581, - 25407485, - 18258623, - 4698602,), u32x4::new( - 50300544, - 2658516, - 26300935, - 2611030,)), - (u32x4::new( - 27183975, - 21791014, - 18105064, - 9875199,), u32x4::new( - 58118912, - 54198635, - 6400311, - 14767984,)), - (u32x4::new( - 33918318, - 42937962, - 14809334, - 22136592,), u32x4::new( - 10636588, - 29082337, - 29829692, - 28549776,)), - (u32x4::new( - 61080905, - 854212, - 12202487, - 20004503,), u32x4::new( - 9256495, - 6903981, - 20567109, - 347423,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 41391822, - 34336880, - 22362564, - 14247996,), u32x4::new( - 12115604, - 41583344, - 7639288, - 28910945,)), - (u32x4::new( - 62066617, - 59758859, - 26665947, - 11614812,), u32x4::new( - 65737664, - 45704543, - 30324810, - 12868376,)), - (u32x4::new( - 17491771, - 43589814, - 9454919, - 26047850,), u32x4::new( - 52629282, - 39304244, - 3868968, - 19296062,)), - (u32x4::new( - 17826638, - 30413590, - 32534225, - 32741469,), u32x4::new( - 15012391, - 14365713, - 33039233, - 14791399,)), - (u32x4::new( - 64115596, - 59197067, - 32739005, - 23275744,), u32x4::new( - 32954320, - 22241406, - 20788442, - 4942942,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 31956192, - 59570132, - 2784352, - 4237732,), u32x4::new( - 47222312, - 4860927, - 18658867, - 15279314,)), - (u32x4::new( - 63240583, - 28160478, - 23524941, - 13390861,), u32x4::new( - 66437406, - 57718120, - 33345312, - 28896298,)), - (u32x4::new( - 39026193, - 46239965, - 21440243, - 25070488,), u32x4::new( - 64012383, - 60999016, - 16517060, - 29565907,)), - (u32x4::new( - 18118181, - 60161496, - 4212092, - 23976240,), u32x4::new( - 36277753, - 62363144, - 5816868, - 16964362,)), - (u32x4::new( - 18196138, - 62490693, - 281468, - 7934713,), u32x4::new( - 56027312, - 62015725, - 4837237, - 32932252,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 29885826, - 51028067, - 30418143, - 33438769,), u32x4::new( - 62542283, - 39442528, - 31535876, - 143299,)), - (u32x4::new( - 17143063, - 56709783, - 14451852, - 15782104,), u32x4::new( - 32762665, - 14047066, - 26295037, - 5432487,)), - (u32x4::new( - 75151, - 533606, - 7539077, - 30926189,), u32x4::new( - 38410914, - 23771680, - 4872443, - 29199566,)), - (u32x4::new( - 61522396, - 48934708, - 16223126, - 207380,), u32x4::new( - 11171993, - 47975147, - 14164574, - 352966,)), - (u32x4::new( - 15449006, - 56530757, - 26796528, - 12045834,), u32x4::new( - 63738697, - 40667227, - 33001582, - 9101885,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 43331297, - 18431341, - 25801195, - 17267698,), u32x4::new( - 19365485, - 57295202, - 22218985, - 21284590,)), - (u32x4::new( - 2429849, - 19152559, - 10762172, - 22564684,), u32x4::new( - 21880390, - 66866426, - 20357935, - 22641906,)), - (u32x4::new( - 19771185, - 31652693, - 3666117, - 28136958,), u32x4::new( - 23624283, - 55101502, - 6313920, - 6783662,)), - (u32x4::new( - 3487137, - 7092443, - 11001876, - 26196524,), u32x4::new( - 47319246, - 44542068, - 17594073, - 15027760,)), - (u32x4::new( - 49563607, - 32191113, - 4991283, - 25400512,), u32x4::new( - 46539152, - 4155103, - 32368171, - 201203,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 20548943, - 14334571, - 4073874, - 6368588,), u32x4::new( - 53208883, - 56484515, - 15970071, - 25561889,)), - (u32x4::new( - 49915097, - 44030795, - 11202344, - 29284344,), u32x4::new( - 60258023, - 66225712, - 8075764, - 12383512,)), - (u32x4::new( - 45248912, - 4933668, - 9592153, - 5819559,), u32x4::new( - 31030983, - 38174071, - 32435814, - 7442522,)), - (u32x4::new( - 62688129, - 48218381, - 22089545, - 12897361,), u32x4::new( - 21050881, - 34278889, - 7569163, - 3225449,)), - (u32x4::new( - 19050183, - 51089071, - 32935757, - 22640195,), u32x4::new( - 66122318, - 47144608, - 18743677, - 25177079,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 41186817, - 46681702, - 31819867, - 32997133,), u32x4::new( - 38559207, - 27147015, - 30293819, - 16762988,)), - (u32x4::new( - 24154689, - 51762873, - 23883879, - 13510519,), u32x4::new( - 55338250, - 61224161, - 11663149, - 30803960,)), - (u32x4::new( - 18104238, - 14117824, - 11724021, - 21362053,), u32x4::new( - 65704761, - 35530242, - 13498058, - 33522849,)), - (u32x4::new( - 63812888, - 23995539, - 28920539, - 24005193,), u32x4::new( - 26412223, - 36582218, - 4251418, - 26160309,)), - (u32x4::new( - 16822053, - 66064082, - 3482145, - 31979593,), u32x4::new( - 45937188, - 54475379, - 612917, - 7976478,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 46509314, - 55327128, - 8944536, - 274914,), u32x4::new( - 26432930, - 53829300, - 21192572, - 3569894,)), - (u32x4::new( - 20919764, - 64356651, - 30642344, - 17215170,), u32x4::new( - 20335124, - 11203745, - 18663316, - 19024174,)), - (u32x4::new( - 59297055, - 53842463, - 3680204, - 9806710,), u32x4::new( - 54004169, - 51484914, - 29807998, - 20134199,)), - (u32x4::new( - 14781592, - 22628010, - 26877930, - 25880359,), u32x4::new( - 30434803, - 190607, - 30184292, - 8991040,)), - (u32x4::new( - 64400983, - 64591751, - 854562, - 28216111,), u32x4::new( - 20010398, - 50414793, - 9803872, - 22687008,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 15091184, - 32550863, - 8818643, - 4244752,), u32x4::new( - 43123513, - 64565526, - 408838, - 13206998,)), - (u32x4::new( - 16405061, - 60379639, - 31489017, - 20949281,), u32x4::new( - 27568751, - 38734986, - 8364264, - 12451020,)), - (u32x4::new( - 16005217, - 58008076, - 1406778, - 26546927,), u32x4::new( - 39571784, - 56365493, - 31274296, - 8918790,)), - (u32x4::new( - 23271122, - 19453469, - 27718201, - 32742670,), u32x4::new( - 234332, - 36785342, - 22601675, - 14331046,)), - (u32x4::new( - 40636025, - 22442705, - 22115403, - 23745859,), u32x4::new( - 41164945, - 61012, - 12499614, - 542137,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 62776018, - 32835413, - 17373246, - 17187309,), u32x4::new( - 54469193, - 21770290, - 15923753, - 28996575,)), - (u32x4::new( - 59385210, - 63082298, - 12568449, - 8509004,), u32x4::new( - 9483342, - 16105238, - 5756054, - 26890758,)), - (u32x4::new( - 53987996, - 38201748, - 5521661, - 19060159,), u32x4::new( - 18663191, - 9093637, - 27786835, - 31189196,)), - (u32x4::new( - 65872678, - 43635130, - 27903055, - 25020300,), u32x4::new( - 65772737, - 38110437, - 5213502, - 21909342,)), - (u32x4::new( - 4438979, - 9680838, - 10212446, - 4764184,), u32x4::new( - 13235684, - 58245995, - 20264570, - 21024049,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 60835961, - 48209103, - 31049052, - 4688268,), u32x4::new( - 12426713, - 59829045, - 22302488, - 29008521,)), - (u32x4::new( - 50401667, - 29716596, - 23531224, - 7581281,), u32x4::new( - 49071895, - 6952617, - 14934683, - 8218256,)), - (u32x4::new( - 1601446, - 36631413, - 31774811, - 29625330,), u32x4::new( - 56786114, - 8331539, - 23129509, - 19783344,)), - (u32x4::new( - 59514327, - 64513110, - 1772300, - 5701338,), u32x4::new( - 5737511, - 16147555, - 9461515, - 5703271,)), - (u32x4::new( - 33072974, - 54300426, - 11940114, - 1308663,), u32x4::new( - 15627555, - 4931627, - 28443714, - 20924342,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 18135013, - 20358426, - 4922557, - 10015355,), u32x4::new( - 65729669, - 34786528, - 26248549, - 29194359,)), - (u32x4::new( - 797666, - 34997544, - 24316856, - 25107230,), u32x4::new( - 24612576, - 4761401, - 15307321, - 32404252,)), - (u32x4::new( - 16501152, - 60565831, - 9487105, - 9316022,), u32x4::new( - 24986054, - 31917592, - 3962024, - 2501883,)), - (u32x4::new( - 63356796, - 50432342, - 18044926, - 30566881,), u32x4::new( - 42032028, - 31415202, - 13524600, - 16119907,)), - (u32x4::new( - 3927286, - 57022374, - 9265437, - 21620772,), u32x4::new( - 19481940, - 3806938, - 24836192, - 14572399,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 10785787, - 46564798, - 368445, - 33181384,), u32x4::new( - 5319843, - 52687136, - 30347110, - 29837357,)), - (u32x4::new( - 56436732, - 47859251, - 24141084, - 22250712,), u32x4::new( - 59046084, - 4963427, - 33463413, - 17168859,)), - (u32x4::new( - 15512044, - 6366740, - 4737504, - 27644548,), u32x4::new( - 30307977, - 25037929, - 14593903, - 12836490,)), - (u32x4::new( - 63878897, - 34013023, - 5860752, - 7244096,), u32x4::new( - 3689461, - 57012135, - 18389096, - 11589351,)), - (u32x4::new( - 4682110, - 36302830, - 653422, - 22316819,), u32x4::new( - 14081831, - 5657024, - 11088376, - 24110612,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 39907267, - 45940262, - 24887471, - 18342609,), u32x4::new( - 878445, - 40456159, - 12019082, - 345107,)), - (u32x4::new( - 12794982, - 28893944, - 9447505, - 11387200,), u32x4::new( - 16961963, - 13916996, - 10893728, - 25898006,)), - (u32x4::new( - 44934162, - 53465865, - 3583620, - 1102334,), u32x4::new( - 53917811, - 63478576, - 2426066, - 10389549,)), - (u32x4::new( - 45096036, - 37595344, - 19367718, - 20257175,), u32x4::new( - 10280866, - 41653449, - 27665642, - 375926,)), - (u32x4::new( - 45847901, - 24064074, - 32494820, - 32204556,), u32x4::new( - 10720704, - 51079060, - 1297436, - 29853825,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 66303987, - 36060363, - 16494578, - 24962147,), u32x4::new( - 11971403, - 49538586, - 25060560, - 1964341,)), - (u32x4::new( - 25988481, - 27641502, - 24909517, - 27237087,), u32x4::new( - 66646363, - 52777626, - 16360849, - 10459972,)), - (u32x4::new( - 43930529, - 34374176, - 31225968, - 8807030,), u32x4::new( - 10394758, - 35904854, - 25325589, - 19335583,)), - (u32x4::new( - 25094697, - 34380951, - 20051185, - 32287161,), u32x4::new( - 11739332, - 53887441, - 30517319, - 26601892,)), - (u32x4::new( - 8868546, - 35635502, - 32513071, - 28248087,), u32x4::new( - 51946989, - 14222744, - 19198839, - 23261841,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 51218008, - 5070126, - 11046681, - 5320810,), u32x4::new( - 61212079, - 34104447, - 23895089, - 6460727,)), - (u32x4::new( - 39843528, - 46278671, - 10426120, - 25624792,), u32x4::new( - 66658766, - 37140083, - 28933107, - 12969597,)), - (u32x4::new( - 59635793, - 40220191, - 5751421, - 173680,), u32x4::new( - 58321825, - 740337, - 1412847, - 7682623,)), - (u32x4::new( - 975962, - 56440763, - 20812276, - 22631115,), u32x4::new( - 49095824, - 19883130, - 2419746, - 31043648,)), - (u32x4::new( - 66208703, - 39669328, - 22525915, - 3748897,), u32x4::new( - 65994776, - 34533552, - 8126286, - 18326047,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 64176557, - 3912400, - 19351673, - 30068471,), u32x4::new( - 31190055, - 24221683, - 33142424, - 28698542,)), - (u32x4::new( - 34784792, - 4109933, - 3867193, - 19557314,), u32x4::new( - 2112512, - 32715890, - 24550117, - 16595976,)), - (u32x4::new( - 35542761, - 48024875, - 10925431, - 31526577,), u32x4::new( - 66577735, - 23189821, - 13375709, - 1735095,)), - (u32x4::new( - 59699254, - 43854093, - 29783239, - 24777271,), u32x4::new( - 19600372, - 39924461, - 2896720, - 1472185,)), - (u32x4::new( - 56389656, - 35980854, - 33172342, - 1370336,), u32x4::new( - 23707480, - 57654949, - 7850973, - 12655016,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 38372660, - 57101970, - 7044964, - 12732710,), u32x4::new( - 57535705, - 6043201, - 30858914, - 10946592,)), - (u32x4::new( - 21023468, - 6946992, - 26403324, - 23901823,), u32x4::new( - 35695559, - 23440687, - 4763891, - 6514074,)), - (u32x4::new( - 28662273, - 30933699, - 9352242, - 26354829,), u32x4::new( - 37402243, - 3145176, - 8770289, - 525937,)), - (u32x4::new( - 54933102, - 36695832, - 3281859, - 4755022,), u32x4::new( - 23043294, - 32794379, - 15618886, - 23602412,)), - (u32x4::new( - 9931565, - 29897140, - 2480737, - 24193701,), u32x4::new( - 7833615, - 2284939, - 893926, - 13421882,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 22917795, - 22088359, - 28978099, - 19794863,), u32x4::new( - 60542318, - 29878494, - 31053731, - 9080720,)), - (u32x4::new( - 23679072, - 52547035, - 28424916, - 20647332,), u32x4::new( - 4008761, - 28267029, - 12961289, - 1589095,)), - (u32x4::new( - 55616194, - 26678929, - 14998265, - 23274397,), u32x4::new( - 54625466, - 46244264, - 28627706, - 33030665,)), - (u32x4::new( - 11527330, - 6449415, - 26531607, - 3472938,), u32x4::new( - 41541592, - 62607682, - 19862690, - 20564723,)), - (u32x4::new( - 32843805, - 49066843, - 28425824, - 19521495,), u32x4::new( - 48792073, - 48242878, - 27392443, - 13175986,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 16185025, - 61537525, - 2961305, - 1492442,), u32x4::new( - 25123147, - 3095034, - 31896958, - 33089615,)), - (u32x4::new( - 64748157, - 18336595, - 16522231, - 25426312,), u32x4::new( - 65718949, - 35485695, - 30554083, - 10205918,)), - (u32x4::new( - 39626934, - 39271045, - 16420458, - 9826240,), u32x4::new( - 56483981, - 27128085, - 3783403, - 13360006,)), - (u32x4::new( - 30793778, - 66771960, - 17241420, - 6564573,), u32x4::new( - 61102581, - 29974476, - 32385512, - 9011754,)), - (u32x4::new( - 28068166, - 11862220, - 14323567, - 12380617,), u32x4::new( - 52090465, - 16029056, - 24495309, - 21409233,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 59411973, - 57437124, - 11695483, - 17586857,), u32x4::new( - 16108987, - 43449109, - 31098002, - 6248476,)), - (u32x4::new( - 42258047, - 61595931, - 29308533, - 11742653,), u32x4::new( - 43042345, - 27373650, - 30165249, - 21929989,)), - (u32x4::new( - 49907221, - 9620337, - 21888081, - 20981082,), u32x4::new( - 56288861, - 61562203, - 33223566, - 3582446,)), - (u32x4::new( - 57535017, - 41003416, - 22080416, - 14463796,), u32x4::new( - 65518565, - 18127889, - 24370863, - 33332664,)), - (u32x4::new( - 66655380, - 6430175, - 471782, - 11947673,), u32x4::new( - 30596400, - 18898659, - 15930721, - 4211851,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 6757410, - 65455566, - 13584784, - 11362173,), u32x4::new( - 10797127, - 24451471, - 19541370, - 29309435,)), - (u32x4::new( - 40360156, - 17685025, - 18326181, - 3846903,), u32x4::new( - 13693365, - 63049479, - 31900359, - 23385063,)), - (u32x4::new( - 52455038, - 57513503, - 22163311, - 27095042,), u32x4::new( - 48610726, - 66454160, - 12085341, - 26357004,)), - (u32x4::new( - 22097042, - 14063840, - 6705778, - 14342902,), u32x4::new( - 66139825, - 20702105, - 31279090, - 7495745,)), - (u32x4::new( - 27360710, - 49314837, - 18774847, - 7146436,), u32x4::new( - 37066216, - 42004961, - 22409916, - 10524446,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 1497507, - 33054449, - 11839906, - 2960428,), u32x4::new( - 40538463, - 18884538, - 25018820, - 4073970,)), - (u32x4::new( - 54484385, - 43640735, - 2808257, - 20710708,), u32x4::new( - 39840730, - 27222424, - 21783544, - 11848522,)), - (u32x4::new( - 45765237, - 48200555, - 9299019, - 9393151,), u32x4::new( - 34818188, - 56098995, - 13575233, - 21012731,)), - (u32x4::new( - 4265428, - 49627650, - 24960282, - 9425650,), u32x4::new( - 47883651, - 2797524, - 11853190, - 22877329,)), - (u32x4::new( - 25008173, - 64199503, - 380047, - 12107343,), u32x4::new( - 12329448, - 11914399, - 764281, - 29687002,)), - ])), - CachedPoint(FieldElement2625x4([ - (u32x4::new( - 35889734, - 23047226, - 4022841, - 7017445,), u32x4::new( - 7274086, - 53316179, - 25100176, - 15310676,)), - (u32x4::new( - 42409427, - 30270106, - 6823853, - 31551384,), u32x4::new( - 40645017, - 66489807, - 18021817, - 32669351,)), - (u32x4::new( - 39827134, - 43680850, - 28297996, - 20258133,), u32x4::new( - 26058742, - 52643238, - 22238331, - 21690533,)), - (u32x4::new( - 60808002, - 17499995, - 30042246, - 29310584,), u32x4::new( - 48219954, - 29389518, - 8680514, - 17844709,)), - (u32x4::new( - 6452896, - 50116553, - 9532047, - 26821214,), u32x4::new( - 44524351, - 50428429, - 21904953, - 12608048,)), - ])) + ( + u32x4::new(3571425, 10045002, 19036563, 1096096), + u32x4::new(243332, 65897020, 0, 28963681), + ), + ( + u32x4::new(30896895, 63055514, 1614915, 5095970), + u32x4::new(0, 53791688, 0, 31258312), + ), + ( + u32x4::new(13347627, 40339464, 2236269, 11185503), + u32x4::new(0, 22520087, 0, 8659512), + ), + ( + u32x4::new(11125413, 29139905, 32037254, 28360723), + u32x4::new(0, 64556417, 0, 9635759), + ), + ( + u32x4::new(33268144, 47262491, 4336918, 15795740), + u32x4::new(0, 22027545, 0, 4846528), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(47099681, 31447946, 29365447, 24740513), + u32x4::new(42991046, 18317844, 16051644, 21404226), + ), + ( + u32x4::new(31708133, 28909527, 2366091, 13703791), + u32x4::new(469246, 54159622, 2601402, 32988002), + ), + ( + u32x4::new(63432457, 30251794, 15163516, 18491340), + u32x4::new(28144087, 35605455, 13682295, 18474872), + ), + ( + u32x4::new(12221607, 4967598, 26061980, 26008006), + u32x4::new(20226147, 9726961, 17410, 18051083), + ), + ( + u32x4::new(60569645, 62487085, 11911242, 21920922), + u32x4::new(4092105, 38186967, 22431483, 31366585), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(18147205, 62587998, 2554617, 536692), + u32x4::new(11924528, 26674131, 17645433, 24341419), + ), + ( + u32x4::new(11573357, 27579485, 31491870, 29000885), + u32x4::new(10800976, 51902791, 28076395, 20464029), + ), + ( + u32x4::new(56031649, 10856669, 11791193, 26769430), + u32x4::new(25306956, 5922200, 6630685, 9385098), + ), + ( + u32x4::new(31319348, 23906711, 16290213, 32142166), + u32x4::new(61106354, 17181823, 3548308, 12022566), + ), + ( + u32x4::new(5904298, 50218605, 11826440, 5492249), + u32x4::new(10379071, 3472255, 172742, 31948344), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(10625852, 15193821, 22918394, 23676410), + u32x4::new(53695416, 54987793, 10067515, 11747680), + ), + ( + u32x4::new(65013325, 1309652, 29616320, 28922974), + u32x4::new(60360891, 19621771, 9938982, 30406429), + ), + ( + u32x4::new(54967954, 65931918, 5595602, 25719523), + u32x4::new(64909864, 30566415, 15945272, 8495317), + ), + ( + u32x4::new(1167157, 55265018, 11507029, 31641054), + u32x4::new(43497904, 2367338, 12937761, 27517066), + ), + ( + u32x4::new(656704, 2544994, 13006713, 480979), + u32x4::new(38471594, 62541240, 25353597, 11531760), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(22176662, 3984313, 27495285, 4110608), + u32x4::new(2909584, 30594106, 15677919, 2549183), + ), + ( + u32x4::new(33979105, 62269905, 2071511, 6894756), + u32x4::new(53189950, 47232857, 6408191, 6123225), + ), + ( + u32x4::new(32553873, 63948030, 12612401, 3633166), + u32x4::new(24054373, 37626618, 14481327, 8520484), + ), + ( + u32x4::new(56552486, 10749438, 12034813, 28811946), + u32x4::new(1445640, 36755601, 12104575, 10257833), + ), + ( + u32x4::new(22795808, 48761311, 1136056, 9380768), + u32x4::new(1411523, 5341811, 27318329, 9686767), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(21157200, 39156966, 20473176, 4934657), + u32x4::new(61478183, 45121537, 5429856, 13035023), + ), + ( + u32x4::new(7954529, 58789246, 31440083, 7054221), + u32x4::new(38438565, 36856107, 1364112, 14548122), + ), + ( + u32x4::new(26120083, 36321360, 4919997, 31687496), + u32x4::new(33757765, 36237559, 15243054, 32163861), + ), + ( + u32x4::new(25878307, 46544824, 19455951, 2414935), + u32x4::new(16844726, 56521560, 32680554, 26660660), + ), + ( + u32x4::new(48360220, 43407178, 12187042, 24925816), + u32x4::new(7423722, 25746484, 12814654, 17395963), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(63153652, 32195955, 4087908, 8431689), + u32x4::new(30392384, 47203165, 8986649, 9053039), + ), + ( + u32x4::new(63659241, 47988767, 2931872, 19953600), + u32x4::new(11747107, 51610101, 20952181, 13364887), + ), + ( + u32x4::new(3659197, 58790649, 5930099, 2605312), + u32x4::new(28477896, 580728, 20579735, 2610622), + ), + ( + u32x4::new(41781607, 17161358, 10690531, 24368015), + u32x4::new(47027031, 36742339, 5414694, 13156365), + ), + ( + u32x4::new(13237853, 51182423, 8954802, 29006542), + u32x4::new(22643989, 56896541, 22830593, 10289708), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(1401265, 58846825, 30911620, 32239180), + u32x4::new(15391552, 15200821, 6339309, 16403588), + ), + ( + u32x4::new(55913797, 29541724, 1664461, 21709410), + u32x4::new(38470488, 47097092, 17674945, 32666066), + ), + ( + u32x4::new(22844482, 10797709, 27548106, 31638735), + u32x4::new(34500968, 26611503, 19727211, 13160873), + ), + ( + u32x4::new(31485204, 14496164, 13981208, 10276888), + u32x4::new(5748808, 35024436, 2740987, 7479021), + ), + ( + u32x4::new(58541207, 14866135, 32344041, 545930), + u32x4::new(62661488, 6941250, 27940205, 11976112), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(39849808, 44781685, 15697329, 24387845), + u32x4::new(12501486, 50260092, 23199481, 31929024), + ), + ( + u32x4::new(24823070, 27956017, 27034296, 10316465), + u32x4::new(47664045, 11152446, 15719183, 30181617), + ), + ( + u32x4::new(20771189, 19969144, 31433937, 19185213), + u32x4::new(27565920, 10384445, 2893359, 9255362), + ), + ( + u32x4::new(42894974, 11925545, 32134441, 32738810), + u32x4::new(55916336, 32479272, 19563550, 5511385), + ), + ( + u32x4::new(17857161, 47809169, 14564114, 27997751), + u32x4::new(33024640, 38669671, 31956536, 27313245), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(58237774, 15917425, 18872208, 19394230), + u32x4::new(17374297, 6101419, 4839741, 6596900), + ), + ( + u32x4::new(66947393, 15744215, 18368993, 17750160), + u32x4::new(41006525, 9205497, 2629667, 32170865), + ), + ( + u32x4::new(66481381, 1919414, 28338762, 7372967), + u32x4::new(33819153, 4156199, 27126309, 12739816), + ), + ( + u32x4::new(44117158, 58545296, 22521371, 11809712), + u32x4::new(28998792, 50731010, 30215699, 25748377), + ), + ( + u32x4::new(23561284, 4160244, 9035405, 24895184), + u32x4::new(39761639, 59253416, 8684759, 22487864), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(12671134, 56419053, 16092401, 30038207), + u32x4::new(4002647, 47822606, 7151311, 28430768), + ), + ( + u32x4::new(61041684, 35765374, 30598048, 19666539), + u32x4::new(44150175, 40140037, 290469, 28442674), + ), + ( + u32x4::new(18847796, 1371617, 33316881, 13199936), + u32x4::new(43646578, 17068881, 12074900, 1537415), + ), + ( + u32x4::new(10052225, 38316070, 27469797, 5297537), + u32x4::new(50725570, 20435349, 10339121, 2779737), + ), + ( + u32x4::new(18372189, 15466385, 24762130, 22217964), + u32x4::new(23503887, 47844464, 10415034, 2606889), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(55082775, 45300503, 16032654, 5964396), + u32x4::new(17743504, 24634761, 19493066, 5184611), + ), + ( + u32x4::new(50172633, 35093294, 10040575, 23616256), + u32x4::new(4543900, 61852191, 4049821, 7423669), + ), + ( + u32x4::new(20295398, 40009376, 10487190, 15670429), + u32x4::new(51972856, 58649552, 20436392, 3432497), + ), + ( + u32x4::new(35189420, 54117751, 12825868, 6283038), + u32x4::new(27540739, 30648758, 22658912, 9466689), + ), + ( + u32x4::new(51737549, 40725785, 17409814, 25201086), + u32x4::new(21156239, 34176168, 26814520, 5956424), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(8211442, 8014184, 6260823, 22108096), + u32x4::new(32182620, 51844847, 2466270, 28582231), + ), + ( + u32x4::new(27199739, 3848333, 31738017, 10892045), + u32x4::new(4963982, 65391770, 32551997, 28906469), + ), + ( + u32x4::new(16606846, 32207068, 26404535, 7614129), + u32x4::new(45416902, 65584718, 13821785, 2646060), + ), + ( + u32x4::new(36090634, 57981287, 32247670, 22837502), + u32x4::new(31003861, 55448117, 6062915, 20369975), + ), + ( + u32x4::new(27381403, 50578107, 522631, 29521058), + u32x4::new(31137497, 40220737, 27628049, 1824195), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(59402443, 17056879, 29262689, 6131785), + u32x4::new(52551472, 43367471, 29423199, 18899208), + ), + ( + u32x4::new(5749414, 43514612, 11365899, 21514624), + u32x4::new(65591890, 60945892, 19841732, 5628567), + ), + ( + u32x4::new(19334369, 52500268, 12307673, 5267367), + u32x4::new(3212103, 9035822, 29142161, 30520954), + ), + ( + u32x4::new(57261330, 6819646, 22089161, 9800373), + u32x4::new(55155453, 62250856, 13766735, 25244545), + ), + ( + u32x4::new(54370226, 61888301, 24496089, 2540581), + u32x4::new(65637506, 60274355, 18154273, 11687259), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(12521903, 26014045, 13995625, 33360175), + u32x4::new(23605474, 7376434, 27229267, 17195036), + ), + ( + u32x4::new(59482891, 10074423, 574357, 3857753), + u32x4::new(61377787, 50306685, 5241065, 20234396), + ), + ( + u32x4::new(23674717, 6997172, 20771841, 16858511), + u32x4::new(40565304, 29973136, 7049812, 14585010), + ), + ( + u32x4::new(1427477, 13295732, 31762066, 31499740), + u32x4::new(60419925, 54666164, 22009424, 8089609), + ), + ( + u32x4::new(58154031, 41593020, 15342328, 957047), + u32x4::new(38937260, 37037498, 24871992, 32973409), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(30654745, 51286025, 21206982, 2433562), + u32x4::new(12780105, 31732574, 33087964, 33081189), + ), + ( + u32x4::new(66640017, 42720009, 16567620, 15300745), + u32x4::new(1530367, 33001123, 20930247, 21042661), + ), + ( + u32x4::new(15003356, 5294119, 22985605, 18928772), + u32x4::new(32628461, 18230172, 14773298, 27193722), + ), + ( + u32x4::new(27555, 65346287, 17017174, 7837720), + u32x4::new(21499787, 42855613, 22474984, 13675085), + ), + ( + u32x4::new(24164369, 50130116, 5973149, 24152073), + u32x4::new(1577334, 25400030, 18648484, 32228854), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(49518649, 59119280, 31670678, 20396561), + u32x4::new(61728330, 651402, 176032, 9529498), + ), + ( + u32x4::new(61765532, 9082232, 32794568, 15526956), + u32x4::new(48543100, 32614212, 19001206, 25680229), + ), + ( + u32x4::new(32086091, 10373081, 8996131, 31822823), + u32x4::new(35788988, 49973190, 30542040, 17858455), + ), + ( + u32x4::new(48130197, 58121889, 27753291, 29923268), + u32x4::new(54448075, 43300790, 9336565, 15770022), + ), + ( + u32x4::new(57725546, 20557498, 9366233, 16023566), + u32x4::new(16189031, 2837363, 24315301, 27003505), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(28286608, 10767548, 18220739, 5413236), + u32x4::new(48253387, 58255702, 11864864, 28527159), + ), + ( + u32x4::new(45038176, 58655197, 25648758, 10951484), + u32x4::new(42564382, 34542843, 23146954, 22234334), + ), + ( + u32x4::new(14858710, 24978793, 15040559, 4379220), + u32x4::new(47621477, 40271440, 15650420, 1998736), + ), + ( + u32x4::new(24106391, 9626149, 344505, 25253814), + u32x4::new(34579800, 59687089, 25718289, 25904133), + ), + ( + u32x4::new(1981195, 37751302, 26132048, 1764722), + u32x4::new(13288231, 28808622, 12531301, 18292949), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(13869851, 31448904, 14963539, 7581293), + u32x4::new(20536485, 35021083, 21257574, 33356609), + ), + ( + u32x4::new(36903364, 18429241, 11097857, 5943856), + u32x4::new(60583077, 40015815, 30509523, 31915271), + ), + ( + u32x4::new(49161801, 40681915, 67892, 25454357), + u32x4::new(22779677, 25798439, 15964829, 5863227), + ), + ( + u32x4::new(60810637, 4496471, 5217137, 14095116), + u32x4::new(50942411, 50712663, 2507380, 26844507), + ), + ( + u32x4::new(34579752, 53519385, 10859797, 18816024), + u32x4::new(42552864, 39478521, 6783896, 17277037), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(43287109, 27900723, 33182187, 2766754), + u32x4::new(17041989, 1018260, 33392790, 4830032), + ), + ( + u32x4::new(60194178, 30788903, 24728888, 14513195), + u32x4::new(20897010, 28843233, 20111980, 17475240), + ), + ( + u32x4::new(46042274, 19257042, 4628173, 31649727), + u32x4::new(27388316, 66631493, 11541886, 6408028), + ), + ( + u32x4::new(57024680, 49536568, 32050358, 31321917), + u32x4::new(17437691, 49672356, 2884755, 20493991), + ), + ( + u32x4::new(59553007, 46782643, 29001173, 1814088), + u32x4::new(21930692, 51319706, 14965872, 30748046), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(16441817, 36111849, 6900424, 602234), + u32x4::new(46522199, 16441484, 8135070, 21726541), + ), + ( + u32x4::new(37711225, 32701959, 11679112, 13125533), + u32x4::new(32154135, 9407918, 26554289, 620848), + ), + ( + u32x4::new(19233407, 30086864, 14679568, 2797374), + u32x4::new(4892806, 7993077, 247658, 5632804), + ), + ( + u32x4::new(37427262, 26675495, 27125659, 13496131), + u32x4::new(50718473, 40115609, 28505351, 27837393), + ), + ( + u32x4::new(196819, 18410429, 7070012, 21691388), + u32x4::new(29763371, 24754123, 9727048, 10930179), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(28319289, 40734650, 16225680, 24739184), + u32x4::new(64272368, 35356897, 7866648, 13635853), + ), + ( + u32x4::new(34165295, 48328447, 27041670, 23643655), + u32x4::new(48949950, 52963288, 30411133, 6045174), + ), + ( + u32x4::new(18583559, 41649834, 9813585, 26098520), + u32x4::new(25682734, 26733526, 19276490, 10654728), + ), + ( + u32x4::new(34867476, 52715968, 5694571, 13380978), + u32x4::new(15134994, 1831255, 8608001, 17266401), + ), + ( + u32x4::new(59925903, 44282172, 27802465, 1855069), + u32x4::new(14234749, 36635487, 11302294, 10938429), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(8373273, 49064494, 4932071, 32997499), + u32x4::new(38472880, 29335908, 14504412, 22460029), + ), + ( + u32x4::new(31795930, 50785923, 25835990, 25790073), + u32x4::new(65669841, 11360450, 9969157, 9008164), + ), + ( + u32x4::new(50262498, 45869261, 16124434, 15336007), + u32x4::new(882762, 42522623, 11277198, 26296377), + ), + ( + u32x4::new(42332732, 59129236, 14452816, 567985), + u32x4::new(208061, 34722729, 32008143, 14828749), + ), + ( + u32x4::new(17937794, 36846032, 32102665, 4442466), + u32x4::new(19745435, 31633451, 7146411, 15812027), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(30741269, 38648744, 12562645, 30092623), + u32x4::new(25073992, 28730659, 27911745, 30000958), + ), + ( + u32x4::new(2859794, 25991700, 17776078, 27091930), + u32x4::new(2328322, 60061146, 18581824, 18039008), + ), + ( + u32x4::new(58206333, 17917354, 1972306, 11853766), + u32x4::new(2655376, 60543390, 18416710, 13287440), + ), + ( + u32x4::new(62746330, 61423885, 21246577, 2266675), + u32x4::new(60099139, 14804707, 14772234, 20679434), + ), + ( + u32x4::new(26987698, 15488817, 715616, 2339565), + u32x4::new(51980752, 17333865, 21965103, 10839820), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(18672548, 57660959, 16042910, 19519287), + u32x4::new(62865851, 17580961, 26628347, 23774759), + ), + ( + u32x4::new(368070, 3464471, 25888304, 30370559), + u32x4::new(52396053, 45426828, 28745251, 9246829), + ), + ( + u32x4::new(29090099, 57950037, 23104657, 4903923), + u32x4::new(10987778, 56163684, 23621539, 10332760), + ), + ( + u32x4::new(53338235, 44851161, 21606845, 31069622), + u32x4::new(4243630, 34464392, 11286454, 5802022), + ), + ( + u32x4::new(46710757, 63389067, 11642865, 1980986), + u32x4::new(12967337, 28162061, 3854192, 30432268), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(12179834, 41005450, 12809619, 33525228), + u32x4::new(4624405, 46957889, 16968743, 11827816), + ), + ( + u32x4::new(51521162, 12466775, 31791271, 15303651), + u32x4::new(49798465, 62714504, 6509600, 12918560), + ), + ( + u32x4::new(20445559, 1756449, 28848701, 7920171), + u32x4::new(9835040, 5900071, 28757409, 12376688), + ), + ( + u32x4::new(18259496, 14281012, 21767026, 10232236), + u32x4::new(20000226, 12400540, 4104902, 23570543), + ), + ( + u32x4::new(3687440, 26546648, 13328821, 26841081), + u32x4::new(49822734, 22334054, 244496, 24862543), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(59523541, 62195428, 3853227, 13954801), + u32x4::new(12387708, 47627615, 27221350, 17899572), + ), + ( + u32x4::new(63193587, 36343307, 14595132, 6880795), + u32x4::new(1364792, 37648434, 3259017, 20536046), + ), + ( + u32x4::new(30362834, 10440372, 9574624, 11729232), + u32x4::new(63861613, 21748389, 5530846, 2721586), + ), + ( + u32x4::new(18339760, 1550632, 17170271, 25732971), + u32x4::new(28459263, 63142237, 21642345, 31557672), + ), + ( + u32x4::new(10611282, 5204623, 18049257, 214175), + u32x4::new(19432723, 49809070, 26010406, 27449522), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(19770733, 26478685, 9464541, 29158041), + u32x4::new(28604307, 45196604, 7586524, 6641859), + ), + ( + u32x4::new(65654484, 52230498, 30886612, 19112823), + u32x4::new(47271809, 38942611, 16020035, 10773481), + ), + ( + u32x4::new(27464323, 54451016, 20646645, 17732915), + u32x4::new(23008717, 53626684, 3253189, 15614410), + ), + ( + u32x4::new(52381752, 40693008, 7063024, 28469981), + u32x4::new(51159478, 44543211, 19941777, 5985451), + ), + ( + u32x4::new(13553668, 35524849, 14788737, 1883845), + u32x4::new(12385775, 47958835, 29135466, 1776722), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(36719806, 20827965, 23175373, 32996806), + u32x4::new(42041892, 65708790, 5467143, 20884008), + ), + ( + u32x4::new(43256281, 40770646, 17244063, 31959819), + u32x4::new(64366384, 43544617, 25057754, 12628720), + ), + ( + u32x4::new(17337782, 58472057, 27906934, 15305274), + u32x4::new(30292418, 39284317, 16946773, 24806712), + ), + ( + u32x4::new(6485126, 32447403, 16261486, 13561940), + u32x4::new(49439635, 10738368, 16419889, 8897231), + ), + ( + u32x4::new(44812203, 40122262, 25496058, 2759794), + u32x4::new(25295304, 52178368, 24154195, 29334408), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(42307254, 57217102, 1088936, 3832827), + u32x4::new(33905401, 23130334, 6958056, 12622851), + ), + ( + u32x4::new(3881189, 14870059, 19712830, 6071598), + u32x4::new(38147944, 60776394, 3427938, 13765703), + ), + ( + u32x4::new(7666911, 24227591, 17077136, 22967588), + u32x4::new(6874639, 30915523, 11451695, 24292224), + ), + ( + u32x4::new(13659529, 31984463, 28764736, 20506164), + u32x4::new(64729627, 49321636, 28284636, 25472371), + ), + ( + u32x4::new(39360308, 42281399, 9446504, 868960), + u32x4::new(49227724, 21351115, 30561851, 11292096), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(7071115, 46444090, 5387916, 15432877), + u32x4::new(27226682, 41506862, 2398278, 3978240), + ), + ( + u32x4::new(51009614, 54216973, 24368938, 31392616), + u32x4::new(38456150, 62313644, 6729154, 99724), + ), + ( + u32x4::new(17474332, 62857913, 2619930, 30659308), + u32x4::new(18268181, 32809239, 22826292, 24561895), + ), + ( + u32x4::new(38187020, 67003092, 14118280, 16500577), + u32x4::new(18808560, 64983716, 25712929, 32518261), + ), + ( + u32x4::new(25735813, 62284262, 10824872, 20558596), + u32x4::new(48149681, 31162667, 22608274, 26285185), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(963440, 63742255, 10230323, 25515008), + u32x4::new(32506414, 6105697, 25980317, 24645129), + ), + ( + u32x4::new(7162189, 8101249, 14679265, 33443386), + u32x4::new(2002396, 8541405, 19442276, 4795881), + ), + ( + u32x4::new(8116694, 51463069, 4415528, 25599140), + u32x4::new(55805721, 39582709, 6719436, 30033839), + ), + ( + u32x4::new(14468202, 42181869, 25188826, 9639755), + u32x4::new(47546189, 62711146, 32762447, 18338064), + ), + ( + u32x4::new(33880058, 32810909, 8969931, 13095238), + u32x4::new(38360605, 40138517, 9246134, 4928058), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(63655588, 17883670, 9410246, 26162761), + u32x4::new(5000571, 7349225, 23785252, 32751089), + ), + ( + u32x4::new(28568737, 10733123, 9342397, 21570673), + u32x4::new(54096560, 32467591, 20494687, 21511513), + ), + ( + u32x4::new(47675157, 47932807, 29250946, 15672208), + u32x4::new(59760469, 9945465, 14939287, 18437405), + ), + ( + u32x4::new(37985267, 8609815, 31573002, 3373596), + u32x4::new(47828883, 20834216, 13248616, 24154292), + ), + ( + u32x4::new(5543543, 29553242, 3386453, 30501150), + u32x4::new(25058089, 15236571, 8814395, 32462955), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(39158670, 15322548, 20495103, 3312736), + u32x4::new(14557171, 12985179, 8044741, 3176899), + ), + ( + u32x4::new(24673290, 29693310, 21412266, 18324699), + u32x4::new(2154518, 40329021, 17500543, 3954277), + ), + ( + u32x4::new(36758685, 38738957, 165513, 14691866), + u32x4::new(3070475, 10424235, 17096536, 16896898), + ), + ( + u32x4::new(59790459, 43094586, 8720681, 10423589), + u32x4::new(1122030, 31545615, 4463786, 31811293), + ), + ( + u32x4::new(49778992, 60881044, 20509974, 5832494), + u32x4::new(64155961, 31483358, 4511231, 20307815), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(2863373, 40876242, 26865913, 24067353), + u32x4::new(15726407, 40919070, 12953902, 9931535), + ), + ( + u32x4::new(60934877, 42512204, 21649141, 21945190), + u32x4::new(52211954, 60984193, 7046207, 5363493), + ), + ( + u32x4::new(4205971, 64068464, 18197273, 7327176), + u32x4::new(51527794, 21166920, 20669933, 11828242), + ), + ( + u32x4::new(59782815, 49617225, 15379924, 457923), + u32x4::new(9320508, 21498914, 3242540, 31563182), + ), + ( + u32x4::new(27714753, 8664670, 3366162, 26338598), + u32x4::new(56775518, 25796006, 13129151, 21388876), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(59276548, 49972346, 16795002, 33455915), + u32x4::new(48430097, 53857205, 18627071, 32474471), + ), + ( + u32x4::new(42160315, 50705892, 13530540, 28012698), + u32x4::new(19833221, 55886870, 20191784, 9644313), + ), + ( + u32x4::new(20372416, 28414713, 24084234, 31804096), + u32x4::new(33815377, 36131001, 17251241, 18291088), + ), + ( + u32x4::new(56234667, 14920441, 2033267, 29572003), + u32x4::new(1724043, 45519699, 17873735, 501988), + ), + ( + u32x4::new(50031659, 31517850, 15697583, 1016845), + u32x4::new(43104661, 54769582, 8008601, 27257051), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(52951491, 66542164, 14853573, 30444631), + u32x4::new(12045973, 24321813, 16545674, 18160646), + ), + ( + u32x4::new(60107911, 1126003, 5947677, 19486116), + u32x4::new(41119984, 30860440, 7935395, 13354438), + ), + ( + u32x4::new(17841328, 11063269, 1664538, 26687568), + u32x4::new(6268968, 22280371, 17275484, 4523163), + ), + ( + u32x4::new(15886041, 56799482, 15446552, 21712778), + u32x4::new(1005290, 17827215, 4978741, 6854882), + ), + ( + u32x4::new(34319277, 47731002, 20321804, 28544575), + u32x4::new(29591814, 63376351, 24754545, 26001714), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(66783087, 5234346, 46102, 8566476), + u32x4::new(19947339, 20180418, 25398238, 3726678), + ), + ( + u32x4::new(63890180, 46380965, 20674069, 5366544), + u32x4::new(59661487, 48406612, 31533614, 7071217), + ), + ( + u32x4::new(13104676, 1406631, 24326736, 19854367), + u32x4::new(61039528, 11019904, 31967425, 19219275), + ), + ( + u32x4::new(39003597, 30143957, 15351834, 8639435), + u32x4::new(57309582, 61436794, 15830475, 10090318), + ), + ( + u32x4::new(45923044, 6700175, 99413, 21263025), + u32x4::new(23762647, 53905481, 6063914, 10065424), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(42822326, 57678669, 4052879, 25452667), + u32x4::new(54049411, 2373092, 22337016, 7701046), + ), + ( + u32x4::new(44382355, 43307377, 16761537, 30373573), + u32x4::new(49790216, 23230748, 25655306, 10519391), + ), + ( + u32x4::new(919475, 59371245, 1273450, 25558666), + u32x4::new(9724711, 8556709, 25755845, 10887647), + ), + ( + u32x4::new(25465699, 44651158, 17658392, 11257418), + u32x4::new(29735193, 22885150, 7094716, 26828565), + ), + ( + u32x4::new(48237389, 47661599, 27054393, 7328070), + u32x4::new(27280193, 65616691, 23062005, 4170709), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(26535281, 60238317, 30343788, 25790743), + u32x4::new(37993933, 24614372, 9523840, 10401918), + ), + ( + u32x4::new(2783987, 29468958, 4697011, 19804475), + u32x4::new(37246678, 46797720, 10261254, 18942252), + ), + ( + u32x4::new(58135580, 60247753, 25301938, 6844561), + u32x4::new(20949454, 39844754, 4552026, 919057), + ), + ( + u32x4::new(6694071, 44126261, 32285330, 31370180), + u32x4::new(24603698, 53328179, 13971149, 5325636), + ), + ( + u32x4::new(64879487, 582094, 17982081, 19190425), + u32x4::new(24951286, 26923842, 29077174, 33286062), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(54863941, 67016431, 1224043, 23371240), + u32x4::new(62940074, 52101083, 13523637, 30366406), + ), + ( + u32x4::new(36324581, 25407485, 18258623, 4698602), + u32x4::new(50300544, 2658516, 26300935, 2611030), + ), + ( + u32x4::new(27183975, 21791014, 18105064, 9875199), + u32x4::new(58118912, 54198635, 6400311, 14767984), + ), + ( + u32x4::new(33918318, 42937962, 14809334, 22136592), + u32x4::new(10636588, 29082337, 29829692, 28549776), + ), + ( + u32x4::new(61080905, 854212, 12202487, 20004503), + u32x4::new(9256495, 6903981, 20567109, 347423), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(41391822, 34336880, 22362564, 14247996), + u32x4::new(12115604, 41583344, 7639288, 28910945), + ), + ( + u32x4::new(62066617, 59758859, 26665947, 11614812), + u32x4::new(65737664, 45704543, 30324810, 12868376), + ), + ( + u32x4::new(17491771, 43589814, 9454919, 26047850), + u32x4::new(52629282, 39304244, 3868968, 19296062), + ), + ( + u32x4::new(17826638, 30413590, 32534225, 32741469), + u32x4::new(15012391, 14365713, 33039233, 14791399), + ), + ( + u32x4::new(64115596, 59197067, 32739005, 23275744), + u32x4::new(32954320, 22241406, 20788442, 4942942), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(31956192, 59570132, 2784352, 4237732), + u32x4::new(47222312, 4860927, 18658867, 15279314), + ), + ( + u32x4::new(63240583, 28160478, 23524941, 13390861), + u32x4::new(66437406, 57718120, 33345312, 28896298), + ), + ( + u32x4::new(39026193, 46239965, 21440243, 25070488), + u32x4::new(64012383, 60999016, 16517060, 29565907), + ), + ( + u32x4::new(18118181, 60161496, 4212092, 23976240), + u32x4::new(36277753, 62363144, 5816868, 16964362), + ), + ( + u32x4::new(18196138, 62490693, 281468, 7934713), + u32x4::new(56027312, 62015725, 4837237, 32932252), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(29885826, 51028067, 30418143, 33438769), + u32x4::new(62542283, 39442528, 31535876, 143299), + ), + ( + u32x4::new(17143063, 56709783, 14451852, 15782104), + u32x4::new(32762665, 14047066, 26295037, 5432487), + ), + ( + u32x4::new(75151, 533606, 7539077, 30926189), + u32x4::new(38410914, 23771680, 4872443, 29199566), + ), + ( + u32x4::new(61522396, 48934708, 16223126, 207380), + u32x4::new(11171993, 47975147, 14164574, 352966), + ), + ( + u32x4::new(15449006, 56530757, 26796528, 12045834), + u32x4::new(63738697, 40667227, 33001582, 9101885), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(43331297, 18431341, 25801195, 17267698), + u32x4::new(19365485, 57295202, 22218985, 21284590), + ), + ( + u32x4::new(2429849, 19152559, 10762172, 22564684), + u32x4::new(21880390, 66866426, 20357935, 22641906), + ), + ( + u32x4::new(19771185, 31652693, 3666117, 28136958), + u32x4::new(23624283, 55101502, 6313920, 6783662), + ), + ( + u32x4::new(3487137, 7092443, 11001876, 26196524), + u32x4::new(47319246, 44542068, 17594073, 15027760), + ), + ( + u32x4::new(49563607, 32191113, 4991283, 25400512), + u32x4::new(46539152, 4155103, 32368171, 201203), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(20548943, 14334571, 4073874, 6368588), + u32x4::new(53208883, 56484515, 15970071, 25561889), + ), + ( + u32x4::new(49915097, 44030795, 11202344, 29284344), + u32x4::new(60258023, 66225712, 8075764, 12383512), + ), + ( + u32x4::new(45248912, 4933668, 9592153, 5819559), + u32x4::new(31030983, 38174071, 32435814, 7442522), + ), + ( + u32x4::new(62688129, 48218381, 22089545, 12897361), + u32x4::new(21050881, 34278889, 7569163, 3225449), + ), + ( + u32x4::new(19050183, 51089071, 32935757, 22640195), + u32x4::new(66122318, 47144608, 18743677, 25177079), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(41186817, 46681702, 31819867, 32997133), + u32x4::new(38559207, 27147015, 30293819, 16762988), + ), + ( + u32x4::new(24154689, 51762873, 23883879, 13510519), + u32x4::new(55338250, 61224161, 11663149, 30803960), + ), + ( + u32x4::new(18104238, 14117824, 11724021, 21362053), + u32x4::new(65704761, 35530242, 13498058, 33522849), + ), + ( + u32x4::new(63812888, 23995539, 28920539, 24005193), + u32x4::new(26412223, 36582218, 4251418, 26160309), + ), + ( + u32x4::new(16822053, 66064082, 3482145, 31979593), + u32x4::new(45937188, 54475379, 612917, 7976478), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(46509314, 55327128, 8944536, 274914), + u32x4::new(26432930, 53829300, 21192572, 3569894), + ), + ( + u32x4::new(20919764, 64356651, 30642344, 17215170), + u32x4::new(20335124, 11203745, 18663316, 19024174), + ), + ( + u32x4::new(59297055, 53842463, 3680204, 9806710), + u32x4::new(54004169, 51484914, 29807998, 20134199), + ), + ( + u32x4::new(14781592, 22628010, 26877930, 25880359), + u32x4::new(30434803, 190607, 30184292, 8991040), + ), + ( + u32x4::new(64400983, 64591751, 854562, 28216111), + u32x4::new(20010398, 50414793, 9803872, 22687008), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(15091184, 32550863, 8818643, 4244752), + u32x4::new(43123513, 64565526, 408838, 13206998), + ), + ( + u32x4::new(16405061, 60379639, 31489017, 20949281), + u32x4::new(27568751, 38734986, 8364264, 12451020), + ), + ( + u32x4::new(16005217, 58008076, 1406778, 26546927), + u32x4::new(39571784, 56365493, 31274296, 8918790), + ), + ( + u32x4::new(23271122, 19453469, 27718201, 32742670), + u32x4::new(234332, 36785342, 22601675, 14331046), + ), + ( + u32x4::new(40636025, 22442705, 22115403, 23745859), + u32x4::new(41164945, 61012, 12499614, 542137), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(62776018, 32835413, 17373246, 17187309), + u32x4::new(54469193, 21770290, 15923753, 28996575), + ), + ( + u32x4::new(59385210, 63082298, 12568449, 8509004), + u32x4::new(9483342, 16105238, 5756054, 26890758), + ), + ( + u32x4::new(53987996, 38201748, 5521661, 19060159), + u32x4::new(18663191, 9093637, 27786835, 31189196), + ), + ( + u32x4::new(65872678, 43635130, 27903055, 25020300), + u32x4::new(65772737, 38110437, 5213502, 21909342), + ), + ( + u32x4::new(4438979, 9680838, 10212446, 4764184), + u32x4::new(13235684, 58245995, 20264570, 21024049), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(60835961, 48209103, 31049052, 4688268), + u32x4::new(12426713, 59829045, 22302488, 29008521), + ), + ( + u32x4::new(50401667, 29716596, 23531224, 7581281), + u32x4::new(49071895, 6952617, 14934683, 8218256), + ), + ( + u32x4::new(1601446, 36631413, 31774811, 29625330), + u32x4::new(56786114, 8331539, 23129509, 19783344), + ), + ( + u32x4::new(59514327, 64513110, 1772300, 5701338), + u32x4::new(5737511, 16147555, 9461515, 5703271), + ), + ( + u32x4::new(33072974, 54300426, 11940114, 1308663), + u32x4::new(15627555, 4931627, 28443714, 20924342), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(18135013, 20358426, 4922557, 10015355), + u32x4::new(65729669, 34786528, 26248549, 29194359), + ), + ( + u32x4::new(797666, 34997544, 24316856, 25107230), + u32x4::new(24612576, 4761401, 15307321, 32404252), + ), + ( + u32x4::new(16501152, 60565831, 9487105, 9316022), + u32x4::new(24986054, 31917592, 3962024, 2501883), + ), + ( + u32x4::new(63356796, 50432342, 18044926, 30566881), + u32x4::new(42032028, 31415202, 13524600, 16119907), + ), + ( + u32x4::new(3927286, 57022374, 9265437, 21620772), + u32x4::new(19481940, 3806938, 24836192, 14572399), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(10785787, 46564798, 368445, 33181384), + u32x4::new(5319843, 52687136, 30347110, 29837357), + ), + ( + u32x4::new(56436732, 47859251, 24141084, 22250712), + u32x4::new(59046084, 4963427, 33463413, 17168859), + ), + ( + u32x4::new(15512044, 6366740, 4737504, 27644548), + u32x4::new(30307977, 25037929, 14593903, 12836490), + ), + ( + u32x4::new(63878897, 34013023, 5860752, 7244096), + u32x4::new(3689461, 57012135, 18389096, 11589351), + ), + ( + u32x4::new(4682110, 36302830, 653422, 22316819), + u32x4::new(14081831, 5657024, 11088376, 24110612), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(39907267, 45940262, 24887471, 18342609), + u32x4::new(878445, 40456159, 12019082, 345107), + ), + ( + u32x4::new(12794982, 28893944, 9447505, 11387200), + u32x4::new(16961963, 13916996, 10893728, 25898006), + ), + ( + u32x4::new(44934162, 53465865, 3583620, 1102334), + u32x4::new(53917811, 63478576, 2426066, 10389549), + ), + ( + u32x4::new(45096036, 37595344, 19367718, 20257175), + u32x4::new(10280866, 41653449, 27665642, 375926), + ), + ( + u32x4::new(45847901, 24064074, 32494820, 32204556), + u32x4::new(10720704, 51079060, 1297436, 29853825), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(66303987, 36060363, 16494578, 24962147), + u32x4::new(11971403, 49538586, 25060560, 1964341), + ), + ( + u32x4::new(25988481, 27641502, 24909517, 27237087), + u32x4::new(66646363, 52777626, 16360849, 10459972), + ), + ( + u32x4::new(43930529, 34374176, 31225968, 8807030), + u32x4::new(10394758, 35904854, 25325589, 19335583), + ), + ( + u32x4::new(25094697, 34380951, 20051185, 32287161), + u32x4::new(11739332, 53887441, 30517319, 26601892), + ), + ( + u32x4::new(8868546, 35635502, 32513071, 28248087), + u32x4::new(51946989, 14222744, 19198839, 23261841), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(51218008, 5070126, 11046681, 5320810), + u32x4::new(61212079, 34104447, 23895089, 6460727), + ), + ( + u32x4::new(39843528, 46278671, 10426120, 25624792), + u32x4::new(66658766, 37140083, 28933107, 12969597), + ), + ( + u32x4::new(59635793, 40220191, 5751421, 173680), + u32x4::new(58321825, 740337, 1412847, 7682623), + ), + ( + u32x4::new(975962, 56440763, 20812276, 22631115), + u32x4::new(49095824, 19883130, 2419746, 31043648), + ), + ( + u32x4::new(66208703, 39669328, 22525915, 3748897), + u32x4::new(65994776, 34533552, 8126286, 18326047), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(64176557, 3912400, 19351673, 30068471), + u32x4::new(31190055, 24221683, 33142424, 28698542), + ), + ( + u32x4::new(34784792, 4109933, 3867193, 19557314), + u32x4::new(2112512, 32715890, 24550117, 16595976), + ), + ( + u32x4::new(35542761, 48024875, 10925431, 31526577), + u32x4::new(66577735, 23189821, 13375709, 1735095), + ), + ( + u32x4::new(59699254, 43854093, 29783239, 24777271), + u32x4::new(19600372, 39924461, 2896720, 1472185), + ), + ( + u32x4::new(56389656, 35980854, 33172342, 1370336), + u32x4::new(23707480, 57654949, 7850973, 12655016), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(38372660, 57101970, 7044964, 12732710), + u32x4::new(57535705, 6043201, 30858914, 10946592), + ), + ( + u32x4::new(21023468, 6946992, 26403324, 23901823), + u32x4::new(35695559, 23440687, 4763891, 6514074), + ), + ( + u32x4::new(28662273, 30933699, 9352242, 26354829), + u32x4::new(37402243, 3145176, 8770289, 525937), + ), + ( + u32x4::new(54933102, 36695832, 3281859, 4755022), + u32x4::new(23043294, 32794379, 15618886, 23602412), + ), + ( + u32x4::new(9931565, 29897140, 2480737, 24193701), + u32x4::new(7833615, 2284939, 893926, 13421882), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(22917795, 22088359, 28978099, 19794863), + u32x4::new(60542318, 29878494, 31053731, 9080720), + ), + ( + u32x4::new(23679072, 52547035, 28424916, 20647332), + u32x4::new(4008761, 28267029, 12961289, 1589095), + ), + ( + u32x4::new(55616194, 26678929, 14998265, 23274397), + u32x4::new(54625466, 46244264, 28627706, 33030665), + ), + ( + u32x4::new(11527330, 6449415, 26531607, 3472938), + u32x4::new(41541592, 62607682, 19862690, 20564723), + ), + ( + u32x4::new(32843805, 49066843, 28425824, 19521495), + u32x4::new(48792073, 48242878, 27392443, 13175986), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(16185025, 61537525, 2961305, 1492442), + u32x4::new(25123147, 3095034, 31896958, 33089615), + ), + ( + u32x4::new(64748157, 18336595, 16522231, 25426312), + u32x4::new(65718949, 35485695, 30554083, 10205918), + ), + ( + u32x4::new(39626934, 39271045, 16420458, 9826240), + u32x4::new(56483981, 27128085, 3783403, 13360006), + ), + ( + u32x4::new(30793778, 66771960, 17241420, 6564573), + u32x4::new(61102581, 29974476, 32385512, 9011754), + ), + ( + u32x4::new(28068166, 11862220, 14323567, 12380617), + u32x4::new(52090465, 16029056, 24495309, 21409233), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(59411973, 57437124, 11695483, 17586857), + u32x4::new(16108987, 43449109, 31098002, 6248476), + ), + ( + u32x4::new(42258047, 61595931, 29308533, 11742653), + u32x4::new(43042345, 27373650, 30165249, 21929989), + ), + ( + u32x4::new(49907221, 9620337, 21888081, 20981082), + u32x4::new(56288861, 61562203, 33223566, 3582446), + ), + ( + u32x4::new(57535017, 41003416, 22080416, 14463796), + u32x4::new(65518565, 18127889, 24370863, 33332664), + ), + ( + u32x4::new(66655380, 6430175, 471782, 11947673), + u32x4::new(30596400, 18898659, 15930721, 4211851), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(6757410, 65455566, 13584784, 11362173), + u32x4::new(10797127, 24451471, 19541370, 29309435), + ), + ( + u32x4::new(40360156, 17685025, 18326181, 3846903), + u32x4::new(13693365, 63049479, 31900359, 23385063), + ), + ( + u32x4::new(52455038, 57513503, 22163311, 27095042), + u32x4::new(48610726, 66454160, 12085341, 26357004), + ), + ( + u32x4::new(22097042, 14063840, 6705778, 14342902), + u32x4::new(66139825, 20702105, 31279090, 7495745), + ), + ( + u32x4::new(27360710, 49314837, 18774847, 7146436), + u32x4::new(37066216, 42004961, 22409916, 10524446), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(1497507, 33054449, 11839906, 2960428), + u32x4::new(40538463, 18884538, 25018820, 4073970), + ), + ( + u32x4::new(54484385, 43640735, 2808257, 20710708), + u32x4::new(39840730, 27222424, 21783544, 11848522), + ), + ( + u32x4::new(45765237, 48200555, 9299019, 9393151), + u32x4::new(34818188, 56098995, 13575233, 21012731), + ), + ( + u32x4::new(4265428, 49627650, 24960282, 9425650), + u32x4::new(47883651, 2797524, 11853190, 22877329), + ), + ( + u32x4::new(25008173, 64199503, 380047, 12107343), + u32x4::new(12329448, 11914399, 764281, 29687002), + ), + ])), + CachedPoint(FieldElement2625x4([ + ( + u32x4::new(35889734, 23047226, 4022841, 7017445), + u32x4::new(7274086, 53316179, 25100176, 15310676), + ), + ( + u32x4::new(42409427, 30270106, 6823853, 31551384), + u32x4::new(40645017, 66489807, 18021817, 32669351), + ), + ( + u32x4::new(39827134, 43680850, 28297996, 20258133), + u32x4::new(26058742, 52643238, 22238331, 21690533), + ), + ( + u32x4::new(60808002, 17499995, 30042246, 29310584), + u32x4::new(48219954, 29389518, 8680514, 17844709), + ), + ( + u32x4::new(6452896, 50116553, 9532047, 26821214), + u32x4::new(44524351, 50428429, 21904953, 12608048), + ), + ])), ]); From 38807d0efb9298dd34271f75e778fe41f07452fd Mon Sep 17 00:00:00 2001 From: Tarinn Date: Fri, 1 Mar 2024 17:52:04 +0100 Subject: [PATCH 06/14] implemented packed_simd for neon --- curve25519-dalek/Cargo.toml | 3 + curve25519-dalek/build.rs | 2 +- curve25519-dalek/src/backend/mod.rs | 78 +- curve25519-dalek/src/backend/vector/mod.rs | 7 +- .../src/backend/vector/neon/constants.rs | 1328 ++++++++--------- .../src/backend/vector/neon/edwards.rs | 4 +- .../src/backend/vector/neon/field.rs | 220 +-- .../src/backend/vector/neon/mod.rs | 4 + .../src/backend/vector/neon/packed_simd.rs | 320 ++++ .../backend/vector/scalar_mul/pippenger.rs | 8 +- .../vector/scalar_mul/precomputed_straus.rs | 8 +- .../src/backend/vector/scalar_mul/straus.rs | 8 +- .../vector/scalar_mul/variable_base.rs | 8 +- .../vector/scalar_mul/vartime_double_base.rs | 12 +- curve25519-dalek/src/lib.rs | 3 + 15 files changed, 1211 insertions(+), 802 deletions(-) create mode 100644 curve25519-dalek/src/backend/vector/neon/packed_simd.rs diff --git a/curve25519-dalek/Cargo.toml b/curve25519-dalek/Cargo.toml index 48dcb977d..ea640e862 100644 --- a/curve25519-dalek/Cargo.toml +++ b/curve25519-dalek/Cargo.toml @@ -72,3 +72,6 @@ group-bits = ["group", "ff/bits"] [target.'cfg(all(not(curve25519_dalek_backend = "fiat"), not(curve25519_dalek_backend = "serial"), target_arch = "x86_64"))'.dependencies] curve25519-dalek-derive = { version = "0.1", path = "../curve25519-dalek-derive" } + +[target.'cfg(all(not(curve25519_dalek_backend = "fiat"), not(curve25519_dalek_backend = "serial"), target_arch = "aarch64"))'.dependencies] +curve25519-dalek-derive = { version = "0.1", path = "../curve25519-dalek-derive" } diff --git a/curve25519-dalek/build.rs b/curve25519-dalek/build.rs index 92d2802cd..eca17c02d 100644 --- a/curve25519-dalek/build.rs +++ b/curve25519-dalek/build.rs @@ -66,7 +66,7 @@ fn main() { // Is the target arch & curve25519_dalek_bits potentially simd capable ? fn is_capable_simd(arch: &str, bits: DalekBits) -> bool { - arch == "x86_64" && bits == DalekBits::Dalek64 + (arch == "x86_64" || arch == "aarch64") && bits == DalekBits::Dalek64 } // Deterministic cfg(curve25519_dalek_bits) when this is not explicitly set. diff --git a/curve25519-dalek/src/backend/mod.rs b/curve25519-dalek/src/backend/mod.rs index 4424e0a53..61c93b0af 100644 --- a/curve25519-dalek/src/backend/mod.rs +++ b/curve25519-dalek/src/backend/mod.rs @@ -44,16 +44,22 @@ pub mod vector; #[derive(Copy, Clone)] enum BackendKind { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] Avx2, - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] Avx512, + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + Neon, Serial, } #[inline] fn get_selected_backend() -> BackendKind { - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + { + return BackendKind::Neon; + } + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] { cpufeatures::new!(cpuid_avx512, "avx512ifma", "avx512vl"); let token_avx512: cpuid_avx512::InitToken = cpuid_avx512::init(); @@ -62,7 +68,7 @@ fn get_selected_backend() -> BackendKind { } } - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] { cpufeatures::new!(cpuid_avx2, "avx2"); let token_avx2: cpuid_avx2::InitToken = cpuid_avx2::init(); @@ -85,12 +91,15 @@ where use crate::traits::VartimeMultiscalarMul; match get_selected_backend() { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] BackendKind::Avx2 => self::vector::scalar_mul::pippenger::spec_avx2::Pippenger::optional_multiscalar_mul::(scalars, points), - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] BackendKind::Avx512 => self::vector::scalar_mul::pippenger::spec_avx512ifma_avx512vl::Pippenger::optional_multiscalar_mul::(scalars, points), + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + BackendKind::Neon => + self::vector::scalar_mul::pippenger::spec_neon::Pippenger::optional_multiscalar_mul::(scalars, points), BackendKind::Serial => self::serial::scalar_mul::pippenger::Pippenger::optional_multiscalar_mul::(scalars, points), } @@ -98,12 +107,16 @@ where #[cfg(feature = "alloc")] pub(crate) enum VartimePrecomputedStraus { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] Avx2(self::vector::scalar_mul::precomputed_straus::spec_avx2::VartimePrecomputedStraus), - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] Avx512ifma( self::vector::scalar_mul::precomputed_straus::spec_avx512ifma_avx512vl::VartimePrecomputedStraus, ), + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + Neon( + self::vector::scalar_mul::precomputed_straus::spec_neon::VartimePrecomputedStraus + ), Scalar(self::serial::scalar_mul::precomputed_straus::VartimePrecomputedStraus), } @@ -117,12 +130,15 @@ impl VartimePrecomputedStraus { use crate::traits::VartimePrecomputedMultiscalarMul; match get_selected_backend() { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] BackendKind::Avx2 => VartimePrecomputedStraus::Avx2(self::vector::scalar_mul::precomputed_straus::spec_avx2::VartimePrecomputedStraus::new(static_points)), - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] BackendKind::Avx512 => VartimePrecomputedStraus::Avx512ifma(self::vector::scalar_mul::precomputed_straus::spec_avx512ifma_avx512vl::VartimePrecomputedStraus::new(static_points)), + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + BackendKind::Neon => + VartimePrecomputedStraus::Neon(self::vector::scalar_mul::precomputed_straus::spec_neon::VartimePrecomputedStraus::new(static_points)), BackendKind::Serial => VartimePrecomputedStraus::Scalar(self::serial::scalar_mul::precomputed_straus::VartimePrecomputedStraus::new(static_points)) } @@ -144,18 +160,24 @@ impl VartimePrecomputedStraus { use crate::traits::VartimePrecomputedMultiscalarMul; match self { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] VartimePrecomputedStraus::Avx2(inner) => inner.optional_mixed_multiscalar_mul( static_scalars, dynamic_scalars, dynamic_points, ), - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] VartimePrecomputedStraus::Avx512ifma(inner) => inner.optional_mixed_multiscalar_mul( static_scalars, dynamic_scalars, dynamic_points, ), + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + VartimePrecomputedStraus::Neon(inner) => inner.optional_mixed_multiscalar_mul( + static_scalars, + dynamic_scalars, + dynamic_points, + ), VartimePrecomputedStraus::Scalar(inner) => inner.optional_mixed_multiscalar_mul( static_scalars, dynamic_scalars, @@ -177,19 +199,25 @@ where use crate::traits::MultiscalarMul; match get_selected_backend() { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] BackendKind::Avx2 => { self::vector::scalar_mul::straus::spec_avx2::Straus::multiscalar_mul::( scalars, points, ) } - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] BackendKind::Avx512 => { self::vector::scalar_mul::straus::spec_avx512ifma_avx512vl::Straus::multiscalar_mul::< I, J, >(scalars, points) } + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + BackendKind::Neon => { + self::vector::scalar_mul::straus::spec_neon::Straus::multiscalar_mul::( + scalars, points, + ) + } BackendKind::Serial => { self::serial::scalar_mul::straus::Straus::multiscalar_mul::(scalars, points) } @@ -207,19 +235,25 @@ where use crate::traits::VartimeMultiscalarMul; match get_selected_backend() { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] BackendKind::Avx2 => { self::vector::scalar_mul::straus::spec_avx2::Straus::optional_multiscalar_mul::( scalars, points, ) } - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] BackendKind::Avx512 => { self::vector::scalar_mul::straus::spec_avx512ifma_avx512vl::Straus::optional_multiscalar_mul::< I, J, >(scalars, points) } + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + BackendKind::Neon => { + self::vector::scalar_mul::straus::spec_neon::Straus::optional_multiscalar_mul::( + scalars, points + ) + } BackendKind::Serial => { self::serial::scalar_mul::straus::Straus::optional_multiscalar_mul::( scalars, points, @@ -231,12 +265,14 @@ where /// Perform constant-time, variable-base scalar multiplication. pub fn variable_base_mul(point: &EdwardsPoint, scalar: &Scalar) -> EdwardsPoint { match get_selected_backend() { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] BackendKind::Avx2 => self::vector::scalar_mul::variable_base::spec_avx2::mul(point, scalar), - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] BackendKind::Avx512 => { self::vector::scalar_mul::variable_base::spec_avx512ifma_avx512vl::mul(point, scalar) } + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + BackendKind::Neon => self::vector::scalar_mul::variable_base::spec_neon::mul(point, scalar), BackendKind::Serial => self::serial::scalar_mul::variable_base::mul(point, scalar), } } @@ -245,12 +281,14 @@ pub fn variable_base_mul(point: &EdwardsPoint, scalar: &Scalar) -> EdwardsPoint #[allow(non_snake_case)] pub fn vartime_double_base_mul(a: &Scalar, A: &EdwardsPoint, b: &Scalar) -> EdwardsPoint { match get_selected_backend() { - #[cfg(curve25519_dalek_backend = "simd")] + #[cfg(all(curve25519_dalek_backend = "simd", target_arch="x86_64"))] BackendKind::Avx2 => self::vector::scalar_mul::vartime_double_base::spec_avx2::mul(a, A, b), - #[cfg(all(curve25519_dalek_backend = "simd", nightly))] + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="x86_64"))] BackendKind::Avx512 => { self::vector::scalar_mul::vartime_double_base::spec_avx512ifma_avx512vl::mul(a, A, b) } + #[cfg(all(curve25519_dalek_backend = "simd", nightly, target_arch="aarch64"))] + BackendKind::Neon => self::vector::scalar_mul::vartime_double_base::spec_neon::mul(a, A, b), BackendKind::Serial => self::serial::scalar_mul::vartime_double_base::mul(a, A, b), } } diff --git a/curve25519-dalek/src/backend/vector/mod.rs b/curve25519-dalek/src/backend/vector/mod.rs index 54ed077d8..90752a7ad 100644 --- a/curve25519-dalek/src/backend/vector/mod.rs +++ b/curve25519-dalek/src/backend/vector/mod.rs @@ -12,14 +12,17 @@ #![doc = include_str!("../../../docs/parallel-formulas.md")] #[allow(missing_docs)] +#[cfg(all(target_arch="x86_64"))] pub mod packed_simd; + +#[cfg(all(target_arch="x86_64"))] pub mod avx2; -#[cfg(nightly)] +#[cfg(all(nightly, target_arch="x86_64"))] pub mod ifma; -#[cfg(nightly)] +#[cfg(all(nightly, target_arch="aarch64"))] pub mod neon; pub mod scalar_mul; diff --git a/curve25519-dalek/src/backend/vector/neon/constants.rs b/curve25519-dalek/src/backend/vector/neon/constants.rs index c9fb0bf58..9870e3899 100644 --- a/curve25519-dalek/src/backend/vector/neon/constants.rs +++ b/curve25519-dalek/src/backend/vector/neon/constants.rs @@ -12,7 +12,7 @@ //! This module contains constants used by the NEON backend. -use packed_simd::u32x4; +use super::packed_simd::u32x4; use crate::backend::vector::neon::edwards::{CachedPoint, ExtendedPoint}; use crate::backend::vector::neon::field::FieldElement2625x4; @@ -20,34 +20,34 @@ use crate::window::NafLookupTable8; /// The identity element as an `ExtendedPoint`. pub(crate) static EXTENDEDPOINT_IDENTITY: ExtendedPoint = ExtendedPoint(FieldElement2625x4([ - (u32x4::new(0, 1, 0, 0), u32x4::new(1, 0, 0, 0)), - (u32x4::splat(0), u32x4::splat(0)), - (u32x4::splat(0), u32x4::splat(0)), - (u32x4::splat(0), u32x4::splat(0)), - (u32x4::splat(0), u32x4::splat(0)), + (u32x4::const_new(0, 1, 0, 0), u32x4::const_new(1, 0, 0, 0)), + (u32x4::const_splat(0), u32x4::const_splat(0)), + (u32x4::const_splat(0), u32x4::const_splat(0)), + (u32x4::const_splat(0), u32x4::const_splat(0)), + (u32x4::const_splat(0), u32x4::const_splat(0)), ])); /// The identity element as a `CachedPoint`. pub(crate) static CACHEDPOINT_IDENTITY: CachedPoint = CachedPoint(FieldElement2625x4([ ( - u32x4::new(121647, 121666, 0, 0), - u32x4::new(243332, 67108845, 0, 33554431), + u32x4::const_new(121647, 121666, 0, 0), + u32x4::const_new(243332, 67108845, 0, 33554431), ), ( - u32x4::new(67108864, 0, 33554431, 0), - u32x4::new(0, 67108863, 0, 33554431), + u32x4::const_new(67108864, 0, 33554431, 0), + u32x4::const_new(0, 67108863, 0, 33554431), ), ( - u32x4::new(67108863, 0, 33554431, 0), - u32x4::new(0, 67108863, 0, 33554431), + u32x4::const_new(67108863, 0, 33554431, 0), + u32x4::const_new(0, 67108863, 0, 33554431), ), ( - u32x4::new(67108863, 0, 33554431, 0), - u32x4::new(0, 67108863, 0, 33554431), + u32x4::const_new(67108863, 0, 33554431, 0), + u32x4::const_new(0, 67108863, 0, 33554431), ), ( - u32x4::new(67108863, 0, 33554431, 0), - u32x4::new(0, 67108863, 0, 33554431), + u32x4::const_new(67108863, 0, 33554431, 0), + u32x4::const_new(0, 67108863, 0, 33554431), ), ])); @@ -56,8 +56,8 @@ pub(crate) static CACHEDPOINT_IDENTITY: CachedPoint = CachedPoint(FieldElement26 /// (2p, 2p, 2p, 2p) = [P_TIMES_2_LO, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI] /// ``` pub(crate) static P_TIMES_2_LO: (u32x4, u32x4) = ( - u32x4::new(67108845 << 1, 67108845 << 1, 33554431 << 1, 33554431 << 1), - u32x4::new(67108845 << 1, 67108845 << 1, 33554431 << 1, 33554431 << 1), + u32x4::const_new(67108845 << 1, 67108845 << 1, 33554431 << 1, 33554431 << 1), + u32x4::const_new(67108845 << 1, 67108845 << 1, 33554431 << 1, 33554431 << 1), ); /// The high limbs of (2p, 2p, 2p, 2p), so that @@ -65,8 +65,8 @@ pub(crate) static P_TIMES_2_LO: (u32x4, u32x4) = ( /// (2p, 2p, 2p, 2p) = [P_TIMES_2_LO, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI] /// ``` pub(crate) static P_TIMES_2_HI: (u32x4, u32x4) = ( - u32x4::new(67108863 << 1, 67108863 << 1, 33554431 << 1, 33554431 << 1), - u32x4::new(67108863 << 1, 67108863 << 1, 33554431 << 1, 33554431 << 1), + u32x4::const_new(67108863 << 1, 67108863 << 1, 33554431 << 1, 33554431 << 1), + u32x4::const_new(67108863 << 1, 67108863 << 1, 33554431 << 1, 33554431 << 1), ); /// The low limbs of (16p, 16p, 16p, 16p), so that @@ -74,8 +74,8 @@ pub(crate) static P_TIMES_2_HI: (u32x4, u32x4) = ( /// (16p, 16p, 16p, 16p) = [P_TIMES_16_LO, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI] /// ``` pub(crate) static P_TIMES_16_LO: (u32x4, u32x4) = ( - u32x4::new(67108845 << 4, 67108845 << 4, 33554431 << 4, 33554431 << 4), - u32x4::new(67108845 << 4, 67108845 << 4, 33554431 << 4, 33554431 << 4), + u32x4::const_new(67108845 << 4, 67108845 << 4, 33554431 << 4, 33554431 << 4), + u32x4::const_new(67108845 << 4, 67108845 << 4, 33554431 << 4, 33554431 << 4), ); /// The high limbs of (16p, 16p, 16p, 16p), so that @@ -83,1418 +83,1418 @@ pub(crate) static P_TIMES_16_LO: (u32x4, u32x4) = ( /// (16p, 16p, 16p, 16p) = [P_TIMES_16_LO, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI] /// ``` pub(crate) static P_TIMES_16_HI: (u32x4, u32x4) = ( - u32x4::new(67108863 << 4, 67108863 << 4, 33554431 << 4, 33554431 << 4), - u32x4::new(67108863 << 4, 67108863 << 4, 33554431 << 4, 33554431 << 4), + u32x4::const_new(67108863 << 4, 67108863 << 4, 33554431 << 4, 33554431 << 4), + u32x4::const_new(67108863 << 4, 67108863 << 4, 33554431 << 4, 33554431 << 4), ); /// Odd multiples of the Ed25519 basepoint: pub(crate) static BASEPOINT_ODD_LOOKUP_TABLE: NafLookupTable8 = NafLookupTable8([ CachedPoint(FieldElement2625x4([ ( - u32x4::new(3571425, 10045002, 19036563, 1096096), - u32x4::new(243332, 65897020, 0, 28963681), + u32x4::const_new(3571425, 10045002, 19036563, 1096096), + u32x4::const_new(243332, 65897020, 0, 28963681), ), ( - u32x4::new(30896895, 63055514, 1614915, 5095970), - u32x4::new(0, 53791688, 0, 31258312), + u32x4::const_new(30896895, 63055514, 1614915, 5095970), + u32x4::const_new(0, 53791688, 0, 31258312), ), ( - u32x4::new(13347627, 40339464, 2236269, 11185503), - u32x4::new(0, 22520087, 0, 8659512), + u32x4::const_new(13347627, 40339464, 2236269, 11185503), + u32x4::const_new(0, 22520087, 0, 8659512), ), ( - u32x4::new(11125413, 29139905, 32037254, 28360723), - u32x4::new(0, 64556417, 0, 9635759), + u32x4::const_new(11125413, 29139905, 32037254, 28360723), + u32x4::const_new(0, 64556417, 0, 9635759), ), ( - u32x4::new(33268144, 47262491, 4336918, 15795740), - u32x4::new(0, 22027545, 0, 4846528), + u32x4::const_new(33268144, 47262491, 4336918, 15795740), + u32x4::const_new(0, 22027545, 0, 4846528), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(47099681, 31447946, 29365447, 24740513), - u32x4::new(42991046, 18317844, 16051644, 21404226), + u32x4::const_new(47099681, 31447946, 29365447, 24740513), + u32x4::const_new(42991046, 18317844, 16051644, 21404226), ), ( - u32x4::new(31708133, 28909527, 2366091, 13703791), - u32x4::new(469246, 54159622, 2601402, 32988002), + u32x4::const_new(31708133, 28909527, 2366091, 13703791), + u32x4::const_new(469246, 54159622, 2601402, 32988002), ), ( - u32x4::new(63432457, 30251794, 15163516, 18491340), - u32x4::new(28144087, 35605455, 13682295, 18474872), + u32x4::const_new(63432457, 30251794, 15163516, 18491340), + u32x4::const_new(28144087, 35605455, 13682295, 18474872), ), ( - u32x4::new(12221607, 4967598, 26061980, 26008006), - u32x4::new(20226147, 9726961, 17410, 18051083), + u32x4::const_new(12221607, 4967598, 26061980, 26008006), + u32x4::const_new(20226147, 9726961, 17410, 18051083), ), ( - u32x4::new(60569645, 62487085, 11911242, 21920922), - u32x4::new(4092105, 38186967, 22431483, 31366585), + u32x4::const_new(60569645, 62487085, 11911242, 21920922), + u32x4::const_new(4092105, 38186967, 22431483, 31366585), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(18147205, 62587998, 2554617, 536692), - u32x4::new(11924528, 26674131, 17645433, 24341419), + u32x4::const_new(18147205, 62587998, 2554617, 536692), + u32x4::const_new(11924528, 26674131, 17645433, 24341419), ), ( - u32x4::new(11573357, 27579485, 31491870, 29000885), - u32x4::new(10800976, 51902791, 28076395, 20464029), + u32x4::const_new(11573357, 27579485, 31491870, 29000885), + u32x4::const_new(10800976, 51902791, 28076395, 20464029), ), ( - u32x4::new(56031649, 10856669, 11791193, 26769430), - u32x4::new(25306956, 5922200, 6630685, 9385098), + u32x4::const_new(56031649, 10856669, 11791193, 26769430), + u32x4::const_new(25306956, 5922200, 6630685, 9385098), ), ( - u32x4::new(31319348, 23906711, 16290213, 32142166), - u32x4::new(61106354, 17181823, 3548308, 12022566), + u32x4::const_new(31319348, 23906711, 16290213, 32142166), + u32x4::const_new(61106354, 17181823, 3548308, 12022566), ), ( - u32x4::new(5904298, 50218605, 11826440, 5492249), - u32x4::new(10379071, 3472255, 172742, 31948344), + u32x4::const_new(5904298, 50218605, 11826440, 5492249), + u32x4::const_new(10379071, 3472255, 172742, 31948344), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(10625852, 15193821, 22918394, 23676410), - u32x4::new(53695416, 54987793, 10067515, 11747680), + u32x4::const_new(10625852, 15193821, 22918394, 23676410), + u32x4::const_new(53695416, 54987793, 10067515, 11747680), ), ( - u32x4::new(65013325, 1309652, 29616320, 28922974), - u32x4::new(60360891, 19621771, 9938982, 30406429), + u32x4::const_new(65013325, 1309652, 29616320, 28922974), + u32x4::const_new(60360891, 19621771, 9938982, 30406429), ), ( - u32x4::new(54967954, 65931918, 5595602, 25719523), - u32x4::new(64909864, 30566415, 15945272, 8495317), + u32x4::const_new(54967954, 65931918, 5595602, 25719523), + u32x4::const_new(64909864, 30566415, 15945272, 8495317), ), ( - u32x4::new(1167157, 55265018, 11507029, 31641054), - u32x4::new(43497904, 2367338, 12937761, 27517066), + u32x4::const_new(1167157, 55265018, 11507029, 31641054), + u32x4::const_new(43497904, 2367338, 12937761, 27517066), ), ( - u32x4::new(656704, 2544994, 13006713, 480979), - u32x4::new(38471594, 62541240, 25353597, 11531760), + u32x4::const_new(656704, 2544994, 13006713, 480979), + u32x4::const_new(38471594, 62541240, 25353597, 11531760), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(22176662, 3984313, 27495285, 4110608), - u32x4::new(2909584, 30594106, 15677919, 2549183), + u32x4::const_new(22176662, 3984313, 27495285, 4110608), + u32x4::const_new(2909584, 30594106, 15677919, 2549183), ), ( - u32x4::new(33979105, 62269905, 2071511, 6894756), - u32x4::new(53189950, 47232857, 6408191, 6123225), + u32x4::const_new(33979105, 62269905, 2071511, 6894756), + u32x4::const_new(53189950, 47232857, 6408191, 6123225), ), ( - u32x4::new(32553873, 63948030, 12612401, 3633166), - u32x4::new(24054373, 37626618, 14481327, 8520484), + u32x4::const_new(32553873, 63948030, 12612401, 3633166), + u32x4::const_new(24054373, 37626618, 14481327, 8520484), ), ( - u32x4::new(56552486, 10749438, 12034813, 28811946), - u32x4::new(1445640, 36755601, 12104575, 10257833), + u32x4::const_new(56552486, 10749438, 12034813, 28811946), + u32x4::const_new(1445640, 36755601, 12104575, 10257833), ), ( - u32x4::new(22795808, 48761311, 1136056, 9380768), - u32x4::new(1411523, 5341811, 27318329, 9686767), + u32x4::const_new(22795808, 48761311, 1136056, 9380768), + u32x4::const_new(1411523, 5341811, 27318329, 9686767), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(21157200, 39156966, 20473176, 4934657), - u32x4::new(61478183, 45121537, 5429856, 13035023), + u32x4::const_new(21157200, 39156966, 20473176, 4934657), + u32x4::const_new(61478183, 45121537, 5429856, 13035023), ), ( - u32x4::new(7954529, 58789246, 31440083, 7054221), - u32x4::new(38438565, 36856107, 1364112, 14548122), + u32x4::const_new(7954529, 58789246, 31440083, 7054221), + u32x4::const_new(38438565, 36856107, 1364112, 14548122), ), ( - u32x4::new(26120083, 36321360, 4919997, 31687496), - u32x4::new(33757765, 36237559, 15243054, 32163861), + u32x4::const_new(26120083, 36321360, 4919997, 31687496), + u32x4::const_new(33757765, 36237559, 15243054, 32163861), ), ( - u32x4::new(25878307, 46544824, 19455951, 2414935), - u32x4::new(16844726, 56521560, 32680554, 26660660), + u32x4::const_new(25878307, 46544824, 19455951, 2414935), + u32x4::const_new(16844726, 56521560, 32680554, 26660660), ), ( - u32x4::new(48360220, 43407178, 12187042, 24925816), - u32x4::new(7423722, 25746484, 12814654, 17395963), + u32x4::const_new(48360220, 43407178, 12187042, 24925816), + u32x4::const_new(7423722, 25746484, 12814654, 17395963), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(63153652, 32195955, 4087908, 8431689), - u32x4::new(30392384, 47203165, 8986649, 9053039), + u32x4::const_new(63153652, 32195955, 4087908, 8431689), + u32x4::const_new(30392384, 47203165, 8986649, 9053039), ), ( - u32x4::new(63659241, 47988767, 2931872, 19953600), - u32x4::new(11747107, 51610101, 20952181, 13364887), + u32x4::const_new(63659241, 47988767, 2931872, 19953600), + u32x4::const_new(11747107, 51610101, 20952181, 13364887), ), ( - u32x4::new(3659197, 58790649, 5930099, 2605312), - u32x4::new(28477896, 580728, 20579735, 2610622), + u32x4::const_new(3659197, 58790649, 5930099, 2605312), + u32x4::const_new(28477896, 580728, 20579735, 2610622), ), ( - u32x4::new(41781607, 17161358, 10690531, 24368015), - u32x4::new(47027031, 36742339, 5414694, 13156365), + u32x4::const_new(41781607, 17161358, 10690531, 24368015), + u32x4::const_new(47027031, 36742339, 5414694, 13156365), ), ( - u32x4::new(13237853, 51182423, 8954802, 29006542), - u32x4::new(22643989, 56896541, 22830593, 10289708), + u32x4::const_new(13237853, 51182423, 8954802, 29006542), + u32x4::const_new(22643989, 56896541, 22830593, 10289708), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(1401265, 58846825, 30911620, 32239180), - u32x4::new(15391552, 15200821, 6339309, 16403588), + u32x4::const_new(1401265, 58846825, 30911620, 32239180), + u32x4::const_new(15391552, 15200821, 6339309, 16403588), ), ( - u32x4::new(55913797, 29541724, 1664461, 21709410), - u32x4::new(38470488, 47097092, 17674945, 32666066), + u32x4::const_new(55913797, 29541724, 1664461, 21709410), + u32x4::const_new(38470488, 47097092, 17674945, 32666066), ), ( - u32x4::new(22844482, 10797709, 27548106, 31638735), - u32x4::new(34500968, 26611503, 19727211, 13160873), + u32x4::const_new(22844482, 10797709, 27548106, 31638735), + u32x4::const_new(34500968, 26611503, 19727211, 13160873), ), ( - u32x4::new(31485204, 14496164, 13981208, 10276888), - u32x4::new(5748808, 35024436, 2740987, 7479021), + u32x4::const_new(31485204, 14496164, 13981208, 10276888), + u32x4::const_new(5748808, 35024436, 2740987, 7479021), ), ( - u32x4::new(58541207, 14866135, 32344041, 545930), - u32x4::new(62661488, 6941250, 27940205, 11976112), + u32x4::const_new(58541207, 14866135, 32344041, 545930), + u32x4::const_new(62661488, 6941250, 27940205, 11976112), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(39849808, 44781685, 15697329, 24387845), - u32x4::new(12501486, 50260092, 23199481, 31929024), + u32x4::const_new(39849808, 44781685, 15697329, 24387845), + u32x4::const_new(12501486, 50260092, 23199481, 31929024), ), ( - u32x4::new(24823070, 27956017, 27034296, 10316465), - u32x4::new(47664045, 11152446, 15719183, 30181617), + u32x4::const_new(24823070, 27956017, 27034296, 10316465), + u32x4::const_new(47664045, 11152446, 15719183, 30181617), ), ( - u32x4::new(20771189, 19969144, 31433937, 19185213), - u32x4::new(27565920, 10384445, 2893359, 9255362), + u32x4::const_new(20771189, 19969144, 31433937, 19185213), + u32x4::const_new(27565920, 10384445, 2893359, 9255362), ), ( - u32x4::new(42894974, 11925545, 32134441, 32738810), - u32x4::new(55916336, 32479272, 19563550, 5511385), + u32x4::const_new(42894974, 11925545, 32134441, 32738810), + u32x4::const_new(55916336, 32479272, 19563550, 5511385), ), ( - u32x4::new(17857161, 47809169, 14564114, 27997751), - u32x4::new(33024640, 38669671, 31956536, 27313245), + u32x4::const_new(17857161, 47809169, 14564114, 27997751), + u32x4::const_new(33024640, 38669671, 31956536, 27313245), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(58237774, 15917425, 18872208, 19394230), - u32x4::new(17374297, 6101419, 4839741, 6596900), + u32x4::const_new(58237774, 15917425, 18872208, 19394230), + u32x4::const_new(17374297, 6101419, 4839741, 6596900), ), ( - u32x4::new(66947393, 15744215, 18368993, 17750160), - u32x4::new(41006525, 9205497, 2629667, 32170865), + u32x4::const_new(66947393, 15744215, 18368993, 17750160), + u32x4::const_new(41006525, 9205497, 2629667, 32170865), ), ( - u32x4::new(66481381, 1919414, 28338762, 7372967), - u32x4::new(33819153, 4156199, 27126309, 12739816), + u32x4::const_new(66481381, 1919414, 28338762, 7372967), + u32x4::const_new(33819153, 4156199, 27126309, 12739816), ), ( - u32x4::new(44117158, 58545296, 22521371, 11809712), - u32x4::new(28998792, 50731010, 30215699, 25748377), + u32x4::const_new(44117158, 58545296, 22521371, 11809712), + u32x4::const_new(28998792, 50731010, 30215699, 25748377), ), ( - u32x4::new(23561284, 4160244, 9035405, 24895184), - u32x4::new(39761639, 59253416, 8684759, 22487864), + u32x4::const_new(23561284, 4160244, 9035405, 24895184), + u32x4::const_new(39761639, 59253416, 8684759, 22487864), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(12671134, 56419053, 16092401, 30038207), - u32x4::new(4002647, 47822606, 7151311, 28430768), + u32x4::const_new(12671134, 56419053, 16092401, 30038207), + u32x4::const_new(4002647, 47822606, 7151311, 28430768), ), ( - u32x4::new(61041684, 35765374, 30598048, 19666539), - u32x4::new(44150175, 40140037, 290469, 28442674), + u32x4::const_new(61041684, 35765374, 30598048, 19666539), + u32x4::const_new(44150175, 40140037, 290469, 28442674), ), ( - u32x4::new(18847796, 1371617, 33316881, 13199936), - u32x4::new(43646578, 17068881, 12074900, 1537415), + u32x4::const_new(18847796, 1371617, 33316881, 13199936), + u32x4::const_new(43646578, 17068881, 12074900, 1537415), ), ( - u32x4::new(10052225, 38316070, 27469797, 5297537), - u32x4::new(50725570, 20435349, 10339121, 2779737), + u32x4::const_new(10052225, 38316070, 27469797, 5297537), + u32x4::const_new(50725570, 20435349, 10339121, 2779737), ), ( - u32x4::new(18372189, 15466385, 24762130, 22217964), - u32x4::new(23503887, 47844464, 10415034, 2606889), + u32x4::const_new(18372189, 15466385, 24762130, 22217964), + u32x4::const_new(23503887, 47844464, 10415034, 2606889), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(55082775, 45300503, 16032654, 5964396), - u32x4::new(17743504, 24634761, 19493066, 5184611), + u32x4::const_new(55082775, 45300503, 16032654, 5964396), + u32x4::const_new(17743504, 24634761, 19493066, 5184611), ), ( - u32x4::new(50172633, 35093294, 10040575, 23616256), - u32x4::new(4543900, 61852191, 4049821, 7423669), + u32x4::const_new(50172633, 35093294, 10040575, 23616256), + u32x4::const_new(4543900, 61852191, 4049821, 7423669), ), ( - u32x4::new(20295398, 40009376, 10487190, 15670429), - u32x4::new(51972856, 58649552, 20436392, 3432497), + u32x4::const_new(20295398, 40009376, 10487190, 15670429), + u32x4::const_new(51972856, 58649552, 20436392, 3432497), ), ( - u32x4::new(35189420, 54117751, 12825868, 6283038), - u32x4::new(27540739, 30648758, 22658912, 9466689), + u32x4::const_new(35189420, 54117751, 12825868, 6283038), + u32x4::const_new(27540739, 30648758, 22658912, 9466689), ), ( - u32x4::new(51737549, 40725785, 17409814, 25201086), - u32x4::new(21156239, 34176168, 26814520, 5956424), + u32x4::const_new(51737549, 40725785, 17409814, 25201086), + u32x4::const_new(21156239, 34176168, 26814520, 5956424), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(8211442, 8014184, 6260823, 22108096), - u32x4::new(32182620, 51844847, 2466270, 28582231), + u32x4::const_new(8211442, 8014184, 6260823, 22108096), + u32x4::const_new(32182620, 51844847, 2466270, 28582231), ), ( - u32x4::new(27199739, 3848333, 31738017, 10892045), - u32x4::new(4963982, 65391770, 32551997, 28906469), + u32x4::const_new(27199739, 3848333, 31738017, 10892045), + u32x4::const_new(4963982, 65391770, 32551997, 28906469), ), ( - u32x4::new(16606846, 32207068, 26404535, 7614129), - u32x4::new(45416902, 65584718, 13821785, 2646060), + u32x4::const_new(16606846, 32207068, 26404535, 7614129), + u32x4::const_new(45416902, 65584718, 13821785, 2646060), ), ( - u32x4::new(36090634, 57981287, 32247670, 22837502), - u32x4::new(31003861, 55448117, 6062915, 20369975), + u32x4::const_new(36090634, 57981287, 32247670, 22837502), + u32x4::const_new(31003861, 55448117, 6062915, 20369975), ), ( - u32x4::new(27381403, 50578107, 522631, 29521058), - u32x4::new(31137497, 40220737, 27628049, 1824195), + u32x4::const_new(27381403, 50578107, 522631, 29521058), + u32x4::const_new(31137497, 40220737, 27628049, 1824195), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(59402443, 17056879, 29262689, 6131785), - u32x4::new(52551472, 43367471, 29423199, 18899208), + u32x4::const_new(59402443, 17056879, 29262689, 6131785), + u32x4::const_new(52551472, 43367471, 29423199, 18899208), ), ( - u32x4::new(5749414, 43514612, 11365899, 21514624), - u32x4::new(65591890, 60945892, 19841732, 5628567), + u32x4::const_new(5749414, 43514612, 11365899, 21514624), + u32x4::const_new(65591890, 60945892, 19841732, 5628567), ), ( - u32x4::new(19334369, 52500268, 12307673, 5267367), - u32x4::new(3212103, 9035822, 29142161, 30520954), + u32x4::const_new(19334369, 52500268, 12307673, 5267367), + u32x4::const_new(3212103, 9035822, 29142161, 30520954), ), ( - u32x4::new(57261330, 6819646, 22089161, 9800373), - u32x4::new(55155453, 62250856, 13766735, 25244545), + u32x4::const_new(57261330, 6819646, 22089161, 9800373), + u32x4::const_new(55155453, 62250856, 13766735, 25244545), ), ( - u32x4::new(54370226, 61888301, 24496089, 2540581), - u32x4::new(65637506, 60274355, 18154273, 11687259), + u32x4::const_new(54370226, 61888301, 24496089, 2540581), + u32x4::const_new(65637506, 60274355, 18154273, 11687259), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(12521903, 26014045, 13995625, 33360175), - u32x4::new(23605474, 7376434, 27229267, 17195036), + u32x4::const_new(12521903, 26014045, 13995625, 33360175), + u32x4::const_new(23605474, 7376434, 27229267, 17195036), ), ( - u32x4::new(59482891, 10074423, 574357, 3857753), - u32x4::new(61377787, 50306685, 5241065, 20234396), + u32x4::const_new(59482891, 10074423, 574357, 3857753), + u32x4::const_new(61377787, 50306685, 5241065, 20234396), ), ( - u32x4::new(23674717, 6997172, 20771841, 16858511), - u32x4::new(40565304, 29973136, 7049812, 14585010), + u32x4::const_new(23674717, 6997172, 20771841, 16858511), + u32x4::const_new(40565304, 29973136, 7049812, 14585010), ), ( - u32x4::new(1427477, 13295732, 31762066, 31499740), - u32x4::new(60419925, 54666164, 22009424, 8089609), + u32x4::const_new(1427477, 13295732, 31762066, 31499740), + u32x4::const_new(60419925, 54666164, 22009424, 8089609), ), ( - u32x4::new(58154031, 41593020, 15342328, 957047), - u32x4::new(38937260, 37037498, 24871992, 32973409), + u32x4::const_new(58154031, 41593020, 15342328, 957047), + u32x4::const_new(38937260, 37037498, 24871992, 32973409), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(30654745, 51286025, 21206982, 2433562), - u32x4::new(12780105, 31732574, 33087964, 33081189), + u32x4::const_new(30654745, 51286025, 21206982, 2433562), + u32x4::const_new(12780105, 31732574, 33087964, 33081189), ), ( - u32x4::new(66640017, 42720009, 16567620, 15300745), - u32x4::new(1530367, 33001123, 20930247, 21042661), + u32x4::const_new(66640017, 42720009, 16567620, 15300745), + u32x4::const_new(1530367, 33001123, 20930247, 21042661), ), ( - u32x4::new(15003356, 5294119, 22985605, 18928772), - u32x4::new(32628461, 18230172, 14773298, 27193722), + u32x4::const_new(15003356, 5294119, 22985605, 18928772), + u32x4::const_new(32628461, 18230172, 14773298, 27193722), ), ( - u32x4::new(27555, 65346287, 17017174, 7837720), - u32x4::new(21499787, 42855613, 22474984, 13675085), + u32x4::const_new(27555, 65346287, 17017174, 7837720), + u32x4::const_new(21499787, 42855613, 22474984, 13675085), ), ( - u32x4::new(24164369, 50130116, 5973149, 24152073), - u32x4::new(1577334, 25400030, 18648484, 32228854), + u32x4::const_new(24164369, 50130116, 5973149, 24152073), + u32x4::const_new(1577334, 25400030, 18648484, 32228854), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(49518649, 59119280, 31670678, 20396561), - u32x4::new(61728330, 651402, 176032, 9529498), + u32x4::const_new(49518649, 59119280, 31670678, 20396561), + u32x4::const_new(61728330, 651402, 176032, 9529498), ), ( - u32x4::new(61765532, 9082232, 32794568, 15526956), - u32x4::new(48543100, 32614212, 19001206, 25680229), + u32x4::const_new(61765532, 9082232, 32794568, 15526956), + u32x4::const_new(48543100, 32614212, 19001206, 25680229), ), ( - u32x4::new(32086091, 10373081, 8996131, 31822823), - u32x4::new(35788988, 49973190, 30542040, 17858455), + u32x4::const_new(32086091, 10373081, 8996131, 31822823), + u32x4::const_new(35788988, 49973190, 30542040, 17858455), ), ( - u32x4::new(48130197, 58121889, 27753291, 29923268), - u32x4::new(54448075, 43300790, 9336565, 15770022), + u32x4::const_new(48130197, 58121889, 27753291, 29923268), + u32x4::const_new(54448075, 43300790, 9336565, 15770022), ), ( - u32x4::new(57725546, 20557498, 9366233, 16023566), - u32x4::new(16189031, 2837363, 24315301, 27003505), + u32x4::const_new(57725546, 20557498, 9366233, 16023566), + u32x4::const_new(16189031, 2837363, 24315301, 27003505), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(28286608, 10767548, 18220739, 5413236), - u32x4::new(48253387, 58255702, 11864864, 28527159), + u32x4::const_new(28286608, 10767548, 18220739, 5413236), + u32x4::const_new(48253387, 58255702, 11864864, 28527159), ), ( - u32x4::new(45038176, 58655197, 25648758, 10951484), - u32x4::new(42564382, 34542843, 23146954, 22234334), + u32x4::const_new(45038176, 58655197, 25648758, 10951484), + u32x4::const_new(42564382, 34542843, 23146954, 22234334), ), ( - u32x4::new(14858710, 24978793, 15040559, 4379220), - u32x4::new(47621477, 40271440, 15650420, 1998736), + u32x4::const_new(14858710, 24978793, 15040559, 4379220), + u32x4::const_new(47621477, 40271440, 15650420, 1998736), ), ( - u32x4::new(24106391, 9626149, 344505, 25253814), - u32x4::new(34579800, 59687089, 25718289, 25904133), + u32x4::const_new(24106391, 9626149, 344505, 25253814), + u32x4::const_new(34579800, 59687089, 25718289, 25904133), ), ( - u32x4::new(1981195, 37751302, 26132048, 1764722), - u32x4::new(13288231, 28808622, 12531301, 18292949), + u32x4::const_new(1981195, 37751302, 26132048, 1764722), + u32x4::const_new(13288231, 28808622, 12531301, 18292949), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(13869851, 31448904, 14963539, 7581293), - u32x4::new(20536485, 35021083, 21257574, 33356609), + u32x4::const_new(13869851, 31448904, 14963539, 7581293), + u32x4::const_new(20536485, 35021083, 21257574, 33356609), ), ( - u32x4::new(36903364, 18429241, 11097857, 5943856), - u32x4::new(60583077, 40015815, 30509523, 31915271), + u32x4::const_new(36903364, 18429241, 11097857, 5943856), + u32x4::const_new(60583077, 40015815, 30509523, 31915271), ), ( - u32x4::new(49161801, 40681915, 67892, 25454357), - u32x4::new(22779677, 25798439, 15964829, 5863227), + u32x4::const_new(49161801, 40681915, 67892, 25454357), + u32x4::const_new(22779677, 25798439, 15964829, 5863227), ), ( - u32x4::new(60810637, 4496471, 5217137, 14095116), - u32x4::new(50942411, 50712663, 2507380, 26844507), + u32x4::const_new(60810637, 4496471, 5217137, 14095116), + u32x4::const_new(50942411, 50712663, 2507380, 26844507), ), ( - u32x4::new(34579752, 53519385, 10859797, 18816024), - u32x4::new(42552864, 39478521, 6783896, 17277037), + u32x4::const_new(34579752, 53519385, 10859797, 18816024), + u32x4::const_new(42552864, 39478521, 6783896, 17277037), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(43287109, 27900723, 33182187, 2766754), - u32x4::new(17041989, 1018260, 33392790, 4830032), + u32x4::const_new(43287109, 27900723, 33182187, 2766754), + u32x4::const_new(17041989, 1018260, 33392790, 4830032), ), ( - u32x4::new(60194178, 30788903, 24728888, 14513195), - u32x4::new(20897010, 28843233, 20111980, 17475240), + u32x4::const_new(60194178, 30788903, 24728888, 14513195), + u32x4::const_new(20897010, 28843233, 20111980, 17475240), ), ( - u32x4::new(46042274, 19257042, 4628173, 31649727), - u32x4::new(27388316, 66631493, 11541886, 6408028), + u32x4::const_new(46042274, 19257042, 4628173, 31649727), + u32x4::const_new(27388316, 66631493, 11541886, 6408028), ), ( - u32x4::new(57024680, 49536568, 32050358, 31321917), - u32x4::new(17437691, 49672356, 2884755, 20493991), + u32x4::const_new(57024680, 49536568, 32050358, 31321917), + u32x4::const_new(17437691, 49672356, 2884755, 20493991), ), ( - u32x4::new(59553007, 46782643, 29001173, 1814088), - u32x4::new(21930692, 51319706, 14965872, 30748046), + u32x4::const_new(59553007, 46782643, 29001173, 1814088), + u32x4::const_new(21930692, 51319706, 14965872, 30748046), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(16441817, 36111849, 6900424, 602234), - u32x4::new(46522199, 16441484, 8135070, 21726541), + u32x4::const_new(16441817, 36111849, 6900424, 602234), + u32x4::const_new(46522199, 16441484, 8135070, 21726541), ), ( - u32x4::new(37711225, 32701959, 11679112, 13125533), - u32x4::new(32154135, 9407918, 26554289, 620848), + u32x4::const_new(37711225, 32701959, 11679112, 13125533), + u32x4::const_new(32154135, 9407918, 26554289, 620848), ), ( - u32x4::new(19233407, 30086864, 14679568, 2797374), - u32x4::new(4892806, 7993077, 247658, 5632804), + u32x4::const_new(19233407, 30086864, 14679568, 2797374), + u32x4::const_new(4892806, 7993077, 247658, 5632804), ), ( - u32x4::new(37427262, 26675495, 27125659, 13496131), - u32x4::new(50718473, 40115609, 28505351, 27837393), + u32x4::const_new(37427262, 26675495, 27125659, 13496131), + u32x4::const_new(50718473, 40115609, 28505351, 27837393), ), ( - u32x4::new(196819, 18410429, 7070012, 21691388), - u32x4::new(29763371, 24754123, 9727048, 10930179), + u32x4::const_new(196819, 18410429, 7070012, 21691388), + u32x4::const_new(29763371, 24754123, 9727048, 10930179), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(28319289, 40734650, 16225680, 24739184), - u32x4::new(64272368, 35356897, 7866648, 13635853), + u32x4::const_new(28319289, 40734650, 16225680, 24739184), + u32x4::const_new(64272368, 35356897, 7866648, 13635853), ), ( - u32x4::new(34165295, 48328447, 27041670, 23643655), - u32x4::new(48949950, 52963288, 30411133, 6045174), + u32x4::const_new(34165295, 48328447, 27041670, 23643655), + u32x4::const_new(48949950, 52963288, 30411133, 6045174), ), ( - u32x4::new(18583559, 41649834, 9813585, 26098520), - u32x4::new(25682734, 26733526, 19276490, 10654728), + u32x4::const_new(18583559, 41649834, 9813585, 26098520), + u32x4::const_new(25682734, 26733526, 19276490, 10654728), ), ( - u32x4::new(34867476, 52715968, 5694571, 13380978), - u32x4::new(15134994, 1831255, 8608001, 17266401), + u32x4::const_new(34867476, 52715968, 5694571, 13380978), + u32x4::const_new(15134994, 1831255, 8608001, 17266401), ), ( - u32x4::new(59925903, 44282172, 27802465, 1855069), - u32x4::new(14234749, 36635487, 11302294, 10938429), + u32x4::const_new(59925903, 44282172, 27802465, 1855069), + u32x4::const_new(14234749, 36635487, 11302294, 10938429), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(8373273, 49064494, 4932071, 32997499), - u32x4::new(38472880, 29335908, 14504412, 22460029), + u32x4::const_new(8373273, 49064494, 4932071, 32997499), + u32x4::const_new(38472880, 29335908, 14504412, 22460029), ), ( - u32x4::new(31795930, 50785923, 25835990, 25790073), - u32x4::new(65669841, 11360450, 9969157, 9008164), + u32x4::const_new(31795930, 50785923, 25835990, 25790073), + u32x4::const_new(65669841, 11360450, 9969157, 9008164), ), ( - u32x4::new(50262498, 45869261, 16124434, 15336007), - u32x4::new(882762, 42522623, 11277198, 26296377), + u32x4::const_new(50262498, 45869261, 16124434, 15336007), + u32x4::const_new(882762, 42522623, 11277198, 26296377), ), ( - u32x4::new(42332732, 59129236, 14452816, 567985), - u32x4::new(208061, 34722729, 32008143, 14828749), + u32x4::const_new(42332732, 59129236, 14452816, 567985), + u32x4::const_new(208061, 34722729, 32008143, 14828749), ), ( - u32x4::new(17937794, 36846032, 32102665, 4442466), - u32x4::new(19745435, 31633451, 7146411, 15812027), + u32x4::const_new(17937794, 36846032, 32102665, 4442466), + u32x4::const_new(19745435, 31633451, 7146411, 15812027), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(30741269, 38648744, 12562645, 30092623), - u32x4::new(25073992, 28730659, 27911745, 30000958), + u32x4::const_new(30741269, 38648744, 12562645, 30092623), + u32x4::const_new(25073992, 28730659, 27911745, 30000958), ), ( - u32x4::new(2859794, 25991700, 17776078, 27091930), - u32x4::new(2328322, 60061146, 18581824, 18039008), + u32x4::const_new(2859794, 25991700, 17776078, 27091930), + u32x4::const_new(2328322, 60061146, 18581824, 18039008), ), ( - u32x4::new(58206333, 17917354, 1972306, 11853766), - u32x4::new(2655376, 60543390, 18416710, 13287440), + u32x4::const_new(58206333, 17917354, 1972306, 11853766), + u32x4::const_new(2655376, 60543390, 18416710, 13287440), ), ( - u32x4::new(62746330, 61423885, 21246577, 2266675), - u32x4::new(60099139, 14804707, 14772234, 20679434), + u32x4::const_new(62746330, 61423885, 21246577, 2266675), + u32x4::const_new(60099139, 14804707, 14772234, 20679434), ), ( - u32x4::new(26987698, 15488817, 715616, 2339565), - u32x4::new(51980752, 17333865, 21965103, 10839820), + u32x4::const_new(26987698, 15488817, 715616, 2339565), + u32x4::const_new(51980752, 17333865, 21965103, 10839820), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(18672548, 57660959, 16042910, 19519287), - u32x4::new(62865851, 17580961, 26628347, 23774759), + u32x4::const_new(18672548, 57660959, 16042910, 19519287), + u32x4::const_new(62865851, 17580961, 26628347, 23774759), ), ( - u32x4::new(368070, 3464471, 25888304, 30370559), - u32x4::new(52396053, 45426828, 28745251, 9246829), + u32x4::const_new(368070, 3464471, 25888304, 30370559), + u32x4::const_new(52396053, 45426828, 28745251, 9246829), ), ( - u32x4::new(29090099, 57950037, 23104657, 4903923), - u32x4::new(10987778, 56163684, 23621539, 10332760), + u32x4::const_new(29090099, 57950037, 23104657, 4903923), + u32x4::const_new(10987778, 56163684, 23621539, 10332760), ), ( - u32x4::new(53338235, 44851161, 21606845, 31069622), - u32x4::new(4243630, 34464392, 11286454, 5802022), + u32x4::const_new(53338235, 44851161, 21606845, 31069622), + u32x4::const_new(4243630, 34464392, 11286454, 5802022), ), ( - u32x4::new(46710757, 63389067, 11642865, 1980986), - u32x4::new(12967337, 28162061, 3854192, 30432268), + u32x4::const_new(46710757, 63389067, 11642865, 1980986), + u32x4::const_new(12967337, 28162061, 3854192, 30432268), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(12179834, 41005450, 12809619, 33525228), - u32x4::new(4624405, 46957889, 16968743, 11827816), + u32x4::const_new(12179834, 41005450, 12809619, 33525228), + u32x4::const_new(4624405, 46957889, 16968743, 11827816), ), ( - u32x4::new(51521162, 12466775, 31791271, 15303651), - u32x4::new(49798465, 62714504, 6509600, 12918560), + u32x4::const_new(51521162, 12466775, 31791271, 15303651), + u32x4::const_new(49798465, 62714504, 6509600, 12918560), ), ( - u32x4::new(20445559, 1756449, 28848701, 7920171), - u32x4::new(9835040, 5900071, 28757409, 12376688), + u32x4::const_new(20445559, 1756449, 28848701, 7920171), + u32x4::const_new(9835040, 5900071, 28757409, 12376688), ), ( - u32x4::new(18259496, 14281012, 21767026, 10232236), - u32x4::new(20000226, 12400540, 4104902, 23570543), + u32x4::const_new(18259496, 14281012, 21767026, 10232236), + u32x4::const_new(20000226, 12400540, 4104902, 23570543), ), ( - u32x4::new(3687440, 26546648, 13328821, 26841081), - u32x4::new(49822734, 22334054, 244496, 24862543), + u32x4::const_new(3687440, 26546648, 13328821, 26841081), + u32x4::const_new(49822734, 22334054, 244496, 24862543), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(59523541, 62195428, 3853227, 13954801), - u32x4::new(12387708, 47627615, 27221350, 17899572), + u32x4::const_new(59523541, 62195428, 3853227, 13954801), + u32x4::const_new(12387708, 47627615, 27221350, 17899572), ), ( - u32x4::new(63193587, 36343307, 14595132, 6880795), - u32x4::new(1364792, 37648434, 3259017, 20536046), + u32x4::const_new(63193587, 36343307, 14595132, 6880795), + u32x4::const_new(1364792, 37648434, 3259017, 20536046), ), ( - u32x4::new(30362834, 10440372, 9574624, 11729232), - u32x4::new(63861613, 21748389, 5530846, 2721586), + u32x4::const_new(30362834, 10440372, 9574624, 11729232), + u32x4::const_new(63861613, 21748389, 5530846, 2721586), ), ( - u32x4::new(18339760, 1550632, 17170271, 25732971), - u32x4::new(28459263, 63142237, 21642345, 31557672), + u32x4::const_new(18339760, 1550632, 17170271, 25732971), + u32x4::const_new(28459263, 63142237, 21642345, 31557672), ), ( - u32x4::new(10611282, 5204623, 18049257, 214175), - u32x4::new(19432723, 49809070, 26010406, 27449522), + u32x4::const_new(10611282, 5204623, 18049257, 214175), + u32x4::const_new(19432723, 49809070, 26010406, 27449522), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(19770733, 26478685, 9464541, 29158041), - u32x4::new(28604307, 45196604, 7586524, 6641859), + u32x4::const_new(19770733, 26478685, 9464541, 29158041), + u32x4::const_new(28604307, 45196604, 7586524, 6641859), ), ( - u32x4::new(65654484, 52230498, 30886612, 19112823), - u32x4::new(47271809, 38942611, 16020035, 10773481), + u32x4::const_new(65654484, 52230498, 30886612, 19112823), + u32x4::const_new(47271809, 38942611, 16020035, 10773481), ), ( - u32x4::new(27464323, 54451016, 20646645, 17732915), - u32x4::new(23008717, 53626684, 3253189, 15614410), + u32x4::const_new(27464323, 54451016, 20646645, 17732915), + u32x4::const_new(23008717, 53626684, 3253189, 15614410), ), ( - u32x4::new(52381752, 40693008, 7063024, 28469981), - u32x4::new(51159478, 44543211, 19941777, 5985451), + u32x4::const_new(52381752, 40693008, 7063024, 28469981), + u32x4::const_new(51159478, 44543211, 19941777, 5985451), ), ( - u32x4::new(13553668, 35524849, 14788737, 1883845), - u32x4::new(12385775, 47958835, 29135466, 1776722), + u32x4::const_new(13553668, 35524849, 14788737, 1883845), + u32x4::const_new(12385775, 47958835, 29135466, 1776722), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(36719806, 20827965, 23175373, 32996806), - u32x4::new(42041892, 65708790, 5467143, 20884008), + u32x4::const_new(36719806, 20827965, 23175373, 32996806), + u32x4::const_new(42041892, 65708790, 5467143, 20884008), ), ( - u32x4::new(43256281, 40770646, 17244063, 31959819), - u32x4::new(64366384, 43544617, 25057754, 12628720), + u32x4::const_new(43256281, 40770646, 17244063, 31959819), + u32x4::const_new(64366384, 43544617, 25057754, 12628720), ), ( - u32x4::new(17337782, 58472057, 27906934, 15305274), - u32x4::new(30292418, 39284317, 16946773, 24806712), + u32x4::const_new(17337782, 58472057, 27906934, 15305274), + u32x4::const_new(30292418, 39284317, 16946773, 24806712), ), ( - u32x4::new(6485126, 32447403, 16261486, 13561940), - u32x4::new(49439635, 10738368, 16419889, 8897231), + u32x4::const_new(6485126, 32447403, 16261486, 13561940), + u32x4::const_new(49439635, 10738368, 16419889, 8897231), ), ( - u32x4::new(44812203, 40122262, 25496058, 2759794), - u32x4::new(25295304, 52178368, 24154195, 29334408), + u32x4::const_new(44812203, 40122262, 25496058, 2759794), + u32x4::const_new(25295304, 52178368, 24154195, 29334408), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(42307254, 57217102, 1088936, 3832827), - u32x4::new(33905401, 23130334, 6958056, 12622851), + u32x4::const_new(42307254, 57217102, 1088936, 3832827), + u32x4::const_new(33905401, 23130334, 6958056, 12622851), ), ( - u32x4::new(3881189, 14870059, 19712830, 6071598), - u32x4::new(38147944, 60776394, 3427938, 13765703), + u32x4::const_new(3881189, 14870059, 19712830, 6071598), + u32x4::const_new(38147944, 60776394, 3427938, 13765703), ), ( - u32x4::new(7666911, 24227591, 17077136, 22967588), - u32x4::new(6874639, 30915523, 11451695, 24292224), + u32x4::const_new(7666911, 24227591, 17077136, 22967588), + u32x4::const_new(6874639, 30915523, 11451695, 24292224), ), ( - u32x4::new(13659529, 31984463, 28764736, 20506164), - u32x4::new(64729627, 49321636, 28284636, 25472371), + u32x4::const_new(13659529, 31984463, 28764736, 20506164), + u32x4::const_new(64729627, 49321636, 28284636, 25472371), ), ( - u32x4::new(39360308, 42281399, 9446504, 868960), - u32x4::new(49227724, 21351115, 30561851, 11292096), + u32x4::const_new(39360308, 42281399, 9446504, 868960), + u32x4::const_new(49227724, 21351115, 30561851, 11292096), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(7071115, 46444090, 5387916, 15432877), - u32x4::new(27226682, 41506862, 2398278, 3978240), + u32x4::const_new(7071115, 46444090, 5387916, 15432877), + u32x4::const_new(27226682, 41506862, 2398278, 3978240), ), ( - u32x4::new(51009614, 54216973, 24368938, 31392616), - u32x4::new(38456150, 62313644, 6729154, 99724), + u32x4::const_new(51009614, 54216973, 24368938, 31392616), + u32x4::const_new(38456150, 62313644, 6729154, 99724), ), ( - u32x4::new(17474332, 62857913, 2619930, 30659308), - u32x4::new(18268181, 32809239, 22826292, 24561895), + u32x4::const_new(17474332, 62857913, 2619930, 30659308), + u32x4::const_new(18268181, 32809239, 22826292, 24561895), ), ( - u32x4::new(38187020, 67003092, 14118280, 16500577), - u32x4::new(18808560, 64983716, 25712929, 32518261), + u32x4::const_new(38187020, 67003092, 14118280, 16500577), + u32x4::const_new(18808560, 64983716, 25712929, 32518261), ), ( - u32x4::new(25735813, 62284262, 10824872, 20558596), - u32x4::new(48149681, 31162667, 22608274, 26285185), + u32x4::const_new(25735813, 62284262, 10824872, 20558596), + u32x4::const_new(48149681, 31162667, 22608274, 26285185), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(963440, 63742255, 10230323, 25515008), - u32x4::new(32506414, 6105697, 25980317, 24645129), + u32x4::const_new(963440, 63742255, 10230323, 25515008), + u32x4::const_new(32506414, 6105697, 25980317, 24645129), ), ( - u32x4::new(7162189, 8101249, 14679265, 33443386), - u32x4::new(2002396, 8541405, 19442276, 4795881), + u32x4::const_new(7162189, 8101249, 14679265, 33443386), + u32x4::const_new(2002396, 8541405, 19442276, 4795881), ), ( - u32x4::new(8116694, 51463069, 4415528, 25599140), - u32x4::new(55805721, 39582709, 6719436, 30033839), + u32x4::const_new(8116694, 51463069, 4415528, 25599140), + u32x4::const_new(55805721, 39582709, 6719436, 30033839), ), ( - u32x4::new(14468202, 42181869, 25188826, 9639755), - u32x4::new(47546189, 62711146, 32762447, 18338064), + u32x4::const_new(14468202, 42181869, 25188826, 9639755), + u32x4::const_new(47546189, 62711146, 32762447, 18338064), ), ( - u32x4::new(33880058, 32810909, 8969931, 13095238), - u32x4::new(38360605, 40138517, 9246134, 4928058), + u32x4::const_new(33880058, 32810909, 8969931, 13095238), + u32x4::const_new(38360605, 40138517, 9246134, 4928058), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(63655588, 17883670, 9410246, 26162761), - u32x4::new(5000571, 7349225, 23785252, 32751089), + u32x4::const_new(63655588, 17883670, 9410246, 26162761), + u32x4::const_new(5000571, 7349225, 23785252, 32751089), ), ( - u32x4::new(28568737, 10733123, 9342397, 21570673), - u32x4::new(54096560, 32467591, 20494687, 21511513), + u32x4::const_new(28568737, 10733123, 9342397, 21570673), + u32x4::const_new(54096560, 32467591, 20494687, 21511513), ), ( - u32x4::new(47675157, 47932807, 29250946, 15672208), - u32x4::new(59760469, 9945465, 14939287, 18437405), + u32x4::const_new(47675157, 47932807, 29250946, 15672208), + u32x4::const_new(59760469, 9945465, 14939287, 18437405), ), ( - u32x4::new(37985267, 8609815, 31573002, 3373596), - u32x4::new(47828883, 20834216, 13248616, 24154292), + u32x4::const_new(37985267, 8609815, 31573002, 3373596), + u32x4::const_new(47828883, 20834216, 13248616, 24154292), ), ( - u32x4::new(5543543, 29553242, 3386453, 30501150), - u32x4::new(25058089, 15236571, 8814395, 32462955), + u32x4::const_new(5543543, 29553242, 3386453, 30501150), + u32x4::const_new(25058089, 15236571, 8814395, 32462955), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(39158670, 15322548, 20495103, 3312736), - u32x4::new(14557171, 12985179, 8044741, 3176899), + u32x4::const_new(39158670, 15322548, 20495103, 3312736), + u32x4::const_new(14557171, 12985179, 8044741, 3176899), ), ( - u32x4::new(24673290, 29693310, 21412266, 18324699), - u32x4::new(2154518, 40329021, 17500543, 3954277), + u32x4::const_new(24673290, 29693310, 21412266, 18324699), + u32x4::const_new(2154518, 40329021, 17500543, 3954277), ), ( - u32x4::new(36758685, 38738957, 165513, 14691866), - u32x4::new(3070475, 10424235, 17096536, 16896898), + u32x4::const_new(36758685, 38738957, 165513, 14691866), + u32x4::const_new(3070475, 10424235, 17096536, 16896898), ), ( - u32x4::new(59790459, 43094586, 8720681, 10423589), - u32x4::new(1122030, 31545615, 4463786, 31811293), + u32x4::const_new(59790459, 43094586, 8720681, 10423589), + u32x4::const_new(1122030, 31545615, 4463786, 31811293), ), ( - u32x4::new(49778992, 60881044, 20509974, 5832494), - u32x4::new(64155961, 31483358, 4511231, 20307815), + u32x4::const_new(49778992, 60881044, 20509974, 5832494), + u32x4::const_new(64155961, 31483358, 4511231, 20307815), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(2863373, 40876242, 26865913, 24067353), - u32x4::new(15726407, 40919070, 12953902, 9931535), + u32x4::const_new(2863373, 40876242, 26865913, 24067353), + u32x4::const_new(15726407, 40919070, 12953902, 9931535), ), ( - u32x4::new(60934877, 42512204, 21649141, 21945190), - u32x4::new(52211954, 60984193, 7046207, 5363493), + u32x4::const_new(60934877, 42512204, 21649141, 21945190), + u32x4::const_new(52211954, 60984193, 7046207, 5363493), ), ( - u32x4::new(4205971, 64068464, 18197273, 7327176), - u32x4::new(51527794, 21166920, 20669933, 11828242), + u32x4::const_new(4205971, 64068464, 18197273, 7327176), + u32x4::const_new(51527794, 21166920, 20669933, 11828242), ), ( - u32x4::new(59782815, 49617225, 15379924, 457923), - u32x4::new(9320508, 21498914, 3242540, 31563182), + u32x4::const_new(59782815, 49617225, 15379924, 457923), + u32x4::const_new(9320508, 21498914, 3242540, 31563182), ), ( - u32x4::new(27714753, 8664670, 3366162, 26338598), - u32x4::new(56775518, 25796006, 13129151, 21388876), + u32x4::const_new(27714753, 8664670, 3366162, 26338598), + u32x4::const_new(56775518, 25796006, 13129151, 21388876), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(59276548, 49972346, 16795002, 33455915), - u32x4::new(48430097, 53857205, 18627071, 32474471), + u32x4::const_new(59276548, 49972346, 16795002, 33455915), + u32x4::const_new(48430097, 53857205, 18627071, 32474471), ), ( - u32x4::new(42160315, 50705892, 13530540, 28012698), - u32x4::new(19833221, 55886870, 20191784, 9644313), + u32x4::const_new(42160315, 50705892, 13530540, 28012698), + u32x4::const_new(19833221, 55886870, 20191784, 9644313), ), ( - u32x4::new(20372416, 28414713, 24084234, 31804096), - u32x4::new(33815377, 36131001, 17251241, 18291088), + u32x4::const_new(20372416, 28414713, 24084234, 31804096), + u32x4::const_new(33815377, 36131001, 17251241, 18291088), ), ( - u32x4::new(56234667, 14920441, 2033267, 29572003), - u32x4::new(1724043, 45519699, 17873735, 501988), + u32x4::const_new(56234667, 14920441, 2033267, 29572003), + u32x4::const_new(1724043, 45519699, 17873735, 501988), ), ( - u32x4::new(50031659, 31517850, 15697583, 1016845), - u32x4::new(43104661, 54769582, 8008601, 27257051), + u32x4::const_new(50031659, 31517850, 15697583, 1016845), + u32x4::const_new(43104661, 54769582, 8008601, 27257051), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(52951491, 66542164, 14853573, 30444631), - u32x4::new(12045973, 24321813, 16545674, 18160646), + u32x4::const_new(52951491, 66542164, 14853573, 30444631), + u32x4::const_new(12045973, 24321813, 16545674, 18160646), ), ( - u32x4::new(60107911, 1126003, 5947677, 19486116), - u32x4::new(41119984, 30860440, 7935395, 13354438), + u32x4::const_new(60107911, 1126003, 5947677, 19486116), + u32x4::const_new(41119984, 30860440, 7935395, 13354438), ), ( - u32x4::new(17841328, 11063269, 1664538, 26687568), - u32x4::new(6268968, 22280371, 17275484, 4523163), + u32x4::const_new(17841328, 11063269, 1664538, 26687568), + u32x4::const_new(6268968, 22280371, 17275484, 4523163), ), ( - u32x4::new(15886041, 56799482, 15446552, 21712778), - u32x4::new(1005290, 17827215, 4978741, 6854882), + u32x4::const_new(15886041, 56799482, 15446552, 21712778), + u32x4::const_new(1005290, 17827215, 4978741, 6854882), ), ( - u32x4::new(34319277, 47731002, 20321804, 28544575), - u32x4::new(29591814, 63376351, 24754545, 26001714), + u32x4::const_new(34319277, 47731002, 20321804, 28544575), + u32x4::const_new(29591814, 63376351, 24754545, 26001714), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(66783087, 5234346, 46102, 8566476), - u32x4::new(19947339, 20180418, 25398238, 3726678), + u32x4::const_new(66783087, 5234346, 46102, 8566476), + u32x4::const_new(19947339, 20180418, 25398238, 3726678), ), ( - u32x4::new(63890180, 46380965, 20674069, 5366544), - u32x4::new(59661487, 48406612, 31533614, 7071217), + u32x4::const_new(63890180, 46380965, 20674069, 5366544), + u32x4::const_new(59661487, 48406612, 31533614, 7071217), ), ( - u32x4::new(13104676, 1406631, 24326736, 19854367), - u32x4::new(61039528, 11019904, 31967425, 19219275), + u32x4::const_new(13104676, 1406631, 24326736, 19854367), + u32x4::const_new(61039528, 11019904, 31967425, 19219275), ), ( - u32x4::new(39003597, 30143957, 15351834, 8639435), - u32x4::new(57309582, 61436794, 15830475, 10090318), + u32x4::const_new(39003597, 30143957, 15351834, 8639435), + u32x4::const_new(57309582, 61436794, 15830475, 10090318), ), ( - u32x4::new(45923044, 6700175, 99413, 21263025), - u32x4::new(23762647, 53905481, 6063914, 10065424), + u32x4::const_new(45923044, 6700175, 99413, 21263025), + u32x4::const_new(23762647, 53905481, 6063914, 10065424), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(42822326, 57678669, 4052879, 25452667), - u32x4::new(54049411, 2373092, 22337016, 7701046), + u32x4::const_new(42822326, 57678669, 4052879, 25452667), + u32x4::const_new(54049411, 2373092, 22337016, 7701046), ), ( - u32x4::new(44382355, 43307377, 16761537, 30373573), - u32x4::new(49790216, 23230748, 25655306, 10519391), + u32x4::const_new(44382355, 43307377, 16761537, 30373573), + u32x4::const_new(49790216, 23230748, 25655306, 10519391), ), ( - u32x4::new(919475, 59371245, 1273450, 25558666), - u32x4::new(9724711, 8556709, 25755845, 10887647), + u32x4::const_new(919475, 59371245, 1273450, 25558666), + u32x4::const_new(9724711, 8556709, 25755845, 10887647), ), ( - u32x4::new(25465699, 44651158, 17658392, 11257418), - u32x4::new(29735193, 22885150, 7094716, 26828565), + u32x4::const_new(25465699, 44651158, 17658392, 11257418), + u32x4::const_new(29735193, 22885150, 7094716, 26828565), ), ( - u32x4::new(48237389, 47661599, 27054393, 7328070), - u32x4::new(27280193, 65616691, 23062005, 4170709), + u32x4::const_new(48237389, 47661599, 27054393, 7328070), + u32x4::const_new(27280193, 65616691, 23062005, 4170709), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(26535281, 60238317, 30343788, 25790743), - u32x4::new(37993933, 24614372, 9523840, 10401918), + u32x4::const_new(26535281, 60238317, 30343788, 25790743), + u32x4::const_new(37993933, 24614372, 9523840, 10401918), ), ( - u32x4::new(2783987, 29468958, 4697011, 19804475), - u32x4::new(37246678, 46797720, 10261254, 18942252), + u32x4::const_new(2783987, 29468958, 4697011, 19804475), + u32x4::const_new(37246678, 46797720, 10261254, 18942252), ), ( - u32x4::new(58135580, 60247753, 25301938, 6844561), - u32x4::new(20949454, 39844754, 4552026, 919057), + u32x4::const_new(58135580, 60247753, 25301938, 6844561), + u32x4::const_new(20949454, 39844754, 4552026, 919057), ), ( - u32x4::new(6694071, 44126261, 32285330, 31370180), - u32x4::new(24603698, 53328179, 13971149, 5325636), + u32x4::const_new(6694071, 44126261, 32285330, 31370180), + u32x4::const_new(24603698, 53328179, 13971149, 5325636), ), ( - u32x4::new(64879487, 582094, 17982081, 19190425), - u32x4::new(24951286, 26923842, 29077174, 33286062), + u32x4::const_new(64879487, 582094, 17982081, 19190425), + u32x4::const_new(24951286, 26923842, 29077174, 33286062), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(54863941, 67016431, 1224043, 23371240), - u32x4::new(62940074, 52101083, 13523637, 30366406), + u32x4::const_new(54863941, 67016431, 1224043, 23371240), + u32x4::const_new(62940074, 52101083, 13523637, 30366406), ), ( - u32x4::new(36324581, 25407485, 18258623, 4698602), - u32x4::new(50300544, 2658516, 26300935, 2611030), + u32x4::const_new(36324581, 25407485, 18258623, 4698602), + u32x4::const_new(50300544, 2658516, 26300935, 2611030), ), ( - u32x4::new(27183975, 21791014, 18105064, 9875199), - u32x4::new(58118912, 54198635, 6400311, 14767984), + u32x4::const_new(27183975, 21791014, 18105064, 9875199), + u32x4::const_new(58118912, 54198635, 6400311, 14767984), ), ( - u32x4::new(33918318, 42937962, 14809334, 22136592), - u32x4::new(10636588, 29082337, 29829692, 28549776), + u32x4::const_new(33918318, 42937962, 14809334, 22136592), + u32x4::const_new(10636588, 29082337, 29829692, 28549776), ), ( - u32x4::new(61080905, 854212, 12202487, 20004503), - u32x4::new(9256495, 6903981, 20567109, 347423), + u32x4::const_new(61080905, 854212, 12202487, 20004503), + u32x4::const_new(9256495, 6903981, 20567109, 347423), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(41391822, 34336880, 22362564, 14247996), - u32x4::new(12115604, 41583344, 7639288, 28910945), + u32x4::const_new(41391822, 34336880, 22362564, 14247996), + u32x4::const_new(12115604, 41583344, 7639288, 28910945), ), ( - u32x4::new(62066617, 59758859, 26665947, 11614812), - u32x4::new(65737664, 45704543, 30324810, 12868376), + u32x4::const_new(62066617, 59758859, 26665947, 11614812), + u32x4::const_new(65737664, 45704543, 30324810, 12868376), ), ( - u32x4::new(17491771, 43589814, 9454919, 26047850), - u32x4::new(52629282, 39304244, 3868968, 19296062), + u32x4::const_new(17491771, 43589814, 9454919, 26047850), + u32x4::const_new(52629282, 39304244, 3868968, 19296062), ), ( - u32x4::new(17826638, 30413590, 32534225, 32741469), - u32x4::new(15012391, 14365713, 33039233, 14791399), + u32x4::const_new(17826638, 30413590, 32534225, 32741469), + u32x4::const_new(15012391, 14365713, 33039233, 14791399), ), ( - u32x4::new(64115596, 59197067, 32739005, 23275744), - u32x4::new(32954320, 22241406, 20788442, 4942942), + u32x4::const_new(64115596, 59197067, 32739005, 23275744), + u32x4::const_new(32954320, 22241406, 20788442, 4942942), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(31956192, 59570132, 2784352, 4237732), - u32x4::new(47222312, 4860927, 18658867, 15279314), + u32x4::const_new(31956192, 59570132, 2784352, 4237732), + u32x4::const_new(47222312, 4860927, 18658867, 15279314), ), ( - u32x4::new(63240583, 28160478, 23524941, 13390861), - u32x4::new(66437406, 57718120, 33345312, 28896298), + u32x4::const_new(63240583, 28160478, 23524941, 13390861), + u32x4::const_new(66437406, 57718120, 33345312, 28896298), ), ( - u32x4::new(39026193, 46239965, 21440243, 25070488), - u32x4::new(64012383, 60999016, 16517060, 29565907), + u32x4::const_new(39026193, 46239965, 21440243, 25070488), + u32x4::const_new(64012383, 60999016, 16517060, 29565907), ), ( - u32x4::new(18118181, 60161496, 4212092, 23976240), - u32x4::new(36277753, 62363144, 5816868, 16964362), + u32x4::const_new(18118181, 60161496, 4212092, 23976240), + u32x4::const_new(36277753, 62363144, 5816868, 16964362), ), ( - u32x4::new(18196138, 62490693, 281468, 7934713), - u32x4::new(56027312, 62015725, 4837237, 32932252), + u32x4::const_new(18196138, 62490693, 281468, 7934713), + u32x4::const_new(56027312, 62015725, 4837237, 32932252), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(29885826, 51028067, 30418143, 33438769), - u32x4::new(62542283, 39442528, 31535876, 143299), + u32x4::const_new(29885826, 51028067, 30418143, 33438769), + u32x4::const_new(62542283, 39442528, 31535876, 143299), ), ( - u32x4::new(17143063, 56709783, 14451852, 15782104), - u32x4::new(32762665, 14047066, 26295037, 5432487), + u32x4::const_new(17143063, 56709783, 14451852, 15782104), + u32x4::const_new(32762665, 14047066, 26295037, 5432487), ), ( - u32x4::new(75151, 533606, 7539077, 30926189), - u32x4::new(38410914, 23771680, 4872443, 29199566), + u32x4::const_new(75151, 533606, 7539077, 30926189), + u32x4::const_new(38410914, 23771680, 4872443, 29199566), ), ( - u32x4::new(61522396, 48934708, 16223126, 207380), - u32x4::new(11171993, 47975147, 14164574, 352966), + u32x4::const_new(61522396, 48934708, 16223126, 207380), + u32x4::const_new(11171993, 47975147, 14164574, 352966), ), ( - u32x4::new(15449006, 56530757, 26796528, 12045834), - u32x4::new(63738697, 40667227, 33001582, 9101885), + u32x4::const_new(15449006, 56530757, 26796528, 12045834), + u32x4::const_new(63738697, 40667227, 33001582, 9101885), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(43331297, 18431341, 25801195, 17267698), - u32x4::new(19365485, 57295202, 22218985, 21284590), + u32x4::const_new(43331297, 18431341, 25801195, 17267698), + u32x4::const_new(19365485, 57295202, 22218985, 21284590), ), ( - u32x4::new(2429849, 19152559, 10762172, 22564684), - u32x4::new(21880390, 66866426, 20357935, 22641906), + u32x4::const_new(2429849, 19152559, 10762172, 22564684), + u32x4::const_new(21880390, 66866426, 20357935, 22641906), ), ( - u32x4::new(19771185, 31652693, 3666117, 28136958), - u32x4::new(23624283, 55101502, 6313920, 6783662), + u32x4::const_new(19771185, 31652693, 3666117, 28136958), + u32x4::const_new(23624283, 55101502, 6313920, 6783662), ), ( - u32x4::new(3487137, 7092443, 11001876, 26196524), - u32x4::new(47319246, 44542068, 17594073, 15027760), + u32x4::const_new(3487137, 7092443, 11001876, 26196524), + u32x4::const_new(47319246, 44542068, 17594073, 15027760), ), ( - u32x4::new(49563607, 32191113, 4991283, 25400512), - u32x4::new(46539152, 4155103, 32368171, 201203), + u32x4::const_new(49563607, 32191113, 4991283, 25400512), + u32x4::const_new(46539152, 4155103, 32368171, 201203), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(20548943, 14334571, 4073874, 6368588), - u32x4::new(53208883, 56484515, 15970071, 25561889), + u32x4::const_new(20548943, 14334571, 4073874, 6368588), + u32x4::const_new(53208883, 56484515, 15970071, 25561889), ), ( - u32x4::new(49915097, 44030795, 11202344, 29284344), - u32x4::new(60258023, 66225712, 8075764, 12383512), + u32x4::const_new(49915097, 44030795, 11202344, 29284344), + u32x4::const_new(60258023, 66225712, 8075764, 12383512), ), ( - u32x4::new(45248912, 4933668, 9592153, 5819559), - u32x4::new(31030983, 38174071, 32435814, 7442522), + u32x4::const_new(45248912, 4933668, 9592153, 5819559), + u32x4::const_new(31030983, 38174071, 32435814, 7442522), ), ( - u32x4::new(62688129, 48218381, 22089545, 12897361), - u32x4::new(21050881, 34278889, 7569163, 3225449), + u32x4::const_new(62688129, 48218381, 22089545, 12897361), + u32x4::const_new(21050881, 34278889, 7569163, 3225449), ), ( - u32x4::new(19050183, 51089071, 32935757, 22640195), - u32x4::new(66122318, 47144608, 18743677, 25177079), + u32x4::const_new(19050183, 51089071, 32935757, 22640195), + u32x4::const_new(66122318, 47144608, 18743677, 25177079), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(41186817, 46681702, 31819867, 32997133), - u32x4::new(38559207, 27147015, 30293819, 16762988), + u32x4::const_new(41186817, 46681702, 31819867, 32997133), + u32x4::const_new(38559207, 27147015, 30293819, 16762988), ), ( - u32x4::new(24154689, 51762873, 23883879, 13510519), - u32x4::new(55338250, 61224161, 11663149, 30803960), + u32x4::const_new(24154689, 51762873, 23883879, 13510519), + u32x4::const_new(55338250, 61224161, 11663149, 30803960), ), ( - u32x4::new(18104238, 14117824, 11724021, 21362053), - u32x4::new(65704761, 35530242, 13498058, 33522849), + u32x4::const_new(18104238, 14117824, 11724021, 21362053), + u32x4::const_new(65704761, 35530242, 13498058, 33522849), ), ( - u32x4::new(63812888, 23995539, 28920539, 24005193), - u32x4::new(26412223, 36582218, 4251418, 26160309), + u32x4::const_new(63812888, 23995539, 28920539, 24005193), + u32x4::const_new(26412223, 36582218, 4251418, 26160309), ), ( - u32x4::new(16822053, 66064082, 3482145, 31979593), - u32x4::new(45937188, 54475379, 612917, 7976478), + u32x4::const_new(16822053, 66064082, 3482145, 31979593), + u32x4::const_new(45937188, 54475379, 612917, 7976478), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(46509314, 55327128, 8944536, 274914), - u32x4::new(26432930, 53829300, 21192572, 3569894), + u32x4::const_new(46509314, 55327128, 8944536, 274914), + u32x4::const_new(26432930, 53829300, 21192572, 3569894), ), ( - u32x4::new(20919764, 64356651, 30642344, 17215170), - u32x4::new(20335124, 11203745, 18663316, 19024174), + u32x4::const_new(20919764, 64356651, 30642344, 17215170), + u32x4::const_new(20335124, 11203745, 18663316, 19024174), ), ( - u32x4::new(59297055, 53842463, 3680204, 9806710), - u32x4::new(54004169, 51484914, 29807998, 20134199), + u32x4::const_new(59297055, 53842463, 3680204, 9806710), + u32x4::const_new(54004169, 51484914, 29807998, 20134199), ), ( - u32x4::new(14781592, 22628010, 26877930, 25880359), - u32x4::new(30434803, 190607, 30184292, 8991040), + u32x4::const_new(14781592, 22628010, 26877930, 25880359), + u32x4::const_new(30434803, 190607, 30184292, 8991040), ), ( - u32x4::new(64400983, 64591751, 854562, 28216111), - u32x4::new(20010398, 50414793, 9803872, 22687008), + u32x4::const_new(64400983, 64591751, 854562, 28216111), + u32x4::const_new(20010398, 50414793, 9803872, 22687008), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(15091184, 32550863, 8818643, 4244752), - u32x4::new(43123513, 64565526, 408838, 13206998), + u32x4::const_new(15091184, 32550863, 8818643, 4244752), + u32x4::const_new(43123513, 64565526, 408838, 13206998), ), ( - u32x4::new(16405061, 60379639, 31489017, 20949281), - u32x4::new(27568751, 38734986, 8364264, 12451020), + u32x4::const_new(16405061, 60379639, 31489017, 20949281), + u32x4::const_new(27568751, 38734986, 8364264, 12451020), ), ( - u32x4::new(16005217, 58008076, 1406778, 26546927), - u32x4::new(39571784, 56365493, 31274296, 8918790), + u32x4::const_new(16005217, 58008076, 1406778, 26546927), + u32x4::const_new(39571784, 56365493, 31274296, 8918790), ), ( - u32x4::new(23271122, 19453469, 27718201, 32742670), - u32x4::new(234332, 36785342, 22601675, 14331046), + u32x4::const_new(23271122, 19453469, 27718201, 32742670), + u32x4::const_new(234332, 36785342, 22601675, 14331046), ), ( - u32x4::new(40636025, 22442705, 22115403, 23745859), - u32x4::new(41164945, 61012, 12499614, 542137), + u32x4::const_new(40636025, 22442705, 22115403, 23745859), + u32x4::const_new(41164945, 61012, 12499614, 542137), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(62776018, 32835413, 17373246, 17187309), - u32x4::new(54469193, 21770290, 15923753, 28996575), + u32x4::const_new(62776018, 32835413, 17373246, 17187309), + u32x4::const_new(54469193, 21770290, 15923753, 28996575), ), ( - u32x4::new(59385210, 63082298, 12568449, 8509004), - u32x4::new(9483342, 16105238, 5756054, 26890758), + u32x4::const_new(59385210, 63082298, 12568449, 8509004), + u32x4::const_new(9483342, 16105238, 5756054, 26890758), ), ( - u32x4::new(53987996, 38201748, 5521661, 19060159), - u32x4::new(18663191, 9093637, 27786835, 31189196), + u32x4::const_new(53987996, 38201748, 5521661, 19060159), + u32x4::const_new(18663191, 9093637, 27786835, 31189196), ), ( - u32x4::new(65872678, 43635130, 27903055, 25020300), - u32x4::new(65772737, 38110437, 5213502, 21909342), + u32x4::const_new(65872678, 43635130, 27903055, 25020300), + u32x4::const_new(65772737, 38110437, 5213502, 21909342), ), ( - u32x4::new(4438979, 9680838, 10212446, 4764184), - u32x4::new(13235684, 58245995, 20264570, 21024049), + u32x4::const_new(4438979, 9680838, 10212446, 4764184), + u32x4::const_new(13235684, 58245995, 20264570, 21024049), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(60835961, 48209103, 31049052, 4688268), - u32x4::new(12426713, 59829045, 22302488, 29008521), + u32x4::const_new(60835961, 48209103, 31049052, 4688268), + u32x4::const_new(12426713, 59829045, 22302488, 29008521), ), ( - u32x4::new(50401667, 29716596, 23531224, 7581281), - u32x4::new(49071895, 6952617, 14934683, 8218256), + u32x4::const_new(50401667, 29716596, 23531224, 7581281), + u32x4::const_new(49071895, 6952617, 14934683, 8218256), ), ( - u32x4::new(1601446, 36631413, 31774811, 29625330), - u32x4::new(56786114, 8331539, 23129509, 19783344), + u32x4::const_new(1601446, 36631413, 31774811, 29625330), + u32x4::const_new(56786114, 8331539, 23129509, 19783344), ), ( - u32x4::new(59514327, 64513110, 1772300, 5701338), - u32x4::new(5737511, 16147555, 9461515, 5703271), + u32x4::const_new(59514327, 64513110, 1772300, 5701338), + u32x4::const_new(5737511, 16147555, 9461515, 5703271), ), ( - u32x4::new(33072974, 54300426, 11940114, 1308663), - u32x4::new(15627555, 4931627, 28443714, 20924342), + u32x4::const_new(33072974, 54300426, 11940114, 1308663), + u32x4::const_new(15627555, 4931627, 28443714, 20924342), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(18135013, 20358426, 4922557, 10015355), - u32x4::new(65729669, 34786528, 26248549, 29194359), + u32x4::const_new(18135013, 20358426, 4922557, 10015355), + u32x4::const_new(65729669, 34786528, 26248549, 29194359), ), ( - u32x4::new(797666, 34997544, 24316856, 25107230), - u32x4::new(24612576, 4761401, 15307321, 32404252), + u32x4::const_new(797666, 34997544, 24316856, 25107230), + u32x4::const_new(24612576, 4761401, 15307321, 32404252), ), ( - u32x4::new(16501152, 60565831, 9487105, 9316022), - u32x4::new(24986054, 31917592, 3962024, 2501883), + u32x4::const_new(16501152, 60565831, 9487105, 9316022), + u32x4::const_new(24986054, 31917592, 3962024, 2501883), ), ( - u32x4::new(63356796, 50432342, 18044926, 30566881), - u32x4::new(42032028, 31415202, 13524600, 16119907), + u32x4::const_new(63356796, 50432342, 18044926, 30566881), + u32x4::const_new(42032028, 31415202, 13524600, 16119907), ), ( - u32x4::new(3927286, 57022374, 9265437, 21620772), - u32x4::new(19481940, 3806938, 24836192, 14572399), + u32x4::const_new(3927286, 57022374, 9265437, 21620772), + u32x4::const_new(19481940, 3806938, 24836192, 14572399), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(10785787, 46564798, 368445, 33181384), - u32x4::new(5319843, 52687136, 30347110, 29837357), + u32x4::const_new(10785787, 46564798, 368445, 33181384), + u32x4::const_new(5319843, 52687136, 30347110, 29837357), ), ( - u32x4::new(56436732, 47859251, 24141084, 22250712), - u32x4::new(59046084, 4963427, 33463413, 17168859), + u32x4::const_new(56436732, 47859251, 24141084, 22250712), + u32x4::const_new(59046084, 4963427, 33463413, 17168859), ), ( - u32x4::new(15512044, 6366740, 4737504, 27644548), - u32x4::new(30307977, 25037929, 14593903, 12836490), + u32x4::const_new(15512044, 6366740, 4737504, 27644548), + u32x4::const_new(30307977, 25037929, 14593903, 12836490), ), ( - u32x4::new(63878897, 34013023, 5860752, 7244096), - u32x4::new(3689461, 57012135, 18389096, 11589351), + u32x4::const_new(63878897, 34013023, 5860752, 7244096), + u32x4::const_new(3689461, 57012135, 18389096, 11589351), ), ( - u32x4::new(4682110, 36302830, 653422, 22316819), - u32x4::new(14081831, 5657024, 11088376, 24110612), + u32x4::const_new(4682110, 36302830, 653422, 22316819), + u32x4::const_new(14081831, 5657024, 11088376, 24110612), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(39907267, 45940262, 24887471, 18342609), - u32x4::new(878445, 40456159, 12019082, 345107), + u32x4::const_new(39907267, 45940262, 24887471, 18342609), + u32x4::const_new(878445, 40456159, 12019082, 345107), ), ( - u32x4::new(12794982, 28893944, 9447505, 11387200), - u32x4::new(16961963, 13916996, 10893728, 25898006), + u32x4::const_new(12794982, 28893944, 9447505, 11387200), + u32x4::const_new(16961963, 13916996, 10893728, 25898006), ), ( - u32x4::new(44934162, 53465865, 3583620, 1102334), - u32x4::new(53917811, 63478576, 2426066, 10389549), + u32x4::const_new(44934162, 53465865, 3583620, 1102334), + u32x4::const_new(53917811, 63478576, 2426066, 10389549), ), ( - u32x4::new(45096036, 37595344, 19367718, 20257175), - u32x4::new(10280866, 41653449, 27665642, 375926), + u32x4::const_new(45096036, 37595344, 19367718, 20257175), + u32x4::const_new(10280866, 41653449, 27665642, 375926), ), ( - u32x4::new(45847901, 24064074, 32494820, 32204556), - u32x4::new(10720704, 51079060, 1297436, 29853825), + u32x4::const_new(45847901, 24064074, 32494820, 32204556), + u32x4::const_new(10720704, 51079060, 1297436, 29853825), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(66303987, 36060363, 16494578, 24962147), - u32x4::new(11971403, 49538586, 25060560, 1964341), + u32x4::const_new(66303987, 36060363, 16494578, 24962147), + u32x4::const_new(11971403, 49538586, 25060560, 1964341), ), ( - u32x4::new(25988481, 27641502, 24909517, 27237087), - u32x4::new(66646363, 52777626, 16360849, 10459972), + u32x4::const_new(25988481, 27641502, 24909517, 27237087), + u32x4::const_new(66646363, 52777626, 16360849, 10459972), ), ( - u32x4::new(43930529, 34374176, 31225968, 8807030), - u32x4::new(10394758, 35904854, 25325589, 19335583), + u32x4::const_new(43930529, 34374176, 31225968, 8807030), + u32x4::const_new(10394758, 35904854, 25325589, 19335583), ), ( - u32x4::new(25094697, 34380951, 20051185, 32287161), - u32x4::new(11739332, 53887441, 30517319, 26601892), + u32x4::const_new(25094697, 34380951, 20051185, 32287161), + u32x4::const_new(11739332, 53887441, 30517319, 26601892), ), ( - u32x4::new(8868546, 35635502, 32513071, 28248087), - u32x4::new(51946989, 14222744, 19198839, 23261841), + u32x4::const_new(8868546, 35635502, 32513071, 28248087), + u32x4::const_new(51946989, 14222744, 19198839, 23261841), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(51218008, 5070126, 11046681, 5320810), - u32x4::new(61212079, 34104447, 23895089, 6460727), + u32x4::const_new(51218008, 5070126, 11046681, 5320810), + u32x4::const_new(61212079, 34104447, 23895089, 6460727), ), ( - u32x4::new(39843528, 46278671, 10426120, 25624792), - u32x4::new(66658766, 37140083, 28933107, 12969597), + u32x4::const_new(39843528, 46278671, 10426120, 25624792), + u32x4::const_new(66658766, 37140083, 28933107, 12969597), ), ( - u32x4::new(59635793, 40220191, 5751421, 173680), - u32x4::new(58321825, 740337, 1412847, 7682623), + u32x4::const_new(59635793, 40220191, 5751421, 173680), + u32x4::const_new(58321825, 740337, 1412847, 7682623), ), ( - u32x4::new(975962, 56440763, 20812276, 22631115), - u32x4::new(49095824, 19883130, 2419746, 31043648), + u32x4::const_new(975962, 56440763, 20812276, 22631115), + u32x4::const_new(49095824, 19883130, 2419746, 31043648), ), ( - u32x4::new(66208703, 39669328, 22525915, 3748897), - u32x4::new(65994776, 34533552, 8126286, 18326047), + u32x4::const_new(66208703, 39669328, 22525915, 3748897), + u32x4::const_new(65994776, 34533552, 8126286, 18326047), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(64176557, 3912400, 19351673, 30068471), - u32x4::new(31190055, 24221683, 33142424, 28698542), + u32x4::const_new(64176557, 3912400, 19351673, 30068471), + u32x4::const_new(31190055, 24221683, 33142424, 28698542), ), ( - u32x4::new(34784792, 4109933, 3867193, 19557314), - u32x4::new(2112512, 32715890, 24550117, 16595976), + u32x4::const_new(34784792, 4109933, 3867193, 19557314), + u32x4::const_new(2112512, 32715890, 24550117, 16595976), ), ( - u32x4::new(35542761, 48024875, 10925431, 31526577), - u32x4::new(66577735, 23189821, 13375709, 1735095), + u32x4::const_new(35542761, 48024875, 10925431, 31526577), + u32x4::const_new(66577735, 23189821, 13375709, 1735095), ), ( - u32x4::new(59699254, 43854093, 29783239, 24777271), - u32x4::new(19600372, 39924461, 2896720, 1472185), + u32x4::const_new(59699254, 43854093, 29783239, 24777271), + u32x4::const_new(19600372, 39924461, 2896720, 1472185), ), ( - u32x4::new(56389656, 35980854, 33172342, 1370336), - u32x4::new(23707480, 57654949, 7850973, 12655016), + u32x4::const_new(56389656, 35980854, 33172342, 1370336), + u32x4::const_new(23707480, 57654949, 7850973, 12655016), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(38372660, 57101970, 7044964, 12732710), - u32x4::new(57535705, 6043201, 30858914, 10946592), + u32x4::const_new(38372660, 57101970, 7044964, 12732710), + u32x4::const_new(57535705, 6043201, 30858914, 10946592), ), ( - u32x4::new(21023468, 6946992, 26403324, 23901823), - u32x4::new(35695559, 23440687, 4763891, 6514074), + u32x4::const_new(21023468, 6946992, 26403324, 23901823), + u32x4::const_new(35695559, 23440687, 4763891, 6514074), ), ( - u32x4::new(28662273, 30933699, 9352242, 26354829), - u32x4::new(37402243, 3145176, 8770289, 525937), + u32x4::const_new(28662273, 30933699, 9352242, 26354829), + u32x4::const_new(37402243, 3145176, 8770289, 525937), ), ( - u32x4::new(54933102, 36695832, 3281859, 4755022), - u32x4::new(23043294, 32794379, 15618886, 23602412), + u32x4::const_new(54933102, 36695832, 3281859, 4755022), + u32x4::const_new(23043294, 32794379, 15618886, 23602412), ), ( - u32x4::new(9931565, 29897140, 2480737, 24193701), - u32x4::new(7833615, 2284939, 893926, 13421882), + u32x4::const_new(9931565, 29897140, 2480737, 24193701), + u32x4::const_new(7833615, 2284939, 893926, 13421882), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(22917795, 22088359, 28978099, 19794863), - u32x4::new(60542318, 29878494, 31053731, 9080720), + u32x4::const_new(22917795, 22088359, 28978099, 19794863), + u32x4::const_new(60542318, 29878494, 31053731, 9080720), ), ( - u32x4::new(23679072, 52547035, 28424916, 20647332), - u32x4::new(4008761, 28267029, 12961289, 1589095), + u32x4::const_new(23679072, 52547035, 28424916, 20647332), + u32x4::const_new(4008761, 28267029, 12961289, 1589095), ), ( - u32x4::new(55616194, 26678929, 14998265, 23274397), - u32x4::new(54625466, 46244264, 28627706, 33030665), + u32x4::const_new(55616194, 26678929, 14998265, 23274397), + u32x4::const_new(54625466, 46244264, 28627706, 33030665), ), ( - u32x4::new(11527330, 6449415, 26531607, 3472938), - u32x4::new(41541592, 62607682, 19862690, 20564723), + u32x4::const_new(11527330, 6449415, 26531607, 3472938), + u32x4::const_new(41541592, 62607682, 19862690, 20564723), ), ( - u32x4::new(32843805, 49066843, 28425824, 19521495), - u32x4::new(48792073, 48242878, 27392443, 13175986), + u32x4::const_new(32843805, 49066843, 28425824, 19521495), + u32x4::const_new(48792073, 48242878, 27392443, 13175986), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(16185025, 61537525, 2961305, 1492442), - u32x4::new(25123147, 3095034, 31896958, 33089615), + u32x4::const_new(16185025, 61537525, 2961305, 1492442), + u32x4::const_new(25123147, 3095034, 31896958, 33089615), ), ( - u32x4::new(64748157, 18336595, 16522231, 25426312), - u32x4::new(65718949, 35485695, 30554083, 10205918), + u32x4::const_new(64748157, 18336595, 16522231, 25426312), + u32x4::const_new(65718949, 35485695, 30554083, 10205918), ), ( - u32x4::new(39626934, 39271045, 16420458, 9826240), - u32x4::new(56483981, 27128085, 3783403, 13360006), + u32x4::const_new(39626934, 39271045, 16420458, 9826240), + u32x4::const_new(56483981, 27128085, 3783403, 13360006), ), ( - u32x4::new(30793778, 66771960, 17241420, 6564573), - u32x4::new(61102581, 29974476, 32385512, 9011754), + u32x4::const_new(30793778, 66771960, 17241420, 6564573), + u32x4::const_new(61102581, 29974476, 32385512, 9011754), ), ( - u32x4::new(28068166, 11862220, 14323567, 12380617), - u32x4::new(52090465, 16029056, 24495309, 21409233), + u32x4::const_new(28068166, 11862220, 14323567, 12380617), + u32x4::const_new(52090465, 16029056, 24495309, 21409233), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(59411973, 57437124, 11695483, 17586857), - u32x4::new(16108987, 43449109, 31098002, 6248476), + u32x4::const_new(59411973, 57437124, 11695483, 17586857), + u32x4::const_new(16108987, 43449109, 31098002, 6248476), ), ( - u32x4::new(42258047, 61595931, 29308533, 11742653), - u32x4::new(43042345, 27373650, 30165249, 21929989), + u32x4::const_new(42258047, 61595931, 29308533, 11742653), + u32x4::const_new(43042345, 27373650, 30165249, 21929989), ), ( - u32x4::new(49907221, 9620337, 21888081, 20981082), - u32x4::new(56288861, 61562203, 33223566, 3582446), + u32x4::const_new(49907221, 9620337, 21888081, 20981082), + u32x4::const_new(56288861, 61562203, 33223566, 3582446), ), ( - u32x4::new(57535017, 41003416, 22080416, 14463796), - u32x4::new(65518565, 18127889, 24370863, 33332664), + u32x4::const_new(57535017, 41003416, 22080416, 14463796), + u32x4::const_new(65518565, 18127889, 24370863, 33332664), ), ( - u32x4::new(66655380, 6430175, 471782, 11947673), - u32x4::new(30596400, 18898659, 15930721, 4211851), + u32x4::const_new(66655380, 6430175, 471782, 11947673), + u32x4::const_new(30596400, 18898659, 15930721, 4211851), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(6757410, 65455566, 13584784, 11362173), - u32x4::new(10797127, 24451471, 19541370, 29309435), + u32x4::const_new(6757410, 65455566, 13584784, 11362173), + u32x4::const_new(10797127, 24451471, 19541370, 29309435), ), ( - u32x4::new(40360156, 17685025, 18326181, 3846903), - u32x4::new(13693365, 63049479, 31900359, 23385063), + u32x4::const_new(40360156, 17685025, 18326181, 3846903), + u32x4::const_new(13693365, 63049479, 31900359, 23385063), ), ( - u32x4::new(52455038, 57513503, 22163311, 27095042), - u32x4::new(48610726, 66454160, 12085341, 26357004), + u32x4::const_new(52455038, 57513503, 22163311, 27095042), + u32x4::const_new(48610726, 66454160, 12085341, 26357004), ), ( - u32x4::new(22097042, 14063840, 6705778, 14342902), - u32x4::new(66139825, 20702105, 31279090, 7495745), + u32x4::const_new(22097042, 14063840, 6705778, 14342902), + u32x4::const_new(66139825, 20702105, 31279090, 7495745), ), ( - u32x4::new(27360710, 49314837, 18774847, 7146436), - u32x4::new(37066216, 42004961, 22409916, 10524446), + u32x4::const_new(27360710, 49314837, 18774847, 7146436), + u32x4::const_new(37066216, 42004961, 22409916, 10524446), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(1497507, 33054449, 11839906, 2960428), - u32x4::new(40538463, 18884538, 25018820, 4073970), + u32x4::const_new(1497507, 33054449, 11839906, 2960428), + u32x4::const_new(40538463, 18884538, 25018820, 4073970), ), ( - u32x4::new(54484385, 43640735, 2808257, 20710708), - u32x4::new(39840730, 27222424, 21783544, 11848522), + u32x4::const_new(54484385, 43640735, 2808257, 20710708), + u32x4::const_new(39840730, 27222424, 21783544, 11848522), ), ( - u32x4::new(45765237, 48200555, 9299019, 9393151), - u32x4::new(34818188, 56098995, 13575233, 21012731), + u32x4::const_new(45765237, 48200555, 9299019, 9393151), + u32x4::const_new(34818188, 56098995, 13575233, 21012731), ), ( - u32x4::new(4265428, 49627650, 24960282, 9425650), - u32x4::new(47883651, 2797524, 11853190, 22877329), + u32x4::const_new(4265428, 49627650, 24960282, 9425650), + u32x4::const_new(47883651, 2797524, 11853190, 22877329), ), ( - u32x4::new(25008173, 64199503, 380047, 12107343), - u32x4::new(12329448, 11914399, 764281, 29687002), + u32x4::const_new(25008173, 64199503, 380047, 12107343), + u32x4::const_new(12329448, 11914399, 764281, 29687002), ), ])), CachedPoint(FieldElement2625x4([ ( - u32x4::new(35889734, 23047226, 4022841, 7017445), - u32x4::new(7274086, 53316179, 25100176, 15310676), + u32x4::const_new(35889734, 23047226, 4022841, 7017445), + u32x4::const_new(7274086, 53316179, 25100176, 15310676), ), ( - u32x4::new(42409427, 30270106, 6823853, 31551384), - u32x4::new(40645017, 66489807, 18021817, 32669351), + u32x4::const_new(42409427, 30270106, 6823853, 31551384), + u32x4::const_new(40645017, 66489807, 18021817, 32669351), ), ( - u32x4::new(39827134, 43680850, 28297996, 20258133), - u32x4::new(26058742, 52643238, 22238331, 21690533), + u32x4::const_new(39827134, 43680850, 28297996, 20258133), + u32x4::const_new(26058742, 52643238, 22238331, 21690533), ), ( - u32x4::new(60808002, 17499995, 30042246, 29310584), - u32x4::new(48219954, 29389518, 8680514, 17844709), + u32x4::const_new(60808002, 17499995, 30042246, 29310584), + u32x4::const_new(48219954, 29389518, 8680514, 17844709), ), ( - u32x4::new(6452896, 50116553, 9532047, 26821214), - u32x4::new(44524351, 50428429, 21904953, 12608048), + u32x4::const_new(6452896, 50116553, 9532047, 26821214), + u32x4::const_new(44524351, 50428429, 21904953, 12608048), ), ])), ]); diff --git a/curve25519-dalek/src/backend/vector/neon/edwards.rs b/curve25519-dalek/src/backend/vector/neon/edwards.rs index d99fce854..f6bc9d62e 100644 --- a/curve25519-dalek/src/backend/vector/neon/edwards.rs +++ b/curve25519-dalek/src/backend/vector/neon/edwards.rs @@ -432,7 +432,7 @@ mod test { println!("Testing B +- kB"); let P = constants::ED25519_BASEPOINT_POINT; - let Q = &constants::ED25519_BASEPOINT_TABLE * &Scalar::from(8475983829u64); + let Q = constants::ED25519_BASEPOINT_TABLE * &Scalar::from(8475983829u64); addition_test_helper(P, Q); } @@ -511,7 +511,7 @@ mod test { doubling_test_helper(P); println!("Testing [2]([k]B)"); - let P = &constants::ED25519_BASEPOINT_TABLE * &Scalar::from(8475983829u64); + let P = constants::ED25519_BASEPOINT_TABLE * &Scalar::from(8475983829u64); doubling_test_helper(P); } diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 63db311f1..5feabb734 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -21,13 +21,29 @@ //! arm instructions. use core::ops::{Add, Mul, Neg}; -use packed_simd::{i32x4, u32x2, u32x4, u64x2, u64x4, IntoBits}; +use super::packed_simd::{u32x2, u32x4, i32x4, u64x2, u64x4}; use crate::backend::serial::u64::field::FieldElement51; use crate::backend::vector::neon::constants::{ P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO, }; +fn shuffle_u32x4(x: u32x4, y: u32x4) -> u32x4 { + unsafe { + core::mem::transmute::<[u32; 4], u32x4>( + *core::intrinsics::simd::simd_shuffle::, [u32; 4], core::simd::Simd>( + core::simd::Simd::from_array(core::mem::transmute::(x)), + core::simd::Simd::from_array(core::mem::transmute::(y)), + IDX).as_array()) + } +} + +macro_rules! shuffle { + ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => { + shuffle_u32x4::<{[$l0, $l1, $l2, $l3]}>($vec0, $vec1) + }; +} + /// Unpack 32-bit lanes: /// ((a0, b0, a1, b1) ,(c0, d0, c1, d1)) /// into @@ -42,10 +58,10 @@ fn unpack_pair(src: (u32x4, u32x4)) -> ((u32x2, u32x2), (u32x2, u32x2)) { unsafe { use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vget_low_u32; - a0 = vget_low_u32(src.0.into_bits()).into_bits(); - a1 = vget_low_u32(src.1.into_bits()).into_bits(); - b0 = vget_high_u32(src.0.into_bits()).into_bits(); - b1 = vget_high_u32(src.1.into_bits()).into_bits(); + a0 = vget_low_u32(src.0.into()).into(); + a1 = vget_low_u32(src.1.into()).into(); + b0 = vget_high_u32(src.0.into()).into(); + b1 = vget_high_u32(src.1.into()).into(); } return ((a0, a1), (b0, b1)); } @@ -64,11 +80,11 @@ fn repack_pair(x: (u32x4, u32x4), y: (u32x4, u32x4)) -> (u32x4, u32x4) { use core::arch::aarch64::vset_lane_u32; (vcombine_u32( - vset_lane_u32(vgetq_lane_u32(x.0.into_bits(), 2) , vget_low_u32(x.0.into_bits()), 1), - vset_lane_u32(vgetq_lane_u32(y.0.into_bits(), 2) , vget_low_u32(y.0.into_bits()), 1)).into_bits(), + vset_lane_u32(vgetq_lane_u32(x.0.into(), 2) , vget_low_u32(x.0.into()), 1), + vset_lane_u32(vgetq_lane_u32(y.0.into(), 2) , vget_low_u32(y.0.into()), 1)).into(), vcombine_u32( - vset_lane_u32(vgetq_lane_u32(x.1.into_bits(), 2) , vget_low_u32(x.1.into_bits()), 1), - vset_lane_u32(vgetq_lane_u32(y.1.into_bits(), 2) , vget_low_u32(y.1.into_bits()), 1)).into_bits()) + vset_lane_u32(vgetq_lane_u32(x.1.into(), 2) , vget_low_u32(x.1.into()), 1), + vset_lane_u32(vgetq_lane_u32(y.1.into(), 2) , vget_low_u32(y.1.into()), 1)).into()) } } @@ -156,14 +172,14 @@ impl FieldElement2625x4 { pub fn split(&self) -> [FieldElement51; 4] { let mut out = [FieldElement51::ZERO; 4]; for i in 0..5 { - let a_2i = self.0[i].0.extract(0) as u64; - let b_2i = self.0[i].0.extract(1) as u64; - let a_2i_1 = self.0[i].0.extract(2) as u64; - let b_2i_1 = self.0[i].0.extract(3) as u64; - let c_2i = self.0[i].1.extract(0) as u64; - let d_2i = self.0[i].1.extract(1) as u64; - let c_2i_1 = self.0[i].1.extract(2) as u64; - let d_2i_1 = self.0[i].1.extract(3) as u64; + let a_2i = self.0[i].0.extract::<0>() as u64; + let b_2i = self.0[i].0.extract::<1>() as u64; + let a_2i_1 = self.0[i].0.extract::<2>() as u64; + let b_2i_1 = self.0[i].0.extract::<3>() as u64; + let c_2i = self.0[i].1.extract::<0>() as u64; + let d_2i = self.0[i].1.extract::<1>() as u64; + let c_2i_1 = self.0[i].1.extract::<2>() as u64; + let d_2i_1 = self.0[i].1.extract::<3>() as u64; out[0].0[i] = a_2i + (a_2i_1 << 26); out[1].0[i] = b_2i + (b_2i_1 << 26); @@ -179,7 +195,6 @@ impl FieldElement2625x4 { #[inline(always)] #[rustfmt::skip] // Retain format of the return tuples fn shuffle_lanes(x: (u32x4, u32x4), control: Shuffle) -> (u32x4, u32x4) { - use packed_simd::shuffle; match control { Shuffle::AAAA => (shuffle!(x.0, x.1, [0, 0, 2, 2]), shuffle!(x.0, x.1, [0, 0, 2, 2])), Shuffle::BBBB => (shuffle!(x.0, x.1, [1, 1, 3, 3]), shuffle!(x.0, x.1, [1, 1, 3, 3])), @@ -209,7 +224,6 @@ impl FieldElement2625x4 { #[inline(always)] #[rustfmt::skip] // Retain format of the return tuples fn blend_lanes(x: (u32x4, u32x4), y: (u32x4, u32x4), control: Lanes) -> (u32x4, u32x4) { - use packed_simd::shuffle; match control { Lanes::C => (x.0, shuffle!(y.1, x.1, [0, 5, 2, 7])), Lanes::D => (x.0, shuffle!(y.1, x.1, [4, 1, 6, 3])), @@ -303,20 +317,20 @@ impl FieldElement2625x4 { use core::arch::aarch64::vqshlq_u32; let c: (u32x4, u32x4) = ( - vqshlq_u32(v.0.into_bits(), shifts.0.into_bits()).into_bits(), - vqshlq_u32(v.1.into_bits(), shifts.1.into_bits()).into_bits(), + vqshlq_u32(v.0.into(), shifts.0.into()).into(), + vqshlq_u32(v.1.into(), shifts.1.into()).into(), ); ( vcombine_u32( - vget_high_u32(c.0.into_bits()), - vget_low_u32(c.0.into_bits()), + vget_high_u32(c.0.into()), + vget_low_u32(c.0.into()), ) - .into_bits(), + .into(), vcombine_u32( - vget_high_u32(c.1.into_bits()), - vget_low_u32(c.1.into_bits()), + vget_high_u32(c.1.into()), + vget_low_u32(c.1.into()), ) - .into_bits(), + .into(), ) } }; @@ -328,15 +342,15 @@ impl FieldElement2625x4 { use core::arch::aarch64::vget_low_u32; ( vcombine_u32( - vget_low_u32(v_lo.0.into_bits()), - vget_high_u32(v_hi.0.into_bits()), + vget_low_u32(v_lo.0.into()), + vget_high_u32(v_hi.0.into()), ) - .into_bits(), + .into(), vcombine_u32( - vget_low_u32(v_lo.1.into_bits()), - vget_high_u32(v_hi.1.into_bits()), + vget_low_u32(v_lo.1.into()), + vget_high_u32(v_hi.1.into()), ) - .into_bits(), + .into(), ) } }; @@ -370,12 +384,12 @@ impl FieldElement2625x4 { use core::arch::aarch64::vmulq_n_u32; let c9_19_spread: (u32x4, u32x4) = ( - vmulq_n_u32(c98.0.into_bits(), 19).into_bits(), - vmulq_n_u32(c98.1.into_bits(), 19).into_bits(), + vmulq_n_u32(c98.0.into(), 19).into(), + vmulq_n_u32(c98.1.into(), 19).into(), ); - (vcombine_u32(vget_low_u32(c9_19_spread.0.into_bits()), u32x2::splat(0).into_bits()).into_bits(), - vcombine_u32(vget_low_u32(c9_19_spread.1.into_bits()), u32x2::splat(0).into_bits()).into_bits()) + (vcombine_u32(vget_low_u32(c9_19_spread.0.into()), u32x2::splat(0).into()).into(), + vcombine_u32(vget_low_u32(c9_19_spread.1.into()), u32x2::splat(0).into()).into()) }; v[0] = (v[0].0 + c9_19.0, v[0].1 + c9_19.1); @@ -393,13 +407,13 @@ impl FieldElement2625x4 { let carry = |z: &mut [(u64x2, u64x2); 10], i: usize| { debug_assert!(i < 9); if i % 2 == 0 { - z[i + 1].0 = z[i + 1].0 + (z[i].0 >> 26); - z[i + 1].1 = z[i + 1].1 + (z[i].1 >> 26); + z[i + 1].0 = z[i + 1].0 + (z[i].0.shr::<26>()); + z[i + 1].1 = z[i + 1].1 + (z[i].1.shr::<26>()); z[i].0 = z[i].0 & LOW_26_BITS; z[i].1 = z[i].1 & LOW_26_BITS; } else { - z[i + 1].0 = z[i + 1].0 + (z[i].0 >> 25); - z[i + 1].1 = z[i + 1].1 + (z[i].1 >> 25); + z[i + 1].0 = z[i + 1].0 + (z[i].0.shr::<25>()); + z[i + 1].1 = z[i + 1].1 + (z[i].1.shr::<25>()); z[i].0 = z[i].0 & LOW_25_BITS; z[i].1 = z[i].1 & LOW_25_BITS; } @@ -411,18 +425,18 @@ impl FieldElement2625x4 { carry(&mut z, 3); carry(&mut z, 7); carry(&mut z, 4); carry(&mut z, 8); - let c = (z[9].0 >> 25, z[9].1 >> 25); + let c = (z[9].0.shr::<25>(), z[9].1.shr::<25>()); z[9] = (z[9].0 & LOW_25_BITS, z[9].1 & LOW_25_BITS); let mut c0: (u64x2, u64x2) = (c.0 & LOW_26_BITS, c.1 & LOW_26_BITS); - let mut c1: (u64x2, u64x2) = (c.0 >> 26, c.1 >> 26); + let mut c1: (u64x2, u64x2) = (c.0.shr::<26>(), c.1.shr::<26>()); unsafe { use core::arch::aarch64::vmulq_n_u32; - c0 = (vmulq_n_u32(c0.0.into_bits(), 19).into_bits(), - vmulq_n_u32(c0.1.into_bits(), 19).into_bits()); - c1 = (vmulq_n_u32(c1.0.into_bits(), 19).into_bits(), - vmulq_n_u32(c1.1.into_bits(), 19).into_bits()); + c0 = (vmulq_n_u32(c0.0.into(), 19).into(), + vmulq_n_u32(c0.1.into(), 19).into()); + c1 = (vmulq_n_u32(c1.0.into(), 19).into(), + vmulq_n_u32(c1.1.into(), 19).into()); } z[0] = (z[0].0 + c0.0, z[0].1 + c0.1); @@ -430,11 +444,11 @@ impl FieldElement2625x4 { carry(&mut z, 0); FieldElement2625x4([ - repack_pair((z[0].0.into_bits(), z[0].1.into_bits()), (z[1].0.into_bits(), z[1].1.into_bits())), - repack_pair((z[2].0.into_bits(), z[2].1.into_bits()), (z[3].0.into_bits(), z[3].1.into_bits())), - repack_pair((z[4].0.into_bits(), z[4].1.into_bits()), (z[5].0.into_bits(), z[5].1.into_bits())), - repack_pair((z[6].0.into_bits(), z[6].1.into_bits()), (z[7].0.into_bits(), z[7].1.into_bits())), - repack_pair((z[8].0.into_bits(), z[8].1.into_bits()), (z[9].0.into_bits(), z[9].1.into_bits())), + repack_pair((z[0].0.into(), z[0].1.into()), (z[1].0.into(), z[1].1.into())), + repack_pair((z[2].0.into(), z[2].1.into()), (z[3].0.into(), z[3].1.into())), + repack_pair((z[4].0.into(), z[4].1.into()), (z[5].0.into(), z[5].1.into())), + repack_pair((z[6].0.into(), z[6].1.into()), (z[7].0.into(), z[7].1.into())), + repack_pair((z[8].0.into(), z[8].1.into()), (z[9].0.into(), z[9].1.into())), ]) } @@ -445,9 +459,9 @@ impl FieldElement2625x4 { fn m(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> u64x4 { use core::arch::aarch64::vmull_u32; unsafe { - let z0: u64x2 = vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(); - let z1: u64x2 = vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits(); - u64x4::new(z0.extract(0), z0.extract(1), z1.extract(0), z1.extract(1)) + let z0: u64x2 = vmull_u32(x.0.into(), y.0.into()).into(); + let z1: u64x2 = vmull_u32(x.1.into(), y.1.into()).into(); + u64x4::new(z0.extract::<0>(), z0.extract::<1>(), z1.extract::<0>(), z1.extract::<1>()) } } @@ -455,9 +469,9 @@ impl FieldElement2625x4 { fn m_lo(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> (u32x2, u32x2) { use core::arch::aarch64::vmull_u32; unsafe { - let x: (u32x4, u32x4) = (vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(), - vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits()); - (u32x2::new(x.0.extract(0), x.0.extract(2)), u32x2::new(x.1.extract(0), x.1.extract(2))) + let x: (u32x4, u32x4) = (vmull_u32(x.0.into(), y.0.into()).into(), + vmull_u32(x.1.into(), y.1.into()).into()); + (u32x2::new(x.0.extract::<0>(), x.0.extract::<2>()), u32x2::new(x.1.extract::<0>(), x.1.extract::<2>())) } } @@ -469,14 +483,14 @@ impl FieldElement2625x4 { let (x6, x7) = unpack_pair(self.0[3]); let (x8, x9) = unpack_pair(self.0[4]); - let x0_2 = (x0.0 << 1, x0.1 << 1); - let x1_2 = (x1.0 << 1, x1.1 << 1); - let x2_2 = (x2.0 << 1, x2.1 << 1); - let x3_2 = (x3.0 << 1, x3.1 << 1); - let x4_2 = (x4.0 << 1, x4.1 << 1); - let x5_2 = (x5.0 << 1, x5.1 << 1); - let x6_2 = (x6.0 << 1, x6.1 << 1); - let x7_2 = (x7.0 << 1, x7.1 << 1); + let x0_2 = (x0.0.shr::<1>(), x0.1.shr::<1>()); + let x1_2 = (x1.0.shr::<1>(), x1.1.shr::<1>()); + let x2_2 = (x2.0.shr::<1>(), x2.1.shr::<1>()); + let x3_2 = (x3.0.shr::<1>(), x3.1.shr::<1>()); + let x4_2 = (x4.0.shr::<1>(), x4.1.shr::<1>()); + let x5_2 = (x5.0.shr::<1>(), x5.1.shr::<1>()); + let x6_2 = (x6.0.shr::<1>(), x6.1.shr::<1>()); + let x7_2 = (x7.0.shr::<1>(), x7.1.shr::<1>()); let x5_19 = m_lo(v19, x5); let x6_19 = m_lo(v19, x6); @@ -484,15 +498,15 @@ impl FieldElement2625x4 { let x8_19 = m_lo(v19, x8); let x9_19 = m_lo(v19, x9); - let z0 = m(x0, x0) + m(x2_2,x8_19) + m(x4_2,x6_19) + ((m(x1_2,x9_19) + m(x3_2,x7_19) + m(x5,x5_19)) << 1); - let z1 = m(x0_2,x1) + m(x3_2,x8_19) + m(x5_2,x6_19) + ((m(x2,x9_19) + m(x4,x7_19)) << 1); - let z2 = m(x0_2,x2) + m(x1_2,x1) + m(x4_2,x8_19) + m(x6,x6_19) + ((m(x3_2,x9_19) + m(x5_2,x7_19)) << 1); - let z3 = m(x0_2,x3) + m(x1_2,x2) + m(x5_2,x8_19) + ((m(x4,x9_19) + m(x6,x7_19)) << 1); - let z4 = m(x0_2,x4) + m(x1_2,x3_2) + m(x2, x2) + m(x6_2,x8_19) + ((m(x5_2,x9_19) + m(x7,x7_19)) << 1); - let z5 = m(x0_2,x5) + m(x1_2,x4) + m(x2_2,x3) + m(x7_2,x8_19) + ((m(x6,x9_19)) << 1); - let z6 = m(x0_2,x6) + m(x1_2,x5_2) + m(x2_2,x4) + m(x3_2,x3) + m(x8,x8_19) + ((m(x7_2,x9_19)) << 1); - let z7 = m(x0_2,x7) + m(x1_2,x6) + m(x2_2,x5) + m(x3_2,x4) + ((m(x8,x9_19)) << 1); - let z8 = m(x0_2,x8) + m(x1_2,x7_2) + m(x2_2,x6) + m(x3_2,x5_2) + m(x4,x4) + ((m(x9,x9_19)) << 1); + let z0 = m(x0, x0) + m(x2_2,x8_19) + m(x4_2,x6_19) + ((m(x1_2,x9_19) + m(x3_2,x7_19) + m(x5,x5_19)).shl::<1>()); + let z1 = m(x0_2,x1) + m(x3_2,x8_19) + m(x5_2,x6_19) + ((m(x2,x9_19) + m(x4,x7_19)).shl::<1>()); + let z2 = m(x0_2,x2) + m(x1_2,x1) + m(x4_2,x8_19) + m(x6,x6_19) + ((m(x3_2,x9_19) + m(x5_2,x7_19)).shl::<1>()); + let z3 = m(x0_2,x3) + m(x1_2,x2) + m(x5_2,x8_19) + ((m(x4,x9_19) + m(x6,x7_19)).shl::<1>()); + let z4 = m(x0_2,x4) + m(x1_2,x3_2) + m(x2, x2) + m(x6_2,x8_19) + ((m(x5_2,x9_19) + m(x7,x7_19)).shl::<1>()); + let z5 = m(x0_2,x5) + m(x1_2,x4) + m(x2_2,x3) + m(x7_2,x8_19) + ((m(x6,x9_19)).shl::<1>()); + let z6 = m(x0_2,x6) + m(x1_2,x5_2) + m(x2_2,x4) + m(x3_2,x3) + m(x8,x8_19) + ((m(x7_2,x9_19)).shl::<1>()); + let z7 = m(x0_2,x7) + m(x1_2,x6) + m(x2_2,x5) + m(x3_2,x4) + ((m(x8,x9_19)).shl::<1>()); + let z8 = m(x0_2,x8) + m(x1_2,x7_2) + m(x2_2,x6) + m(x3_2,x5_2) + m(x4,x4) + ((m(x9,x9_19)).shl::<1>()); let z9 = m(x0_2,x9) + m(x1_2,x8) + m(x2_2,x7) + m(x3_2,x6) + m(x4_2,x5); @@ -506,12 +520,12 @@ impl FieldElement2625x4 { use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vcombine_u32; - let x = (u64x2::new(x_01.extract(0), x_01.extract(1)), u64x2::new(x_01.extract(2), x_01.extract(3))); - let p = (u64x2::new(p_01.extract(0), p_01.extract(1)), u64x2::new(p_01.extract(2), p_01.extract(3))); + let x = (u64x2::new(x_01.extract::<0>(), x_01.extract::<1>()), u64x2::new(x_01.extract::<2>(), x_01.extract::<3>())); + let p = (u64x2::new(p_01.extract::<0>(), p_01.extract::<1>()), u64x2::new(p_01.extract::<2>(), p_01.extract::<3>())); - (x.0.into_bits(), - vcombine_u32(vget_low_u32(x.1.into_bits()), - vget_high_u32((p.1 - x.1).into_bits())).into_bits()) + (x.0.into(), + vcombine_u32(vget_low_u32(x.1.into()), + vget_high_u32((p.1 - x.1).into())).into()) } }; @@ -579,16 +593,16 @@ impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 { let (b8, b9) = unpack_pair(self.0[4]); FieldElement2625x4::reduce64([ - (vmull_u32(b0.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b0.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b1.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b1.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b2.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b2.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b3.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b3.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b4.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b4.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b5.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b5.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b6.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b6.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b7.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b7.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b8.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b8.1.into_bits(), consts.1.into_bits()).into_bits()), - (vmull_u32(b9.0.into_bits(), consts.0.into_bits()).into_bits(), vmull_u32(b9.1.into_bits(), consts.1.into_bits()).into_bits()) + (vmull_u32(b0.0.into(), consts.0.into()).into(), vmull_u32(b0.1.into(), consts.1.into()).into()), + (vmull_u32(b1.0.into(), consts.0.into()).into(), vmull_u32(b1.1.into(), consts.1.into()).into()), + (vmull_u32(b2.0.into(), consts.0.into()).into(), vmull_u32(b2.1.into(), consts.1.into()).into()), + (vmull_u32(b3.0.into(), consts.0.into()).into(), vmull_u32(b3.1.into(), consts.1.into()).into()), + (vmull_u32(b4.0.into(), consts.0.into()).into(), vmull_u32(b4.1.into(), consts.1.into()).into()), + (vmull_u32(b5.0.into(), consts.0.into()).into(), vmull_u32(b5.1.into(), consts.1.into()).into()), + (vmull_u32(b6.0.into(), consts.0.into()).into(), vmull_u32(b6.1.into(), consts.1.into()).into()), + (vmull_u32(b7.0.into(), consts.0.into()).into(), vmull_u32(b7.1.into(), consts.1.into()).into()), + (vmull_u32(b8.0.into(), consts.0.into()).into(), vmull_u32(b8.1.into(), consts.1.into()).into()), + (vmull_u32(b9.0.into(), consts.0.into()).into(), vmull_u32(b9.1.into(), consts.1.into()).into()) ]) } } @@ -603,9 +617,9 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { fn m(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> u64x4 { use core::arch::aarch64::vmull_u32; unsafe { - let z0: u64x2 = vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(); - let z1: u64x2 = vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits(); - u64x4::new(z0.extract(0), z0.extract(1), z1.extract(0), z1.extract(1)) + let z0: u64x2 = vmull_u32(x.0.into(), y.0.into()).into(); + let z1: u64x2 = vmull_u32(x.1.into(), y.1.into()).into(); + u64x4::new(z0.extract::<0>(), z0.extract::<1>(), z1.extract::<0>(), z1.extract::<1>()) } } @@ -614,12 +628,12 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { use core::arch::aarch64::vmull_u32; unsafe { let x: (u32x4, u32x4) = ( - vmull_u32(x.0.into_bits(), y.0.into_bits()).into_bits(), - vmull_u32(x.1.into_bits(), y.1.into_bits()).into_bits(), + vmull_u32(x.0.into(), y.0.into()).into(), + vmull_u32(x.1.into(), y.1.into()).into(), ); ( - u32x2::new(x.0.extract(0), x.0.extract(2)), - u32x2::new(x.1.extract(0), x.1.extract(2)), + u32x2::new(x.0.extract::<0>(), x.0.extract::<2>()), + u32x2::new(x.1.extract::<0>(), x.1.extract::<2>()), ) } } @@ -667,8 +681,8 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { let f = |x: u64x4| -> (u64x2, u64x2) { ( - (u64x2::new(x.extract(0), x.extract(1))).into_bits(), - (u64x2::new(x.extract(2), x.extract(3))).into_bits(), + (u64x2::new(x.extract::<0>(), x.extract::<1>())).into(), + (u64x2::new(x.extract::<2>(), x.extract::<3>())).into(), ) }; @@ -712,12 +726,12 @@ mod test { let expected_src = repack_pair( ( - u32x4::new(a.0.extract(0), 0, a.0.extract(1), 0), - u32x4::new(a.1.extract(0), 0, a.1.extract(1), 0), + u32x4::new(a.0.extract::<0>(), 0, a.0.extract::<1>(), 0), + u32x4::new(a.1.extract::<0>(), 0, a.1.extract::<1>(), 0), ), ( - u32x4::new(b.0.extract(0), 0, b.0.extract(1), 0), - u32x4::new(b.1.extract(0), 0, b.1.extract(1), 0), + u32x4::new(b.0.extract::<0>(), 0, b.0.extract::<1>(), 0), + u32x4::new(b.1.extract::<0>(), 0, b.1.extract::<1>(), 0), ), ); diff --git a/curve25519-dalek/src/backend/vector/neon/mod.rs b/curve25519-dalek/src/backend/vector/neon/mod.rs index de3f33c25..262694131 100644 --- a/curve25519-dalek/src/backend/vector/neon/mod.rs +++ b/curve25519-dalek/src/backend/vector/neon/mod.rs @@ -14,3 +14,7 @@ pub(crate) mod field; pub(crate) mod edwards; pub(crate) mod constants; + +pub(crate) use self::edwards::{CachedPoint, ExtendedPoint}; + +mod packed_simd; diff --git a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs new file mode 100644 index 000000000..f33b812c8 --- /dev/null +++ b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs @@ -0,0 +1,320 @@ +// -*- mode: rust; -*- +// +// This file is part of curve25519-dalek. +// See LICENSE for licensing information. + +//! This module defines wrappers over platform-specific SIMD types to make them +//! more convenient to use. +//! +//! This is an adaptation of `crate::backend::vector::packed_simd.rs` for aarch64. + +use core::ops::{Add, AddAssign, BitAnd, BitAndAssign, BitXor, BitXorAssign, Sub}; + +macro_rules! impl_shared { + ( + $ty:ident, // Name of the struct + $lane_ty:ident, + $internal_ty: ident, + $beq_intrinsic:ident, + $add_intrinsic:ident, + $sub_intrinsic:ident, + $and_intrinsic:ident, + $xor_intrinsic:ident, + $shl_intrinsic:ident, + $shr_intrinsic:ident, + $extract_intrinsic:ident + ) => { + #[allow(non_camel_case_types)] + #[derive(Copy, Clone, Debug)] + #[repr(transparent)] + pub struct $ty(core::arch::aarch64::$internal_ty); + + impl From<$ty> for core::arch::aarch64::$internal_ty { + #[inline] + fn from(value: $ty) -> core::arch::aarch64::$internal_ty { + value.0 + } + } + + impl From for $ty { + #[inline] + fn from(value: core::arch::aarch64::$internal_ty) -> $ty { + $ty(value) + } + } + + impl PartialEq for $ty { + #[inline] + fn eq(&self, rhs: &$ty) -> bool { + unsafe { + let m = core::arch::aarch64::$beq_intrinsic(self.0, rhs.0); + Self(m).extract::<0>() != 0 + } + } + } + + impl Eq for $ty {} + + + impl Add for $ty { + type Output = Self; + + #[inline] + fn add(self, rhs: $ty) -> Self { + unsafe { core::arch::aarch64::$add_intrinsic(self.0, rhs.0).into() } + } + } + + impl AddAssign for $ty { + #[inline] + fn add_assign(&mut self, rhs: $ty) { + *self = *self + rhs + } + } + + impl Sub for $ty { + type Output = Self; + + #[inline] + fn sub(self, rhs: $ty) -> Self { + unsafe { core::arch::aarch64::$sub_intrinsic(self.0, rhs.0).into() } + } + } + + impl BitAnd for $ty { + type Output = Self; + + #[inline] + fn bitand(self, rhs: $ty) -> Self { + unsafe { core::arch::aarch64::$and_intrinsic(self.0, rhs.0).into() } + } + } + + impl BitAndAssign for $ty { + #[inline] + fn bitand_assign(&mut self, rhs: $ty) { + *self = *self & rhs; + } + } + + impl BitXor for $ty { + type Output = Self; + + #[inline] + fn bitxor(self, rhs: $ty) -> Self { + unsafe { core::arch::aarch64::$xor_intrinsic(self.0, rhs.0).into() } + } + } + + impl BitXorAssign for $ty { + #[inline] + fn bitxor_assign(&mut self, rhs: $ty) { + *self = *self ^ rhs; + } + } + + impl $ty { + #[inline] + pub fn extract(self) -> $lane_ty { + unsafe { core::arch::aarch64::$extract_intrinsic(self.0, N) as $lane_ty } + } + + #[inline] + pub fn shl(self) -> Self { + unsafe { core::arch::aarch64::$shl_intrinsic(self.0, N).into() } + } + + #[inline] + pub fn shr(self) -> Self { + unsafe { core::arch::aarch64::$shr_intrinsic(self.0, N).into() } + } + + } + } +} + +impl_shared!( + u32x4, + u32, + uint32x4_t, + vceqq_u32, + vaddq_u32, + vsubq_u32, + vandq_u32, + veorq_u32, + vshlq_n_u32, + vshrq_n_u32, + vgetq_lane_u32 +); + +impl u32x4 { + #[inline] + pub fn new(x0: u32, x1: u32, x2: u32, x3: u32) -> Self { + unsafe { core::mem::transmute::<[u32; 4], Self>([x0, x1, x2, x3]) } + } + + #[inline] + pub const fn const_new(x0: u32, x1: u32, x2: u32, x3: u32) -> Self { + unsafe { core::mem::transmute::<[u32; 4], Self>([x0, x1, x2, x3]) } + } + + #[inline] + pub fn splat(x: u32) -> Self { + unsafe { core::mem::transmute::<[u32; 4], Self>([x, x, x, x]) } + } + + #[inline] + pub const fn const_splat(x: u32) -> Self { + unsafe { core::mem::transmute::<[u32; 4], Self>([x, x, x, x]) } + } +} + +impl From for core::arch::aarch64::uint32x4_t { + #[inline] + fn from(value: u64x2) -> core::arch::aarch64::uint32x4_t { + unsafe { core::arch::aarch64::vreinterpretq_u32_u64(value.into()) } + } +} + +impl From for u32x4 { + #[inline] + fn from(value: core::arch::aarch64::uint64x2_t) -> u32x4 { + unsafe { core::arch::aarch64::vreinterpretq_u32_u64(value).into() } + } +} + +impl From for u32x4 { + #[inline] + fn from(value: u64x2) -> u32x4 { + Into::::into(value).into() + } +} + +impl_shared!( + u32x2, + u32, + uint32x2_t, + vceq_u32, + vadd_u32, + vsub_u32, + vand_u32, + veor_u32, + vshl_n_u32, + vshr_n_u32, + vget_lane_u32 +); + +impl u32x2 { + #[inline] + pub fn new(x0: u32, x1: u32) -> Self { + unsafe { core::mem::transmute::<[u32; 2], Self>([x0, x1]) } + } + + #[inline] + pub fn splat(x: u32) -> Self { + unsafe { core::mem::transmute::<[u32; 2], Self>([x, x]) } + } +} + +impl_shared!( + u64x2, + u64, + uint64x2_t, + vceqq_u64, + vaddq_u64, + vsubq_u64, + vandq_u64, + veorq_u64, + vshlq_n_u64, + vshrq_n_u64, + vgetq_lane_u64 +); + +impl u64x2 { + #[inline] + pub fn new(x0: u64, x1: u64) -> Self { + unsafe { core::mem::transmute::<[u64; 2], Self>([x0, x1]) } + } + + #[inline] + pub fn splat(x: u64) -> Self { + unsafe { core::mem::transmute::<[u64; 2], Self>([x, x]) } + } +} + +impl From for u64x2 { + #[inline] + fn from(value: core::arch::aarch64::uint32x4_t) -> u64x2 { + unsafe { core::arch::aarch64::vreinterpretq_u64_u32(value).into() } + } +} + + +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Debug)] +#[repr(transparent)] +pub struct i32x4(core::arch::aarch64::int32x4_t); + +impl From for core::arch::aarch64::int32x4_t { + #[inline] + fn from(value: i32x4) -> core::arch::aarch64::int32x4_t { + value.0 + } +} + +impl From for i32x4 { + #[inline] + fn from(value: core::arch::aarch64::int32x4_t) -> i32x4 { + i32x4(value) + } +} + +impl i32x4 { + #[inline] + pub fn new(x0: i32, x1: i32, x2: i32, x3: i32) -> Self { + unsafe { core::mem::transmute::<[i32; 4], Self>([x0, x1, x2, x3]) } + } +} + +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Debug)] +#[repr(transparent)] +pub struct u64x4((u64x2, u64x2)); + +impl u64x4 { + #[inline] + pub fn new(x0: u64, x1: u64, x2: u64, x3: u64) -> Self { + Self((u64x2::new(x0, x1), u64x2::new(x2, x3))) + } + + #[inline] + pub fn splat(x: u64) -> Self { + Self::new(x, x, x, x) + } + + #[inline] + pub fn extract(self) -> u64 { + match N { + 0 => self.0.0.extract::<0>(), + 1 => self.0.0.extract::<1>(), + 2 => self.0.1.extract::<0>(), + 3 => self.0.1.extract::<1>(), + _ => unreachable!() + } + } + + #[inline] + pub fn shl(self) -> Self { + Self((self.0.0.shl::(), self.0.1.shl::())) + } +} + +impl Add for u64x4 { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self { + Self((self.0.0 + rhs.0.0, self.0.1 + rhs.0.1)) + } + +} diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs b/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs index 1376c4eab..f439ee789 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs @@ -10,8 +10,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - "avx2", - conditional("avx512ifma,avx512vl", nightly) + conditional("avx2", target_arch="x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), + conditional("neon", all(nightly, target_arch="aarch64")) )] pub mod spec { @@ -26,6 +27,9 @@ pub mod spec { #[for_target_feature("avx512ifma")] use crate::backend::vector::ifma::{CachedPoint, ExtendedPoint}; + #[for_target_feature("neon")] + use crate::backend::vector::neon::{CachedPoint, ExtendedPoint}; + use crate::edwards::EdwardsPoint; use crate::scalar::Scalar; use crate::traits::{Identity, VartimeMultiscalarMul}; diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs b/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs index 515b4040c..5b3bf6726 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs @@ -12,8 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - "avx2", - conditional("avx512ifma,avx512vl", nightly) + conditional("avx2", target_arch="x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), + conditional("neon", all(nightly, target_arch="aarch64")) )] pub mod spec { @@ -28,6 +29,9 @@ pub mod spec { #[for_target_feature("avx512ifma")] use crate::backend::vector::ifma::{CachedPoint, ExtendedPoint}; + #[for_target_feature("neon")] + use crate::backend::vector::neon::{CachedPoint, ExtendedPoint}; + use crate::edwards::EdwardsPoint; use crate::scalar::Scalar; use crate::traits::Identity; diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs b/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs index 413e6fd9a..9dafd6ba5 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs @@ -12,8 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - "avx2", - conditional("avx512ifma,avx512vl", nightly) + conditional("avx2", target_arch="x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), + conditional("neon", all(nightly, target_arch="aarch64")) )] pub mod spec { @@ -31,6 +32,9 @@ pub mod spec { #[for_target_feature("avx512ifma")] use crate::backend::vector::ifma::{CachedPoint, ExtendedPoint}; + #[for_target_feature("neon")] + use crate::backend::vector::neon::{CachedPoint, ExtendedPoint}; + use crate::edwards::EdwardsPoint; use crate::scalar::Scalar; use crate::traits::{Identity, MultiscalarMul, VartimeMultiscalarMul}; diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs b/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs index 9f924f286..d716e1994 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs @@ -1,8 +1,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - "avx2", - conditional("avx512ifma,avx512vl", nightly) + conditional("avx2", target_arch="x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), + conditional("neon", all(nightly, target_arch="aarch64")) )] pub mod spec { @@ -12,6 +13,9 @@ pub mod spec { #[for_target_feature("avx512ifma")] use crate::backend::vector::ifma::{CachedPoint, ExtendedPoint}; + #[for_target_feature("neon")] + use crate::backend::vector::neon::{CachedPoint, ExtendedPoint}; + use crate::edwards::EdwardsPoint; use crate::scalar::Scalar; use crate::traits::Identity; diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs b/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs index ea2af8ad4..bc6eeef50 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs @@ -12,8 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - "avx2", - conditional("avx512ifma,avx512vl", nightly) + conditional("avx2", target_arch="x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), + conditional("neon", all(nightly, target_arch="aarch64")) )] pub mod spec { @@ -25,6 +26,9 @@ pub mod spec { #[for_target_feature("avx512ifma")] use crate::backend::vector::ifma::{CachedPoint, ExtendedPoint}; + #[for_target_feature("neon")] + use crate::backend::vector::neon::{CachedPoint, ExtendedPoint}; + #[cfg(feature = "precomputed-tables")] #[for_target_feature("avx2")] use crate::backend::vector::avx2::constants::BASEPOINT_ODD_LOOKUP_TABLE; @@ -33,6 +37,10 @@ pub mod spec { #[for_target_feature("avx512ifma")] use crate::backend::vector::ifma::constants::BASEPOINT_ODD_LOOKUP_TABLE; + #[cfg(feature = "precomputed-tables")] + #[for_target_feature("neon")] + use crate::backend::vector::neon::constants::BASEPOINT_ODD_LOOKUP_TABLE; + use crate::edwards::EdwardsPoint; use crate::scalar::Scalar; use crate::traits::Identity; diff --git a/curve25519-dalek/src/lib.rs b/curve25519-dalek/src/lib.rs index d8666453c..7fc772d23 100644 --- a/curve25519-dalek/src/lib.rs +++ b/curve25519-dalek/src/lib.rs @@ -22,6 +22,9 @@ all(curve25519_dalek_backend = "simd", nightly), feature(avx512_target_feature) )] +#![cfg_attr(all(nightly, target_arch="aarch64"), feature(core_intrinsics))] +#![cfg_attr(all(nightly, target_arch="aarch64"), feature(adt_const_params))] +#![cfg_attr(all(nightly, target_arch="aarch64"), feature(portable_simd))] #![cfg_attr(docsrs, feature(doc_auto_cfg, doc_cfg, doc_cfg_hide))] #![cfg_attr(docsrs, doc(cfg_hide(docsrs)))] //------------------------------------------------------------------------ From d2b7b310028bcef461d553d0792b5913303787c9 Mon Sep 17 00:00:00 2001 From: Tarinn Date: Fri, 22 Mar 2024 11:27:59 +0100 Subject: [PATCH 07/14] fixed minor mistake --- .../src/backend/vector/neon/field.rs | 38 +++++++++---------- curve25519-dalek/src/lib.rs | 2 - 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 5feabb734..3e0ae05d2 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -21,26 +21,22 @@ //! arm instructions. use core::ops::{Add, Mul, Neg}; -use super::packed_simd::{u32x2, u32x4, i32x4, u64x2, u64x4}; +use super::packed_simd::{u32x2, u32x4, i32x4, u64x2, u64x4}; use crate::backend::serial::u64::field::FieldElement51; use crate::backend::vector::neon::constants::{ P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO, }; -fn shuffle_u32x4(x: u32x4, y: u32x4) -> u32x4 { - unsafe { - core::mem::transmute::<[u32; 4], u32x4>( - *core::intrinsics::simd::simd_shuffle::, [u32; 4], core::simd::Simd>( - core::simd::Simd::from_array(core::mem::transmute::(x)), - core::simd::Simd::from_array(core::mem::transmute::(y)), - IDX).as_array()) - } -} - macro_rules! shuffle { - ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => { - shuffle_u32x4::<{[$l0, $l1, $l2, $l3]}>($vec0, $vec1) + ($vec0:expr, $vec1:expr, $index:expr) => { + unsafe { + core::mem::transmute::<[u32; 4], u32x4>( + *core::simd::simd_swizzle!( + core::simd::Simd::from_array(core::mem::transmute::($vec0)), + core::simd::Simd::from_array(core::mem::transmute::($vec1)), + $index).as_array()) + } }; } @@ -483,14 +479,14 @@ impl FieldElement2625x4 { let (x6, x7) = unpack_pair(self.0[3]); let (x8, x9) = unpack_pair(self.0[4]); - let x0_2 = (x0.0.shr::<1>(), x0.1.shr::<1>()); - let x1_2 = (x1.0.shr::<1>(), x1.1.shr::<1>()); - let x2_2 = (x2.0.shr::<1>(), x2.1.shr::<1>()); - let x3_2 = (x3.0.shr::<1>(), x3.1.shr::<1>()); - let x4_2 = (x4.0.shr::<1>(), x4.1.shr::<1>()); - let x5_2 = (x5.0.shr::<1>(), x5.1.shr::<1>()); - let x6_2 = (x6.0.shr::<1>(), x6.1.shr::<1>()); - let x7_2 = (x7.0.shr::<1>(), x7.1.shr::<1>()); + let x0_2 = (x0.0.shl::<1>(), x0.1.shl::<1>()); + let x1_2 = (x1.0.shl::<1>(), x1.1.shl::<1>()); + let x2_2 = (x2.0.shl::<1>(), x2.1.shl::<1>()); + let x3_2 = (x3.0.shl::<1>(), x3.1.shl::<1>()); + let x4_2 = (x4.0.shl::<1>(), x4.1.shl::<1>()); + let x5_2 = (x5.0.shl::<1>(), x5.1.shl::<1>()); + let x6_2 = (x6.0.shl::<1>(), x6.1.shl::<1>()); + let x7_2 = (x7.0.shl::<1>(), x7.1.shl::<1>()); let x5_19 = m_lo(v19, x5); let x6_19 = m_lo(v19, x6); diff --git a/curve25519-dalek/src/lib.rs b/curve25519-dalek/src/lib.rs index 7fc772d23..4ca7ef29b 100644 --- a/curve25519-dalek/src/lib.rs +++ b/curve25519-dalek/src/lib.rs @@ -22,8 +22,6 @@ all(curve25519_dalek_backend = "simd", nightly), feature(avx512_target_feature) )] -#![cfg_attr(all(nightly, target_arch="aarch64"), feature(core_intrinsics))] -#![cfg_attr(all(nightly, target_arch="aarch64"), feature(adt_const_params))] #![cfg_attr(all(nightly, target_arch="aarch64"), feature(portable_simd))] #![cfg_attr(docsrs, feature(doc_auto_cfg, doc_cfg, doc_cfg_hide))] #![cfg_attr(docsrs, doc(cfg_hide(docsrs)))] From 1c070b3c06c2d922e510e8780d63cd98d5cd13eb Mon Sep 17 00:00:00 2001 From: Tarinn Date: Wed, 22 May 2024 13:28:35 +0200 Subject: [PATCH 08/14] Small improvements to neon backend --- .../src/backend/vector/neon/field.rs | 46 ++++++++++++++----- .../src/backend/vector/neon/packed_simd.rs | 2 +- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 3e0ae05d2..2f8d42c2b 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -20,6 +20,7 @@ //! changes where made to account for different structure in //! arm instructions. +use core::arch::aarch64::{self, vuzp1_u32}; use core::ops::{Add, Mul, Neg}; use super::packed_simd::{u32x2, u32x4, i32x4, u64x2, u64x4}; @@ -214,6 +215,15 @@ impl FieldElement2625x4 { ]) } + pub fn shuffleABAB(&self) -> FieldElement2625x4 { + self.shuffle(Shuffle::ABAB) + } + + pub fn shuffleBACD(&self) -> FieldElement2625x4 { + self.shuffle(Shuffle::BACD) + } + + // Can probably be sped up using multiple vset/vget instead of table #[inline] pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 { @@ -457,17 +467,25 @@ impl FieldElement2625x4 { unsafe { let z0: u64x2 = vmull_u32(x.0.into(), y.0.into()).into(); let z1: u64x2 = vmull_u32(x.1.into(), y.1.into()).into(); - u64x4::new(z0.extract::<0>(), z0.extract::<1>(), z1.extract::<0>(), z1.extract::<1>()) + u64x4((z0, z1)) } } #[inline(always)] fn m_lo(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> (u32x2, u32x2) { use core::arch::aarch64::vmull_u32; + use core::arch::aarch64::vuzp1_u32; + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vget_high_u32; unsafe { - let x: (u32x4, u32x4) = (vmull_u32(x.0.into(), y.0.into()).into(), - vmull_u32(x.1.into(), y.1.into()).into()); - (u32x2::new(x.0.extract::<0>(), x.0.extract::<2>()), u32x2::new(x.1.extract::<0>(), x.1.extract::<2>())) + let x: (u32x4, u32x4) = ( + vmull_u32(x.0.into(), y.0.into()).into(), + vmull_u32(x.1.into(), y.1.into()).into(), + ); + ( + vuzp1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())).into(), + vuzp1_u32(vget_low_u32(x.1.into()), vget_high_u32(x.1.into())).into() + ) } } @@ -516,9 +534,8 @@ impl FieldElement2625x4 { use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vcombine_u32; - let x = (u64x2::new(x_01.extract::<0>(), x_01.extract::<1>()), u64x2::new(x_01.extract::<2>(), x_01.extract::<3>())); - let p = (u64x2::new(p_01.extract::<0>(), p_01.extract::<1>()), u64x2::new(p_01.extract::<2>(), p_01.extract::<3>())); - + let x = x_01.0; + let p = p_01.0; (x.0.into(), vcombine_u32(vget_low_u32(x.1.into()), vget_high_u32((p.1 - x.1).into())).into()) @@ -615,21 +632,24 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { unsafe { let z0: u64x2 = vmull_u32(x.0.into(), y.0.into()).into(); let z1: u64x2 = vmull_u32(x.1.into(), y.1.into()).into(); - u64x4::new(z0.extract::<0>(), z0.extract::<1>(), z1.extract::<0>(), z1.extract::<1>()) + u64x4((z0, z1)) } } #[inline(always)] fn m_lo(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> (u32x2, u32x2) { use core::arch::aarch64::vmull_u32; + use core::arch::aarch64::vuzp1_u32; + use core::arch::aarch64::vget_low_u32; + use core::arch::aarch64::vget_high_u32; unsafe { let x: (u32x4, u32x4) = ( vmull_u32(x.0.into(), y.0.into()).into(), vmull_u32(x.1.into(), y.1.into()).into(), ); ( - u32x2::new(x.0.extract::<0>(), x.0.extract::<2>()), - u32x2::new(x.1.extract::<0>(), x.1.extract::<2>()), + vuzp1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())).into(), + vuzp1_u32(vget_low_u32(x.1.into()), vget_high_u32(x.1.into())).into() ) } } @@ -677,8 +697,8 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { let f = |x: u64x4| -> (u64x2, u64x2) { ( - (u64x2::new(x.extract::<0>(), x.extract::<1>())).into(), - (u64x2::new(x.extract::<2>(), x.extract::<3>())).into(), + x.0.0, + x.0.1 ) }; @@ -816,3 +836,5 @@ mod test { assert_eq!(x3, splits[3]); } } + + diff --git a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs index f33b812c8..bb9842591 100644 --- a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs +++ b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs @@ -279,7 +279,7 @@ impl i32x4 { #[allow(non_camel_case_types)] #[derive(Copy, Clone, Debug)] #[repr(transparent)] -pub struct u64x4((u64x2, u64x2)); +pub struct u64x4(pub (u64x2, u64x2)); impl u64x4 { #[inline] From f61eb5a1173949b6431e1793c558cb7ead66b7e7 Mon Sep 17 00:00:00 2001 From: Tarinn Date: Mon, 3 Jun 2024 16:24:22 +0200 Subject: [PATCH 09/14] repack_pair optimisation --- .../src/backend/vector/neon/field.rs | 75 ++++++++++++++----- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 2f8d42c2b..4ac212c0b 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -29,14 +29,56 @@ use crate::backend::vector::neon::constants::{ P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO, }; +#[cfg(all(target_arch = "aarch64"))] +#[inline(always)] +fn vget_high_u32(v: core::arch::aarch64::uint32x4_t) -> core::arch::aarch64::uint32x2_t { + use core::arch::asm; + let o; + unsafe { + asm! ( + "DUP {o:d}, {v}.D[1]", + v = in(vreg) v, + o = out(vreg) o, + ) + } + o +} + +#[cfg(all(target_arch = "aarch64"))] +#[inline(always)] +fn vget_low_u32(v: core::arch::aarch64::uint32x4_t) -> core::arch::aarch64::uint32x2_t { + use core::arch::asm; + let o; + unsafe { + asm! ( + "DUP {o:d}, {v}.D[0]", + v = in(vreg) v, + o = out(vreg) o, + ) + } + o +} +#[cfg(not(target_arch = "aarch64"))] +use core::arch::aarch64::vget_high_u32; +#[cfg(not(target_arch = "aarch64"))] +use core::arch::aarch64::vget_low_u32; + macro_rules! shuffle { ($vec0:expr, $vec1:expr, $index:expr) => { unsafe { + let v_n: [u32;8] = [ + $vec0.extract::<0>(), + $vec0.extract::<1>(), + $vec0.extract::<2>(), + $vec0.extract::<3>(), + $vec1.extract::<0>(), + $vec1.extract::<1>(), + $vec1.extract::<2>(), + $vec1.extract::<3>() + ]; core::mem::transmute::<[u32; 4], u32x4>( - *core::simd::simd_swizzle!( - core::simd::Simd::from_array(core::mem::transmute::($vec0)), - core::simd::Simd::from_array(core::mem::transmute::($vec1)), - $index).as_array()) + [v_n[$index[0]], v_n[$index[1]], v_n[$index[2]], v_n[$index[3]]] + ) } }; } @@ -53,7 +95,6 @@ fn unpack_pair(src: (u32x4, u32x4)) -> ((u32x2, u32x2), (u32x2, u32x2)) { let b0: u32x2; let b1: u32x2; unsafe { - use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vget_low_u32; a0 = vget_low_u32(src.0.into()).into(); a1 = vget_low_u32(src.1.into()).into(); @@ -73,15 +114,16 @@ fn repack_pair(x: (u32x4, u32x4), y: (u32x4, u32x4)) -> (u32x4, u32x4) { unsafe { use core::arch::aarch64::vcombine_u32; use core::arch::aarch64::vget_low_u32; - use core::arch::aarch64::vgetq_lane_u32; - use core::arch::aarch64::vset_lane_u32; - - (vcombine_u32( - vset_lane_u32(vgetq_lane_u32(x.0.into(), 2) , vget_low_u32(x.0.into()), 1), - vset_lane_u32(vgetq_lane_u32(y.0.into(), 2) , vget_low_u32(y.0.into()), 1)).into(), - vcombine_u32( - vset_lane_u32(vgetq_lane_u32(x.1.into(), 2) , vget_low_u32(x.1.into()), 1), - vset_lane_u32(vgetq_lane_u32(y.1.into(), 2) , vget_low_u32(y.1.into()), 1)).into()) + use core::arch::aarch64::vtrn1_u32; + + ( + vcombine_u32( + vtrn1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())), + vtrn1_u32(vget_low_u32(y.0.into()), vget_high_u32(y.0.into()))).into(), + vcombine_u32( + vtrn1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())), + vtrn1_u32(vget_low_u32(y.0.into()), vget_high_u32(y.0.into()))).into() + ) } } @@ -318,7 +360,6 @@ impl FieldElement2625x4 { let rotated_carryout = |v: (u32x4, u32x4)| -> (u32x4, u32x4) { unsafe { use core::arch::aarch64::vcombine_u32; - use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vqshlq_u32; @@ -344,7 +385,6 @@ impl FieldElement2625x4 { let combine = |v_lo: (u32x4, u32x4), v_hi: (u32x4, u32x4)| -> (u32x4, u32x4) { unsafe { use core::arch::aarch64::vcombine_u32; - use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vget_low_u32; ( vcombine_u32( @@ -476,7 +516,6 @@ impl FieldElement2625x4 { use core::arch::aarch64::vmull_u32; use core::arch::aarch64::vuzp1_u32; use core::arch::aarch64::vget_low_u32; - use core::arch::aarch64::vget_high_u32; unsafe { let x: (u32x4, u32x4) = ( vmull_u32(x.0.into(), y.0.into()).into(), @@ -531,7 +570,6 @@ impl FieldElement2625x4 { let negate_D = |x_01: u64x4, p_01: u64x4| -> (u64x2, u64x2) { unsafe { use core::arch::aarch64::vget_low_u32; - use core::arch::aarch64::vget_high_u32; use core::arch::aarch64::vcombine_u32; let x = x_01.0; @@ -641,7 +679,6 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { use core::arch::aarch64::vmull_u32; use core::arch::aarch64::vuzp1_u32; use core::arch::aarch64::vget_low_u32; - use core::arch::aarch64::vget_high_u32; unsafe { let x: (u32x4, u32x4) = ( vmull_u32(x.0.into(), y.0.into()).into(), From 6cffc064af1092924e716269ef18029f6596cab9 Mon Sep 17 00:00:00 2001 From: Tarinn Date: Mon, 3 Jun 2024 17:32:41 +0200 Subject: [PATCH 10/14] fixed small bug in repack_pair --- curve25519-dalek/src/backend/vector/neon/field.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 4ac212c0b..4ace4d84c 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -121,8 +121,8 @@ fn repack_pair(x: (u32x4, u32x4), y: (u32x4, u32x4)) -> (u32x4, u32x4) { vtrn1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())), vtrn1_u32(vget_low_u32(y.0.into()), vget_high_u32(y.0.into()))).into(), vcombine_u32( - vtrn1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())), - vtrn1_u32(vget_low_u32(y.0.into()), vget_high_u32(y.0.into()))).into() + vtrn1_u32(vget_low_u32(x.1.into()), vget_high_u32(x.1.into())), + vtrn1_u32(vget_low_u32(y.1.into()), vget_high_u32(y.1.into()))).into() ) } } From b7d2f53fbc776bb362a96cd4f979b21ec2c8260e Mon Sep 17 00:00:00 2001 From: Tarinn Date: Thu, 6 Jun 2024 15:28:22 +0200 Subject: [PATCH 11/14] changed to use internal arm types instead of tuples --- .../src/backend/vector/neon/constants.rs | 670 +++++++++--------- .../src/backend/vector/neon/field.rs | 397 ++++++----- .../src/backend/vector/neon/packed_simd.rs | 175 ++++- 3 files changed, 702 insertions(+), 540 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/constants.rs b/curve25519-dalek/src/backend/vector/neon/constants.rs index 9870e3899..befcf1f05 100644 --- a/curve25519-dalek/src/backend/vector/neon/constants.rs +++ b/curve25519-dalek/src/backend/vector/neon/constants.rs @@ -12,7 +12,7 @@ //! This module contains constants used by the NEON backend. -use super::packed_simd::u32x4; +use super::packed_simd::{u32x4, u32x4x2}; use crate::backend::vector::neon::edwards::{CachedPoint, ExtendedPoint}; use crate::backend::vector::neon::field::FieldElement2625x4; @@ -20,32 +20,32 @@ use crate::window::NafLookupTable8; /// The identity element as an `ExtendedPoint`. pub(crate) static EXTENDEDPOINT_IDENTITY: ExtendedPoint = ExtendedPoint(FieldElement2625x4([ - (u32x4::const_new(0, 1, 0, 0), u32x4::const_new(1, 0, 0, 0)), - (u32x4::const_splat(0), u32x4::const_splat(0)), - (u32x4::const_splat(0), u32x4::const_splat(0)), - (u32x4::const_splat(0), u32x4::const_splat(0)), - (u32x4::const_splat(0), u32x4::const_splat(0)), + u32x4x2::new(u32x4::const_new(0, 1, 0, 0), u32x4::const_new(1, 0, 0, 0)), + u32x4x2::new(u32x4::const_splat(0), u32x4::const_splat(0)), + u32x4x2::new(u32x4::const_splat(0), u32x4::const_splat(0)), + u32x4x2::new(u32x4::const_splat(0), u32x4::const_splat(0)), + u32x4x2::new(u32x4::const_splat(0), u32x4::const_splat(0)), ])); /// The identity element as a `CachedPoint`. pub(crate) static CACHEDPOINT_IDENTITY: CachedPoint = CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(121647, 121666, 0, 0), u32x4::const_new(243332, 67108845, 0, 33554431), ), - ( + u32x4x2::new( u32x4::const_new(67108864, 0, 33554431, 0), u32x4::const_new(0, 67108863, 0, 33554431), ), - ( + u32x4x2::new( u32x4::const_new(67108863, 0, 33554431, 0), u32x4::const_new(0, 67108863, 0, 33554431), ), - ( + u32x4x2::new( u32x4::const_new(67108863, 0, 33554431, 0), u32x4::const_new(0, 67108863, 0, 33554431), ), - ( + u32x4x2::new( u32x4::const_new(67108863, 0, 33554431, 0), u32x4::const_new(0, 67108863, 0, 33554431), ), @@ -55,7 +55,7 @@ pub(crate) static CACHEDPOINT_IDENTITY: CachedPoint = CachedPoint(FieldElement26 /// ```ascii,no_run /// (2p, 2p, 2p, 2p) = [P_TIMES_2_LO, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI] /// ``` -pub(crate) static P_TIMES_2_LO: (u32x4, u32x4) = ( +pub(crate) static P_TIMES_2_LO: u32x4x2 = u32x4x2::new( u32x4::const_new(67108845 << 1, 67108845 << 1, 33554431 << 1, 33554431 << 1), u32x4::const_new(67108845 << 1, 67108845 << 1, 33554431 << 1, 33554431 << 1), ); @@ -64,7 +64,7 @@ pub(crate) static P_TIMES_2_LO: (u32x4, u32x4) = ( /// ```ascii,no_run /// (2p, 2p, 2p, 2p) = [P_TIMES_2_LO, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI, P_TIMES_2_HI] /// ``` -pub(crate) static P_TIMES_2_HI: (u32x4, u32x4) = ( +pub(crate) static P_TIMES_2_HI: u32x4x2 = u32x4x2::new( u32x4::const_new(67108863 << 1, 67108863 << 1, 33554431 << 1, 33554431 << 1), u32x4::const_new(67108863 << 1, 67108863 << 1, 33554431 << 1, 33554431 << 1), ); @@ -73,7 +73,7 @@ pub(crate) static P_TIMES_2_HI: (u32x4, u32x4) = ( /// ```ascii,no_run /// (16p, 16p, 16p, 16p) = [P_TIMES_16_LO, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI] /// ``` -pub(crate) static P_TIMES_16_LO: (u32x4, u32x4) = ( +pub(crate) static P_TIMES_16_LO: u32x4x2 = u32x4x2::new( u32x4::const_new(67108845 << 4, 67108845 << 4, 33554431 << 4, 33554431 << 4), u32x4::const_new(67108845 << 4, 67108845 << 4, 33554431 << 4, 33554431 << 4), ); @@ -82,7 +82,7 @@ pub(crate) static P_TIMES_16_LO: (u32x4, u32x4) = ( /// ```ascii,no_run /// (16p, 16p, 16p, 16p) = [P_TIMES_16_LO, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI, P_TIMES_16_HI] /// ``` -pub(crate) static P_TIMES_16_HI: (u32x4, u32x4) = ( +pub(crate) static P_TIMES_16_HI: u32x4x2 = u32x4x2::new( u32x4::const_new(67108863 << 4, 67108863 << 4, 33554431 << 4, 33554431 << 4), u32x4::const_new(67108863 << 4, 67108863 << 4, 33554431 << 4, 33554431 << 4), ); @@ -90,1409 +90,1409 @@ pub(crate) static P_TIMES_16_HI: (u32x4, u32x4) = ( /// Odd multiples of the Ed25519 basepoint: pub(crate) static BASEPOINT_ODD_LOOKUP_TABLE: NafLookupTable8 = NafLookupTable8([ CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(3571425, 10045002, 19036563, 1096096), u32x4::const_new(243332, 65897020, 0, 28963681), ), - ( + u32x4x2::new( u32x4::const_new(30896895, 63055514, 1614915, 5095970), u32x4::const_new(0, 53791688, 0, 31258312), ), - ( + u32x4x2::new( u32x4::const_new(13347627, 40339464, 2236269, 11185503), u32x4::const_new(0, 22520087, 0, 8659512), ), - ( + u32x4x2::new( u32x4::const_new(11125413, 29139905, 32037254, 28360723), u32x4::const_new(0, 64556417, 0, 9635759), ), - ( + u32x4x2::new( u32x4::const_new(33268144, 47262491, 4336918, 15795740), u32x4::const_new(0, 22027545, 0, 4846528), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(47099681, 31447946, 29365447, 24740513), u32x4::const_new(42991046, 18317844, 16051644, 21404226), ), - ( + u32x4x2::new( u32x4::const_new(31708133, 28909527, 2366091, 13703791), u32x4::const_new(469246, 54159622, 2601402, 32988002), ), - ( + u32x4x2::new( u32x4::const_new(63432457, 30251794, 15163516, 18491340), u32x4::const_new(28144087, 35605455, 13682295, 18474872), ), - ( + u32x4x2::new( u32x4::const_new(12221607, 4967598, 26061980, 26008006), u32x4::const_new(20226147, 9726961, 17410, 18051083), ), - ( + u32x4x2::new( u32x4::const_new(60569645, 62487085, 11911242, 21920922), u32x4::const_new(4092105, 38186967, 22431483, 31366585), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(18147205, 62587998, 2554617, 536692), u32x4::const_new(11924528, 26674131, 17645433, 24341419), ), - ( + u32x4x2::new( u32x4::const_new(11573357, 27579485, 31491870, 29000885), u32x4::const_new(10800976, 51902791, 28076395, 20464029), ), - ( + u32x4x2::new( u32x4::const_new(56031649, 10856669, 11791193, 26769430), u32x4::const_new(25306956, 5922200, 6630685, 9385098), ), - ( + u32x4x2::new( u32x4::const_new(31319348, 23906711, 16290213, 32142166), u32x4::const_new(61106354, 17181823, 3548308, 12022566), ), - ( + u32x4x2::new( u32x4::const_new(5904298, 50218605, 11826440, 5492249), u32x4::const_new(10379071, 3472255, 172742, 31948344), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(10625852, 15193821, 22918394, 23676410), u32x4::const_new(53695416, 54987793, 10067515, 11747680), ), - ( + u32x4x2::new( u32x4::const_new(65013325, 1309652, 29616320, 28922974), u32x4::const_new(60360891, 19621771, 9938982, 30406429), ), - ( + u32x4x2::new( u32x4::const_new(54967954, 65931918, 5595602, 25719523), u32x4::const_new(64909864, 30566415, 15945272, 8495317), ), - ( + u32x4x2::new( u32x4::const_new(1167157, 55265018, 11507029, 31641054), u32x4::const_new(43497904, 2367338, 12937761, 27517066), ), - ( + u32x4x2::new( u32x4::const_new(656704, 2544994, 13006713, 480979), u32x4::const_new(38471594, 62541240, 25353597, 11531760), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(22176662, 3984313, 27495285, 4110608), u32x4::const_new(2909584, 30594106, 15677919, 2549183), ), - ( + u32x4x2::new( u32x4::const_new(33979105, 62269905, 2071511, 6894756), u32x4::const_new(53189950, 47232857, 6408191, 6123225), ), - ( + u32x4x2::new( u32x4::const_new(32553873, 63948030, 12612401, 3633166), u32x4::const_new(24054373, 37626618, 14481327, 8520484), ), - ( + u32x4x2::new( u32x4::const_new(56552486, 10749438, 12034813, 28811946), u32x4::const_new(1445640, 36755601, 12104575, 10257833), ), - ( + u32x4x2::new( u32x4::const_new(22795808, 48761311, 1136056, 9380768), u32x4::const_new(1411523, 5341811, 27318329, 9686767), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(21157200, 39156966, 20473176, 4934657), u32x4::const_new(61478183, 45121537, 5429856, 13035023), ), - ( + u32x4x2::new( u32x4::const_new(7954529, 58789246, 31440083, 7054221), u32x4::const_new(38438565, 36856107, 1364112, 14548122), ), - ( + u32x4x2::new( u32x4::const_new(26120083, 36321360, 4919997, 31687496), u32x4::const_new(33757765, 36237559, 15243054, 32163861), ), - ( + u32x4x2::new( u32x4::const_new(25878307, 46544824, 19455951, 2414935), u32x4::const_new(16844726, 56521560, 32680554, 26660660), ), - ( + u32x4x2::new( u32x4::const_new(48360220, 43407178, 12187042, 24925816), u32x4::const_new(7423722, 25746484, 12814654, 17395963), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(63153652, 32195955, 4087908, 8431689), u32x4::const_new(30392384, 47203165, 8986649, 9053039), ), - ( + u32x4x2::new( u32x4::const_new(63659241, 47988767, 2931872, 19953600), u32x4::const_new(11747107, 51610101, 20952181, 13364887), ), - ( + u32x4x2::new( u32x4::const_new(3659197, 58790649, 5930099, 2605312), u32x4::const_new(28477896, 580728, 20579735, 2610622), ), - ( + u32x4x2::new( u32x4::const_new(41781607, 17161358, 10690531, 24368015), u32x4::const_new(47027031, 36742339, 5414694, 13156365), ), - ( + u32x4x2::new( u32x4::const_new(13237853, 51182423, 8954802, 29006542), u32x4::const_new(22643989, 56896541, 22830593, 10289708), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(1401265, 58846825, 30911620, 32239180), u32x4::const_new(15391552, 15200821, 6339309, 16403588), ), - ( + u32x4x2::new( u32x4::const_new(55913797, 29541724, 1664461, 21709410), u32x4::const_new(38470488, 47097092, 17674945, 32666066), ), - ( + u32x4x2::new( u32x4::const_new(22844482, 10797709, 27548106, 31638735), u32x4::const_new(34500968, 26611503, 19727211, 13160873), ), - ( + u32x4x2::new( u32x4::const_new(31485204, 14496164, 13981208, 10276888), u32x4::const_new(5748808, 35024436, 2740987, 7479021), ), - ( + u32x4x2::new( u32x4::const_new(58541207, 14866135, 32344041, 545930), u32x4::const_new(62661488, 6941250, 27940205, 11976112), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(39849808, 44781685, 15697329, 24387845), u32x4::const_new(12501486, 50260092, 23199481, 31929024), ), - ( + u32x4x2::new( u32x4::const_new(24823070, 27956017, 27034296, 10316465), u32x4::const_new(47664045, 11152446, 15719183, 30181617), ), - ( + u32x4x2::new( u32x4::const_new(20771189, 19969144, 31433937, 19185213), u32x4::const_new(27565920, 10384445, 2893359, 9255362), ), - ( + u32x4x2::new( u32x4::const_new(42894974, 11925545, 32134441, 32738810), u32x4::const_new(55916336, 32479272, 19563550, 5511385), ), - ( + u32x4x2::new( u32x4::const_new(17857161, 47809169, 14564114, 27997751), u32x4::const_new(33024640, 38669671, 31956536, 27313245), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(58237774, 15917425, 18872208, 19394230), u32x4::const_new(17374297, 6101419, 4839741, 6596900), ), - ( + u32x4x2::new( u32x4::const_new(66947393, 15744215, 18368993, 17750160), u32x4::const_new(41006525, 9205497, 2629667, 32170865), ), - ( + u32x4x2::new( u32x4::const_new(66481381, 1919414, 28338762, 7372967), u32x4::const_new(33819153, 4156199, 27126309, 12739816), ), - ( + u32x4x2::new( u32x4::const_new(44117158, 58545296, 22521371, 11809712), u32x4::const_new(28998792, 50731010, 30215699, 25748377), ), - ( + u32x4x2::new( u32x4::const_new(23561284, 4160244, 9035405, 24895184), u32x4::const_new(39761639, 59253416, 8684759, 22487864), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(12671134, 56419053, 16092401, 30038207), u32x4::const_new(4002647, 47822606, 7151311, 28430768), ), - ( + u32x4x2::new( u32x4::const_new(61041684, 35765374, 30598048, 19666539), u32x4::const_new(44150175, 40140037, 290469, 28442674), ), - ( + u32x4x2::new( u32x4::const_new(18847796, 1371617, 33316881, 13199936), u32x4::const_new(43646578, 17068881, 12074900, 1537415), ), - ( + u32x4x2::new( u32x4::const_new(10052225, 38316070, 27469797, 5297537), u32x4::const_new(50725570, 20435349, 10339121, 2779737), ), - ( + u32x4x2::new( u32x4::const_new(18372189, 15466385, 24762130, 22217964), u32x4::const_new(23503887, 47844464, 10415034, 2606889), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(55082775, 45300503, 16032654, 5964396), u32x4::const_new(17743504, 24634761, 19493066, 5184611), ), - ( + u32x4x2::new( u32x4::const_new(50172633, 35093294, 10040575, 23616256), u32x4::const_new(4543900, 61852191, 4049821, 7423669), ), - ( + u32x4x2::new( u32x4::const_new(20295398, 40009376, 10487190, 15670429), u32x4::const_new(51972856, 58649552, 20436392, 3432497), ), - ( + u32x4x2::new( u32x4::const_new(35189420, 54117751, 12825868, 6283038), u32x4::const_new(27540739, 30648758, 22658912, 9466689), ), - ( + u32x4x2::new( u32x4::const_new(51737549, 40725785, 17409814, 25201086), u32x4::const_new(21156239, 34176168, 26814520, 5956424), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(8211442, 8014184, 6260823, 22108096), u32x4::const_new(32182620, 51844847, 2466270, 28582231), ), - ( + u32x4x2::new( u32x4::const_new(27199739, 3848333, 31738017, 10892045), u32x4::const_new(4963982, 65391770, 32551997, 28906469), ), - ( + u32x4x2::new( u32x4::const_new(16606846, 32207068, 26404535, 7614129), u32x4::const_new(45416902, 65584718, 13821785, 2646060), ), - ( + u32x4x2::new( u32x4::const_new(36090634, 57981287, 32247670, 22837502), u32x4::const_new(31003861, 55448117, 6062915, 20369975), ), - ( + u32x4x2::new( u32x4::const_new(27381403, 50578107, 522631, 29521058), u32x4::const_new(31137497, 40220737, 27628049, 1824195), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(59402443, 17056879, 29262689, 6131785), u32x4::const_new(52551472, 43367471, 29423199, 18899208), ), - ( + u32x4x2::new( u32x4::const_new(5749414, 43514612, 11365899, 21514624), u32x4::const_new(65591890, 60945892, 19841732, 5628567), ), - ( + u32x4x2::new( u32x4::const_new(19334369, 52500268, 12307673, 5267367), u32x4::const_new(3212103, 9035822, 29142161, 30520954), ), - ( + u32x4x2::new( u32x4::const_new(57261330, 6819646, 22089161, 9800373), u32x4::const_new(55155453, 62250856, 13766735, 25244545), ), - ( + u32x4x2::new( u32x4::const_new(54370226, 61888301, 24496089, 2540581), u32x4::const_new(65637506, 60274355, 18154273, 11687259), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(12521903, 26014045, 13995625, 33360175), u32x4::const_new(23605474, 7376434, 27229267, 17195036), ), - ( + u32x4x2::new( u32x4::const_new(59482891, 10074423, 574357, 3857753), u32x4::const_new(61377787, 50306685, 5241065, 20234396), ), - ( + u32x4x2::new( u32x4::const_new(23674717, 6997172, 20771841, 16858511), u32x4::const_new(40565304, 29973136, 7049812, 14585010), ), - ( + u32x4x2::new( u32x4::const_new(1427477, 13295732, 31762066, 31499740), u32x4::const_new(60419925, 54666164, 22009424, 8089609), ), - ( + u32x4x2::new( u32x4::const_new(58154031, 41593020, 15342328, 957047), u32x4::const_new(38937260, 37037498, 24871992, 32973409), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(30654745, 51286025, 21206982, 2433562), u32x4::const_new(12780105, 31732574, 33087964, 33081189), ), - ( + u32x4x2::new( u32x4::const_new(66640017, 42720009, 16567620, 15300745), u32x4::const_new(1530367, 33001123, 20930247, 21042661), ), - ( + u32x4x2::new( u32x4::const_new(15003356, 5294119, 22985605, 18928772), u32x4::const_new(32628461, 18230172, 14773298, 27193722), ), - ( + u32x4x2::new( u32x4::const_new(27555, 65346287, 17017174, 7837720), u32x4::const_new(21499787, 42855613, 22474984, 13675085), ), - ( + u32x4x2::new( u32x4::const_new(24164369, 50130116, 5973149, 24152073), u32x4::const_new(1577334, 25400030, 18648484, 32228854), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(49518649, 59119280, 31670678, 20396561), u32x4::const_new(61728330, 651402, 176032, 9529498), ), - ( + u32x4x2::new( u32x4::const_new(61765532, 9082232, 32794568, 15526956), u32x4::const_new(48543100, 32614212, 19001206, 25680229), ), - ( + u32x4x2::new( u32x4::const_new(32086091, 10373081, 8996131, 31822823), u32x4::const_new(35788988, 49973190, 30542040, 17858455), ), - ( + u32x4x2::new( u32x4::const_new(48130197, 58121889, 27753291, 29923268), u32x4::const_new(54448075, 43300790, 9336565, 15770022), ), - ( + u32x4x2::new( u32x4::const_new(57725546, 20557498, 9366233, 16023566), u32x4::const_new(16189031, 2837363, 24315301, 27003505), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(28286608, 10767548, 18220739, 5413236), u32x4::const_new(48253387, 58255702, 11864864, 28527159), ), - ( + u32x4x2::new( u32x4::const_new(45038176, 58655197, 25648758, 10951484), u32x4::const_new(42564382, 34542843, 23146954, 22234334), ), - ( + u32x4x2::new( u32x4::const_new(14858710, 24978793, 15040559, 4379220), u32x4::const_new(47621477, 40271440, 15650420, 1998736), ), - ( + u32x4x2::new( u32x4::const_new(24106391, 9626149, 344505, 25253814), u32x4::const_new(34579800, 59687089, 25718289, 25904133), ), - ( + u32x4x2::new( u32x4::const_new(1981195, 37751302, 26132048, 1764722), u32x4::const_new(13288231, 28808622, 12531301, 18292949), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(13869851, 31448904, 14963539, 7581293), u32x4::const_new(20536485, 35021083, 21257574, 33356609), ), - ( + u32x4x2::new( u32x4::const_new(36903364, 18429241, 11097857, 5943856), u32x4::const_new(60583077, 40015815, 30509523, 31915271), ), - ( + u32x4x2::new( u32x4::const_new(49161801, 40681915, 67892, 25454357), u32x4::const_new(22779677, 25798439, 15964829, 5863227), ), - ( + u32x4x2::new( u32x4::const_new(60810637, 4496471, 5217137, 14095116), u32x4::const_new(50942411, 50712663, 2507380, 26844507), ), - ( + u32x4x2::new( u32x4::const_new(34579752, 53519385, 10859797, 18816024), u32x4::const_new(42552864, 39478521, 6783896, 17277037), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(43287109, 27900723, 33182187, 2766754), u32x4::const_new(17041989, 1018260, 33392790, 4830032), ), - ( + u32x4x2::new( u32x4::const_new(60194178, 30788903, 24728888, 14513195), u32x4::const_new(20897010, 28843233, 20111980, 17475240), ), - ( + u32x4x2::new( u32x4::const_new(46042274, 19257042, 4628173, 31649727), u32x4::const_new(27388316, 66631493, 11541886, 6408028), ), - ( + u32x4x2::new( u32x4::const_new(57024680, 49536568, 32050358, 31321917), u32x4::const_new(17437691, 49672356, 2884755, 20493991), ), - ( + u32x4x2::new( u32x4::const_new(59553007, 46782643, 29001173, 1814088), u32x4::const_new(21930692, 51319706, 14965872, 30748046), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(16441817, 36111849, 6900424, 602234), u32x4::const_new(46522199, 16441484, 8135070, 21726541), ), - ( + u32x4x2::new( u32x4::const_new(37711225, 32701959, 11679112, 13125533), u32x4::const_new(32154135, 9407918, 26554289, 620848), ), - ( + u32x4x2::new( u32x4::const_new(19233407, 30086864, 14679568, 2797374), u32x4::const_new(4892806, 7993077, 247658, 5632804), ), - ( + u32x4x2::new( u32x4::const_new(37427262, 26675495, 27125659, 13496131), u32x4::const_new(50718473, 40115609, 28505351, 27837393), ), - ( + u32x4x2::new( u32x4::const_new(196819, 18410429, 7070012, 21691388), u32x4::const_new(29763371, 24754123, 9727048, 10930179), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(28319289, 40734650, 16225680, 24739184), u32x4::const_new(64272368, 35356897, 7866648, 13635853), ), - ( + u32x4x2::new( u32x4::const_new(34165295, 48328447, 27041670, 23643655), u32x4::const_new(48949950, 52963288, 30411133, 6045174), ), - ( + u32x4x2::new( u32x4::const_new(18583559, 41649834, 9813585, 26098520), u32x4::const_new(25682734, 26733526, 19276490, 10654728), ), - ( + u32x4x2::new( u32x4::const_new(34867476, 52715968, 5694571, 13380978), u32x4::const_new(15134994, 1831255, 8608001, 17266401), ), - ( + u32x4x2::new( u32x4::const_new(59925903, 44282172, 27802465, 1855069), u32x4::const_new(14234749, 36635487, 11302294, 10938429), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(8373273, 49064494, 4932071, 32997499), u32x4::const_new(38472880, 29335908, 14504412, 22460029), ), - ( + u32x4x2::new( u32x4::const_new(31795930, 50785923, 25835990, 25790073), u32x4::const_new(65669841, 11360450, 9969157, 9008164), ), - ( + u32x4x2::new( u32x4::const_new(50262498, 45869261, 16124434, 15336007), u32x4::const_new(882762, 42522623, 11277198, 26296377), ), - ( + u32x4x2::new( u32x4::const_new(42332732, 59129236, 14452816, 567985), u32x4::const_new(208061, 34722729, 32008143, 14828749), ), - ( + u32x4x2::new( u32x4::const_new(17937794, 36846032, 32102665, 4442466), u32x4::const_new(19745435, 31633451, 7146411, 15812027), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(30741269, 38648744, 12562645, 30092623), u32x4::const_new(25073992, 28730659, 27911745, 30000958), ), - ( + u32x4x2::new( u32x4::const_new(2859794, 25991700, 17776078, 27091930), u32x4::const_new(2328322, 60061146, 18581824, 18039008), ), - ( + u32x4x2::new( u32x4::const_new(58206333, 17917354, 1972306, 11853766), u32x4::const_new(2655376, 60543390, 18416710, 13287440), ), - ( + u32x4x2::new( u32x4::const_new(62746330, 61423885, 21246577, 2266675), u32x4::const_new(60099139, 14804707, 14772234, 20679434), ), - ( + u32x4x2::new( u32x4::const_new(26987698, 15488817, 715616, 2339565), u32x4::const_new(51980752, 17333865, 21965103, 10839820), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(18672548, 57660959, 16042910, 19519287), u32x4::const_new(62865851, 17580961, 26628347, 23774759), ), - ( + u32x4x2::new( u32x4::const_new(368070, 3464471, 25888304, 30370559), u32x4::const_new(52396053, 45426828, 28745251, 9246829), ), - ( + u32x4x2::new( u32x4::const_new(29090099, 57950037, 23104657, 4903923), u32x4::const_new(10987778, 56163684, 23621539, 10332760), ), - ( + u32x4x2::new( u32x4::const_new(53338235, 44851161, 21606845, 31069622), u32x4::const_new(4243630, 34464392, 11286454, 5802022), ), - ( + u32x4x2::new( u32x4::const_new(46710757, 63389067, 11642865, 1980986), u32x4::const_new(12967337, 28162061, 3854192, 30432268), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(12179834, 41005450, 12809619, 33525228), u32x4::const_new(4624405, 46957889, 16968743, 11827816), ), - ( + u32x4x2::new( u32x4::const_new(51521162, 12466775, 31791271, 15303651), u32x4::const_new(49798465, 62714504, 6509600, 12918560), ), - ( + u32x4x2::new( u32x4::const_new(20445559, 1756449, 28848701, 7920171), u32x4::const_new(9835040, 5900071, 28757409, 12376688), ), - ( + u32x4x2::new( u32x4::const_new(18259496, 14281012, 21767026, 10232236), u32x4::const_new(20000226, 12400540, 4104902, 23570543), ), - ( + u32x4x2::new( u32x4::const_new(3687440, 26546648, 13328821, 26841081), u32x4::const_new(49822734, 22334054, 244496, 24862543), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(59523541, 62195428, 3853227, 13954801), u32x4::const_new(12387708, 47627615, 27221350, 17899572), ), - ( + u32x4x2::new( u32x4::const_new(63193587, 36343307, 14595132, 6880795), u32x4::const_new(1364792, 37648434, 3259017, 20536046), ), - ( + u32x4x2::new( u32x4::const_new(30362834, 10440372, 9574624, 11729232), u32x4::const_new(63861613, 21748389, 5530846, 2721586), ), - ( + u32x4x2::new( u32x4::const_new(18339760, 1550632, 17170271, 25732971), u32x4::const_new(28459263, 63142237, 21642345, 31557672), ), - ( + u32x4x2::new( u32x4::const_new(10611282, 5204623, 18049257, 214175), u32x4::const_new(19432723, 49809070, 26010406, 27449522), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(19770733, 26478685, 9464541, 29158041), u32x4::const_new(28604307, 45196604, 7586524, 6641859), ), - ( + u32x4x2::new( u32x4::const_new(65654484, 52230498, 30886612, 19112823), u32x4::const_new(47271809, 38942611, 16020035, 10773481), ), - ( + u32x4x2::new( u32x4::const_new(27464323, 54451016, 20646645, 17732915), u32x4::const_new(23008717, 53626684, 3253189, 15614410), ), - ( + u32x4x2::new( u32x4::const_new(52381752, 40693008, 7063024, 28469981), u32x4::const_new(51159478, 44543211, 19941777, 5985451), ), - ( + u32x4x2::new( u32x4::const_new(13553668, 35524849, 14788737, 1883845), u32x4::const_new(12385775, 47958835, 29135466, 1776722), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(36719806, 20827965, 23175373, 32996806), u32x4::const_new(42041892, 65708790, 5467143, 20884008), ), - ( + u32x4x2::new( u32x4::const_new(43256281, 40770646, 17244063, 31959819), u32x4::const_new(64366384, 43544617, 25057754, 12628720), ), - ( + u32x4x2::new( u32x4::const_new(17337782, 58472057, 27906934, 15305274), u32x4::const_new(30292418, 39284317, 16946773, 24806712), ), - ( + u32x4x2::new( u32x4::const_new(6485126, 32447403, 16261486, 13561940), u32x4::const_new(49439635, 10738368, 16419889, 8897231), ), - ( + u32x4x2::new( u32x4::const_new(44812203, 40122262, 25496058, 2759794), u32x4::const_new(25295304, 52178368, 24154195, 29334408), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(42307254, 57217102, 1088936, 3832827), u32x4::const_new(33905401, 23130334, 6958056, 12622851), ), - ( + u32x4x2::new( u32x4::const_new(3881189, 14870059, 19712830, 6071598), u32x4::const_new(38147944, 60776394, 3427938, 13765703), ), - ( + u32x4x2::new( u32x4::const_new(7666911, 24227591, 17077136, 22967588), u32x4::const_new(6874639, 30915523, 11451695, 24292224), ), - ( + u32x4x2::new( u32x4::const_new(13659529, 31984463, 28764736, 20506164), u32x4::const_new(64729627, 49321636, 28284636, 25472371), ), - ( + u32x4x2::new( u32x4::const_new(39360308, 42281399, 9446504, 868960), u32x4::const_new(49227724, 21351115, 30561851, 11292096), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(7071115, 46444090, 5387916, 15432877), u32x4::const_new(27226682, 41506862, 2398278, 3978240), ), - ( + u32x4x2::new( u32x4::const_new(51009614, 54216973, 24368938, 31392616), u32x4::const_new(38456150, 62313644, 6729154, 99724), ), - ( + u32x4x2::new( u32x4::const_new(17474332, 62857913, 2619930, 30659308), u32x4::const_new(18268181, 32809239, 22826292, 24561895), ), - ( + u32x4x2::new( u32x4::const_new(38187020, 67003092, 14118280, 16500577), u32x4::const_new(18808560, 64983716, 25712929, 32518261), ), - ( + u32x4x2::new( u32x4::const_new(25735813, 62284262, 10824872, 20558596), u32x4::const_new(48149681, 31162667, 22608274, 26285185), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(963440, 63742255, 10230323, 25515008), u32x4::const_new(32506414, 6105697, 25980317, 24645129), ), - ( + u32x4x2::new( u32x4::const_new(7162189, 8101249, 14679265, 33443386), u32x4::const_new(2002396, 8541405, 19442276, 4795881), ), - ( + u32x4x2::new( u32x4::const_new(8116694, 51463069, 4415528, 25599140), u32x4::const_new(55805721, 39582709, 6719436, 30033839), ), - ( + u32x4x2::new( u32x4::const_new(14468202, 42181869, 25188826, 9639755), u32x4::const_new(47546189, 62711146, 32762447, 18338064), ), - ( + u32x4x2::new( u32x4::const_new(33880058, 32810909, 8969931, 13095238), u32x4::const_new(38360605, 40138517, 9246134, 4928058), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(63655588, 17883670, 9410246, 26162761), u32x4::const_new(5000571, 7349225, 23785252, 32751089), ), - ( + u32x4x2::new( u32x4::const_new(28568737, 10733123, 9342397, 21570673), u32x4::const_new(54096560, 32467591, 20494687, 21511513), ), - ( + u32x4x2::new( u32x4::const_new(47675157, 47932807, 29250946, 15672208), u32x4::const_new(59760469, 9945465, 14939287, 18437405), ), - ( + u32x4x2::new( u32x4::const_new(37985267, 8609815, 31573002, 3373596), u32x4::const_new(47828883, 20834216, 13248616, 24154292), ), - ( + u32x4x2::new( u32x4::const_new(5543543, 29553242, 3386453, 30501150), u32x4::const_new(25058089, 15236571, 8814395, 32462955), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(39158670, 15322548, 20495103, 3312736), u32x4::const_new(14557171, 12985179, 8044741, 3176899), ), - ( + u32x4x2::new( u32x4::const_new(24673290, 29693310, 21412266, 18324699), u32x4::const_new(2154518, 40329021, 17500543, 3954277), ), - ( + u32x4x2::new( u32x4::const_new(36758685, 38738957, 165513, 14691866), u32x4::const_new(3070475, 10424235, 17096536, 16896898), ), - ( + u32x4x2::new( u32x4::const_new(59790459, 43094586, 8720681, 10423589), u32x4::const_new(1122030, 31545615, 4463786, 31811293), ), - ( + u32x4x2::new( u32x4::const_new(49778992, 60881044, 20509974, 5832494), u32x4::const_new(64155961, 31483358, 4511231, 20307815), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(2863373, 40876242, 26865913, 24067353), u32x4::const_new(15726407, 40919070, 12953902, 9931535), ), - ( + u32x4x2::new( u32x4::const_new(60934877, 42512204, 21649141, 21945190), u32x4::const_new(52211954, 60984193, 7046207, 5363493), ), - ( + u32x4x2::new( u32x4::const_new(4205971, 64068464, 18197273, 7327176), u32x4::const_new(51527794, 21166920, 20669933, 11828242), ), - ( + u32x4x2::new( u32x4::const_new(59782815, 49617225, 15379924, 457923), u32x4::const_new(9320508, 21498914, 3242540, 31563182), ), - ( + u32x4x2::new( u32x4::const_new(27714753, 8664670, 3366162, 26338598), u32x4::const_new(56775518, 25796006, 13129151, 21388876), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(59276548, 49972346, 16795002, 33455915), u32x4::const_new(48430097, 53857205, 18627071, 32474471), ), - ( + u32x4x2::new( u32x4::const_new(42160315, 50705892, 13530540, 28012698), u32x4::const_new(19833221, 55886870, 20191784, 9644313), ), - ( + u32x4x2::new( u32x4::const_new(20372416, 28414713, 24084234, 31804096), u32x4::const_new(33815377, 36131001, 17251241, 18291088), ), - ( + u32x4x2::new( u32x4::const_new(56234667, 14920441, 2033267, 29572003), u32x4::const_new(1724043, 45519699, 17873735, 501988), ), - ( + u32x4x2::new( u32x4::const_new(50031659, 31517850, 15697583, 1016845), u32x4::const_new(43104661, 54769582, 8008601, 27257051), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(52951491, 66542164, 14853573, 30444631), u32x4::const_new(12045973, 24321813, 16545674, 18160646), ), - ( + u32x4x2::new( u32x4::const_new(60107911, 1126003, 5947677, 19486116), u32x4::const_new(41119984, 30860440, 7935395, 13354438), ), - ( + u32x4x2::new( u32x4::const_new(17841328, 11063269, 1664538, 26687568), u32x4::const_new(6268968, 22280371, 17275484, 4523163), ), - ( + u32x4x2::new( u32x4::const_new(15886041, 56799482, 15446552, 21712778), u32x4::const_new(1005290, 17827215, 4978741, 6854882), ), - ( + u32x4x2::new( u32x4::const_new(34319277, 47731002, 20321804, 28544575), u32x4::const_new(29591814, 63376351, 24754545, 26001714), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(66783087, 5234346, 46102, 8566476), u32x4::const_new(19947339, 20180418, 25398238, 3726678), ), - ( + u32x4x2::new( u32x4::const_new(63890180, 46380965, 20674069, 5366544), u32x4::const_new(59661487, 48406612, 31533614, 7071217), ), - ( + u32x4x2::new( u32x4::const_new(13104676, 1406631, 24326736, 19854367), u32x4::const_new(61039528, 11019904, 31967425, 19219275), ), - ( + u32x4x2::new( u32x4::const_new(39003597, 30143957, 15351834, 8639435), u32x4::const_new(57309582, 61436794, 15830475, 10090318), ), - ( + u32x4x2::new( u32x4::const_new(45923044, 6700175, 99413, 21263025), u32x4::const_new(23762647, 53905481, 6063914, 10065424), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(42822326, 57678669, 4052879, 25452667), u32x4::const_new(54049411, 2373092, 22337016, 7701046), ), - ( + u32x4x2::new( u32x4::const_new(44382355, 43307377, 16761537, 30373573), u32x4::const_new(49790216, 23230748, 25655306, 10519391), ), - ( + u32x4x2::new( u32x4::const_new(919475, 59371245, 1273450, 25558666), u32x4::const_new(9724711, 8556709, 25755845, 10887647), ), - ( + u32x4x2::new( u32x4::const_new(25465699, 44651158, 17658392, 11257418), u32x4::const_new(29735193, 22885150, 7094716, 26828565), ), - ( + u32x4x2::new( u32x4::const_new(48237389, 47661599, 27054393, 7328070), u32x4::const_new(27280193, 65616691, 23062005, 4170709), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(26535281, 60238317, 30343788, 25790743), u32x4::const_new(37993933, 24614372, 9523840, 10401918), ), - ( + u32x4x2::new( u32x4::const_new(2783987, 29468958, 4697011, 19804475), u32x4::const_new(37246678, 46797720, 10261254, 18942252), ), - ( + u32x4x2::new( u32x4::const_new(58135580, 60247753, 25301938, 6844561), u32x4::const_new(20949454, 39844754, 4552026, 919057), ), - ( + u32x4x2::new( u32x4::const_new(6694071, 44126261, 32285330, 31370180), u32x4::const_new(24603698, 53328179, 13971149, 5325636), ), - ( + u32x4x2::new( u32x4::const_new(64879487, 582094, 17982081, 19190425), u32x4::const_new(24951286, 26923842, 29077174, 33286062), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(54863941, 67016431, 1224043, 23371240), u32x4::const_new(62940074, 52101083, 13523637, 30366406), ), - ( + u32x4x2::new( u32x4::const_new(36324581, 25407485, 18258623, 4698602), u32x4::const_new(50300544, 2658516, 26300935, 2611030), ), - ( + u32x4x2::new( u32x4::const_new(27183975, 21791014, 18105064, 9875199), u32x4::const_new(58118912, 54198635, 6400311, 14767984), ), - ( + u32x4x2::new( u32x4::const_new(33918318, 42937962, 14809334, 22136592), u32x4::const_new(10636588, 29082337, 29829692, 28549776), ), - ( + u32x4x2::new( u32x4::const_new(61080905, 854212, 12202487, 20004503), u32x4::const_new(9256495, 6903981, 20567109, 347423), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(41391822, 34336880, 22362564, 14247996), u32x4::const_new(12115604, 41583344, 7639288, 28910945), ), - ( + u32x4x2::new( u32x4::const_new(62066617, 59758859, 26665947, 11614812), u32x4::const_new(65737664, 45704543, 30324810, 12868376), ), - ( + u32x4x2::new( u32x4::const_new(17491771, 43589814, 9454919, 26047850), u32x4::const_new(52629282, 39304244, 3868968, 19296062), ), - ( + u32x4x2::new( u32x4::const_new(17826638, 30413590, 32534225, 32741469), u32x4::const_new(15012391, 14365713, 33039233, 14791399), ), - ( + u32x4x2::new( u32x4::const_new(64115596, 59197067, 32739005, 23275744), u32x4::const_new(32954320, 22241406, 20788442, 4942942), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(31956192, 59570132, 2784352, 4237732), u32x4::const_new(47222312, 4860927, 18658867, 15279314), ), - ( + u32x4x2::new( u32x4::const_new(63240583, 28160478, 23524941, 13390861), u32x4::const_new(66437406, 57718120, 33345312, 28896298), ), - ( + u32x4x2::new( u32x4::const_new(39026193, 46239965, 21440243, 25070488), u32x4::const_new(64012383, 60999016, 16517060, 29565907), ), - ( + u32x4x2::new( u32x4::const_new(18118181, 60161496, 4212092, 23976240), u32x4::const_new(36277753, 62363144, 5816868, 16964362), ), - ( + u32x4x2::new( u32x4::const_new(18196138, 62490693, 281468, 7934713), u32x4::const_new(56027312, 62015725, 4837237, 32932252), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(29885826, 51028067, 30418143, 33438769), u32x4::const_new(62542283, 39442528, 31535876, 143299), ), - ( + u32x4x2::new( u32x4::const_new(17143063, 56709783, 14451852, 15782104), u32x4::const_new(32762665, 14047066, 26295037, 5432487), ), - ( + u32x4x2::new( u32x4::const_new(75151, 533606, 7539077, 30926189), u32x4::const_new(38410914, 23771680, 4872443, 29199566), ), - ( + u32x4x2::new( u32x4::const_new(61522396, 48934708, 16223126, 207380), u32x4::const_new(11171993, 47975147, 14164574, 352966), ), - ( + u32x4x2::new( u32x4::const_new(15449006, 56530757, 26796528, 12045834), u32x4::const_new(63738697, 40667227, 33001582, 9101885), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(43331297, 18431341, 25801195, 17267698), u32x4::const_new(19365485, 57295202, 22218985, 21284590), ), - ( + u32x4x2::new( u32x4::const_new(2429849, 19152559, 10762172, 22564684), u32x4::const_new(21880390, 66866426, 20357935, 22641906), ), - ( + u32x4x2::new( u32x4::const_new(19771185, 31652693, 3666117, 28136958), u32x4::const_new(23624283, 55101502, 6313920, 6783662), ), - ( + u32x4x2::new( u32x4::const_new(3487137, 7092443, 11001876, 26196524), u32x4::const_new(47319246, 44542068, 17594073, 15027760), ), - ( + u32x4x2::new( u32x4::const_new(49563607, 32191113, 4991283, 25400512), u32x4::const_new(46539152, 4155103, 32368171, 201203), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(20548943, 14334571, 4073874, 6368588), u32x4::const_new(53208883, 56484515, 15970071, 25561889), ), - ( + u32x4x2::new( u32x4::const_new(49915097, 44030795, 11202344, 29284344), u32x4::const_new(60258023, 66225712, 8075764, 12383512), ), - ( + u32x4x2::new( u32x4::const_new(45248912, 4933668, 9592153, 5819559), u32x4::const_new(31030983, 38174071, 32435814, 7442522), ), - ( + u32x4x2::new( u32x4::const_new(62688129, 48218381, 22089545, 12897361), u32x4::const_new(21050881, 34278889, 7569163, 3225449), ), - ( + u32x4x2::new( u32x4::const_new(19050183, 51089071, 32935757, 22640195), u32x4::const_new(66122318, 47144608, 18743677, 25177079), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(41186817, 46681702, 31819867, 32997133), u32x4::const_new(38559207, 27147015, 30293819, 16762988), ), - ( + u32x4x2::new( u32x4::const_new(24154689, 51762873, 23883879, 13510519), u32x4::const_new(55338250, 61224161, 11663149, 30803960), ), - ( + u32x4x2::new( u32x4::const_new(18104238, 14117824, 11724021, 21362053), u32x4::const_new(65704761, 35530242, 13498058, 33522849), ), - ( + u32x4x2::new( u32x4::const_new(63812888, 23995539, 28920539, 24005193), u32x4::const_new(26412223, 36582218, 4251418, 26160309), ), - ( + u32x4x2::new( u32x4::const_new(16822053, 66064082, 3482145, 31979593), u32x4::const_new(45937188, 54475379, 612917, 7976478), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(46509314, 55327128, 8944536, 274914), u32x4::const_new(26432930, 53829300, 21192572, 3569894), ), - ( + u32x4x2::new( u32x4::const_new(20919764, 64356651, 30642344, 17215170), u32x4::const_new(20335124, 11203745, 18663316, 19024174), ), - ( + u32x4x2::new( u32x4::const_new(59297055, 53842463, 3680204, 9806710), u32x4::const_new(54004169, 51484914, 29807998, 20134199), ), - ( + u32x4x2::new( u32x4::const_new(14781592, 22628010, 26877930, 25880359), u32x4::const_new(30434803, 190607, 30184292, 8991040), ), - ( + u32x4x2::new( u32x4::const_new(64400983, 64591751, 854562, 28216111), u32x4::const_new(20010398, 50414793, 9803872, 22687008), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(15091184, 32550863, 8818643, 4244752), u32x4::const_new(43123513, 64565526, 408838, 13206998), ), - ( + u32x4x2::new( u32x4::const_new(16405061, 60379639, 31489017, 20949281), u32x4::const_new(27568751, 38734986, 8364264, 12451020), ), - ( + u32x4x2::new( u32x4::const_new(16005217, 58008076, 1406778, 26546927), u32x4::const_new(39571784, 56365493, 31274296, 8918790), ), - ( + u32x4x2::new( u32x4::const_new(23271122, 19453469, 27718201, 32742670), u32x4::const_new(234332, 36785342, 22601675, 14331046), ), - ( + u32x4x2::new( u32x4::const_new(40636025, 22442705, 22115403, 23745859), u32x4::const_new(41164945, 61012, 12499614, 542137), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(62776018, 32835413, 17373246, 17187309), u32x4::const_new(54469193, 21770290, 15923753, 28996575), ), - ( + u32x4x2::new( u32x4::const_new(59385210, 63082298, 12568449, 8509004), u32x4::const_new(9483342, 16105238, 5756054, 26890758), ), - ( + u32x4x2::new( u32x4::const_new(53987996, 38201748, 5521661, 19060159), u32x4::const_new(18663191, 9093637, 27786835, 31189196), ), - ( + u32x4x2::new( u32x4::const_new(65872678, 43635130, 27903055, 25020300), u32x4::const_new(65772737, 38110437, 5213502, 21909342), ), - ( + u32x4x2::new( u32x4::const_new(4438979, 9680838, 10212446, 4764184), u32x4::const_new(13235684, 58245995, 20264570, 21024049), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(60835961, 48209103, 31049052, 4688268), u32x4::const_new(12426713, 59829045, 22302488, 29008521), ), - ( + u32x4x2::new( u32x4::const_new(50401667, 29716596, 23531224, 7581281), u32x4::const_new(49071895, 6952617, 14934683, 8218256), ), - ( + u32x4x2::new( u32x4::const_new(1601446, 36631413, 31774811, 29625330), u32x4::const_new(56786114, 8331539, 23129509, 19783344), ), - ( + u32x4x2::new( u32x4::const_new(59514327, 64513110, 1772300, 5701338), u32x4::const_new(5737511, 16147555, 9461515, 5703271), ), - ( + u32x4x2::new( u32x4::const_new(33072974, 54300426, 11940114, 1308663), u32x4::const_new(15627555, 4931627, 28443714, 20924342), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(18135013, 20358426, 4922557, 10015355), u32x4::const_new(65729669, 34786528, 26248549, 29194359), ), - ( + u32x4x2::new( u32x4::const_new(797666, 34997544, 24316856, 25107230), u32x4::const_new(24612576, 4761401, 15307321, 32404252), ), - ( + u32x4x2::new( u32x4::const_new(16501152, 60565831, 9487105, 9316022), u32x4::const_new(24986054, 31917592, 3962024, 2501883), ), - ( + u32x4x2::new( u32x4::const_new(63356796, 50432342, 18044926, 30566881), u32x4::const_new(42032028, 31415202, 13524600, 16119907), ), - ( + u32x4x2::new( u32x4::const_new(3927286, 57022374, 9265437, 21620772), u32x4::const_new(19481940, 3806938, 24836192, 14572399), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(10785787, 46564798, 368445, 33181384), u32x4::const_new(5319843, 52687136, 30347110, 29837357), ), - ( + u32x4x2::new( u32x4::const_new(56436732, 47859251, 24141084, 22250712), u32x4::const_new(59046084, 4963427, 33463413, 17168859), ), - ( + u32x4x2::new( u32x4::const_new(15512044, 6366740, 4737504, 27644548), u32x4::const_new(30307977, 25037929, 14593903, 12836490), ), - ( + u32x4x2::new( u32x4::const_new(63878897, 34013023, 5860752, 7244096), u32x4::const_new(3689461, 57012135, 18389096, 11589351), ), - ( + u32x4x2::new( u32x4::const_new(4682110, 36302830, 653422, 22316819), u32x4::const_new(14081831, 5657024, 11088376, 24110612), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(39907267, 45940262, 24887471, 18342609), u32x4::const_new(878445, 40456159, 12019082, 345107), ), - ( + u32x4x2::new( u32x4::const_new(12794982, 28893944, 9447505, 11387200), u32x4::const_new(16961963, 13916996, 10893728, 25898006), ), - ( + u32x4x2::new( u32x4::const_new(44934162, 53465865, 3583620, 1102334), u32x4::const_new(53917811, 63478576, 2426066, 10389549), ), - ( + u32x4x2::new( u32x4::const_new(45096036, 37595344, 19367718, 20257175), u32x4::const_new(10280866, 41653449, 27665642, 375926), ), - ( + u32x4x2::new( u32x4::const_new(45847901, 24064074, 32494820, 32204556), u32x4::const_new(10720704, 51079060, 1297436, 29853825), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(66303987, 36060363, 16494578, 24962147), u32x4::const_new(11971403, 49538586, 25060560, 1964341), ), - ( + u32x4x2::new( u32x4::const_new(25988481, 27641502, 24909517, 27237087), u32x4::const_new(66646363, 52777626, 16360849, 10459972), ), - ( + u32x4x2::new( u32x4::const_new(43930529, 34374176, 31225968, 8807030), u32x4::const_new(10394758, 35904854, 25325589, 19335583), ), - ( + u32x4x2::new( u32x4::const_new(25094697, 34380951, 20051185, 32287161), u32x4::const_new(11739332, 53887441, 30517319, 26601892), ), - ( + u32x4x2::new( u32x4::const_new(8868546, 35635502, 32513071, 28248087), u32x4::const_new(51946989, 14222744, 19198839, 23261841), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(51218008, 5070126, 11046681, 5320810), u32x4::const_new(61212079, 34104447, 23895089, 6460727), ), - ( + u32x4x2::new( u32x4::const_new(39843528, 46278671, 10426120, 25624792), u32x4::const_new(66658766, 37140083, 28933107, 12969597), ), - ( + u32x4x2::new( u32x4::const_new(59635793, 40220191, 5751421, 173680), u32x4::const_new(58321825, 740337, 1412847, 7682623), ), - ( + u32x4x2::new( u32x4::const_new(975962, 56440763, 20812276, 22631115), u32x4::const_new(49095824, 19883130, 2419746, 31043648), ), - ( + u32x4x2::new( u32x4::const_new(66208703, 39669328, 22525915, 3748897), u32x4::const_new(65994776, 34533552, 8126286, 18326047), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(64176557, 3912400, 19351673, 30068471), u32x4::const_new(31190055, 24221683, 33142424, 28698542), ), - ( + u32x4x2::new( u32x4::const_new(34784792, 4109933, 3867193, 19557314), u32x4::const_new(2112512, 32715890, 24550117, 16595976), ), - ( + u32x4x2::new( u32x4::const_new(35542761, 48024875, 10925431, 31526577), u32x4::const_new(66577735, 23189821, 13375709, 1735095), ), - ( + u32x4x2::new( u32x4::const_new(59699254, 43854093, 29783239, 24777271), u32x4::const_new(19600372, 39924461, 2896720, 1472185), ), - ( + u32x4x2::new( u32x4::const_new(56389656, 35980854, 33172342, 1370336), u32x4::const_new(23707480, 57654949, 7850973, 12655016), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(38372660, 57101970, 7044964, 12732710), u32x4::const_new(57535705, 6043201, 30858914, 10946592), ), - ( + u32x4x2::new( u32x4::const_new(21023468, 6946992, 26403324, 23901823), u32x4::const_new(35695559, 23440687, 4763891, 6514074), ), - ( + u32x4x2::new( u32x4::const_new(28662273, 30933699, 9352242, 26354829), u32x4::const_new(37402243, 3145176, 8770289, 525937), ), - ( + u32x4x2::new( u32x4::const_new(54933102, 36695832, 3281859, 4755022), u32x4::const_new(23043294, 32794379, 15618886, 23602412), ), - ( + u32x4x2::new( u32x4::const_new(9931565, 29897140, 2480737, 24193701), u32x4::const_new(7833615, 2284939, 893926, 13421882), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(22917795, 22088359, 28978099, 19794863), u32x4::const_new(60542318, 29878494, 31053731, 9080720), ), - ( + u32x4x2::new( u32x4::const_new(23679072, 52547035, 28424916, 20647332), u32x4::const_new(4008761, 28267029, 12961289, 1589095), ), - ( + u32x4x2::new( u32x4::const_new(55616194, 26678929, 14998265, 23274397), u32x4::const_new(54625466, 46244264, 28627706, 33030665), ), - ( + u32x4x2::new( u32x4::const_new(11527330, 6449415, 26531607, 3472938), u32x4::const_new(41541592, 62607682, 19862690, 20564723), ), - ( + u32x4x2::new( u32x4::const_new(32843805, 49066843, 28425824, 19521495), u32x4::const_new(48792073, 48242878, 27392443, 13175986), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(16185025, 61537525, 2961305, 1492442), u32x4::const_new(25123147, 3095034, 31896958, 33089615), ), - ( + u32x4x2::new( u32x4::const_new(64748157, 18336595, 16522231, 25426312), u32x4::const_new(65718949, 35485695, 30554083, 10205918), ), - ( + u32x4x2::new( u32x4::const_new(39626934, 39271045, 16420458, 9826240), u32x4::const_new(56483981, 27128085, 3783403, 13360006), ), - ( + u32x4x2::new( u32x4::const_new(30793778, 66771960, 17241420, 6564573), u32x4::const_new(61102581, 29974476, 32385512, 9011754), ), - ( + u32x4x2::new( u32x4::const_new(28068166, 11862220, 14323567, 12380617), u32x4::const_new(52090465, 16029056, 24495309, 21409233), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(59411973, 57437124, 11695483, 17586857), u32x4::const_new(16108987, 43449109, 31098002, 6248476), ), - ( + u32x4x2::new( u32x4::const_new(42258047, 61595931, 29308533, 11742653), u32x4::const_new(43042345, 27373650, 30165249, 21929989), ), - ( + u32x4x2::new( u32x4::const_new(49907221, 9620337, 21888081, 20981082), u32x4::const_new(56288861, 61562203, 33223566, 3582446), ), - ( + u32x4x2::new( u32x4::const_new(57535017, 41003416, 22080416, 14463796), u32x4::const_new(65518565, 18127889, 24370863, 33332664), ), - ( + u32x4x2::new( u32x4::const_new(66655380, 6430175, 471782, 11947673), u32x4::const_new(30596400, 18898659, 15930721, 4211851), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(6757410, 65455566, 13584784, 11362173), u32x4::const_new(10797127, 24451471, 19541370, 29309435), ), - ( + u32x4x2::new( u32x4::const_new(40360156, 17685025, 18326181, 3846903), u32x4::const_new(13693365, 63049479, 31900359, 23385063), ), - ( + u32x4x2::new( u32x4::const_new(52455038, 57513503, 22163311, 27095042), u32x4::const_new(48610726, 66454160, 12085341, 26357004), ), - ( + u32x4x2::new( u32x4::const_new(22097042, 14063840, 6705778, 14342902), u32x4::const_new(66139825, 20702105, 31279090, 7495745), ), - ( + u32x4x2::new( u32x4::const_new(27360710, 49314837, 18774847, 7146436), u32x4::const_new(37066216, 42004961, 22409916, 10524446), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(1497507, 33054449, 11839906, 2960428), u32x4::const_new(40538463, 18884538, 25018820, 4073970), ), - ( + u32x4x2::new( u32x4::const_new(54484385, 43640735, 2808257, 20710708), u32x4::const_new(39840730, 27222424, 21783544, 11848522), ), - ( + u32x4x2::new( u32x4::const_new(45765237, 48200555, 9299019, 9393151), u32x4::const_new(34818188, 56098995, 13575233, 21012731), ), - ( + u32x4x2::new( u32x4::const_new(4265428, 49627650, 24960282, 9425650), u32x4::const_new(47883651, 2797524, 11853190, 22877329), ), - ( + u32x4x2::new( u32x4::const_new(25008173, 64199503, 380047, 12107343), u32x4::const_new(12329448, 11914399, 764281, 29687002), ), ])), CachedPoint(FieldElement2625x4([ - ( + u32x4x2::new( u32x4::const_new(35889734, 23047226, 4022841, 7017445), u32x4::const_new(7274086, 53316179, 25100176, 15310676), ), - ( + u32x4x2::new( u32x4::const_new(42409427, 30270106, 6823853, 31551384), u32x4::const_new(40645017, 66489807, 18021817, 32669351), ), - ( + u32x4x2::new( u32x4::const_new(39827134, 43680850, 28297996, 20258133), u32x4::const_new(26058742, 52643238, 22238331, 21690533), ), - ( + u32x4x2::new( u32x4::const_new(60808002, 17499995, 30042246, 29310584), u32x4::const_new(48219954, 29389518, 8680514, 17844709), ), - ( + u32x4x2::new( u32x4::const_new(6452896, 50116553, 9532047, 26821214), u32x4::const_new(44524351, 50428429, 21904953, 12608048), ), diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 4ace4d84c..6ccec8f32 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -20,10 +20,9 @@ //! changes where made to account for different structure in //! arm instructions. -use core::arch::aarch64::{self, vuzp1_u32}; use core::ops::{Add, Mul, Neg}; -use super::packed_simd::{u32x2, u32x4, i32x4, u64x2, u64x4}; +use super::packed_simd::{i32x4, u32x2, u32x2x2, u32x4, u32x4x2, u64x2, u64x4}; use crate::backend::serial::u64::field::FieldElement51; use crate::backend::vector::neon::constants::{ P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO, @@ -63,8 +62,33 @@ use core::arch::aarch64::vget_high_u32; #[cfg(not(target_arch = "aarch64"))] use core::arch::aarch64::vget_low_u32; +// Shuffle the lanes in a u32x4x2 macro_rules! shuffle { - ($vec0:expr, $vec1:expr, $index:expr) => { + ($vec:expr , $index:expr) => { + unsafe { + let v_n: [u32;8] = [ + $vec.extract::<0>(), + $vec.extract::<1>(), + $vec.extract::<2>(), + $vec.extract::<3>(), + $vec.extract::<4>(), + $vec.extract::<5>(), + $vec.extract::<6>(), + $vec.extract::<7>() + ]; + u32x4x2::new( + core::mem::transmute::<[u32; 4], u32x4>( + [v_n[$index[0]], v_n[$index[1]], v_n[$index[2]], v_n[$index[3]]]), + core::mem::transmute::<[u32; 4], u32x4>( + [v_n[$index[4]], v_n[$index[5]], v_n[$index[6]], v_n[$index[7]]]) + ) + } + }; +} + +// Blend the lanes of two u32x4 +macro_rules! blend { + ($vec0: expr, $vec1: expr, $index:expr) => { unsafe { let v_n: [u32;8] = [ $vec0.extract::<0>(), @@ -77,8 +101,7 @@ macro_rules! shuffle { $vec1.extract::<3>() ]; core::mem::transmute::<[u32; 4], u32x4>( - [v_n[$index[0]], v_n[$index[1]], v_n[$index[2]], v_n[$index[3]]] - ) + [v_n[$index[0]], v_n[$index[1]], v_n[$index[2]], v_n[$index[3]]]) } }; } @@ -89,19 +112,18 @@ macro_rules! shuffle { /// ((a0, b0), (c0, d0)) /// ((a1, b1), (c1, d1)) #[inline(always)] -fn unpack_pair(src: (u32x4, u32x4)) -> ((u32x2, u32x2), (u32x2, u32x2)) { +fn unpack_pair(src: u32x4x2) -> (u32x2x2, u32x2x2) { let a0: u32x2; let a1: u32x2; let b0: u32x2; let b1: u32x2; unsafe { - use core::arch::aarch64::vget_low_u32; - a0 = vget_low_u32(src.0.into()).into(); - a1 = vget_low_u32(src.1.into()).into(); - b0 = vget_high_u32(src.0.into()).into(); - b1 = vget_high_u32(src.1.into()).into(); + a0 = vget_low_u32(src.0.0).into(); + a1 = vget_low_u32(src.0.1).into(); + b0 = vget_high_u32(src.0.0).into(); + b1 = vget_high_u32(src.0.1).into(); } - return ((a0, a1), (b0, b1)); + return (u32x2x2::new(a0, a1), u32x2x2::new(b0, b1)); } /// ((a0, 0, b0, 0), (c0, 0, d0, 0)) @@ -110,19 +132,19 @@ fn unpack_pair(src: (u32x4, u32x4)) -> ((u32x2, u32x2), (u32x2, u32x2)) { /// ((a0, b0, a1, b1), (c0, d0, c1, d1)) #[inline(always)] #[rustfmt::skip] // Retain formatting of the return tuples -fn repack_pair(x: (u32x4, u32x4), y: (u32x4, u32x4)) -> (u32x4, u32x4) { +fn repack_pair(x: u32x4x2, y: u32x4x2) -> u32x4x2 { unsafe { use core::arch::aarch64::vcombine_u32; use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vtrn1_u32; - ( + u32x4x2::new( vcombine_u32( - vtrn1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())), - vtrn1_u32(vget_low_u32(y.0.into()), vget_high_u32(y.0.into()))).into(), + vtrn1_u32(vget_low_u32(x.0.0), vget_high_u32(x.0.0)), + vtrn1_u32(vget_low_u32(y.0.0), vget_high_u32(y.0.0))).into(), vcombine_u32( - vtrn1_u32(vget_low_u32(x.1.into()), vget_high_u32(x.1.into())), - vtrn1_u32(vget_low_u32(y.1.into()), vget_high_u32(y.1.into()))).into() + vtrn1_u32(vget_low_u32(x.0.1), vget_high_u32(x.0.1)), + vtrn1_u32(vget_low_u32(y.0.1), vget_high_u32(y.0.1))).into(), ) } } @@ -154,7 +176,7 @@ pub enum Shuffle { } #[derive(Clone, Copy, Debug)] -pub struct FieldElement2625x4(pub(crate) [(u32x4, u32x4); 5]); +pub struct FieldElement2625x4(pub(crate) [u32x4x2; 5]); use subtle::Choice; use subtle::ConditionallySelectable; @@ -166,44 +188,24 @@ impl ConditionallySelectable for FieldElement2625x4 { choice: Choice, ) -> FieldElement2625x4 { let mask = (-(choice.unwrap_u8() as i32)) as u32; - let mask_vec = u32x4::splat(mask); + let mask_vec = u32x4x2::splat(mask); FieldElement2625x4([ - ( - a.0[0].0 ^ (mask_vec & (a.0[0].0 ^ b.0[0].0)), - a.0[0].1 ^ (mask_vec & (a.0[0].1 ^ b.0[0].1)), - ), - ( - a.0[1].0 ^ (mask_vec & (a.0[1].0 ^ b.0[1].0)), - a.0[1].1 ^ (mask_vec & (a.0[1].1 ^ b.0[1].1)), - ), - ( - a.0[2].0 ^ (mask_vec & (a.0[2].0 ^ b.0[2].0)), - a.0[2].1 ^ (mask_vec & (a.0[2].1 ^ b.0[2].1)), - ), - ( - a.0[3].0 ^ (mask_vec & (a.0[3].0 ^ b.0[3].0)), - a.0[3].1 ^ (mask_vec & (a.0[3].1 ^ b.0[3].1)), - ), - ( - a.0[4].0 ^ (mask_vec & (a.0[4].0 ^ b.0[4].0)), - a.0[4].1 ^ (mask_vec & (a.0[4].1 ^ b.0[4].1)), - ), + a.0[0] ^ (mask_vec & (a.0[0] ^ b.0[0])), + a.0[1] ^ (mask_vec & (a.0[1] ^ b.0[1])), + a.0[2] ^ (mask_vec & (a.0[2] ^ b.0[2])), + a.0[3] ^ (mask_vec & (a.0[3] ^ b.0[3])), + a.0[4] ^ (mask_vec & (a.0[4] ^ b.0[4])) ]) } fn conditional_assign(&mut self, other: &FieldElement2625x4, choice: Choice) { let mask = (-(choice.unwrap_u8() as i32)) as u32; - let mask_vec = u32x4::splat(mask); - self.0[0].0 ^= mask_vec & (self.0[0].0 ^ other.0[0].0); - self.0[0].1 ^= mask_vec & (self.0[0].1 ^ other.0[0].1); - self.0[1].0 ^= mask_vec & (self.0[1].0 ^ other.0[1].0); - self.0[1].1 ^= mask_vec & (self.0[1].1 ^ other.0[1].1); - self.0[2].0 ^= mask_vec & (self.0[2].0 ^ other.0[2].0); - self.0[2].1 ^= mask_vec & (self.0[2].1 ^ other.0[2].1); - self.0[3].0 ^= mask_vec & (self.0[3].0 ^ other.0[3].0); - self.0[3].1 ^= mask_vec & (self.0[3].1 ^ other.0[3].1); - self.0[4].0 ^= mask_vec & (self.0[4].0 ^ other.0[4].0); - self.0[4].1 ^= mask_vec & (self.0[4].1 ^ other.0[4].1); + let mask_vec = u32x4x2::splat(mask); + self.0[0] ^= mask_vec & (self.0[0] ^ other.0[0]); + self.0[1] ^= mask_vec & (self.0[1] ^ other.0[1]); + self.0[2] ^= mask_vec & (self.0[2] ^ other.0[2]); + self.0[3] ^= mask_vec & (self.0[3] ^ other.0[3]); + self.0[4] ^= mask_vec & (self.0[4] ^ other.0[4]); } } @@ -211,14 +213,14 @@ impl FieldElement2625x4 { pub fn split(&self) -> [FieldElement51; 4] { let mut out = [FieldElement51::ZERO; 4]; for i in 0..5 { - let a_2i = self.0[i].0.extract::<0>() as u64; - let b_2i = self.0[i].0.extract::<1>() as u64; - let a_2i_1 = self.0[i].0.extract::<2>() as u64; - let b_2i_1 = self.0[i].0.extract::<3>() as u64; - let c_2i = self.0[i].1.extract::<0>() as u64; - let d_2i = self.0[i].1.extract::<1>() as u64; - let c_2i_1 = self.0[i].1.extract::<2>() as u64; - let d_2i_1 = self.0[i].1.extract::<3>() as u64; + let a_2i = self.0[i].extract::<0>() as u64; + let b_2i = self.0[i].extract::<1>() as u64; + let a_2i_1 = self.0[i].extract::<2>() as u64; + let b_2i_1 = self.0[i].extract::<3>() as u64; + let c_2i = self.0[i].extract::<4>() as u64; + let d_2i = self.0[i].extract::<5>() as u64; + let c_2i_1 = self.0[i].extract::<6>() as u64; + let d_2i_1 = self.0[i].extract::<7>() as u64; out[0].0[i] = a_2i + (a_2i_1 << 26); out[1].0[i] = b_2i + (b_2i_1 << 26); @@ -233,18 +235,18 @@ impl FieldElement2625x4 { pub fn shuffle(&self, control: Shuffle) -> FieldElement2625x4 { #[inline(always)] #[rustfmt::skip] // Retain format of the return tuples - fn shuffle_lanes(x: (u32x4, u32x4), control: Shuffle) -> (u32x4, u32x4) { + fn shuffle_lanes(x: u32x4x2, control: Shuffle) -> u32x4x2 { match control { - Shuffle::AAAA => (shuffle!(x.0, x.1, [0, 0, 2, 2]), shuffle!(x.0, x.1, [0, 0, 2, 2])), - Shuffle::BBBB => (shuffle!(x.0, x.1, [1, 1, 3, 3]), shuffle!(x.0, x.1, [1, 1, 3, 3])), - Shuffle::CACA => (shuffle!(x.0, x.1, [4, 0, 6, 2]), shuffle!(x.0, x.1, [4, 0, 6, 2])), - Shuffle::DBBD => (shuffle!(x.0, x.1, [5, 1, 7, 3]), shuffle!(x.0, x.1, [1, 5, 3, 7])), - Shuffle::ADDA => (shuffle!(x.0, x.1, [0, 5, 2, 7]), shuffle!(x.0, x.1, [5, 0, 7, 2])), - Shuffle::CBCB => (shuffle!(x.0, x.1, [4, 1, 6, 3]), shuffle!(x.0, x.1, [4, 1, 6, 3])), - Shuffle::ABAB => (shuffle!(x.0, x.1, [0, 1, 2, 3]), shuffle!(x.0, x.1, [0, 1, 2, 3])), - Shuffle::BADC => (shuffle!(x.0, x.1, [1, 0, 3, 2]), shuffle!(x.0, x.1, [5, 4, 7, 6])), - Shuffle::BACD => (shuffle!(x.0, x.1, [1, 0, 3, 2]), shuffle!(x.0, x.1, [4, 5, 6, 7])), - Shuffle::ABDC => (shuffle!(x.0, x.1, [0, 1, 2, 3]), shuffle!(x.0, x.1, [5, 4, 7, 6])), + Shuffle::AAAA => shuffle!(x, [0, 0, 2, 2, 0, 0, 2, 2]), + Shuffle::BBBB => shuffle!(x, [1, 1, 3, 3, 1, 1, 3, 3]), + Shuffle::CACA => shuffle!(x, [4, 0, 6, 2, 4, 0, 6, 2]), + Shuffle::DBBD => shuffle!(x, [5, 1, 7, 3, 1, 5, 3, 7]), + Shuffle::ADDA => shuffle!(x, [0, 5, 2, 7, 5, 0, 7, 2]), + Shuffle::CBCB => shuffle!(x, [4, 1, 6, 3, 4, 1, 6, 3]), + Shuffle::ABAB => shuffle!(x, [0, 1, 2, 3, 0, 1, 2, 3]), + Shuffle::BADC => shuffle!(x, [1, 0, 3, 2, 5, 4, 7, 6]), + Shuffle::BACD => shuffle!(x, [1, 0, 3, 2, 4, 5, 6, 7]), + Shuffle::ABDC => shuffle!(x, [0, 1, 2, 3, 5, 4, 7, 6]), } } @@ -271,15 +273,19 @@ impl FieldElement2625x4 { pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 { #[inline(always)] #[rustfmt::skip] // Retain format of the return tuples - fn blend_lanes(x: (u32x4, u32x4), y: (u32x4, u32x4), control: Lanes) -> (u32x4, u32x4) { + fn blend_lanes(x: u32x4x2, y: u32x4x2, control: Lanes) -> u32x4x2 { + let x0 = u32x4::from(x.0.0); + let x1 = u32x4::from(x.0.1); + let y0 = u32x4::from(y.0.0); + let y1 = u32x4::from(y.0.1); match control { - Lanes::C => (x.0, shuffle!(y.1, x.1, [0, 5, 2, 7])), - Lanes::D => (x.0, shuffle!(y.1, x.1, [4, 1, 6, 3])), - Lanes::AD => (shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [4, 1, 6, 3])), - Lanes::AB => (y.0, x.1), - Lanes::AC => (shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [0, 5, 2, 7])), - Lanes::CD => (x.0, y.1), - Lanes::BC => (shuffle!(y.0, x.0, [4, 1, 6, 3]), shuffle!(y.1, x.1, [0, 5, 2, 7])), + Lanes::C => u32x4x2::new(x0, blend!(y1, x1, [0, 5, 2, 7])), + Lanes::D => u32x4x2::new(x0, blend!(y1, x1, [4, 1, 6, 3])), + Lanes::AD => u32x4x2::new(blend!(y0, x0, [0, 5, 2, 7]), blend!(y1, x1, [4, 1, 6, 3])), + Lanes::AB => u32x4x2::new(y0, x1), + Lanes::AC => u32x4x2::new(blend!(y0, x0, [0, 5, 2, 7]), blend!(y1, x1, [0, 5, 2, 7])), + Lanes::CD => u32x4x2::new(x0, y1), + Lanes::BC => u32x4x2::new(blend!(y0, x0, [4, 1, 6, 3]), blend!(y1, x1, [0, 5, 2, 7])), Lanes::ABCD => y, } } @@ -294,7 +300,7 @@ impl FieldElement2625x4 { } pub fn zero() -> FieldElement2625x4 { - FieldElement2625x4([(u32x4::splat(0), u32x4::splat(0)); 5]) + FieldElement2625x4([u32x4x2::splat(0); 5]) } pub fn splat(x: &FieldElement51) -> FieldElement2625x4 { @@ -307,7 +313,7 @@ impl FieldElement2625x4 { x2: &FieldElement51, x3: &FieldElement51, ) -> FieldElement2625x4 { - let mut buf = [(u32x4::splat(0), u32x4::splat(0)); 5]; + let mut buf = [u32x4x2::splat(0); 5]; let low_26_bits = (1 << 26) - 1; for i in 0..5 { let a_2i = (x0.0[i] & low_26_bits) as u32; @@ -319,9 +325,9 @@ impl FieldElement2625x4 { let d_2i = (x3.0[i] & low_26_bits) as u32; let d_2i_1 = (x3.0[i] >> 26) as u32; - buf[i] = ( + buf[i] = u32x4x2::new( u32x4::new(a_2i, b_2i, a_2i_1, b_2i_1), - u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1), + u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1) ); } return FieldElement2625x4(buf).reduce(); @@ -330,11 +336,11 @@ impl FieldElement2625x4 { #[inline] pub fn negate_lazy(&self) -> FieldElement2625x4 { FieldElement2625x4([ - (P_TIMES_2_LO.0 - self.0[0].0, P_TIMES_2_LO.1 - self.0[0].1), - (P_TIMES_2_HI.0 - self.0[1].0, P_TIMES_2_HI.1 - self.0[1].1), - (P_TIMES_2_HI.0 - self.0[2].0, P_TIMES_2_HI.1 - self.0[2].1), - (P_TIMES_2_HI.0 - self.0[3].0, P_TIMES_2_HI.1 - self.0[3].1), - (P_TIMES_2_HI.0 - self.0[4].0, P_TIMES_2_HI.1 - self.0[4].1), + P_TIMES_2_LO - self.0[0], + P_TIMES_2_HI - self.0[1], + P_TIMES_2_HI - self.0[2], + P_TIMES_2_HI - self.0[3], + P_TIMES_2_HI - self.0[4], ]) } @@ -351,50 +357,50 @@ impl FieldElement2625x4 { i32x4::new(-26, -26, -25, -25), i32x4::new(-26, -26, -25, -25), ); - let masks = ( + let masks = u32x4x2::new( u32x4::new((1 << 26) - 1, (1 << 26) - 1, (1 << 25) - 1, (1 << 25) - 1), u32x4::new((1 << 26) - 1, (1 << 26) - 1, (1 << 25) - 1, (1 << 25) - 1), ); // Use mutliple transposes instead of table lookup? - let rotated_carryout = |v: (u32x4, u32x4)| -> (u32x4, u32x4) { + let rotated_carryout = |v: u32x4x2| -> u32x4x2 { unsafe { use core::arch::aarch64::vcombine_u32; use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vqshlq_u32; - let c: (u32x4, u32x4) = ( - vqshlq_u32(v.0.into(), shifts.0.into()).into(), - vqshlq_u32(v.1.into(), shifts.1.into()).into(), + let c: u32x4x2 = u32x4x2::new( + vqshlq_u32(v.0.0, shifts.0.into()).into(), + vqshlq_u32(v.0.1, shifts.1.into()).into(), ); - ( + u32x4x2::new( vcombine_u32( - vget_high_u32(c.0.into()), - vget_low_u32(c.0.into()), + vget_high_u32(c.0.0), + vget_low_u32(c.0.0), ) .into(), vcombine_u32( - vget_high_u32(c.1.into()), - vget_low_u32(c.1.into()), + vget_high_u32(c.0.1), + vget_low_u32(c.0.1), ) .into(), ) } }; - let combine = |v_lo: (u32x4, u32x4), v_hi: (u32x4, u32x4)| -> (u32x4, u32x4) { + let combine = |v_lo: u32x4x2, v_hi: u32x4x2| -> u32x4x2 { unsafe { use core::arch::aarch64::vcombine_u32; use core::arch::aarch64::vget_low_u32; - ( + u32x4x2::new( vcombine_u32( - vget_low_u32(v_lo.0.into()), - vget_high_u32(v_hi.0.into()), + vget_low_u32(v_lo.0.0), + vget_high_u32(v_hi.0.0), ) .into(), vcombine_u32( - vget_low_u32(v_lo.1.into()), - vget_high_u32(v_hi.1.into()), + vget_low_u32(v_lo.0.1), + vget_high_u32(v_hi.0.1), ) .into(), ) @@ -404,44 +410,46 @@ impl FieldElement2625x4 { let mut v = self.0; let c10 = rotated_carryout(v[0]); - let mut com = combine((u32x4::splat(0), u32x4::splat(0)), c10); - v[0] = ((v[0].0 & masks.0) + com.0, (v[0].1 & masks.1) + com.1); + let mut com = combine(u32x4x2::splat(0), c10); + v[0] = (v[0] & masks) + com; let c32 = rotated_carryout(v[1]); com = combine(c10, c32); - v[1] = ((v[1].0 & masks.0) + com.0, (v[1].1 & masks.1) + com.1); + v[1] = (v[1] & masks) + com; let c54 = rotated_carryout(v[2]); com = combine(c32, c54); - v[2] = ((v[2].0 & masks.0) + com.0, (v[2].1 & masks.1) + com.1); + v[2] = (v[2] & masks) + com; let c76 = rotated_carryout(v[3]); com = combine(c54, c76); - v[3] = ((v[3].0 & masks.0) + com.0, (v[3].1 & masks.1) + com.1); + v[3] = (v[3] & masks) + com; let c98 = rotated_carryout(v[4]); com = combine(c76, c98); - v[4] = ((v[4].0 & masks.0) + com.0, (v[4].1 & masks.1) + com.1); + v[4] = (v[4] & masks) + com; #[rustfmt::skip] // Retain formatting of return tuple - let c9_19: (u32x4, u32x4) = unsafe { + let c9_19: u32x4x2 = unsafe { use core::arch::aarch64::vcombine_u32; use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vmulq_n_u32; - let c9_19_spread: (u32x4, u32x4) = ( - vmulq_n_u32(c98.0.into(), 19).into(), - vmulq_n_u32(c98.1.into(), 19).into(), + let c9_19_spread: u32x4x2 = u32x4x2::new( + vmulq_n_u32(c98.0.0, 19).into(), + vmulq_n_u32(c98.0.1, 19).into(), ); - (vcombine_u32(vget_low_u32(c9_19_spread.0.into()), u32x2::splat(0).into()).into(), - vcombine_u32(vget_low_u32(c9_19_spread.1.into()), u32x2::splat(0).into()).into()) + u32x4x2::new( + vcombine_u32(vget_low_u32(c9_19_spread.0.0), u32x2::splat(0).into()).into(), + vcombine_u32(vget_low_u32(c9_19_spread.0.1), u32x2::splat(0).into()).into()) }; - v[0] = (v[0].0 + c9_19.0, v[0].1 + c9_19.1); + v[0] = v[0] + c9_19; FieldElement2625x4(v) } + // TODO: use arm types #[inline] #[rustfmt::skip] // Retain formatting of carry and repacking fn reduce64(mut z: [(u64x2, u64x2); 10]) -> FieldElement2625x4 { @@ -490,11 +498,11 @@ impl FieldElement2625x4 { carry(&mut z, 0); FieldElement2625x4([ - repack_pair((z[0].0.into(), z[0].1.into()), (z[1].0.into(), z[1].1.into())), - repack_pair((z[2].0.into(), z[2].1.into()), (z[3].0.into(), z[3].1.into())), - repack_pair((z[4].0.into(), z[4].1.into()), (z[5].0.into(), z[5].1.into())), - repack_pair((z[6].0.into(), z[6].1.into()), (z[7].0.into(), z[7].1.into())), - repack_pair((z[8].0.into(), z[8].1.into()), (z[9].0.into(), z[9].1.into())), + repack_pair(u32x4x2::new(z[0].0.into(), z[0].1.into()), u32x4x2::new(z[1].0.into(), z[1].1.into())), + repack_pair(u32x4x2::new(z[2].0.into(), z[2].1.into()), u32x4x2::new(z[3].0.into(), z[3].1.into())), + repack_pair(u32x4x2::new(z[4].0.into(), z[4].1.into()), u32x4x2::new(z[5].0.into(), z[5].1.into())), + repack_pair(u32x4x2::new(z[6].0.into(), z[6].1.into()), u32x4x2::new(z[7].0.into(), z[7].1.into())), + repack_pair(u32x4x2::new(z[8].0.into(), z[8].1.into()), u32x4x2::new(z[9].0.into(), z[9].1.into())), ]) } @@ -502,33 +510,33 @@ impl FieldElement2625x4 { #[rustfmt::skip] // keep alignment of formulas pub fn square_and_negate_D(&self) -> FieldElement2625x4 { #[inline(always)] - fn m(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> u64x4 { + fn m(x: u32x2x2, y: u32x2x2) -> u64x4 { use core::arch::aarch64::vmull_u32; unsafe { - let z0: u64x2 = vmull_u32(x.0.into(), y.0.into()).into(); - let z1: u64x2 = vmull_u32(x.1.into(), y.1.into()).into(); - u64x4((z0, z1)) + let z0: u64x2 = vmull_u32(x.0.0, y.0.0).into(); + let z1: u64x2 = vmull_u32(x.0.1, y.0.1).into(); + u64x4::new(z0, z1) } } #[inline(always)] - fn m_lo(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> (u32x2, u32x2) { + fn m_lo(x: u32x2x2, y: u32x2x2) -> u32x2x2 { use core::arch::aarch64::vmull_u32; use core::arch::aarch64::vuzp1_u32; use core::arch::aarch64::vget_low_u32; unsafe { - let x: (u32x4, u32x4) = ( - vmull_u32(x.0.into(), y.0.into()).into(), - vmull_u32(x.1.into(), y.1.into()).into(), + let x: u32x4x2 = u32x4x2::new( + vmull_u32(x.0.0, y.0.0).into(), + vmull_u32(x.0.1, y.0.1).into(), ); - ( - vuzp1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())).into(), - vuzp1_u32(vget_low_u32(x.1.into()), vget_high_u32(x.1.into())).into() + u32x2x2::new( + vuzp1_u32(vget_low_u32(x.0.0), vget_high_u32(x.0.0)).into(), + vuzp1_u32(vget_low_u32(x.0.1), vget_high_u32(x.0.1)).into() ) } } - let v19 = (u32x2::new(19, 19), u32x2::new(19, 19)); + let v19 = u32x2x2::new(u32x2::new(19, 19), u32x2::new(19, 19)); let (x0, x1) = unpack_pair(self.0[0]); let (x2, x3) = unpack_pair(self.0[1]); @@ -536,14 +544,14 @@ impl FieldElement2625x4 { let (x6, x7) = unpack_pair(self.0[3]); let (x8, x9) = unpack_pair(self.0[4]); - let x0_2 = (x0.0.shl::<1>(), x0.1.shl::<1>()); - let x1_2 = (x1.0.shl::<1>(), x1.1.shl::<1>()); - let x2_2 = (x2.0.shl::<1>(), x2.1.shl::<1>()); - let x3_2 = (x3.0.shl::<1>(), x3.1.shl::<1>()); - let x4_2 = (x4.0.shl::<1>(), x4.1.shl::<1>()); - let x5_2 = (x5.0.shl::<1>(), x5.1.shl::<1>()); - let x6_2 = (x6.0.shl::<1>(), x6.1.shl::<1>()); - let x7_2 = (x7.0.shl::<1>(), x7.1.shl::<1>()); + let x0_2 = x0.shl::<1>(); + let x1_2 = x1.shl::<1>(); + let x2_2 = x2.shl::<1>(); + let x3_2 = x3.shl::<1>(); + let x4_2 = x4.shl::<1>(); + let x5_2 = x5.shl::<1>(); + let x6_2 = x6.shl::<1>(); + let x7_2 = x7.shl::<1>(); let x5_19 = m_lo(v19, x5); let x6_19 = m_lo(v19, x6); @@ -569,14 +577,14 @@ impl FieldElement2625x4 { let negate_D = |x_01: u64x4, p_01: u64x4| -> (u64x2, u64x2) { unsafe { - use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vcombine_u32; + use core::arch::aarch64::vreinterpretq_u32_u64; + use core::arch::aarch64::vsubq_u64; - let x = x_01.0; - let p = p_01.0; - (x.0.into(), - vcombine_u32(vget_low_u32(x.1.into()), - vget_high_u32((p.1 - x.1).into())).into()) + (u64x2(x_01.0.0), + vcombine_u32( + vget_low_u32(vreinterpretq_u32_u64(x_01.0.1)), + vget_high_u32(vreinterpretq_u32_u64(vsubq_u64(p_01.0.1, x_01.0.1)))).into()) } }; @@ -600,11 +608,11 @@ impl Neg for FieldElement2625x4 { #[inline] fn neg(self) -> FieldElement2625x4 { FieldElement2625x4([ - (P_TIMES_16_LO.0 - self.0[0].0, P_TIMES_16_LO.1 - self.0[0].1), - (P_TIMES_16_HI.0 - self.0[1].0, P_TIMES_16_HI.1 - self.0[1].1), - (P_TIMES_16_HI.0 - self.0[2].0, P_TIMES_16_HI.1 - self.0[2].1), - (P_TIMES_16_HI.0 - self.0[3].0, P_TIMES_16_HI.1 - self.0[3].1), - (P_TIMES_16_HI.0 - self.0[4].0, P_TIMES_16_HI.1 - self.0[4].1), + P_TIMES_16_LO - self.0[0], + P_TIMES_16_HI - self.0[1], + P_TIMES_16_HI - self.0[2], + P_TIMES_16_HI - self.0[3], + P_TIMES_16_HI - self.0[4], ]) .reduce() } @@ -615,11 +623,11 @@ impl Add for FieldElement2625x4 { #[inline] fn add(self, rhs: FieldElement2625x4) -> FieldElement2625x4 { FieldElement2625x4([ - (self.0[0].0 + rhs.0[0].0, self.0[0].1 + rhs.0[0].1), - (self.0[1].0 + rhs.0[1].0, self.0[1].1 + rhs.0[1].1), - (self.0[2].0 + rhs.0[2].0, self.0[2].1 + rhs.0[2].1), - (self.0[3].0 + rhs.0[3].0, self.0[3].1 + rhs.0[3].1), - (self.0[4].0 + rhs.0[4].0, self.0[4].1 + rhs.0[4].1), + self.0[0] + rhs.0[0], + self.0[1] + rhs.0[1], + self.0[2] + rhs.0[2], + self.0[3] + rhs.0[3], + self.0[4] + rhs.0[4], ]) } } @@ -644,16 +652,16 @@ impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 { let (b8, b9) = unpack_pair(self.0[4]); FieldElement2625x4::reduce64([ - (vmull_u32(b0.0.into(), consts.0.into()).into(), vmull_u32(b0.1.into(), consts.1.into()).into()), - (vmull_u32(b1.0.into(), consts.0.into()).into(), vmull_u32(b1.1.into(), consts.1.into()).into()), - (vmull_u32(b2.0.into(), consts.0.into()).into(), vmull_u32(b2.1.into(), consts.1.into()).into()), - (vmull_u32(b3.0.into(), consts.0.into()).into(), vmull_u32(b3.1.into(), consts.1.into()).into()), - (vmull_u32(b4.0.into(), consts.0.into()).into(), vmull_u32(b4.1.into(), consts.1.into()).into()), - (vmull_u32(b5.0.into(), consts.0.into()).into(), vmull_u32(b5.1.into(), consts.1.into()).into()), - (vmull_u32(b6.0.into(), consts.0.into()).into(), vmull_u32(b6.1.into(), consts.1.into()).into()), - (vmull_u32(b7.0.into(), consts.0.into()).into(), vmull_u32(b7.1.into(), consts.1.into()).into()), - (vmull_u32(b8.0.into(), consts.0.into()).into(), vmull_u32(b8.1.into(), consts.1.into()).into()), - (vmull_u32(b9.0.into(), consts.0.into()).into(), vmull_u32(b9.1.into(), consts.1.into()).into()) + (vmull_u32(b0.0.0, consts.0.into()).into(), vmull_u32(b0.0.1, consts.1.into()).into()), + (vmull_u32(b1.0.0, consts.0.into()).into(), vmull_u32(b1.0.1, consts.1.into()).into()), + (vmull_u32(b2.0.0, consts.0.into()).into(), vmull_u32(b2.0.1, consts.1.into()).into()), + (vmull_u32(b3.0.0, consts.0.into()).into(), vmull_u32(b3.0.1, consts.1.into()).into()), + (vmull_u32(b4.0.0, consts.0.into()).into(), vmull_u32(b4.0.1, consts.1.into()).into()), + (vmull_u32(b5.0.0, consts.0.into()).into(), vmull_u32(b5.0.1, consts.1.into()).into()), + (vmull_u32(b6.0.0, consts.0.into()).into(), vmull_u32(b6.0.1, consts.1.into()).into()), + (vmull_u32(b7.0.0, consts.0.into()).into(), vmull_u32(b7.0.1, consts.1.into()).into()), + (vmull_u32(b8.0.0, consts.0.into()).into(), vmull_u32(b8.0.1, consts.1.into()).into()), + (vmull_u32(b9.0.0, consts.0.into()).into(), vmull_u32(b9.0.1, consts.1.into()).into()) ]) } } @@ -665,32 +673,33 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { #[rustfmt::skip] // Retain formatting of z_i computation fn mul(self, rhs: &'b FieldElement2625x4) -> FieldElement2625x4 { #[inline(always)] - fn m(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> u64x4 { + fn m(x: u32x2x2, y: u32x2x2) -> u64x4 { use core::arch::aarch64::vmull_u32; unsafe { - let z0: u64x2 = vmull_u32(x.0.into(), y.0.into()).into(); - let z1: u64x2 = vmull_u32(x.1.into(), y.1.into()).into(); - u64x4((z0, z1)) + let z0: u64x2 = vmull_u32(x.0.0, y.0.0).into(); + let z1: u64x2 = vmull_u32(x.0.1, y.0.1).into(); + u64x4::new(z0, z1) } } #[inline(always)] - fn m_lo(x: (u32x2, u32x2), y: (u32x2, u32x2)) -> (u32x2, u32x2) { + fn m_lo(x: u32x2x2, y: u32x2x2) -> u32x2x2 { use core::arch::aarch64::vmull_u32; use core::arch::aarch64::vuzp1_u32; use core::arch::aarch64::vget_low_u32; unsafe { - let x: (u32x4, u32x4) = ( - vmull_u32(x.0.into(), y.0.into()).into(), - vmull_u32(x.1.into(), y.1.into()).into(), + let x: u32x4x2 = u32x4x2::new( + vmull_u32(x.0.0, y.0.0).into(), + vmull_u32(x.0.1, y.0.1).into(), ); - ( - vuzp1_u32(vget_low_u32(x.0.into()), vget_high_u32(x.0.into())).into(), - vuzp1_u32(vget_low_u32(x.1.into()), vget_high_u32(x.1.into())).into() + u32x2x2::new( + vuzp1_u32(vget_low_u32(x.0.0), vget_high_u32(x.0.0)).into(), + vuzp1_u32(vget_low_u32(x.0.1), vget_high_u32(x.0.1)).into() ) } } + let (x0, x1) = unpack_pair(self.0[0]); let (x2, x3) = unpack_pair(self.0[1]); let (x4, x5) = unpack_pair(self.0[2]); @@ -703,7 +712,7 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { let (y6, y7) = unpack_pair(rhs.0[3]); let (y8, y9) = unpack_pair(rhs.0[4]); - let v19 = (u32x2::new(19, 19), u32x2::new(19, 19)); + let v19 = u32x2x2::new(u32x2::new(19, 19), u32x2::new(19, 19)); let y1_19 = m_lo(v19, y1); let y2_19 = m_lo(v19, y2); @@ -715,11 +724,11 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { let y8_19 = m_lo(v19, y8); let y9_19 = m_lo(v19, y9); - let x1_2 = (x1.0 + x1.0, x1.1 + x1.1); - let x3_2 = (x3.0 + x3.0, x3.1 + x3.1); - let x5_2 = (x5.0 + x5.0, x5.1 + x5.1); - let x7_2 = (x7.0 + x7.0, x7.1 + x7.1); - let x9_2 = (x9.0 + x9.0, x9.1 + x9.1); + let x1_2 = x1 + x1; + let x3_2 = x3 + x3; + let x5_2 = x5 + x5; + let x7_2 = x7 + x7; + let x9_2 = x9 + x9; let z0 = m(x0,y0) + m(x1_2,y9_19) + m(x2,y8_19) + m(x3_2,y7_19) + m(x4,y6_19) + m(x5_2,y5_19) + m(x6,y4_19) + m(x7_2,y3_19) + m(x8,y2_19) + m(x9_2,y1_19); let z1 = m(x0,y1) + m(x1,y0) + m(x2,y9_19) + m(x3,y8_19) + m(x4,y7_19) + m(x5,y6_19) + m(x6,y5_19) + m(x7,y4_19) + m(x8,y3_19) + m(x9,y2_19); @@ -734,8 +743,8 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { let f = |x: u64x4| -> (u64x2, u64x2) { ( - x.0.0, - x.0.1 + x.0.0.into(), + x.0.1.into() ) }; @@ -771,20 +780,20 @@ mod test { let (a, b) = unpack_pair(src); - let expected_a = (u32x2::new(10000, 10100), u32x2::new(10200, 10300)); - let expected_b = (u32x2::new(10001, 10101), u32x2::new(10201, 10301)); + let expected_a = u32x2x2::new(u32x2::new(10000, 10100), u32x2::new(10200, 10300)); + let expected_b = u32x2x2::new(u32x2::new(10001, 10101), u32x2::new(10201, 10301)); assert_eq!(a, expected_a); assert_eq!(b, expected_b); let expected_src = repack_pair( - ( - u32x4::new(a.0.extract::<0>(), 0, a.0.extract::<1>(), 0), - u32x4::new(a.1.extract::<0>(), 0, a.1.extract::<1>(), 0), + u32x4x2::new( + u32x4::new(a.extract::<0>(), 0, a.extract::<1>(), 0), + u32x4::new(a.extract::<2>(), 0, a.extract::<3>(), 0), ), - ( - u32x4::new(b.0.extract::<0>(), 0, b.0.extract::<1>(), 0), - u32x4::new(b.1.extract::<0>(), 0, b.1.extract::<1>(), 0), + u32x4x2::new( + u32x4::new(b.extract::<0>(), 0, b.extract::<1>(), 0), + u32x4::new(b.extract::<2>(), 0, b.extract::<3>(), 0), ), ); diff --git a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs index bb9842591..32a9ce04c 100644 --- a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs +++ b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs @@ -27,7 +27,7 @@ macro_rules! impl_shared { #[allow(non_camel_case_types)] #[derive(Copy, Clone, Debug)] #[repr(transparent)] - pub struct $ty(core::arch::aarch64::$internal_ty); + pub struct $ty(pub core::arch::aarch64::$internal_ty); impl From<$ty> for core::arch::aarch64::$internal_ty { #[inline] @@ -249,6 +249,155 @@ impl From for u64x2 { } } +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Debug)] +#[repr(transparent)] +pub struct u32x4x2(pub core::arch::aarch64::uint32x4x2_t); + +impl PartialEq for u32x4x2 { + fn eq(&self, other: &Self) -> bool { + u32x4::from(self.0.0) == u32x4::from(other.0.0) + && u32x4::from(self.0.1) == u32x4::from(other.0.1) + } +} + +impl From for core::arch::aarch64::uint32x4x2_t { + #[inline] + fn from(value: u32x4x2) -> core::arch::aarch64::uint32x4x2_t { + value.0 + } +} + +impl From for u32x4x2 { + #[inline] + fn from(value: core::arch::aarch64::uint32x4x2_t) -> u32x4x2 { + u32x4x2(value) + } +} + +impl BitXor for u32x4x2 { + type Output = Self; + + fn bitxor(self, rhs: Self) -> Self::Output { + Self::new( + u32x4::from(self.0.0) ^ u32x4::from(rhs.0.0), + u32x4::from(self.0.1) ^ u32x4::from(rhs.0.1)) + } +} + +impl BitXorAssign for u32x4x2 { + fn bitxor_assign(&mut self, rhs: Self) { + *self = *self ^ rhs; + } +} + +impl BitAnd for u32x4x2 { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self::Output { + Self::new( + u32x4::from(self.0.0) & u32x4::from(rhs.0.0), + u32x4::from(self.0.1) & u32x4::from(rhs.0.1)) + + } +} + +impl Add for u32x4x2 { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self::new( + u32x4::from(self.0.0) + u32x4::from(rhs.0.0), + u32x4::from(self.0.1) + u32x4::from(rhs.0.1)) + } +} + +impl Sub for u32x4x2 { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Self::new( + u32x4::from(self.0.0) - u32x4::from(rhs.0.0), + u32x4::from(self.0.1) - u32x4::from(rhs.0.1)) + } +} + +impl u32x4x2 { + #[inline] + pub const fn new(x0: u32x4, x1: u32x4) -> Self { + Self(core::arch::aarch64::uint32x4x2_t(x0.0, x1.0)) + } + + #[inline] + pub const fn splat(x: u32) -> Self { + Self(core::arch::aarch64::uint32x4x2_t(u32x4::const_splat(x).0, u32x4::const_splat(x).0)) + } + + #[inline] + pub fn extract(self) -> u32 { + match N { + 0 => u32x4::from(self.0.0).extract::<0>(), + 1 => u32x4::from(self.0.0).extract::<1>(), + 2 => u32x4::from(self.0.0).extract::<2>(), + 3 => u32x4::from(self.0.0).extract::<3>(), + 4 => u32x4::from(self.0.1).extract::<0>(), + 5 => u32x4::from(self.0.1).extract::<1>(), + 6 => u32x4::from(self.0.1).extract::<2>(), + 7 => u32x4::from(self.0.1).extract::<3>(), + _ => unreachable!() + } + } +} + +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Debug)] +#[repr(transparent)] +pub struct u32x2x2(pub core::arch::aarch64::uint32x2x2_t); + +impl PartialEq for u32x2x2 { + fn eq(&self, other: &Self) -> bool { + u32x2::from(self.0.0) == u32x2::from(other.0.0) + && u32x2::from(self.0.1) == u32x2::from(other.0.1) + } +} + +impl Add for u32x2x2 { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self::new( + u32x2::from(self.0.0) + u32x2::from(rhs.0.0), + u32x2::from(self.0.1) + u32x2::from(rhs.0.1) + ) + } +} + +impl u32x2x2 { + #[inline] + pub fn new(x0: u32x2, x1: u32x2) -> Self { + Self(core::arch::aarch64::uint32x2x2_t(x0.0, x1.0)) + } + + #[inline] + pub fn shl(self) -> Self { + Self(core::arch::aarch64::uint32x2x2_t( + u32x2::from(self.0.0).shl::().0, + u32x2::from(self.0.1).shl::().0 + )) + } + + #[inline] + pub fn extract(self) -> u32 { + match N { + 0 => u32x2::from(self.0.0).extract::<0>(), + 1 => u32x2::from(self.0.0).extract::<1>(), + 2 => u32x2::from(self.0.1).extract::<0>(), + 3 => u32x2::from(self.0.1).extract::<1>(), + _ => unreachable!() + } + } +} + #[allow(non_camel_case_types)] #[derive(Copy, Clone, Debug)] @@ -279,33 +428,35 @@ impl i32x4 { #[allow(non_camel_case_types)] #[derive(Copy, Clone, Debug)] #[repr(transparent)] -pub struct u64x4(pub (u64x2, u64x2)); +pub struct u64x4(pub core::arch::aarch64::uint64x2x2_t); impl u64x4 { #[inline] - pub fn new(x0: u64, x1: u64, x2: u64, x3: u64) -> Self { - Self((u64x2::new(x0, x1), u64x2::new(x2, x3))) + pub fn new(x0: u64x2, x1: u64x2) -> Self { + Self(core::arch::aarch64::uint64x2x2_t(x0.0, x1.0)) } #[inline] pub fn splat(x: u64) -> Self { - Self::new(x, x, x, x) + Self::new(u64x2::splat(x), u64x2::splat(x)) } #[inline] pub fn extract(self) -> u64 { match N { - 0 => self.0.0.extract::<0>(), - 1 => self.0.0.extract::<1>(), - 2 => self.0.1.extract::<0>(), - 3 => self.0.1.extract::<1>(), + 0 => u64x2::from(self.0.0).extract::<0>(), + 1 => u64x2::from(self.0.0).extract::<1>(), + 2 => u64x2::from(self.0.1).extract::<0>(), + 3 => u64x2::from(self.0.1).extract::<1>(), _ => unreachable!() } } #[inline] pub fn shl(self) -> Self { - Self((self.0.0.shl::(), self.0.1.shl::())) + Self(core::arch::aarch64::uint64x2x2_t( + u64x2::from(self.0.0).shl::().into(), + u64x2::from(self.0.1).shl::().into())) } } @@ -314,7 +465,9 @@ impl Add for u64x4 { #[inline] fn add(self, rhs: Self) -> Self { - Self((self.0.0 + rhs.0.0, self.0.1 + rhs.0.1)) + Self(core::arch::aarch64::uint64x2x2_t( + (u64x2::from(self.0.0) + u64x2::from(rhs.0.0)).into(), + (u64x2::from(self.0.1) + u64x2::from(rhs.0.1)).into())) } } From d524791bdaba3ff997d7990017ba1603f63b30a4 Mon Sep 17 00:00:00 2001 From: Tarinn Date: Wed, 21 Aug 2024 12:47:41 +0200 Subject: [PATCH 12/14] further refactoring to internal arm types --- .../src/backend/vector/neon/edwards.rs | 17 +++ .../src/backend/vector/neon/field.rs | 106 ++++++++---------- .../src/backend/vector/neon/mod.rs | 2 +- .../src/backend/vector/neon/packed_simd.rs | 36 ++++-- 4 files changed, 94 insertions(+), 67 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/edwards.rs b/curve25519-dalek/src/backend/vector/neon/edwards.rs index f6bc9d62e..ac773ec6a 100644 --- a/curve25519-dalek/src/backend/vector/neon/edwards.rs +++ b/curve25519-dalek/src/backend/vector/neon/edwards.rs @@ -29,6 +29,7 @@ use core::convert::From; use core::ops::{Add, Neg, Sub}; +use curve25519_dalek_derive::unsafe_target_feature; use subtle::Choice; use subtle::ConditionallySelectable; @@ -50,12 +51,14 @@ use super::field::{FieldElement2625x4, Lanes, Shuffle}; #[derive(Copy, Clone, Debug)] pub struct ExtendedPoint(pub(super) FieldElement2625x4); +#[unsafe_target_feature("neon")] impl From for ExtendedPoint { fn from(P: edwards::EdwardsPoint) -> ExtendedPoint { ExtendedPoint(FieldElement2625x4::new(&P.X, &P.Y, &P.Z, &P.T)) } } +#[unsafe_target_feature("neon")] impl From for edwards::EdwardsPoint { fn from(P: ExtendedPoint) -> edwards::EdwardsPoint { let tmp = P.0.split(); @@ -68,6 +71,7 @@ impl From for edwards::EdwardsPoint { } } +#[unsafe_target_feature("neon")] impl ConditionallySelectable for ExtendedPoint { fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self { ExtendedPoint(FieldElement2625x4::conditional_select(&a.0, &b.0, choice)) @@ -78,18 +82,21 @@ impl ConditionallySelectable for ExtendedPoint { } } +#[unsafe_target_feature("neon")] impl Default for ExtendedPoint { fn default() -> ExtendedPoint { ExtendedPoint::identity() } } +#[unsafe_target_feature("neon")] impl Identity for ExtendedPoint { fn identity() -> ExtendedPoint { constants::EXTENDEDPOINT_IDENTITY } } +#[unsafe_target_feature("neon")] impl ExtendedPoint { /// Compute the double of this point. pub fn double(&self) -> ExtendedPoint { @@ -175,6 +182,7 @@ impl ExtendedPoint { #[derive(Copy, Clone, Debug)] pub struct CachedPoint(pub(super) FieldElement2625x4); +#[unsafe_target_feature("neon")] impl From for CachedPoint { fn from(P: ExtendedPoint) -> CachedPoint { let mut x = P.0; @@ -193,18 +201,21 @@ impl From for CachedPoint { } } +#[unsafe_target_feature("neon")] impl Default for CachedPoint { fn default() -> CachedPoint { CachedPoint::identity() } } +#[unsafe_target_feature("neon")] impl Identity for CachedPoint { fn identity() -> CachedPoint { constants::CACHEDPOINT_IDENTITY } } +#[unsafe_target_feature("neon")] impl ConditionallySelectable for CachedPoint { fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self { CachedPoint(FieldElement2625x4::conditional_select(&a.0, &b.0, choice)) @@ -215,6 +226,7 @@ impl ConditionallySelectable for CachedPoint { } } +#[unsafe_target_feature("neon")] impl<'a> Neg for &'a CachedPoint { type Output = CachedPoint; /// Lazily negate the point. @@ -229,6 +241,7 @@ impl<'a> Neg for &'a CachedPoint { } } +#[unsafe_target_feature("neon")] impl<'a, 'b> Add<&'b CachedPoint> for &'a ExtendedPoint { type Output = ExtendedPoint; @@ -266,6 +279,7 @@ impl<'a, 'b> Add<&'b CachedPoint> for &'a ExtendedPoint { } } +#[unsafe_target_feature("neon")] impl<'a, 'b> Sub<&'b CachedPoint> for &'a ExtendedPoint { type Output = ExtendedPoint; @@ -279,6 +293,7 @@ impl<'a, 'b> Sub<&'b CachedPoint> for &'a ExtendedPoint { } } +#[unsafe_target_feature("neon")] impl<'a> From<&'a edwards::EdwardsPoint> for LookupTable { fn from(point: &'a edwards::EdwardsPoint) -> Self { let P = ExtendedPoint::from(*point); @@ -290,6 +305,7 @@ impl<'a> From<&'a edwards::EdwardsPoint> for LookupTable { } } +#[unsafe_target_feature("neon")] impl<'a> From<&'a edwards::EdwardsPoint> for NafLookupTable5 { fn from(point: &'a edwards::EdwardsPoint) -> Self { let A = ExtendedPoint::from(*point); @@ -303,6 +319,7 @@ impl<'a> From<&'a edwards::EdwardsPoint> for NafLookupTable5 { } } +#[unsafe_target_feature("neon")] impl<'a> From<&'a edwards::EdwardsPoint> for NafLookupTable8 { fn from(point: &'a edwards::EdwardsPoint) -> Self { let A = ExtendedPoint::from(*point); diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 6ccec8f32..29a743426 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -22,7 +22,7 @@ use core::ops::{Add, Mul, Neg}; -use super::packed_simd::{i32x4, u32x2, u32x2x2, u32x4, u32x4x2, u64x2, u64x4}; +use super::packed_simd::{i32x4, u32x2, u32x2x2, u32x4, u32x4x2, u64x2, u64x2x2}; use crate::backend::serial::u64::field::FieldElement51; use crate::backend::vector::neon::constants::{ P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO, @@ -135,7 +135,6 @@ fn unpack_pair(src: u32x4x2) -> (u32x2x2, u32x2x2) { fn repack_pair(x: u32x4x2, y: u32x4x2) -> u32x4x2 { unsafe { use core::arch::aarch64::vcombine_u32; - use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vtrn1_u32; u32x4x2::new( @@ -366,7 +365,6 @@ impl FieldElement2625x4 { let rotated_carryout = |v: u32x4x2| -> u32x4x2 { unsafe { use core::arch::aarch64::vcombine_u32; - use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vqshlq_u32; let c: u32x4x2 = u32x4x2::new( @@ -391,7 +389,6 @@ impl FieldElement2625x4 { let combine = |v_lo: u32x4x2, v_hi: u32x4x2| -> u32x4x2 { unsafe { use core::arch::aarch64::vcombine_u32; - use core::arch::aarch64::vget_low_u32; u32x4x2::new( vcombine_u32( vget_low_u32(v_lo.0.0), @@ -432,7 +429,6 @@ impl FieldElement2625x4 { #[rustfmt::skip] // Retain formatting of return tuple let c9_19: u32x4x2 = unsafe { use core::arch::aarch64::vcombine_u32; - use core::arch::aarch64::vget_low_u32; use core::arch::aarch64::vmulq_n_u32; let c9_19_spread: u32x4x2 = u32x4x2::new( @@ -449,27 +445,22 @@ impl FieldElement2625x4 { FieldElement2625x4(v) } - // TODO: use arm types #[inline] #[rustfmt::skip] // Retain formatting of carry and repacking - fn reduce64(mut z: [(u64x2, u64x2); 10]) -> FieldElement2625x4 { + fn reduce64(mut z: [u64x2x2; 10]) -> FieldElement2625x4 { #[allow(non_snake_case)] - let LOW_25_BITS: u64x2 = u64x2::splat((1 << 25) - 1); + let LOW_25_BITS: u64x2x2 = u64x2x2::splat((1 << 25) - 1); #[allow(non_snake_case)] - let LOW_26_BITS: u64x2 = u64x2::splat((1 << 26) - 1); + let LOW_26_BITS: u64x2x2 = u64x2x2::splat((1 << 26) - 1); - let carry = |z: &mut [(u64x2, u64x2); 10], i: usize| { + let carry = |z: &mut [u64x2x2; 10], i: usize| { debug_assert!(i < 9); if i % 2 == 0 { - z[i + 1].0 = z[i + 1].0 + (z[i].0.shr::<26>()); - z[i + 1].1 = z[i + 1].1 + (z[i].1.shr::<26>()); - z[i].0 = z[i].0 & LOW_26_BITS; - z[i].1 = z[i].1 & LOW_26_BITS; + z[i + 1] = z[i + 1] + (z[i].shr::<26>()); + z[i] = z[i] & LOW_26_BITS; } else { - z[i + 1].0 = z[i + 1].0 + (z[i].0.shr::<25>()); - z[i + 1].1 = z[i + 1].1 + (z[i].1.shr::<25>()); - z[i].0 = z[i].0 & LOW_25_BITS; - z[i].1 = z[i].1 & LOW_25_BITS; + z[i + 1] = z[i + 1] + (z[i].shr::<25>()); + z[i] = z[i] & LOW_25_BITS; } }; @@ -479,30 +470,33 @@ impl FieldElement2625x4 { carry(&mut z, 3); carry(&mut z, 7); carry(&mut z, 4); carry(&mut z, 8); - let c = (z[9].0.shr::<25>(), z[9].1.shr::<25>()); - z[9] = (z[9].0 & LOW_25_BITS, z[9].1 & LOW_25_BITS); - let mut c0: (u64x2, u64x2) = (c.0 & LOW_26_BITS, c.1 & LOW_26_BITS); - let mut c1: (u64x2, u64x2) = (c.0.shr::<26>(), c.1.shr::<26>()); + let c = z[9].shr::<25>(); + z[9] = z[9] & LOW_25_BITS; + let mut c0: u64x2x2 = c & LOW_26_BITS; + let mut c1: u64x2x2 = c.shr::<26>(); unsafe { use core::arch::aarch64::vmulq_n_u32; - - c0 = (vmulq_n_u32(c0.0.into(), 19).into(), - vmulq_n_u32(c0.1.into(), 19).into()); - c1 = (vmulq_n_u32(c1.0.into(), 19).into(), - vmulq_n_u32(c1.1.into(), 19).into()); + use core::arch::aarch64::vreinterpretq_u32_u64; + + c0 = u64x2x2::new( + vmulq_n_u32(vreinterpretq_u32_u64(c0.0.0), 19).into(), + vmulq_n_u32(vreinterpretq_u32_u64(c0.0.1), 19).into()); + c1 = u64x2x2::new( + vmulq_n_u32(vreinterpretq_u32_u64(c1.0.0), 19).into(), + vmulq_n_u32(vreinterpretq_u32_u64(c1.0.1), 19).into()); } - z[0] = (z[0].0 + c0.0, z[0].1 + c0.1); - z[1] = (z[1].0 + c1.0, z[1].1 + c1.1); + z[0] = z[0] + c0; + z[1] = z[1] + c1; carry(&mut z, 0); FieldElement2625x4([ - repack_pair(u32x4x2::new(z[0].0.into(), z[0].1.into()), u32x4x2::new(z[1].0.into(), z[1].1.into())), - repack_pair(u32x4x2::new(z[2].0.into(), z[2].1.into()), u32x4x2::new(z[3].0.into(), z[3].1.into())), - repack_pair(u32x4x2::new(z[4].0.into(), z[4].1.into()), u32x4x2::new(z[5].0.into(), z[5].1.into())), - repack_pair(u32x4x2::new(z[6].0.into(), z[6].1.into()), u32x4x2::new(z[7].0.into(), z[7].1.into())), - repack_pair(u32x4x2::new(z[8].0.into(), z[8].1.into()), u32x4x2::new(z[9].0.into(), z[9].1.into())), + repack_pair(u32x4x2::new(z[0].0.0.into(), z[0].0.1.into()), u32x4x2::new(z[1].0.0.into(), z[1].0.1.into())), + repack_pair(u32x4x2::new(z[2].0.0.into(), z[2].0.1.into()), u32x4x2::new(z[3].0.0.into(), z[3].0.1.into())), + repack_pair(u32x4x2::new(z[4].0.0.into(), z[4].0.1.into()), u32x4x2::new(z[5].0.0.into(), z[5].0.1.into())), + repack_pair(u32x4x2::new(z[6].0.0.into(), z[6].0.1.into()), u32x4x2::new(z[7].0.0.into(), z[7].0.1.into())), + repack_pair(u32x4x2::new(z[8].0.0.into(), z[8].0.1.into()), u32x4x2::new(z[9].0.0.into(), z[9].0.1.into())), ]) } @@ -510,12 +504,12 @@ impl FieldElement2625x4 { #[rustfmt::skip] // keep alignment of formulas pub fn square_and_negate_D(&self) -> FieldElement2625x4 { #[inline(always)] - fn m(x: u32x2x2, y: u32x2x2) -> u64x4 { + fn m(x: u32x2x2, y: u32x2x2) -> u64x2x2 { use core::arch::aarch64::vmull_u32; unsafe { let z0: u64x2 = vmull_u32(x.0.0, y.0.0).into(); let z1: u64x2 = vmull_u32(x.0.1, y.0.1).into(); - u64x4::new(z0, z1) + u64x2x2::new(z0, z1) } } @@ -523,7 +517,6 @@ impl FieldElement2625x4 { fn m_lo(x: u32x2x2, y: u32x2x2) -> u32x2x2 { use core::arch::aarch64::vmull_u32; use core::arch::aarch64::vuzp1_u32; - use core::arch::aarch64::vget_low_u32; unsafe { let x: u32x4x2 = u32x4x2::new( vmull_u32(x.0.0, y.0.0).into(), @@ -571,17 +564,17 @@ impl FieldElement2625x4 { let z9 = m(x0_2,x9) + m(x1_2,x8) + m(x2_2,x7) + m(x3_2,x6) + m(x4_2,x5); - let low__p37 = u64x4::splat(0x3ffffed << 37); - let even_p37 = u64x4::splat(0x3ffffff << 37); - let odd__p37 = u64x4::splat(0x1ffffff << 37); + let low__p37 = u64x2x2::splat(0x3ffffed << 37); + let even_p37 = u64x2x2::splat(0x3ffffff << 37); + let odd__p37 = u64x2x2::splat(0x1ffffff << 37); - let negate_D = |x_01: u64x4, p_01: u64x4| -> (u64x2, u64x2) { + let negate_D = |x_01: u64x2x2, p_01: u64x2x2| -> u64x2x2 { unsafe { use core::arch::aarch64::vcombine_u32; use core::arch::aarch64::vreinterpretq_u32_u64; use core::arch::aarch64::vsubq_u64; - (u64x2(x_01.0.0), + u64x2x2::new(u64x2(x_01.0.0), vcombine_u32( vget_low_u32(vreinterpretq_u32_u64(x_01.0.1)), vget_high_u32(vreinterpretq_u32_u64(vsubq_u64(p_01.0.1, x_01.0.1)))).into()) @@ -652,16 +645,16 @@ impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 { let (b8, b9) = unpack_pair(self.0[4]); FieldElement2625x4::reduce64([ - (vmull_u32(b0.0.0, consts.0.into()).into(), vmull_u32(b0.0.1, consts.1.into()).into()), - (vmull_u32(b1.0.0, consts.0.into()).into(), vmull_u32(b1.0.1, consts.1.into()).into()), - (vmull_u32(b2.0.0, consts.0.into()).into(), vmull_u32(b2.0.1, consts.1.into()).into()), - (vmull_u32(b3.0.0, consts.0.into()).into(), vmull_u32(b3.0.1, consts.1.into()).into()), - (vmull_u32(b4.0.0, consts.0.into()).into(), vmull_u32(b4.0.1, consts.1.into()).into()), - (vmull_u32(b5.0.0, consts.0.into()).into(), vmull_u32(b5.0.1, consts.1.into()).into()), - (vmull_u32(b6.0.0, consts.0.into()).into(), vmull_u32(b6.0.1, consts.1.into()).into()), - (vmull_u32(b7.0.0, consts.0.into()).into(), vmull_u32(b7.0.1, consts.1.into()).into()), - (vmull_u32(b8.0.0, consts.0.into()).into(), vmull_u32(b8.0.1, consts.1.into()).into()), - (vmull_u32(b9.0.0, consts.0.into()).into(), vmull_u32(b9.0.1, consts.1.into()).into()) + u64x2x2::new(vmull_u32(b0.0.0, consts.0.into()).into(), vmull_u32(b0.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b1.0.0, consts.0.into()).into(), vmull_u32(b1.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b2.0.0, consts.0.into()).into(), vmull_u32(b2.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b3.0.0, consts.0.into()).into(), vmull_u32(b3.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b4.0.0, consts.0.into()).into(), vmull_u32(b4.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b5.0.0, consts.0.into()).into(), vmull_u32(b5.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b6.0.0, consts.0.into()).into(), vmull_u32(b6.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b7.0.0, consts.0.into()).into(), vmull_u32(b7.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b8.0.0, consts.0.into()).into(), vmull_u32(b8.0.1, consts.1.into()).into()), + u64x2x2::new(vmull_u32(b9.0.0, consts.0.into()).into(), vmull_u32(b9.0.1, consts.1.into()).into()) ]) } } @@ -673,12 +666,12 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { #[rustfmt::skip] // Retain formatting of z_i computation fn mul(self, rhs: &'b FieldElement2625x4) -> FieldElement2625x4 { #[inline(always)] - fn m(x: u32x2x2, y: u32x2x2) -> u64x4 { + fn m(x: u32x2x2, y: u32x2x2) -> u64x2x2 { use core::arch::aarch64::vmull_u32; unsafe { let z0: u64x2 = vmull_u32(x.0.0, y.0.0).into(); let z1: u64x2 = vmull_u32(x.0.1, y.0.1).into(); - u64x4::new(z0, z1) + u64x2x2::new(z0, z1) } } @@ -686,7 +679,6 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { fn m_lo(x: u32x2x2, y: u32x2x2) -> u32x2x2 { use core::arch::aarch64::vmull_u32; use core::arch::aarch64::vuzp1_u32; - use core::arch::aarch64::vget_low_u32; unsafe { let x: u32x4x2 = u32x4x2::new( vmull_u32(x.0.0, y.0.0).into(), @@ -741,8 +733,8 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 { let z8 = m(x0,y8) + m(x1_2,y7) + m(x2,y6) + m(x3_2,y5) + m(x4,y4) + m(x5_2,y3) + m(x6,y2) + m(x7_2,y1) + m(x8,y0) + m(x9_2,y9_19); let z9 = m(x0,y9) + m(x1,y8) + m(x2,y7) + m(x3,y6) + m(x4,y5) + m(x5,y4) + m(x6,y3) + m(x7,y2) + m(x8,y1) + m(x9,y0); - let f = |x: u64x4| -> (u64x2, u64x2) { - ( + let f = |x: u64x2x2| -> u64x2x2 { + u64x2x2::new( x.0.0.into(), x.0.1.into() ) diff --git a/curve25519-dalek/src/backend/vector/neon/mod.rs b/curve25519-dalek/src/backend/vector/neon/mod.rs index 262694131..0e279f715 100644 --- a/curve25519-dalek/src/backend/vector/neon/mod.rs +++ b/curve25519-dalek/src/backend/vector/neon/mod.rs @@ -17,4 +17,4 @@ pub(crate) mod constants; pub(crate) use self::edwards::{CachedPoint, ExtendedPoint}; -mod packed_simd; +pub mod packed_simd; diff --git a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs index 32a9ce04c..a8f5b40f6 100644 --- a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs +++ b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs @@ -428,9 +428,30 @@ impl i32x4 { #[allow(non_camel_case_types)] #[derive(Copy, Clone, Debug)] #[repr(transparent)] -pub struct u64x4(pub core::arch::aarch64::uint64x2x2_t); +pub struct u64x2x2(pub core::arch::aarch64::uint64x2x2_t); -impl u64x4 { +impl Add for u64x2x2 { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self { + Self(core::arch::aarch64::uint64x2x2_t( + (u64x2::from(self.0.0) + u64x2::from(rhs.0.0)).into(), + (u64x2::from(self.0.1) + u64x2::from(rhs.0.1)).into())) + } +} + +impl BitAnd for u64x2x2 { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self::Output { + Self(core::arch::aarch64::uint64x2x2_t( + (u64x2::from(self.0.0) & u64x2::from(rhs.0.0)).into(), + (u64x2::from(self.0.1) & u64x2::from(rhs.0.1)).into())) + } +} + +impl u64x2x2 { #[inline] pub fn new(x0: u64x2, x1: u64x2) -> Self { Self(core::arch::aarch64::uint64x2x2_t(x0.0, x1.0)) @@ -458,16 +479,13 @@ impl u64x4 { u64x2::from(self.0.0).shl::().into(), u64x2::from(self.0.1).shl::().into())) } -} - -impl Add for u64x4 { - type Output = Self; #[inline] - fn add(self, rhs: Self) -> Self { + pub fn shr(self) -> Self { Self(core::arch::aarch64::uint64x2x2_t( - (u64x2::from(self.0.0) + u64x2::from(rhs.0.0)).into(), - (u64x2::from(self.0.1) + u64x2::from(rhs.0.1)).into())) + u64x2::from(self.0.0).shr::().into(), + u64x2::from(self.0.1).shr::().into())) } } + From 2aa38c99ad0ba306f3b6dbbb747930f1aee270c5 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Wed, 21 Aug 2024 13:49:23 +0200 Subject: [PATCH 13/14] Clippy --- curve25519-dalek/src/backend/vector/mod.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/mod.rs b/curve25519-dalek/src/backend/vector/mod.rs index 90752a7ad..49277266f 100644 --- a/curve25519-dalek/src/backend/vector/mod.rs +++ b/curve25519-dalek/src/backend/vector/mod.rs @@ -12,17 +12,16 @@ #![doc = include_str!("../../../docs/parallel-formulas.md")] #[allow(missing_docs)] -#[cfg(all(target_arch="x86_64"))] +#[cfg(target_arch = "x86_64")] pub mod packed_simd; - -#[cfg(all(target_arch="x86_64"))] +#[cfg(target_arch = "x86_64")] pub mod avx2; -#[cfg(all(nightly, target_arch="x86_64"))] +#[cfg(all(nightly, target_arch = "x86_64"))] pub mod ifma; -#[cfg(all(nightly, target_arch="aarch64"))] +#[cfg(all(nightly, target_arch = "aarch64"))] pub mod neon; pub mod scalar_mul; From 98d09ae4ff828454749f65798459a276bd1b9f28 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Wed, 21 Aug 2024 14:05:36 +0200 Subject: [PATCH 14/14] Rustfmt --- .../src/backend/vector/neon/field.rs | 79 ++++----- .../src/backend/vector/neon/packed_simd.rs | 163 +++++++++--------- .../backend/vector/scalar_mul/pippenger.rs | 6 +- .../vector/scalar_mul/precomputed_straus.rs | 6 +- .../src/backend/vector/scalar_mul/straus.rs | 6 +- .../vector/scalar_mul/variable_base.rs | 6 +- .../vector/scalar_mul/vartime_double_base.rs | 6 +- curve25519-dalek/src/lib.rs | 2 +- 8 files changed, 136 insertions(+), 138 deletions(-) diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 29a743426..dddd24c9d 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -66,7 +66,7 @@ use core::arch::aarch64::vget_low_u32; macro_rules! shuffle { ($vec:expr , $index:expr) => { unsafe { - let v_n: [u32;8] = [ + let v_n: [u32; 8] = [ $vec.extract::<0>(), $vec.extract::<1>(), $vec.extract::<2>(), @@ -74,13 +74,21 @@ macro_rules! shuffle { $vec.extract::<4>(), $vec.extract::<5>(), $vec.extract::<6>(), - $vec.extract::<7>() - ]; + $vec.extract::<7>(), + ]; u32x4x2::new( - core::mem::transmute::<[u32; 4], u32x4>( - [v_n[$index[0]], v_n[$index[1]], v_n[$index[2]], v_n[$index[3]]]), - core::mem::transmute::<[u32; 4], u32x4>( - [v_n[$index[4]], v_n[$index[5]], v_n[$index[6]], v_n[$index[7]]]) + core::mem::transmute::<[u32; 4], u32x4>([ + v_n[$index[0]], + v_n[$index[1]], + v_n[$index[2]], + v_n[$index[3]], + ]), + core::mem::transmute::<[u32; 4], u32x4>([ + v_n[$index[4]], + v_n[$index[5]], + v_n[$index[6]], + v_n[$index[7]], + ]), ) } }; @@ -90,7 +98,7 @@ macro_rules! shuffle { macro_rules! blend { ($vec0: expr, $vec1: expr, $index:expr) => { unsafe { - let v_n: [u32;8] = [ + let v_n: [u32; 8] = [ $vec0.extract::<0>(), $vec0.extract::<1>(), $vec0.extract::<2>(), @@ -98,10 +106,14 @@ macro_rules! blend { $vec1.extract::<0>(), $vec1.extract::<1>(), $vec1.extract::<2>(), - $vec1.extract::<3>() - ]; - core::mem::transmute::<[u32; 4], u32x4>( - [v_n[$index[0]], v_n[$index[1]], v_n[$index[2]], v_n[$index[3]]]) + $vec1.extract::<3>(), + ]; + core::mem::transmute::<[u32; 4], u32x4>([ + v_n[$index[0]], + v_n[$index[1]], + v_n[$index[2]], + v_n[$index[3]], + ]) } }; } @@ -118,10 +130,10 @@ fn unpack_pair(src: u32x4x2) -> (u32x2x2, u32x2x2) { let b0: u32x2; let b1: u32x2; unsafe { - a0 = vget_low_u32(src.0.0).into(); - a1 = vget_low_u32(src.0.1).into(); - b0 = vget_high_u32(src.0.0).into(); - b1 = vget_high_u32(src.0.1).into(); + a0 = vget_low_u32(src.0 .0).into(); + a1 = vget_low_u32(src.0 .1).into(); + b0 = vget_high_u32(src.0 .0).into(); + b1 = vget_high_u32(src.0 .1).into(); } return (u32x2x2::new(a0, a1), u32x2x2::new(b0, b1)); } @@ -193,7 +205,7 @@ impl ConditionallySelectable for FieldElement2625x4 { a.0[1] ^ (mask_vec & (a.0[1] ^ b.0[1])), a.0[2] ^ (mask_vec & (a.0[2] ^ b.0[2])), a.0[3] ^ (mask_vec & (a.0[3] ^ b.0[3])), - a.0[4] ^ (mask_vec & (a.0[4] ^ b.0[4])) + a.0[4] ^ (mask_vec & (a.0[4] ^ b.0[4])), ]) } @@ -266,7 +278,6 @@ impl FieldElement2625x4 { self.shuffle(Shuffle::BACD) } - // Can probably be sped up using multiple vset/vget instead of table #[inline] pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 { @@ -326,7 +337,7 @@ impl FieldElement2625x4 { buf[i] = u32x4x2::new( u32x4::new(a_2i, b_2i, a_2i_1, b_2i_1), - u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1) + u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1), ); } return FieldElement2625x4(buf).reduce(); @@ -368,20 +379,12 @@ impl FieldElement2625x4 { use core::arch::aarch64::vqshlq_u32; let c: u32x4x2 = u32x4x2::new( - vqshlq_u32(v.0.0, shifts.0.into()).into(), - vqshlq_u32(v.0.1, shifts.1.into()).into(), + vqshlq_u32(v.0 .0, shifts.0.into()).into(), + vqshlq_u32(v.0 .1, shifts.1.into()).into(), ); u32x4x2::new( - vcombine_u32( - vget_high_u32(c.0.0), - vget_low_u32(c.0.0), - ) - .into(), - vcombine_u32( - vget_high_u32(c.0.1), - vget_low_u32(c.0.1), - ) - .into(), + vcombine_u32(vget_high_u32(c.0 .0), vget_low_u32(c.0 .0)).into(), + vcombine_u32(vget_high_u32(c.0 .1), vget_low_u32(c.0 .1)).into(), ) } }; @@ -390,16 +393,8 @@ impl FieldElement2625x4 { unsafe { use core::arch::aarch64::vcombine_u32; u32x4x2::new( - vcombine_u32( - vget_low_u32(v_lo.0.0), - vget_high_u32(v_hi.0.0), - ) - .into(), - vcombine_u32( - vget_low_u32(v_lo.0.1), - vget_high_u32(v_hi.0.1), - ) - .into(), + vcombine_u32(vget_low_u32(v_lo.0 .0), vget_high_u32(v_hi.0 .0)).into(), + vcombine_u32(vget_low_u32(v_lo.0 .1), vget_high_u32(v_hi.0 .1)).into(), ) } }; @@ -874,5 +869,3 @@ mod test { assert_eq!(x3, splits[3]); } } - - diff --git a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs index a8f5b40f6..51aa7b678 100644 --- a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs +++ b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs @@ -55,7 +55,6 @@ macro_rules! impl_shared { impl Eq for $ty {} - impl Add for $ty { type Output = Self; @@ -71,7 +70,7 @@ macro_rules! impl_shared { *self = *self + rhs } } - + impl Sub for $ty { type Output = Self; @@ -128,23 +127,22 @@ macro_rules! impl_shared { pub fn shr(self) -> Self { unsafe { core::arch::aarch64::$shr_intrinsic(self.0, N).into() } } - } - } + }; } impl_shared!( - u32x4, - u32, - uint32x4_t, - vceqq_u32, - vaddq_u32, - vsubq_u32, - vandq_u32, - veorq_u32, - vshlq_n_u32, - vshrq_n_u32, - vgetq_lane_u32 + u32x4, + u32, + uint32x4_t, + vceqq_u32, + vaddq_u32, + vsubq_u32, + vandq_u32, + veorq_u32, + vshlq_n_u32, + vshrq_n_u32, + vgetq_lane_u32 ); impl u32x4 { @@ -171,7 +169,7 @@ impl u32x4 { impl From for core::arch::aarch64::uint32x4_t { #[inline] - fn from(value: u64x2) -> core::arch::aarch64::uint32x4_t { + fn from(value: u64x2) -> core::arch::aarch64::uint32x4_t { unsafe { core::arch::aarch64::vreinterpretq_u32_u64(value.into()) } } } @@ -184,7 +182,7 @@ impl From for u32x4 { } impl From for u32x4 { - #[inline] + #[inline] fn from(value: u64x2) -> u32x4 { Into::::into(value).into() } @@ -217,17 +215,17 @@ impl u32x2 { } impl_shared!( - u64x2, - u64, - uint64x2_t, - vceqq_u64, - vaddq_u64, - vsubq_u64, - vandq_u64, - veorq_u64, - vshlq_n_u64, - vshrq_n_u64, - vgetq_lane_u64 + u64x2, + u64, + uint64x2_t, + vceqq_u64, + vaddq_u64, + vsubq_u64, + vandq_u64, + veorq_u64, + vshlq_n_u64, + vshrq_n_u64, + vgetq_lane_u64 ); impl u64x2 { @@ -256,15 +254,15 @@ pub struct u32x4x2(pub core::arch::aarch64::uint32x4x2_t); impl PartialEq for u32x4x2 { fn eq(&self, other: &Self) -> bool { - u32x4::from(self.0.0) == u32x4::from(other.0.0) - && u32x4::from(self.0.1) == u32x4::from(other.0.1) + u32x4::from(self.0 .0) == u32x4::from(other.0 .0) + && u32x4::from(self.0 .1) == u32x4::from(other.0 .1) } } impl From for core::arch::aarch64::uint32x4x2_t { #[inline] fn from(value: u32x4x2) -> core::arch::aarch64::uint32x4x2_t { - value.0 + value.0 } } @@ -280,8 +278,9 @@ impl BitXor for u32x4x2 { fn bitxor(self, rhs: Self) -> Self::Output { Self::new( - u32x4::from(self.0.0) ^ u32x4::from(rhs.0.0), - u32x4::from(self.0.1) ^ u32x4::from(rhs.0.1)) + u32x4::from(self.0 .0) ^ u32x4::from(rhs.0 .0), + u32x4::from(self.0 .1) ^ u32x4::from(rhs.0 .1), + ) } } @@ -296,9 +295,9 @@ impl BitAnd for u32x4x2 { fn bitand(self, rhs: Self) -> Self::Output { Self::new( - u32x4::from(self.0.0) & u32x4::from(rhs.0.0), - u32x4::from(self.0.1) & u32x4::from(rhs.0.1)) - + u32x4::from(self.0 .0) & u32x4::from(rhs.0 .0), + u32x4::from(self.0 .1) & u32x4::from(rhs.0 .1), + ) } } @@ -306,9 +305,10 @@ impl Add for u32x4x2 { type Output = Self; fn add(self, rhs: Self) -> Self::Output { - Self::new( - u32x4::from(self.0.0) + u32x4::from(rhs.0.0), - u32x4::from(self.0.1) + u32x4::from(rhs.0.1)) + Self::new( + u32x4::from(self.0 .0) + u32x4::from(rhs.0 .0), + u32x4::from(self.0 .1) + u32x4::from(rhs.0 .1), + ) } } @@ -317,8 +317,9 @@ impl Sub for u32x4x2 { fn sub(self, rhs: Self) -> Self::Output { Self::new( - u32x4::from(self.0.0) - u32x4::from(rhs.0.0), - u32x4::from(self.0.1) - u32x4::from(rhs.0.1)) + u32x4::from(self.0 .0) - u32x4::from(rhs.0 .0), + u32x4::from(self.0 .1) - u32x4::from(rhs.0 .1), + ) } } @@ -330,21 +331,24 @@ impl u32x4x2 { #[inline] pub const fn splat(x: u32) -> Self { - Self(core::arch::aarch64::uint32x4x2_t(u32x4::const_splat(x).0, u32x4::const_splat(x).0)) + Self(core::arch::aarch64::uint32x4x2_t( + u32x4::const_splat(x).0, + u32x4::const_splat(x).0, + )) } #[inline] pub fn extract(self) -> u32 { match N { - 0 => u32x4::from(self.0.0).extract::<0>(), - 1 => u32x4::from(self.0.0).extract::<1>(), - 2 => u32x4::from(self.0.0).extract::<2>(), - 3 => u32x4::from(self.0.0).extract::<3>(), - 4 => u32x4::from(self.0.1).extract::<0>(), - 5 => u32x4::from(self.0.1).extract::<1>(), - 6 => u32x4::from(self.0.1).extract::<2>(), - 7 => u32x4::from(self.0.1).extract::<3>(), - _ => unreachable!() + 0 => u32x4::from(self.0 .0).extract::<0>(), + 1 => u32x4::from(self.0 .0).extract::<1>(), + 2 => u32x4::from(self.0 .0).extract::<2>(), + 3 => u32x4::from(self.0 .0).extract::<3>(), + 4 => u32x4::from(self.0 .1).extract::<0>(), + 5 => u32x4::from(self.0 .1).extract::<1>(), + 6 => u32x4::from(self.0 .1).extract::<2>(), + 7 => u32x4::from(self.0 .1).extract::<3>(), + _ => unreachable!(), } } } @@ -356,8 +360,8 @@ pub struct u32x2x2(pub core::arch::aarch64::uint32x2x2_t); impl PartialEq for u32x2x2 { fn eq(&self, other: &Self) -> bool { - u32x2::from(self.0.0) == u32x2::from(other.0.0) - && u32x2::from(self.0.1) == u32x2::from(other.0.1) + u32x2::from(self.0 .0) == u32x2::from(other.0 .0) + && u32x2::from(self.0 .1) == u32x2::from(other.0 .1) } } @@ -366,8 +370,8 @@ impl Add for u32x2x2 { fn add(self, rhs: Self) -> Self::Output { Self::new( - u32x2::from(self.0.0) + u32x2::from(rhs.0.0), - u32x2::from(self.0.1) + u32x2::from(rhs.0.1) + u32x2::from(self.0 .0) + u32x2::from(rhs.0 .0), + u32x2::from(self.0 .1) + u32x2::from(rhs.0 .1), ) } } @@ -381,24 +385,23 @@ impl u32x2x2 { #[inline] pub fn shl(self) -> Self { Self(core::arch::aarch64::uint32x2x2_t( - u32x2::from(self.0.0).shl::().0, - u32x2::from(self.0.1).shl::().0 + u32x2::from(self.0 .0).shl::().0, + u32x2::from(self.0 .1).shl::().0, )) } #[inline] pub fn extract(self) -> u32 { match N { - 0 => u32x2::from(self.0.0).extract::<0>(), - 1 => u32x2::from(self.0.0).extract::<1>(), - 2 => u32x2::from(self.0.1).extract::<0>(), - 3 => u32x2::from(self.0.1).extract::<1>(), - _ => unreachable!() + 0 => u32x2::from(self.0 .0).extract::<0>(), + 1 => u32x2::from(self.0 .0).extract::<1>(), + 2 => u32x2::from(self.0 .1).extract::<0>(), + 3 => u32x2::from(self.0 .1).extract::<1>(), + _ => unreachable!(), } } } - #[allow(non_camel_case_types)] #[derive(Copy, Clone, Debug)] #[repr(transparent)] @@ -436,8 +439,9 @@ impl Add for u64x2x2 { #[inline] fn add(self, rhs: Self) -> Self { Self(core::arch::aarch64::uint64x2x2_t( - (u64x2::from(self.0.0) + u64x2::from(rhs.0.0)).into(), - (u64x2::from(self.0.1) + u64x2::from(rhs.0.1)).into())) + (u64x2::from(self.0 .0) + u64x2::from(rhs.0 .0)).into(), + (u64x2::from(self.0 .1) + u64x2::from(rhs.0 .1)).into(), + )) } } @@ -446,8 +450,9 @@ impl BitAnd for u64x2x2 { fn bitand(self, rhs: Self) -> Self::Output { Self(core::arch::aarch64::uint64x2x2_t( - (u64x2::from(self.0.0) & u64x2::from(rhs.0.0)).into(), - (u64x2::from(self.0.1) & u64x2::from(rhs.0.1)).into())) + (u64x2::from(self.0 .0) & u64x2::from(rhs.0 .0)).into(), + (u64x2::from(self.0 .1) & u64x2::from(rhs.0 .1)).into(), + )) } } @@ -465,27 +470,27 @@ impl u64x2x2 { #[inline] pub fn extract(self) -> u64 { match N { - 0 => u64x2::from(self.0.0).extract::<0>(), - 1 => u64x2::from(self.0.0).extract::<1>(), - 2 => u64x2::from(self.0.1).extract::<0>(), - 3 => u64x2::from(self.0.1).extract::<1>(), - _ => unreachable!() - } + 0 => u64x2::from(self.0 .0).extract::<0>(), + 1 => u64x2::from(self.0 .0).extract::<1>(), + 2 => u64x2::from(self.0 .1).extract::<0>(), + 3 => u64x2::from(self.0 .1).extract::<1>(), + _ => unreachable!(), + } } #[inline] pub fn shl(self) -> Self { Self(core::arch::aarch64::uint64x2x2_t( - u64x2::from(self.0.0).shl::().into(), - u64x2::from(self.0.1).shl::().into())) + u64x2::from(self.0 .0).shl::().into(), + u64x2::from(self.0 .1).shl::().into(), + )) } #[inline] pub fn shr(self) -> Self { Self(core::arch::aarch64::uint64x2x2_t( - u64x2::from(self.0.0).shr::().into(), - u64x2::from(self.0.1).shr::().into())) + u64x2::from(self.0 .0).shr::().into(), + u64x2::from(self.0 .1).shr::().into(), + )) } - } - diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs b/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs index f439ee789..5a643b8b5 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs @@ -10,9 +10,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs b/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs index eb7954e64..500510b31 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs @@ -12,9 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs b/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs index 9dafd6ba5..40bf0d9f7 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs @@ -12,9 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs b/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs index d716e1994..d12cf1e99 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs @@ -1,9 +1,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs b/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs index bc6eeef50..83fcdcfe1 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs @@ -12,9 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/lib.rs b/curve25519-dalek/src/lib.rs index 4ca7ef29b..bded841fe 100644 --- a/curve25519-dalek/src/lib.rs +++ b/curve25519-dalek/src/lib.rs @@ -22,7 +22,7 @@ all(curve25519_dalek_backend = "simd", nightly), feature(avx512_target_feature) )] -#![cfg_attr(all(nightly, target_arch="aarch64"), feature(portable_simd))] +#![cfg_attr(all(nightly, target_arch = "aarch64"), feature(portable_simd))] #![cfg_attr(docsrs, feature(doc_auto_cfg, doc_cfg, doc_cfg_hide))] #![cfg_attr(docsrs, doc(cfg_hide(docsrs)))] //------------------------------------------------------------------------