diff options
Diffstat (limited to 'gnu/packages/patches')
-rw-r--r-- | gnu/packages/patches/ddclient-skip-test.patch | 43 | ||||
-rw-r--r-- | gnu/packages/patches/elogind-fix-rpath.patch | 60 | ||||
-rw-r--r-- | gnu/packages/patches/elogind-revert-polkit-detection.patch | 41 | ||||
-rw-r--r-- | gnu/packages/patches/fbreader-fix-icon.patch | 29 | ||||
-rw-r--r-- | gnu/packages/patches/highlight-gui-data-dir.patch | 51 | ||||
-rw-r--r-- | gnu/packages/patches/maturin-no-cross-compile.patch | 55 | ||||
-rw-r--r-- | gnu/packages/patches/mcrl2-fix-1687.patch | 337 | ||||
-rw-r--r-- | gnu/packages/patches/mcrl2-fix-counterexample.patch | 32 | ||||
-rw-r--r-- | gnu/packages/patches/openssh-hurd.patch | 30 | ||||
-rw-r--r-- | gnu/packages/patches/po4a-partial-texinfo-menu-fix.patch | 242 | ||||
-rw-r--r-- | gnu/packages/patches/rust-nettle-sys-disable-vendor.patch | 48 | ||||
-rw-r--r-- | gnu/packages/patches/rust-ring-0.16-missing-files.patch | 2293 | ||||
-rw-r--r-- | gnu/packages/patches/rust-ring-0.16-test-files.patch | 54 |
13 files changed, 2784 insertions, 531 deletions
diff --git a/gnu/packages/patches/ddclient-skip-test.patch b/gnu/packages/patches/ddclient-skip-test.patch deleted file mode 100644 index 28d748997b..0000000000 --- a/gnu/packages/patches/ddclient-skip-test.patch +++ /dev/null @@ -1,43 +0,0 @@ -From e5657802025f238b39581534f3b4d408565c8943 Mon Sep 17 00:00:00 2001 -From: Bruno Victal <mirai@makinata.eu> -Date: Sun, 5 Feb 2023 21:05:00 +0000 -Subject: [PATCH] Disable sandbox incompatible tests. - -See: https://github.com/ddclient/ddclient/issues/465 ---- - t/get_ip_from_if.pl | 21 --------------------- - 1 file changed, 21 deletions(-) - -diff --git a/t/get_ip_from_if.pl b/t/get_ip_from_if.pl -index 6f08e5d..d78c3d0 100644 ---- a/t/get_ip_from_if.pl -+++ b/t/get_ip_from_if.pl -@@ -39,25 +39,4 @@ subtest "get_ip_from_interface tests" => sub { - } - }; - --subtest "Get default interface and IP for test system" => sub { -- my $interface = ddclient::get_default_interface(4); -- if ($interface) { -- isnt($interface, "lo", "Check for loopback 'lo'"); -- isnt($interface, "lo0", "Check for loopback 'lo0'"); -- my $ip1 = ddclient::get_ip_from_interface("default", 4); -- my $ip2 = ddclient::get_ip_from_interface($interface, 4); -- is($ip1, $ip2, "Check IPv4 from default interface"); -- ok(ddclient::is_ipv4($ip1), "Valid IPv4 from get_ip_from_interface($interface)"); -- } -- $interface = ddclient::get_default_interface(6); -- if ($interface) { -- isnt($interface, "lo", "Check for loopback 'lo'"); -- isnt($interface, "lo0", "Check for loopback 'lo0'"); -- my $ip1 = ddclient::get_ip_from_interface("default", 6); -- my $ip2 = ddclient::get_ip_from_interface($interface, 6); -- is($ip1, $ip2, "Check IPv6 from default interface"); -- ok(ddclient::is_ipv6($ip1), "Valid IPv6 from get_ip_from_interface($interface)"); -- } --}; -- - done_testing(); --- -2.38.1 - diff --git a/gnu/packages/patches/elogind-fix-rpath.patch b/gnu/packages/patches/elogind-fix-rpath.patch new file mode 100644 index 0000000000..2a76cc467f --- /dev/null +++ b/gnu/packages/patches/elogind-fix-rpath.patch @@ -0,0 +1,60 @@ +Retrieved from https://github.com/elogind/elogind/issues/258 + +From: Mark Hindley <mark@hindley.org.uk> +Date: Wed, 24 May 2023 10:39:41 +0100 +Subject: Fixup_executable_rpath + +./meson.build sets + + install_rpath : rootlibexecdir + +however src/shared/meson.build sets + +libshared = shared_library( + [snip] + install_dir : rootpkglibdir + ) +--- + meson.build | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/meson.build b/meson.build +index 694a2fd..a575f69 100644 +--- a/meson.build ++++ b/meson.build +@@ -2903,7 +2903,7 @@ executable('elogind', + dependencies : [threads, + libacl, + libudev], +- install_rpath : rootlibexecdir, ++ install_rpath : rootpkglibdir, + install : true, + install_dir : rootlibexecdir) + +@@ -2913,7 +2913,7 @@ exe = executable('loginctl', + link_with : [libshared], + dependencies : [threads, + libudev], +- install_rpath : rootlibexecdir, ++ install_rpath : rootpkglibdir, + install : true, + install_dir : rootbindir) + public_programs += [exe] +@@ -2923,7 +2923,7 @@ exe = executable('elogind-inhibit', + include_directories : includes, + link_with : [libshared], + dependencies : [threads], +- install_rpath : rootlibexecdir, ++ install_rpath : rootpkglibdir, + install : true, + install_dir : rootbindir) + public_programs += [exe] +@@ -4283,7 +4283,7 @@ executable('elogind-uaccess-command', + libshared], + dependencies: [libacl, + libudev], +- install_rpath : rootlibexecdir, ++ install_rpath : rootpkglibdir, + install : true, + install_dir : rootlibexecdir) + #endif // 0 diff --git a/gnu/packages/patches/elogind-revert-polkit-detection.patch b/gnu/packages/patches/elogind-revert-polkit-detection.patch deleted file mode 100644 index 43dd1684b6..0000000000 --- a/gnu/packages/patches/elogind-revert-polkit-detection.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 715ce0a6459e418f92e74c7ce52df3244c18f383 Mon Sep 17 00:00:00 2001 -From: Sven Eden <sven.eden@prydeworx.com> -Date: Mon, 8 Mar 2021 08:40:08 +0100 -Subject: [PATCH] Revert "Disable polkit support if libpolkit is not installed" - -This reverts commit 1194dec4f8f2d1b8bd14e1625f34418ecfce817e. - -Removing polkit support with -Dpolkit=auto when libpolkit is not -installed, removes the whole interface. This makes it impossible to -add polkit support as a runtime dependency. - -Bug: #167 -Closes: #206 -Signed-off-by: Sven Eden <sven.eden@prydeworx.com> ---- - meson.build | 9 --------- - 1 file changed, 9 deletions(-) - -diff --git a/meson.build b/meson.build -index 2dd05db3c..f38551f55 100644 ---- a/meson.build -+++ b/meson.build -@@ -1157,15 +1157,6 @@ if want_polkit != 'false' and not skip_deps - message('Old polkit detected, will install pkla files') - install_polkit_pkla = true - endif --#if 1 /// Disable polkit completely if libpolkit is not there. See elogind issue #167 -- if not libpolkit.found() -- if want_polkit != 'auto' -- error('Polkit requested but libpolkit was not found.') -- endif -- install_polkit = false -- want_polkit = false -- endif --#endif // 1 - endif - conf.set10('ENABLE_POLKIT', install_polkit) - --- -2.33.1 - diff --git a/gnu/packages/patches/fbreader-fix-icon.patch b/gnu/packages/patches/fbreader-fix-icon.patch new file mode 100644 index 0000000000..635abf6a43 --- /dev/null +++ b/gnu/packages/patches/fbreader-fix-icon.patch @@ -0,0 +1,29 @@ +Author: Danny Milosavljevic <dannym+a@scratchpost.org> +Date: 2023-08-15 + +--- orig/jswba9mn9nh43l7g4w2qslmr7i3q64vy-fbreader-0.99.6-checkout/fbreader/desktop/desktop 2023-08-14 23:56:03.092567740 +0200 ++++ jswba9mn9nh43l7g4w2qslmr7i3q64vy-fbreader-0.99.6-checkout/fbreader/desktop/desktop 2023-08-14 23:56:59.936213278 +0200 +@@ -25,5 +25,5 @@ + StartupNotify=true + Terminal=false + Type=Application +-Icon=FBReader.png ++Icon=FBReader + Categories=Office;Viewer;Literature; +--- orig/jswba9mn9nh43l7g4w2qslmr7i3q64vy-fbreader-0.99.6-checkout/fbreader/desktop/Makefile 2023-08-14 23:56:03.092567740 +0200 ++++ jswba9mn9nh43l7g4w2qslmr7i3q64vy-fbreader-0.99.6-checkout/fbreader/desktop/Makefile 2023-08-15 00:23:19.046691430 +0200 +@@ -10,6 +10,14 @@ + @install -m 0644 desktop $(DESTDIR)/usr/share/applications/$(TARGET).desktop + @install -d $(DESTDIR)$(IMAGEDIR) + @install -m 0644 ../data/icons/application/$(TARGET_ARCH).png $(DESTDIR)$(IMAGEDIR)/FBReader.png ++ @install -d $(DESTDIR)$(SHAREDIR)/icons/hicolor/16x16/apps ++ @install -m 0644 ../data/icons/application/16x16.png $(DESTDIR)$(SHAREDIR)/icons/hicolor/16x16/apps/FBReader.png ++ @install -d $(DESTDIR)$(SHAREDIR)/icons/hicolor/32x32/apps ++ @install -m 0644 ../data/icons/application/32x32.png $(DESTDIR)$(SHAREDIR)/icons/hicolor/32x32/apps/FBReader.png ++ @install -d $(DESTDIR)$(SHAREDIR)/icons/hicolor/48x48/apps ++ @install -m 0644 ../data/icons/application/48x48.png $(DESTDIR)$(SHAREDIR)/icons/hicolor/48x48/apps/FBReader.png ++ @install -d $(DESTDIR)$(SHAREDIR)/icons/hicolor/64x64/apps ++ @install -m 0644 ../data/icons/application/64x64.png $(DESTDIR)$(SHAREDIR)/icons/hicolor/64x64/apps/FBReader.png + @install -m 0644 ../data/default/config.desktop.xml $(SHARE_FBREADER)/default/config.xml + @install -m 0644 ../data/default/keymap.desktop.xml $(SHARE_FBREADER)/default/keymap.xml + @install -m 0644 ../data/default/styles.desktop.xml $(SHARE_FBREADER)/default/styles.xml diff --git a/gnu/packages/patches/highlight-gui-data-dir.patch b/gnu/packages/patches/highlight-gui-data-dir.patch new file mode 100644 index 0000000000..33f40d309c --- /dev/null +++ b/gnu/packages/patches/highlight-gui-data-dir.patch @@ -0,0 +1,51 @@ +This patch distinguishes between the data directory path for GUI and the one +for non-GUI by allowing to set the former path. + +highlight package has two outputs: out and gui. Both outputs have files which +will be in the same directory /share/highlight/ without this patch (see also +install and install-gui tasks in makefile). In the gui's data directory, +there are GUI specific files in /share/highlight/gui-files/{l10n,ext}/. + +diff --git a/src/gui-qt/main.cpp b/src/gui-qt/main.cpp +index 4700dc1..3567745 100644 +--- a/src/gui-qt/main.cpp ++++ b/src/gui-qt/main.cpp +@@ -47,8 +47,8 @@ int main(int argc, char *argv[]) + { + QApplication app(argc, argv); + QTranslator translator; +-#ifdef DATA_DIR +- translator.load(QString("%1/gui_files/l10n/highlight_%2").arg(DATA_DIR).arg(QLocale::system().name())); ++#ifdef GUI_DATA_DIR ++ translator.load(QString("%1/gui_files/l10n/highlight_%2").arg(GUI_DATA_DIR).arg(QLocale::system().name())); + #else + translator.load(QString("%1/gui_files/l10n/highlight_%2").arg(QDir::currentPath()).arg(QLocale::system().name())); + #endif +diff --git a/src/gui-qt/mainwindow.cpp b/src/gui-qt/mainwindow.cpp +index 3a21ad2..f060431 100644 +--- a/src/gui-qt/mainwindow.cpp ++++ b/src/gui-qt/mainwindow.cpp +@@ -2131,8 +2131,8 @@ QString MainWindow::getDistFileFilterPath(){ + #ifdef Q_OS_OSX + return QCoreApplication::applicationDirPath()+"/../Resources/gui_files/ext/fileopenfilter.conf"; + #else +- #ifdef DATA_DIR +- return QString(DATA_DIR) + "/gui_files/ext/fileopenfilter.conf"; ++ #ifdef GUI_DATA_DIR ++ return QString(GUI_DATA_DIR) + "/gui_files/ext/fileopenfilter.conf"; + #else + return QDir::currentPath()+"/gui_files/ext/fileopenfilter.conf"; + #endif +diff --git a/src/makefile b/src/makefile +index b1d7988..2963105 100644 +--- a/src/makefile ++++ b/src/makefile +@@ -118,7 +118,7 @@ gui-qt: highlight-gui + + highlight-gui: libhighlight.a ${GUI_OBJECTS} + cd gui-qt && \ +- ${QMAKE} 'DEFINES+=DATA_DIR=\\\"${HL_DATA_DIR}\\\" CONFIG_DIR=\\\"${HL_CONFIG_DIR}\\\" DOC_DIR=\\\"${HL_DOC_DIR}\\\" ' && \ ++ ${QMAKE} 'DEFINES+=DATA_DIR=\\\"${HL_DATA_DIR}\\\" CONFIG_DIR=\\\"${HL_CONFIG_DIR}\\\" DOC_DIR=\\\"${HL_DOC_DIR}\\\" GUI_DATA_DIR=\\\"${GUI_DATA_DIR}\\\" ' && \ + $(MAKE) + + $(OBJECTFILES) : makefile diff --git a/gnu/packages/patches/maturin-no-cross-compile.patch b/gnu/packages/patches/maturin-no-cross-compile.patch new file mode 100644 index 0000000000..7394d0854e --- /dev/null +++ b/gnu/packages/patches/maturin-no-cross-compile.patch @@ -0,0 +1,55 @@ +Remove dependencies on xwin and zig. We're not offering cross-compilation +options using these crates. + +diff --git a/Cargo.toml b/Cargo.toml +index 6cbdca3..22ea5ef 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -76,16 +76,6 @@ version = "0.1.4" + [dependencies.cargo-options] + version = "0.6.0" + +-[dependencies.cargo-xwin] +-version = "0.14.3" +-optional = true +-default-features = false +- +-[dependencies.cargo-zigbuild] +-version = "0.16.10" +-optional = true +-default-features = false +- + [dependencies.cargo_metadata] + version = "0.15.3" + +@@ -310,8 +300,6 @@ version = "4.3.0" + [features] + cli-completion = ["dep:clap_complete_command"] + cross-compile = [ +- "zig", +- "xwin", + ] + default = [ + "full", +@@ -330,7 +318,6 @@ log = ["tracing-subscriber"] + native-tls = [ + "dep:native-tls", + "ureq?/native-tls", +- "cargo-xwin?/native-tls", + "dep:rustls-pemfile", + ] + password-storage = [ +@@ -340,7 +327,6 @@ password-storage = [ + rustls = [ + "dep:rustls", + "ureq?/tls", +- "cargo-xwin?/rustls-tls", + "dep:rustls-pemfile", + ] + scaffolding = [ +@@ -358,5 +344,3 @@ upload = [ + "wild", + "dep:dirs", + ] +-xwin = ["cargo-xwin"] +-zig = ["cargo-zigbuild"] diff --git a/gnu/packages/patches/mcrl2-fix-1687.patch b/gnu/packages/patches/mcrl2-fix-1687.patch deleted file mode 100644 index 449ecbf638..0000000000 --- a/gnu/packages/patches/mcrl2-fix-1687.patch +++ /dev/null @@ -1,337 +0,0 @@ -Taken from upstream: - https://github.com/mCRL2org/mCRL2/commit/f38998be5198236bc5bf5a957b0e132d6d6d8bee - -Fixes bug in ltsconvert: - https://listserver.tue.nl/pipermail/mcrl2-users/2022-June/000395.html - -From f38998be5198236bc5bf5a957b0e132d6d6d8bee Mon Sep 17 00:00:00 2001 -From: Jan Friso Groote <J.F.Groote@tue.nl> -Date: Tue, 28 Jun 2022 12:27:47 +0200 -Subject: [PATCH] Solved bug report #1687 - -Hidden actions were not properly recognized in ltsconvert. Multiactions -that were partly hidden compared with the default action label, and had -to be compared with a tau-action. This caused multiple tau-actions to be -listed in the list of actions of an lts, and this caused other tools to -go astray. - -The code to rename actions has completely be rewritten. - -This should solve #1687. - -A test have been added. ---- - libraries/lts/include/mcrl2/lts/lts.h | 95 ++++++++++++++++++++++--- - libraries/lts/test/lts_test.cpp | 61 ++++++++-------- - tools/release/ltsconvert/ltsconvert.cpp | 3 +- - 3 files changed, 116 insertions(+), 43 deletions(-) - -diff --git a/libraries/lts/include/mcrl2/lts/lts.h b/libraries/lts/include/mcrl2/lts/lts.h -index 095031e7c..8562eb900 100644 ---- a/libraries/lts/include/mcrl2/lts/lts.h -+++ b/libraries/lts/include/mcrl2/lts/lts.h -@@ -25,6 +25,7 @@ - #include <algorithm> - #include <cassert> - #include <set> -+#include <map> - #include "mcrl2/lts/transition.h" - #include "mcrl2/lts/lts_type.h" - -@@ -482,40 +483,112 @@ class lts: public LTS_BASE - return; - } - -+ std::map<labels_size_type, labels_size_type> action_rename_map; - for (labels_size_type i=0; i< num_action_labels(); ++i) - { - ACTION_LABEL_T a=action_label(i); - a.hide_actions(tau_actions); -- if (a==ACTION_LABEL_T()) -+ if (a==ACTION_LABEL_T::tau_action()) - { -- m_hidden_label_set.insert(i); -+ if (i!=const_tau_label_index) -+ { -+ m_hidden_label_set.insert(i); -+ } - } - else if (a!=action_label(i)) - { -- set_action_label(i,a); -+ /* In this the action_label i is changed by the tau_actions but not renamed to tau. -+ We check whether a maps onto another action label index. If yes, it is added to -+ the rename map, and we explicitly rename transition labels with this label afterwards. -+ If no, we rename the action label. -+ */ -+ bool found=false; -+ for (labels_size_type j=0; !found && j< num_action_labels(); ++j) -+ { -+ if (a==action_label(j)) -+ { -+ if (i!=j) -+ { -+ action_rename_map[i]=j; -+ } -+ found=true; -+ } -+ } -+ if (!found) // a!=action_label(j) for any j, then rename action_label(i) to a. -+ { -+ set_action_label(i,a); -+ } -+ } -+ } -+ -+ if (action_rename_map.size()>0) // Check whether there are action labels that must be renamed, and -+ { -+ for(transition& t: m_transitions) -+ { -+ auto i = action_rename_map.find(t.label()); -+ if (i!=action_rename_map.end()) -+ { -+ t=transition(t.from(),i->second,t.to()); -+ } - } - } - } - -- /** \brief Apply the recorded actions that are renamed to internal actions to the lts. -- * \details After hiding actions, it checks whether action labels are -- * equal and merges actions with the same labels in the lts. -+ /** \brief Rename the hidden actions in the lts. -+ * \details Multiactions can be partially renamed. I.e. a|b becomes a if b is hidden. -+ * In such a case the new action a is mapped onto an existing action a; if such -+ * a label a does not exist, the action a|b is renamed to a. - * \param[in] tau_actions Vector with strings indicating which actions must be - * transformed to tau's */ -- void apply_hidden_actions(void) -+ void apply_hidden_actions(const std::vector<std::string>& tau_actions) - { -- if (m_hidden_label_set.size()>0) // Check whether there is something to rename. -+ if (tau_actions.size()==0) -+ { -+ return; -+ } -+ -+ std::map<labels_size_type, labels_size_type> action_rename_map; -+ for (labels_size_type i=0; i< num_action_labels(); ++i) -+ { -+ ACTION_LABEL_T a=action_label(i); -+ a.hide_actions(tau_actions); -+#ifndef NDEBUG -+ ACTION_LABEL_T b=a; -+ b.hide_actions(tau_actions); -+ assert(a==b); // hide_actions applied twice yields the same result as applying it once. -+#endif -+ bool found=false; -+ for (labels_size_type j=0; !found && j< num_action_labels(); ++j) -+ { -+ if (a==action_label(j)) -+ { -+ if (i!=j) -+ { -+ action_rename_map[i]=j; -+ } -+ found=true; -+ } -+ } -+ if (!found) // a!=action_label(j) for any j, then rename action_label(i) to a. -+ { -+ set_action_label(i,a); -+ } -+ } -+ -+ -+ if (action_rename_map.size()>0) // Check whether there is something to rename. - { - for(transition& t: m_transitions) - { -- if (m_hidden_label_set.count(t.label())) -+ auto i = action_rename_map.find(t.label()); -+ if (i!=action_rename_map.end()) - { -- t=transition(t.from(),tau_label_index(),t.to()); -+ t=transition(t.from(),i->second,t.to()); - } - } -- m_hidden_label_set.clear(); // Empty the hidden label set. - } - } -+ - /** \brief Checks whether this LTS has state values associated with its states. - * \retval true if the LTS has state information; - * \retval false otherwise. -diff --git a/libraries/lts/test/lts_test.cpp b/libraries/lts/test/lts_test.cpp -index 5840393d9..ad69f6275 100644 ---- a/libraries/lts/test/lts_test.cpp -+++ b/libraries/lts/test/lts_test.cpp -@@ -149,7 +149,7 @@ static void reduce_lts_in_various_ways(const std::string& test_description, - BOOST_CHECK(is_deterministic(l)); - } - --static void reduce_simple_loop() -+BOOST_AUTO_TEST_CASE(reduce_simple_loop) - { - std::string SIMPLE_AUT = - "des (0,2,2)\n" -@@ -173,7 +173,7 @@ static void reduce_simple_loop() - reduce_lts_in_various_ways("Simple loop", SIMPLE_AUT, expected); - } - --static void reduce_simple_loop_with_tau() -+BOOST_AUTO_TEST_CASE(reduce_simple_loop_with_tau) - { - std::string SIMPLE_AUT = - "des (0,2,2)\n" -@@ -200,7 +200,7 @@ static void reduce_simple_loop_with_tau() - /* The example below was encountered by David Jansen. The problem is that - * for branching bisimulations the tau may supersede the b, not leading to the - * necessary splitting into two equivalence classes. */ --static void tricky_example_for_branching_bisimulation() -+BOOST_AUTO_TEST_CASE(tricky_example_for_branching_bisimulation) - { - std::string TRICKY_BB = - "des (0,3,2)\n" -@@ -226,7 +226,7 @@ static void tricky_example_for_branching_bisimulation() - } - - --static void reduce_abp() -+BOOST_AUTO_TEST_CASE(reduce_abp) - { - std::string ABP_AUT = - "des (0,92,74)\n" -@@ -342,7 +342,7 @@ static void reduce_abp() - - // Peterson's protocol has the interesting property that the number of states modulo branching bisimulation - // differs from the number of states modulo weak bisimulation, as observed by Rob van Glabbeek. --static void reduce_peterson() -+BOOST_AUTO_TEST_CASE(reduce_peterson) - { - std::string PETERSON_AUT = - "des (0,59,35)\n" -@@ -423,7 +423,7 @@ static void reduce_peterson() - reduce_lts_in_various_ways("Peterson protocol", PETERSON_AUT, expected); - } - --static void test_reachability() -+BOOST_AUTO_TEST_CASE(test_reachability) - { - std::string REACH = - "des (0,4,5) \n" -@@ -449,7 +449,7 @@ static void test_reachability() - - // The example below caused failures in the GW mlogn branching bisimulation - // algorithm when cleaning the code up. --static void failing_test_groote_wijs_algorithm() -+BOOST_AUTO_TEST_CASE(failing_test_groote_wijs_algorithm) - { - std::string GWLTS = - "des(0,29,10)\n" -@@ -511,7 +511,7 @@ static void failing_test_groote_wijs_algorithm() - // It has not been implemented fully. The problem is that it is difficult to - // prescribe the order in which refinements have to be done. - --static void counterexample_jk_1(std::size_t k) -+void counterexample_jk_1(std::size_t k) - { - // numbering scheme of states: - // states 0..k-1 are the blue squares -@@ -571,7 +571,7 @@ static void counterexample_jk_1(std::size_t k) - - // In the meantime, the bug is corrected: this is why the first part of the - // algorithm now follows a much simpler line than previously. --static void counterexample_postprocessing() -+BOOST_AUTO_TEST_CASE(counterexample_postprocessing) - { - std::string POSTPROCESS_AUT = - "des(0,33,13)\n" -@@ -634,7 +634,7 @@ static void counterexample_postprocessing() - test_lts("postprocessing problem (branching bisimulation signature [Blom/Orzan 2003])",l,expected_label_count, expected_state_count, expected_transition_count); - } - --static void regression_delete_old_bb_slice() -+BOOST_AUTO_TEST_CASE(regression_delete_old_bb_slice) - { - std::string POSTPROCESS_AUT = - "des(0,163,100)\n" -@@ -824,7 +824,7 @@ static void regression_delete_old_bb_slice() - test_lts("regression test for GJKW bug (branching bisimulation signature [Blom/Orzan 2003])",l,expected_label_count, expected_state_count, expected_transition_count); - } - --void is_deterministic_test1() -+BOOST_AUTO_TEST_CASE(is_deterministic_test1) - { - std::string automaton = - "des(0,2,2)\n" -@@ -837,7 +837,7 @@ void is_deterministic_test1() - BOOST_CHECK(is_deterministic(l_det)); - } - --void is_deterministic_test2() -+BOOST_AUTO_TEST_CASE(is_deterministic_test2) - { - std::string automaton = - "des(0,2,2)\n" -@@ -850,24 +850,25 @@ void is_deterministic_test2() - BOOST_CHECK(!is_deterministic(l_det)); - } - --void test_is_deterministic() -+BOOST_AUTO_TEST_CASE(hide_actions1) - { -- is_deterministic_test1(); -- is_deterministic_test2(); --} -+ std::string automaton = -+ "des (0,4,3)\n" -+ "(0,\"<state>\",1)\n" -+ "(1,\"return|hello\",2)\n" -+ "(1,\"return\",2)\n" -+ "(2,\"world\",1)\n"; -+ -+ std::istringstream is(automaton); -+ lts::lts_aut_t l; -+ l.load(is); -+ std::vector<std::string>hidden_actions(1,"hello"); -+ l.apply_hidden_actions(hidden_actions); -+ reduce(l,lts::lts_eq_bisim); -+ std::size_t expected_label_count = 5; -+ std::size_t expected_state_count = 3; -+ std::size_t expected_transition_count = 3; -+ test_lts("regression test for GJKW bug (branching bisimulation [Jansen/Groote/Keiren/Wijs 2019])",l,expected_label_count, expected_state_count, expected_transition_count); -+ - --BOOST_AUTO_TEST_CASE(test_main) --{ -- reduce_simple_loop(); -- reduce_simple_loop_with_tau(); -- tricky_example_for_branching_bisimulation(); -- reduce_abp(); -- reduce_peterson(); -- test_reachability(); -- test_is_deterministic(); -- failing_test_groote_wijs_algorithm(); -- counterexample_jk_1(3); -- counterexample_postprocessing(); -- regression_delete_old_bb_slice(); -- // TODO: Add groote wijs branching bisimulation and add weak bisimulation tests. For the last Peterson is a good candidate. - } -diff --git a/tools/release/ltsconvert/ltsconvert.cpp b/tools/release/ltsconvert/ltsconvert.cpp -index 231deabe2..5645d31d1 100644 ---- a/tools/release/ltsconvert/ltsconvert.cpp -+++ b/tools/release/ltsconvert/ltsconvert.cpp -@@ -123,8 +123,7 @@ class ltsconvert_tool : public input_output_tool - - LTS_TYPE l; - l.load(tool_options.infilename); -- l.record_hidden_actions(tool_options.tau_actions); -- l.apply_hidden_actions(); -+ l.apply_hidden_actions(tool_options.tau_actions); - - if (tool_options.check_reach) - { --- -2.35.1 - diff --git a/gnu/packages/patches/mcrl2-fix-counterexample.patch b/gnu/packages/patches/mcrl2-fix-counterexample.patch deleted file mode 100644 index abf541f50c..0000000000 --- a/gnu/packages/patches/mcrl2-fix-counterexample.patch +++ /dev/null @@ -1,32 +0,0 @@ -Taken from upstream: - https://github.com/mCRL2org/mCRL2/commit/435421429dde9dcc5956e8a978597111a3947ec1 - -Fixes bug in ltscompare: - https://listserver.tue.nl/pipermail/mcrl2-users/2022-June/000396.html - -From 435421429dde9dcc5956e8a978597111a3947ec1 Mon Sep 17 00:00:00 2001 -From: Maurice Laveaux <m.laveaux@tue.nl> -Date: Wed, 29 Jun 2022 10:27:58 +0200 -Subject: [PATCH] Write counterexample's structured output trace on single - line. - ---- - libraries/lts/include/mcrl2/lts/detail/counter_example.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libraries/lts/include/mcrl2/lts/detail/counter_example.h b/libraries/lts/include/mcrl2/lts/detail/counter_example.h -index c339cfde4..ca3967768 100644 ---- a/libraries/lts/include/mcrl2/lts/detail/counter_example.h -+++ b/libraries/lts/include/mcrl2/lts/detail/counter_example.h -@@ -139,7 +139,7 @@ class counter_example_constructor - if (m_structured_output) - { - std::cout << m_name << ": "; -- result.save("", mcrl2::lts::trace::tfPlain); // Write to stdout. -+ result.save("", mcrl2::lts::trace::tfLine); // Write to stdout. - } - else - { --- -2.35.1 - diff --git a/gnu/packages/patches/openssh-hurd.patch b/gnu/packages/patches/openssh-hurd.patch deleted file mode 100644 index 1ad09a7ee6..0000000000 --- a/gnu/packages/patches/openssh-hurd.patch +++ /dev/null @@ -1,30 +0,0 @@ -Author: Jan (janneke) Nieuwenhuizen" <janneke@gnu.org> -Not upstreamed. - -From 1ddae040d67e9a4ebcc3e1b95af1bff12c0f086b Mon Sep 17 00:00:00 2001 -From: "Jan (janneke) Nieuwenhuizen" <janneke@gnu.org> -Date: Tue, 7 Apr 2020 17:41:05 +0200 -Subject: [PATCH] Build fix for the Hurd. - -* gss-serv.c (ssh_gssapi_acquire_cred): Use HOST_NAME_MAX instead of -MAXHOSTNAMELEN. ---- - gss-serv.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/gss-serv.c b/gss-serv.c -index 1d47870e7..22081c6f1 100644 ---- a/gss-serv.c -+++ b/gss-serv.c -@@ -107,7 +107,7 @@ ssh_gssapi_acquire_cred(Gssctxt *ctx) - gss_create_empty_oid_set(&status, &oidset); - gss_add_oid_set_member(&status, ctx->oid, &oidset); - -- if (gethostname(lname, MAXHOSTNAMELEN)) { -+ if (gethostname(lname, HOST_NAME_MAX)) { - gss_release_oid_set(&status, &oidset); - return (-1); - } --- -2.26.0 - diff --git a/gnu/packages/patches/po4a-partial-texinfo-menu-fix.patch b/gnu/packages/patches/po4a-partial-texinfo-menu-fix.patch new file mode 100644 index 0000000000..8a075fa74c --- /dev/null +++ b/gnu/packages/patches/po4a-partial-texinfo-menu-fix.patch @@ -0,0 +1,242 @@ +Submitted upstream: https://github.com/mquinson/po4a/pull/437 + +From 43db5c0b14ec2a8ba44d338bce024df87256457b Mon Sep 17 00:00:00 2001 +From: Maxim Cournoyer <maxim.cournoyer@gmail.com> +Date: Thu, 27 Jul 2023 17:44:49 -0400 +Subject: [PATCH] lib: Texinfo: Translate partial menu node names. + +Fixes <https://issues.guix.gnu.org/64881>. + +* lib/Locale/Po4a/Texinfo.pm (translate_buffer_menuentry): Refine +regexp, so that it matches menu entries lacking a description. +Only call 'translate_buffer' on the description if it was provided. +* t/fmt/texinfo/partialmenus.trans: New file. +* t/fmt/texinfo/partialmenus.texi: Likewise. +* t/fmt/texinfo/partialmenus.pot: Likewise. +* t/fmt/texinfo/partialmenus.po: Likewise. +* t/fmt/texinfo/partialmenus.norm: Likewise. +* t/fmt-texinfo.t: Register the new 'partialmenus' test. +--- + lib/Locale/Po4a/Texinfo.pm | 7 +++--- + t/fmt-texinfo.t | 2 +- + t/fmt/texinfo/partialmenus.norm | 21 +++++++++++++++++ + t/fmt/texinfo/partialmenus.po | 40 ++++++++++++++++++++++++++++++++ + t/fmt/texinfo/partialmenus.pot | 40 ++++++++++++++++++++++++++++++++ + t/fmt/texinfo/partialmenus.texi | 14 +++++++++++ + t/fmt/texinfo/partialmenus.trans | 21 +++++++++++++++++ + 7 files changed, 141 insertions(+), 4 deletions(-) + create mode 100644 t/fmt/texinfo/partialmenus.norm + create mode 100644 t/fmt/texinfo/partialmenus.po + create mode 100644 t/fmt/texinfo/partialmenus.pot + create mode 100644 t/fmt/texinfo/partialmenus.texi + create mode 100644 t/fmt/texinfo/partialmenus.trans + +diff --git a/lib/Locale/Po4a/Texinfo.pm b/lib/Locale/Po4a/Texinfo.pm +index b4750699..1c3a4bae 100644 +--- a/lib/Locale/Po4a/Texinfo.pm ++++ b/lib/Locale/Po4a/Texinfo.pm +@@ -336,7 +336,7 @@ sub translate_buffer_menuentry { + + my $translated_buffer = ""; + +- if ( $buffer =~ m/^(.*?)(::)\s+(.*)$/s ++ if ( $buffer =~ m/^(.*?)(::)(?:\s+(.*))?$/s + or $buffer =~ m/^(.*?: .*?)(\.)\s+(.*)$/s ) + { + my ( $name, $sep, $description ) = ( $1, $2, $3 ); +@@ -347,8 +347,9 @@ sub translate_buffer_menuentry { + $translated_buffer .= ' ' x ( $menu_sep_width - 1 - $l ); + $l = $menu_sep_width - 1; + } +- ( $t, @e ) = $self->translate_buffer( $description, $no_wrap, @env ); +- ++ if ($description) { ++ ( $t, @e ) = $self->translate_buffer( $description, $no_wrap, @env ); ++ } + # Replace newlines with space for proper wrapping + # See https://github.com/mquinson/po4a/issues/122 + $t =~ s/\n/ /sg; +diff --git a/t/fmt-texinfo.t b/t/fmt-texinfo.t +index 4b067e43..d9ed5df3 100644 +--- a/t/fmt-texinfo.t ++++ b/t/fmt-texinfo.t +@@ -10,7 +10,7 @@ use Testhelper; + + my @tests; + +-for my $test (qw(longmenu comments tindex)) { ++for my $test (qw(longmenu partialmenus comments tindex)) { + push @tests, + { + 'format' => 'texinfo', +diff --git a/t/fmt/texinfo/partialmenus.norm b/t/fmt/texinfo/partialmenus.norm +new file mode 100644 +index 00000000..99240682 +--- /dev/null ++++ b/t/fmt/texinfo/partialmenus.norm +@@ -0,0 +1,21 @@ ++\input texinfo ++@c =========================================================================== ++@c ++@c This file was generated with po4a. Translate the source file. ++@c ++@c =========================================================================== ++ ++ ++@c These menus do not contain a description, which used to cause a ++@c Texinfo menu entry to not be translated. ++@menu ++* A menu entry without any description:: A menu entry without any ++ description ++* Optional menu name: The menu node:: Optional menu name: The menu node ++@end menu ++ ++@node A menu entry without any description ++@chapter A menu entry without any description ++ ++@node The menu node ++@chapter Optional menu name +diff --git a/t/fmt/texinfo/partialmenus.po b/t/fmt/texinfo/partialmenus.po +new file mode 100644 +index 00000000..31a14443 +--- /dev/null ++++ b/t/fmt/texinfo/partialmenus.po +@@ -0,0 +1,40 @@ ++# SOME DESCRIPTIVE TITLE ++# Copyright (C) YEAR Free Software Foundation, Inc. ++# This file is distributed under the same license as the PACKAGE package. ++# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. ++# ++#, fuzzy ++msgid "" ++msgstr "" ++"Project-Id-Version: PACKAGE VERSION\n" ++"POT-Creation-Date: 2023-07-27 17:29-0400\n" ++"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" ++"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" ++"Language-Team: LANGUAGE <LL@li.org>\n" ++"Language: \n" ++"MIME-Version: 1.0\n" ++"Content-Type: text/plain; charset=UTF-8\n" ++"Content-Transfer-Encoding: 8bit\n" ++ ++#. type: chapter ++#: partialmenus.texi:8 partialmenus.texi:10 partialmenus.texi:11 ++#, no-wrap ++msgid "A menu entry without any description" ++msgstr "A MENU ENTRY WITHOUT ANY DESCRIPTION" ++ ++#. type: menuentry ++#: partialmenus.texi:8 ++msgid "Optional menu name: The menu node" ++msgstr "OPTIONAL MENU NAME: THE MENU NODE" ++ ++#. type: node ++#: partialmenus.texi:13 ++#, no-wrap ++msgid "The menu node" ++msgstr "THE MENU NODE" ++ ++#. type: chapter ++#: partialmenus.texi:14 ++#, no-wrap ++msgid "Optional menu name" ++msgstr "OPTIONAL MENU NAME" +diff --git a/t/fmt/texinfo/partialmenus.pot b/t/fmt/texinfo/partialmenus.pot +new file mode 100644 +index 00000000..0379f805 +--- /dev/null ++++ b/t/fmt/texinfo/partialmenus.pot +@@ -0,0 +1,40 @@ ++# SOME DESCRIPTIVE TITLE ++# Copyright (C) YEAR Free Software Foundation, Inc. ++# This file is distributed under the same license as the PACKAGE package. ++# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. ++# ++#, fuzzy ++msgid "" ++msgstr "" ++"Project-Id-Version: PACKAGE VERSION\n" ++"POT-Creation-Date: 2023-08-16 09:47-0400\n" ++"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" ++"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" ++"Language-Team: LANGUAGE <LL@li.org>\n" ++"Language: \n" ++"MIME-Version: 1.0\n" ++"Content-Type: text/plain; charset=UTF-8\n" ++"Content-Transfer-Encoding: 8bit\n" ++ ++#. type: chapter ++#: partialmenus.texi:8 partialmenus.texi:10 partialmenus.texi:11 ++#, no-wrap ++msgid "A menu entry without any description" ++msgstr "" ++ ++#. type: menuentry ++#: partialmenus.texi:8 ++msgid "Optional menu name: The menu node" ++msgstr "" ++ ++#. type: node ++#: partialmenus.texi:13 ++#, no-wrap ++msgid "The menu node" ++msgstr "" ++ ++#. type: chapter ++#: partialmenus.texi:14 ++#, no-wrap ++msgid "Optional menu name" ++msgstr "" +diff --git a/t/fmt/texinfo/partialmenus.texi b/t/fmt/texinfo/partialmenus.texi +new file mode 100644 +index 00000000..f8663a2b +--- /dev/null ++++ b/t/fmt/texinfo/partialmenus.texi +@@ -0,0 +1,14 @@ ++\input texinfo ++ ++@c These menus do not contain a description, which used to cause a ++@c Texinfo menu entry to not be translated. ++@menu ++* A menu entry without any description:: ++* Optional menu name: The menu node:: ++@end menu ++ ++@node A menu entry without any description ++@chapter A menu entry without any description ++ ++@node The menu node ++@chapter Optional menu name +diff --git a/t/fmt/texinfo/partialmenus.trans b/t/fmt/texinfo/partialmenus.trans +new file mode 100644 +index 00000000..0ef742a1 +--- /dev/null ++++ b/t/fmt/texinfo/partialmenus.trans +@@ -0,0 +1,21 @@ ++\input texinfo ++@c =========================================================================== ++@c ++@c This file was generated with po4a. Translate the source file. ++@c ++@c =========================================================================== ++ ++ ++@c These menus do not contain a description, which used to cause a ++@c Texinfo menu entry to not be translated. ++@menu ++* A MENU ENTRY WITHOUT ANY DESCRIPTION:: A MENU ENTRY WITHOUT ANY ++ DESCRIPTION ++* OPTIONAL MENU NAME: THE MENU NODE:: OPTIONAL MENU NAME: THE MENU NODE ++@end menu ++ ++@node A MENU ENTRY WITHOUT ANY DESCRIPTION ++@chapter A MENU ENTRY WITHOUT ANY DESCRIPTION ++ ++@node THE MENU NODE ++@chapter OPTIONAL MENU NAME + +base-commit: 5b1cd768afdf4e9445812c5d43428495a0fde3c6 +-- +2.41.0 + diff --git a/gnu/packages/patches/rust-nettle-sys-disable-vendor.patch b/gnu/packages/patches/rust-nettle-sys-disable-vendor.patch deleted file mode 100644 index ae5ef5ebe0..0000000000 --- a/gnu/packages/patches/rust-nettle-sys-disable-vendor.patch +++ /dev/null @@ -1,48 +0,0 @@ -Subject: nettle-sys: clear out "vendored" feature cruft from build.rs -From: Daniel Kahn Gillmor's avatarDaniel Kahn Gillmor <dkg@fifthhorseman.net> - -https://salsa.debian.org/rust-team/debcargo-conf/-/commit/0c71150ad26bb66a8396dcdab055181af232ddc5 -https://sources.debian.org/src/rust-nettle-sys/2.0.4-3/debian/patches/disable-vendor.diff/ ---- a/Cargo.toml 2019-10-23 13:08:07.000000000 -0400 -+++ b/Cargo.toml 2019-10-23 14:08:46.644064014 -0400 -@@ -29,12 +29,9 @@ - version = "0.51.1" - default-features = false - --[build-dependencies.nettle-src] --version = "3.5.1-0" --optional = true -- - [build-dependencies.pkg-config] - version = "0.3" - - [features] - vendored = ["nettle-src"] -+nettle-src = [] -diff --git a/build.rs b/build.rs -index 44f7af3..ede4b2f 100644 ---- a/build.rs -+++ b/build.rs -@@ -1,7 +1,5 @@ - extern crate bindgen; - extern crate pkg_config; --#[cfg(feature = "vendored")] --extern crate nettle_src; - - use std::env; - use std::fs; -@@ -36,14 +34,6 @@ fn main() { - println!("cargo:rerun-if-env-changed=NETTLE_STATIC"); - println!("cargo:rerun-if-env-changed={}", NETTLE_PREGENERATED_BINDINGS); - -- #[cfg(feature = "vendored")] -- { -- let artifacts = nettle_src::Build::new().build(); -- println!("cargo:vendored=1"); -- env::set_var("PKG_CONFIG_PATH", -- artifacts.lib_dir().join("pkgconfig")); -- } -- - let nettle = pkg_config::probe_library("nettle hogweed").unwrap(); - - let mode = match env::var_os("NETTLE_STATIC") { diff --git a/gnu/packages/patches/rust-ring-0.16-missing-files.patch b/gnu/packages/patches/rust-ring-0.16-missing-files.patch new file mode 100644 index 0000000000..fa2f94a801 --- /dev/null +++ b/gnu/packages/patches/rust-ring-0.16-missing-files.patch @@ -0,0 +1,2293 @@ +These 4 files exist in the git repository for rust-ring, and are from +the same commit where 0.16.20 is taken from. They were not added to the +include list in Cargo.toml, so they were not added to the tarball. + +--- + crypto/curve25519/make_curve25519_tables.py | 222 +++++ + crypto/fipsmodule/aes/asm/vpaes-armv7.pl | 896 ++++++++++++++++++ + crypto/fipsmodule/aes/asm/vpaes-armv8.pl | 837 ++++++++++++++++ + .../fipsmodule/modes/asm/ghash-neon-armv8.pl | 294 ++++++ + 4 files changed, 2249 insertions(+) + create mode 100755 crypto/curve25519/make_curve25519_tables.py + create mode 100644 crypto/fipsmodule/aes/asm/vpaes-armv7.pl + create mode 100755 crypto/fipsmodule/aes/asm/vpaes-armv8.pl + create mode 100644 crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl + +diff --git a/crypto/curve25519/make_curve25519_tables.py b/crypto/curve25519/make_curve25519_tables.py +new file mode 100755 +index 0000000..50dee2a +--- /dev/null ++++ b/crypto/curve25519/make_curve25519_tables.py +@@ -0,0 +1,222 @@ ++#!/usr/bin/env python ++# coding=utf-8 ++# Copyright (c) 2020, Google Inc. ++# ++# Permission to use, copy, modify, and/or distribute this software for any ++# purpose with or without fee is hereby granted, provided that the above ++# copyright notice and this permission notice appear in all copies. ++# ++# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY ++# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION ++# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ++# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ ++import StringIO ++import subprocess ++ ++# Base field Z_p ++p = 2**255 - 19 ++ ++def modp_inv(x): ++ return pow(x, p-2, p) ++ ++# Square root of -1 ++modp_sqrt_m1 = pow(2, (p-1) // 4, p) ++ ++# Compute corresponding x-coordinate, with low bit corresponding to ++# sign, or return None on failure ++def recover_x(y, sign): ++ if y >= p: ++ return None ++ x2 = (y*y-1) * modp_inv(d*y*y+1) ++ if x2 == 0: ++ if sign: ++ return None ++ else: ++ return 0 ++ ++ # Compute square root of x2 ++ x = pow(x2, (p+3) // 8, p) ++ if (x*x - x2) % p != 0: ++ x = x * modp_sqrt_m1 % p ++ if (x*x - x2) % p != 0: ++ return None ++ ++ if (x & 1) != sign: ++ x = p - x ++ return x ++ ++# Curve constant ++d = -121665 * modp_inv(121666) % p ++ ++# Base point ++g_y = 4 * modp_inv(5) % p ++g_x = recover_x(g_y, 0) ++ ++# Points are represented as affine tuples (x, y). ++ ++def point_add(P, Q): ++ x1, y1 = P ++ x2, y2 = Q ++ x3 = ((x1*y2 + y1*x2) * modp_inv(1 + d*x1*x2*y1*y2)) % p ++ y3 = ((y1*y2 + x1*x2) * modp_inv(1 - d*x1*x2*y1*y2)) % p ++ return (x3, y3) ++ ++# Computes Q = s * P ++def point_mul(s, P): ++ Q = (0, 1) # Neutral element ++ while s > 0: ++ if s & 1: ++ Q = point_add(Q, P) ++ P = point_add(P, P) ++ s >>= 1 ++ return Q ++ ++def to_bytes(x): ++ ret = bytearray(32) ++ for i in range(len(ret)): ++ ret[i] = x % 256 ++ x >>= 8 ++ assert x == 0 ++ return ret ++ ++def to_ge_precomp(P): ++ # typedef struct { ++ # fe_loose yplusx; ++ # fe_loose yminusx; ++ # fe_loose xy2d; ++ # } ge_precomp; ++ x, y = P ++ return ((y + x) % p, (y - x) % p, (x * y * 2 * d) % p) ++ ++def to_base_25_5(x): ++ limbs = (26, 25, 26, 25, 26, 25, 26, 25, 26, 25) ++ ret = [] ++ for l in limbs: ++ ret.append(x & ((1<<l) - 1)) ++ x >>= l ++ assert x == 0 ++ return ret ++ ++def to_base_51(x): ++ ret = [] ++ for _ in range(5): ++ ret.append(x & ((1<<51) - 1)) ++ x >>= 51 ++ assert x == 0 ++ return ret ++ ++def to_literal(x): ++ ret = "{{\n#if defined(BORINGSSL_CURVE25519_64BIT)\n" ++ ret += ", ".join(map(str, to_base_51(x))) ++ ret += "\n#else\n" ++ ret += ", ".join(map(str, to_base_25_5(x))) ++ ret += "\n#endif\n}}" ++ return ret ++ ++def main(): ++ d2 = (2 * d) % p ++ ++ small_precomp = bytearray() ++ for i in range(1, 16): ++ s = (i&1) | ((i&2) << (64-1)) | ((i&4) << (128-2)) | ((i&8) << (192-3)) ++ P = point_mul(s, (g_x, g_y)) ++ small_precomp += to_bytes(P[0]) ++ small_precomp += to_bytes(P[1]) ++ ++ large_precomp = [] ++ for i in range(32): ++ large_precomp.append([]) ++ for j in range(8): ++ P = point_mul((j + 1) << (i * 8), (g_x, g_y)) ++ large_precomp[-1].append(to_ge_precomp(P)) ++ ++ bi_precomp = [] ++ for i in range(8): ++ P = point_mul(2*i + 1, (g_x, g_y)) ++ bi_precomp.append(to_ge_precomp(P)) ++ ++ ++ buf = StringIO.StringIO() ++ buf.write("""/* Copyright (c) 2020, Google Inc. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY ++ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION ++ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ++ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ ++ ++// This file is generated from ++// ./make_curve25519_tables.py > curve25519_tables.h ++ ++ ++static const fe d = """) ++ buf.write(to_literal(d)) ++ buf.write("""; ++ ++static const fe sqrtm1 = """) ++ buf.write(to_literal(modp_sqrt_m1)) ++ buf.write("""; ++ ++static const fe d2 = """) ++ buf.write(to_literal(d2)) ++ buf.write("""; ++ ++#if defined(OPENSSL_SMALL) ++ ++// This block of code replaces the standard base-point table with a much smaller ++// one. The standard table is 30,720 bytes while this one is just 960. ++// ++// This table contains 15 pairs of group elements, (x, y), where each field ++// element is serialised with |fe_tobytes|. If |i| is the index of the group ++// element then consider i+1 as a four-bit number: (i₀, i₁, i₂, i₃) (where i₀ ++// is the most significant bit). The value of the group element is then: ++// (i₀×2^192 + i₁×2^128 + i₂×2^64 + i₃)G, where G is the generator. ++static const uint8_t k25519SmallPrecomp[15 * 2 * 32] = {""") ++ for i, b in enumerate(small_precomp): ++ buf.write("0x%02x, " % b) ++ buf.write(""" ++}; ++ ++#else ++ ++// k25519Precomp[i][j] = (j+1)*256^i*B ++static const ge_precomp k25519Precomp[32][8] = { ++""") ++ for child in large_precomp: ++ buf.write("{\n") ++ for val in child: ++ buf.write("{\n") ++ for term in val: ++ buf.write(to_literal(term) + ",\n") ++ buf.write("},\n") ++ buf.write("},\n") ++ buf.write("""}; ++ ++#endif // OPENSSL_SMALL ++ ++// Bi[i] = (2*i+1)*B ++static const ge_precomp Bi[8] = { ++""") ++ for val in bi_precomp: ++ buf.write("{\n") ++ for term in val: ++ buf.write(to_literal(term) + ",\n") ++ buf.write("},\n") ++ buf.write("""}; ++""") ++ ++ proc = subprocess.Popen(["clang-format"], stdin=subprocess.PIPE) ++ proc.communicate(buf.getvalue()) ++ ++if __name__ == "__main__": ++ main() +diff --git a/crypto/fipsmodule/aes/asm/vpaes-armv7.pl b/crypto/fipsmodule/aes/asm/vpaes-armv7.pl +new file mode 100644 +index 0000000..d36a97a +--- /dev/null ++++ b/crypto/fipsmodule/aes/asm/vpaes-armv7.pl +@@ -0,0 +1,896 @@ ++#! /usr/bin/env perl ++# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the OpenSSL license (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++ ++###################################################################### ++## Constant-time SSSE3 AES core implementation. ++## version 0.1 ++## ++## By Mike Hamburg (Stanford University), 2009 ++## Public domain. ++## ++## For details see http://shiftleft.org/papers/vector_aes/ and ++## http://crypto.stanford.edu/vpaes/. ++## ++###################################################################### ++# Adapted from the original x86_64 version and <appro@openssl.org>'s ARMv8 ++# version. ++# ++# armv7, aarch64, and x86_64 differ in several ways: ++# ++# * x86_64 SSSE3 instructions are two-address (destination operand is also a ++# source), while NEON is three-address (destination operand is separate from ++# two sources). ++# ++# * aarch64 has 32 SIMD registers available, while x86_64 and armv7 have 16. ++# ++# * x86_64 instructions can take memory references, while ARM is a load/store ++# architecture. This means we sometimes need a spare register. ++# ++# * aarch64 and x86_64 have 128-bit byte shuffle instructions (tbl and pshufb), ++# while armv7 only has a 64-bit byte shuffle (vtbl). ++# ++# This means this armv7 version must be a mix of both aarch64 and x86_64 ++# implementations. armv7 and aarch64 have analogous SIMD instructions, so we ++# base the instructions on aarch64. However, we cannot use aarch64's register ++# allocation. x86_64's register count matches, but x86_64 is two-address. ++# vpaes-armv8.pl already accounts for this in the comments, which use ++# three-address AVX instructions instead of the original SSSE3 ones. We base ++# register usage on these comments, which are preserved in this file. ++# ++# This means we do not use separate input and output registers as in aarch64 and ++# cannot pin as many constants in the preheat functions. However, the load/store ++# architecture means we must still deviate from x86_64 in places. ++# ++# Next, we account for the byte shuffle instructions. vtbl takes 64-bit source ++# and destination and 128-bit table. Fortunately, armv7 also allows addressing ++# upper and lower halves of each 128-bit register. The lower half of q{N} is ++# d{2*N}. The upper half is d{2*N+1}. Instead of the following non-existent ++# instruction, ++# ++# vtbl.8 q0, q1, q2 @ Index each of q2's 16 bytes into q1. Store in q0. ++# ++# we write: ++# ++# vtbl.8 d0, q1, d4 @ Index each of d4's 8 bytes into q1. Store in d0. ++# vtbl.8 d1, q1, d5 @ Index each of d5's 8 bytes into q1. Store in d1. ++# ++# For readability, we write d0 and d1 as q0#lo and q0#hi, respectively and ++# post-process before outputting. (This is adapted from ghash-armv4.pl.) Note, ++# however, that destination (q0) and table (q1) registers may no longer match. ++# We adjust the register usage from x86_64 to avoid this. (Unfortunately, the ++# two-address pshufb always matched these operands, so this is common.) ++# ++# This file also runs against the limit of ARMv7's ADR pseudo-instruction. ADR ++# expands to an ADD or SUB of the pc register to find an address. That immediate ++# must fit in ARM's encoding scheme: 8 bits of constant and 4 bits of rotation. ++# This means larger values must be more aligned. ++# ++# ARM additionally has two encodings, ARM and Thumb mode. Our assembly files may ++# use either encoding (do we actually need to support this?). In ARM mode, the ++# distances get large enough to require 16-byte alignment. Moving constants ++# closer to their use resolves most of this, but common constants in ++# _vpaes_consts are used by the whole file. Affected ADR instructions must be ++# placed at 8 mod 16 (the pc register is 8 ahead). Instructions with this ++# constraint have been commented. ++# ++# For details on ARM's immediate value encoding scheme, see ++# https://alisdair.mcdiarmid.org/arm-immediate-value-encoding/ ++# ++# Finally, a summary of armv7 and aarch64 SIMD syntax differences: ++# ++# * armv7 prefixes SIMD instructions with 'v', while aarch64 does not. ++# ++# * armv7 SIMD registers are named like q0 (and d0 for the half-width ones). ++# aarch64 names registers like v0, and denotes half-width operations in an ++# instruction suffix (see below). ++# ++# * aarch64 embeds size and lane information in register suffixes. v0.16b is ++# 16 bytes, v0.8h is eight u16s, v0.4s is four u32s, and v0.2d is two u64s. ++# armv7 embeds the total size in the register name (see above) and the size of ++# each element in an instruction suffix, which may look like vmov.i8, ++# vshr.u8, or vtbl.8, depending on instruction. ++ ++use strict; ++ ++my $flavour = shift; ++my $output; ++while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; ++my $dir=$1; ++my $xlate; ++( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or ++die "can't locate arm-xlate.pl"; ++ ++open OUT,"| \"$^X\" $xlate $flavour $output"; ++*STDOUT=*OUT; ++ ++my $code = ""; ++ ++$code.=<<___; ++.syntax unified ++ ++.arch armv7-a ++.fpu neon ++ ++#if defined(__thumb2__) ++.thumb ++#else ++.code 32 ++#endif ++ ++.text ++ ++.type _vpaes_consts,%object ++.align 7 @ totally strategic alignment ++_vpaes_consts: ++.Lk_mc_forward: @ mc_forward ++ .quad 0x0407060500030201, 0x0C0F0E0D080B0A09 ++ .quad 0x080B0A0904070605, 0x000302010C0F0E0D ++ .quad 0x0C0F0E0D080B0A09, 0x0407060500030201 ++ .quad 0x000302010C0F0E0D, 0x080B0A0904070605 ++.Lk_mc_backward:@ mc_backward ++ .quad 0x0605040702010003, 0x0E0D0C0F0A09080B ++ .quad 0x020100030E0D0C0F, 0x0A09080B06050407 ++ .quad 0x0E0D0C0F0A09080B, 0x0605040702010003 ++ .quad 0x0A09080B06050407, 0x020100030E0D0C0F ++.Lk_sr: @ sr ++ .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 ++ .quad 0x030E09040F0A0500, 0x0B06010C07020D08 ++ .quad 0x0F060D040B020900, 0x070E050C030A0108 ++ .quad 0x0B0E0104070A0D00, 0x0306090C0F020508 ++ ++@ ++@ "Hot" constants ++@ ++.Lk_inv: @ inv, inva ++ .quad 0x0E05060F0D080180, 0x040703090A0B0C02 ++ .quad 0x01040A060F0B0780, 0x030D0E0C02050809 ++.Lk_ipt: @ input transform (lo, hi) ++ .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 ++ .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 ++.Lk_sbo: @ sbou, sbot ++ .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 ++ .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA ++.Lk_sb1: @ sb1u, sb1t ++ .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF ++ .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 ++.Lk_sb2: @ sb2u, sb2t ++ .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A ++ .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD ++ ++.asciz "Vector Permutation AES for ARMv7 NEON, Mike Hamburg (Stanford University)" ++.size _vpaes_consts,.-_vpaes_consts ++.align 6 ++___ ++ ++{ ++my ($inp,$out,$key) = map("r$_", (0..2)); ++ ++my ($invlo,$invhi) = map("q$_", (10..11)); ++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("q$_", (12..15)); ++ ++$code.=<<___; ++@@ ++@@ _aes_preheat ++@@ ++@@ Fills q9-q15 as specified below. ++@@ ++.type _vpaes_preheat,%function ++.align 4 ++_vpaes_preheat: ++ adr r10, .Lk_inv ++ vmov.i8 q9, #0x0f @ .Lk_s0F ++ vld1.64 {q10,q11}, [r10]! @ .Lk_inv ++ add r10, r10, #64 @ Skip .Lk_ipt, .Lk_sbo ++ vld1.64 {q12,q13}, [r10]! @ .Lk_sb1 ++ vld1.64 {q14,q15}, [r10] @ .Lk_sb2 ++ bx lr ++ ++@@ ++@@ _aes_encrypt_core ++@@ ++@@ AES-encrypt q0. ++@@ ++@@ Inputs: ++@@ q0 = input ++@@ q9-q15 as in _vpaes_preheat ++@@ [$key] = scheduled keys ++@@ ++@@ Output in q0 ++@@ Clobbers q1-q5, r8-r11 ++@@ Preserves q6-q8 so you get some local vectors ++@@ ++@@ ++.type _vpaes_encrypt_core,%function ++.align 4 ++_vpaes_encrypt_core: ++ mov r9, $key ++ ldr r8, [$key,#240] @ pull rounds ++ adr r11, .Lk_ipt ++ @ vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ++ @ vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi ++ vld1.64 {q2, q3}, [r11] ++ adr r11, .Lk_mc_forward+16 ++ vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5 # round0 key ++ vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 ++ vshr.u8 q0, q0, #4 @ vpsrlb \$4, %xmm0, %xmm0 ++ vtbl.8 q1#lo, {q2}, q1#lo @ vpshufb %xmm1, %xmm2, %xmm1 ++ vtbl.8 q1#hi, {q2}, q1#hi ++ vtbl.8 q2#lo, {q3}, q0#lo @ vpshufb %xmm0, %xmm3, %xmm2 ++ vtbl.8 q2#hi, {q3}, q0#hi ++ veor q0, q1, q5 @ vpxor %xmm5, %xmm1, %xmm0 ++ veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0 ++ ++ @ .Lenc_entry ends with a bnz instruction which is normally paired with ++ @ subs in .Lenc_loop. ++ tst r8, r8 ++ b .Lenc_entry ++ ++.align 4 ++.Lenc_loop: ++ @ middle of middle round ++ add r10, r11, #0x40 ++ vtbl.8 q4#lo, {$sb1t}, q2#lo @ vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ vtbl.8 q4#hi, {$sb1t}, q2#hi ++ vld1.64 {q1}, [r11]! @ vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] ++ vtbl.8 q0#lo, {$sb1u}, q3#lo @ vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ vtbl.8 q0#hi, {$sb1u}, q3#hi ++ veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ vtbl.8 q5#lo, {$sb2t}, q2#lo @ vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ vtbl.8 q5#hi, {$sb2t}, q2#hi ++ veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ vtbl.8 q2#lo, {$sb2u}, q3#lo @ vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ vtbl.8 q2#hi, {$sb2u}, q3#hi ++ vld1.64 {q4}, [r10] @ vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] ++ vtbl.8 q3#lo, {q0}, q1#lo @ vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ vtbl.8 q3#hi, {q0}, q1#hi ++ veor q2, q2, q5 @ vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ @ Write to q5 instead of q0, so the table and destination registers do ++ @ not overlap. ++ vtbl.8 q5#lo, {q0}, q4#lo @ vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ vtbl.8 q5#hi, {q0}, q4#hi ++ veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ vtbl.8 q4#lo, {q3}, q1#lo @ vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ vtbl.8 q4#hi, {q3}, q1#hi ++ @ Here we restore the original q0/q5 usage. ++ veor q0, q5, q3 @ vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ and r11, r11, #~(1<<6) @ and \$0x30, %r11 # ... mod 4 ++ veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ subs r8, r8, #1 @ nr-- ++ ++.Lenc_entry: ++ @ top of round ++ vand q1, q0, q9 @ vpand %xmm0, %xmm9, %xmm1 # 0 = k ++ vshr.u8 q0, q0, #4 @ vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vtbl.8 q5#lo, {$invhi}, q1#lo @ vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ vtbl.8 q5#hi, {$invhi}, q1#hi ++ veor q1, q1, q0 @ vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vtbl.8 q3#lo, {$invlo}, q0#lo @ vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vtbl.8 q3#hi, {$invlo}, q0#hi ++ vtbl.8 q4#lo, {$invlo}, q1#lo @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vtbl.8 q4#hi, {$invlo}, q1#hi ++ veor q3, q3, q5 @ vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vtbl.8 q2#lo, {$invlo}, q3#lo @ vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vtbl.8 q2#hi, {$invlo}, q3#hi ++ vtbl.8 q3#lo, {$invlo}, q4#lo @ vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ vtbl.8 q3#hi, {$invlo}, q4#hi ++ veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5 ++ bne .Lenc_loop ++ ++ @ middle of last round ++ add r10, r11, #0x80 ++ ++ adr r11, .Lk_sbo ++ @ Read to q1 instead of q4, so the vtbl.8 instruction below does not ++ @ overlap table and destination registers. ++ vld1.64 {q1}, [r11]! @ vmovdqa -0x60(%r10), %xmm4 # 3 : sbou ++ vld1.64 {q0}, [r11] @ vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 ++ vtbl.8 q4#lo, {q1}, q2#lo @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ vtbl.8 q4#hi, {q1}, q2#hi ++ vld1.64 {q1}, [r10] @ vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] ++ @ Write to q2 instead of q0 below, to avoid overlapping table and ++ @ destination registers. ++ vtbl.8 q2#lo, {q0}, q3#lo @ vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ vtbl.8 q2#hi, {q0}, q3#hi ++ veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ veor q2, q2, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ @ Here we restore the original q0/q2 usage. ++ vtbl.8 q0#lo, {q2}, q1#lo @ vpshufb %xmm1, %xmm0, %xmm0 ++ vtbl.8 q0#hi, {q2}, q1#hi ++ bx lr ++.size _vpaes_encrypt_core,.-_vpaes_encrypt_core ++ ++.globl GFp_vpaes_encrypt ++.type GFp_vpaes_encrypt,%function ++.align 4 ++GFp_vpaes_encrypt: ++ @ _vpaes_encrypt_core uses r8-r11. Round up to r7-r11 to maintain stack ++ @ alignment. ++ stmdb sp!, {r7-r11,lr} ++ @ _vpaes_encrypt_core uses q4-q5 (d8-d11), which are callee-saved. ++ vstmdb sp!, {d8-d11} ++ ++ vld1.64 {q0}, [$inp] ++ bl _vpaes_preheat ++ bl _vpaes_encrypt_core ++ vst1.64 {q0}, [$out] ++ ++ vldmia sp!, {d8-d11} ++ ldmia sp!, {r7-r11, pc} @ return ++.size GFp_vpaes_encrypt,.-GFp_vpaes_encrypt ++___ ++} ++{ ++my ($inp,$bits,$out,$dir)=("r0","r1","r2","r3"); ++my ($rcon,$s0F,$invlo,$invhi,$s63) = map("q$_",(8..12)); ++ ++$code.=<<___; ++@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ++@@ @@ ++@@ AES key schedule @@ ++@@ @@ ++@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ++ ++@ This function diverges from both x86_64 and armv7 in which constants are ++@ pinned. x86_64 has a common preheat function for all operations. aarch64 ++@ separates them because it has enough registers to pin nearly all constants. ++@ armv7 does not have enough registers, but needing explicit loads and stores ++@ also complicates using x86_64's register allocation directly. ++@ ++@ We pin some constants for convenience and leave q14 and q15 free to load ++@ others on demand. ++ ++@ ++@ Key schedule constants ++@ ++.type _vpaes_key_consts,%object ++.align 4 ++_vpaes_key_consts: ++.Lk_rcon: @ rcon ++ .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 ++ ++.Lk_opt: @ output transform ++ .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 ++ .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 ++.Lk_deskew: @ deskew tables: inverts the sbox's "skew" ++ .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A ++ .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 ++.size _vpaes_key_consts,.-_vpaes_key_consts ++ ++.type _vpaes_key_preheat,%function ++.align 4 ++_vpaes_key_preheat: ++ adr r11, .Lk_rcon ++ vmov.i8 $s63, #0x5b @ .Lk_s63 ++ adr r10, .Lk_inv @ Must be aligned to 8 mod 16. ++ vmov.i8 $s0F, #0x0f @ .Lk_s0F ++ vld1.64 {$invlo,$invhi}, [r10] @ .Lk_inv ++ vld1.64 {$rcon}, [r11] @ .Lk_rcon ++ bx lr ++.size _vpaes_key_preheat,.-_vpaes_key_preheat ++ ++.type _vpaes_schedule_core,%function ++.align 4 ++_vpaes_schedule_core: ++ @ We only need to save lr, but ARM requires an 8-byte stack alignment, ++ @ so save an extra register. ++ stmdb sp!, {r3,lr} ++ ++ bl _vpaes_key_preheat @ load the tables ++ ++ adr r11, .Lk_ipt @ Must be aligned to 8 mod 16. ++ vld1.64 {q0}, [$inp]! @ vmovdqu (%rdi), %xmm0 # load key (unaligned) ++ ++ @ input transform ++ @ Use q4 here rather than q3 so .Lschedule_am_decrypting does not ++ @ overlap table and destination. ++ vmov q4, q0 @ vmovdqa %xmm0, %xmm3 ++ bl _vpaes_schedule_transform ++ adr r10, .Lk_sr @ Must be aligned to 8 mod 16. ++ vmov q7, q0 @ vmovdqa %xmm0, %xmm7 ++ ++ add r8, r8, r10 ++ ++ @ encrypting, output zeroth round key after transform ++ vst1.64 {q0}, [$out] @ vmovdqu %xmm0, (%rdx) ++ ++ @ *ring*: Decryption removed. ++ ++.Lschedule_go: ++ cmp $bits, #192 @ cmp \$192, %esi ++ bhi .Lschedule_256 ++ @ 128: fall though ++ ++@@ ++@@ .schedule_128 ++@@ ++@@ 128-bit specific part of key schedule. ++@@ ++@@ This schedule is really simple, because all its parts ++@@ are accomplished by the subroutines. ++@@ ++.Lschedule_128: ++ mov $inp, #10 @ mov \$10, %esi ++ ++.Loop_schedule_128: ++ bl _vpaes_schedule_round ++ subs $inp, $inp, #1 @ dec %esi ++ beq .Lschedule_mangle_last ++ bl _vpaes_schedule_mangle @ write output ++ b .Loop_schedule_128 ++ ++@@ ++@@ .aes_schedule_256 ++@@ ++@@ 256-bit specific part of key schedule. ++@@ ++@@ The structure here is very similar to the 128-bit ++@@ schedule, but with an additional "low side" in ++@@ q6. The low side's rounds are the same as the ++@@ high side's, except no rcon and no rotation. ++@@ ++.align 4 ++.Lschedule_256: ++ vld1.64 {q0}, [$inp] @ vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) ++ bl _vpaes_schedule_transform @ input transform ++ mov $inp, #7 @ mov \$7, %esi ++ ++.Loop_schedule_256: ++ bl _vpaes_schedule_mangle @ output low result ++ vmov q6, q0 @ vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 ++ ++ @ high round ++ bl _vpaes_schedule_round ++ subs $inp, $inp, #1 @ dec %esi ++ beq .Lschedule_mangle_last ++ bl _vpaes_schedule_mangle ++ ++ @ low round. swap xmm7 and xmm6 ++ vdup.32 q0, q0#hi[1] @ vpshufd \$0xFF, %xmm0, %xmm0 ++ vmov.i8 q4, #0 ++ vmov q5, q7 @ vmovdqa %xmm7, %xmm5 ++ vmov q7, q6 @ vmovdqa %xmm6, %xmm7 ++ bl _vpaes_schedule_low_round ++ vmov q7, q5 @ vmovdqa %xmm5, %xmm7 ++ ++ b .Loop_schedule_256 ++ ++@@ ++@@ .aes_schedule_mangle_last ++@@ ++@@ Mangler for last round of key schedule ++@@ Mangles q0 ++@@ when encrypting, outputs out(q0) ^ 63 ++@@ when decrypting, outputs unskew(q0) ++@@ ++@@ Always called right before return... jumps to cleanup and exits ++@@ ++.align 4 ++.Lschedule_mangle_last: ++ @ schedule last round key from xmm0 ++ adr r11, .Lk_deskew @ lea .Lk_deskew(%rip),%r11 # prepare to deskew ++ ++ @ encrypting ++ vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10),%xmm1 ++ adr r11, .Lk_opt @ lea .Lk_opt(%rip), %r11 # prepare to output transform ++ add $out, $out, #32 @ add \$32, %rdx ++ vmov q2, q0 ++ vtbl.8 q0#lo, {q2}, q1#lo @ vpshufb %xmm1, %xmm0, %xmm0 # output permute ++ vtbl.8 q0#hi, {q2}, q1#hi ++ ++.Lschedule_mangle_last_dec: ++ sub $out, $out, #16 @ add \$-16, %rdx ++ veor q0, q0, $s63 @ vpxor .Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform @ output transform ++ vst1.64 {q0}, [$out] @ vmovdqu %xmm0, (%rdx) # save last key ++ ++ @ cleanup ++ veor q0, q0, q0 @ vpxor %xmm0, %xmm0, %xmm0 ++ veor q1, q1, q1 @ vpxor %xmm1, %xmm1, %xmm1 ++ veor q2, q2, q2 @ vpxor %xmm2, %xmm2, %xmm2 ++ veor q3, q3, q3 @ vpxor %xmm3, %xmm3, %xmm3 ++ veor q4, q4, q4 @ vpxor %xmm4, %xmm4, %xmm4 ++ veor q5, q5, q5 @ vpxor %xmm5, %xmm5, %xmm5 ++ veor q6, q6, q6 @ vpxor %xmm6, %xmm6, %xmm6 ++ veor q7, q7, q7 @ vpxor %xmm7, %xmm7, %xmm7 ++ ldmia sp!, {r3,pc} @ return ++.size _vpaes_schedule_core,.-_vpaes_schedule_core ++ ++@@ ++@@ .aes_schedule_round ++@@ ++@@ Runs one main round of the key schedule on q0, q7 ++@@ ++@@ Specifically, runs subbytes on the high dword of q0 ++@@ then rotates it by one byte and xors into the low dword of ++@@ q7. ++@@ ++@@ Adds rcon from low byte of q8, then rotates q8 for ++@@ next rcon. ++@@ ++@@ Smears the dwords of q7 by xoring the low into the ++@@ second low, result into third, result into highest. ++@@ ++@@ Returns results in q7 = q0. ++@@ Clobbers q1-q4, r11. ++@@ ++.type _vpaes_schedule_round,%function ++.align 4 ++_vpaes_schedule_round: ++ @ extract rcon from xmm8 ++ vmov.i8 q4, #0 @ vpxor %xmm4, %xmm4, %xmm4 ++ vext.8 q1, $rcon, q4, #15 @ vpalignr \$15, %xmm8, %xmm4, %xmm1 ++ vext.8 $rcon, $rcon, $rcon, #15 @ vpalignr \$15, %xmm8, %xmm8, %xmm8 ++ veor q7, q7, q1 @ vpxor %xmm1, %xmm7, %xmm7 ++ ++ @ rotate ++ vdup.32 q0, q0#hi[1] @ vpshufd \$0xFF, %xmm0, %xmm0 ++ vext.8 q0, q0, q0, #1 @ vpalignr \$1, %xmm0, %xmm0, %xmm0 ++ ++ @ fall through... ++ ++ @ low round: same as high round, but no rotation and no rcon. ++_vpaes_schedule_low_round: ++ @ The x86_64 version pins .Lk_sb1 in %xmm13 and .Lk_sb1+16 in %xmm12. ++ @ We pin other values in _vpaes_key_preheat, so load them now. ++ adr r11, .Lk_sb1 ++ vld1.64 {q14,q15}, [r11] ++ ++ @ smear xmm7 ++ vext.8 q1, q4, q7, #12 @ vpslldq \$4, %xmm7, %xmm1 ++ veor q7, q7, q1 @ vpxor %xmm1, %xmm7, %xmm7 ++ vext.8 q4, q4, q7, #8 @ vpslldq \$8, %xmm7, %xmm4 ++ ++ @ subbytes ++ vand q1, q0, $s0F @ vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ vshr.u8 q0, q0, #4 @ vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ veor q7, q7, q4 @ vpxor %xmm4, %xmm7, %xmm7 ++ vtbl.8 q2#lo, {$invhi}, q1#lo @ vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vtbl.8 q2#hi, {$invhi}, q1#hi ++ veor q1, q1, q0 @ vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vtbl.8 q3#lo, {$invlo}, q0#lo @ vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vtbl.8 q3#hi, {$invlo}, q0#hi ++ veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vtbl.8 q4#lo, {$invlo}, q1#lo @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vtbl.8 q4#hi, {$invlo}, q1#hi ++ veor q7, q7, $s63 @ vpxor .Lk_s63(%rip), %xmm7, %xmm7 ++ vtbl.8 q3#lo, {$invlo}, q3#lo @ vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak ++ vtbl.8 q3#hi, {$invlo}, q3#hi ++ veor q4, q4, q2 @ vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vtbl.8 q2#lo, {$invlo}, q4#lo @ vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak ++ vtbl.8 q2#hi, {$invlo}, q4#hi ++ veor q3, q3, q1 @ vpxor %xmm1, %xmm3, %xmm3 # 2 = io ++ veor q2, q2, q0 @ vpxor %xmm0, %xmm2, %xmm2 # 3 = jo ++ vtbl.8 q4#lo, {q15}, q3#lo @ vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou ++ vtbl.8 q4#hi, {q15}, q3#hi ++ vtbl.8 q1#lo, {q14}, q2#lo @ vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t ++ vtbl.8 q1#hi, {q14}, q2#hi ++ veor q1, q1, q4 @ vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output ++ ++ @ add in smeared stuff ++ veor q0, q1, q7 @ vpxor %xmm7, %xmm1, %xmm0 ++ veor q7, q1, q7 @ vmovdqa %xmm0, %xmm7 ++ bx lr ++.size _vpaes_schedule_round,.-_vpaes_schedule_round ++ ++@@ ++@@ .aes_schedule_transform ++@@ ++@@ Linear-transform q0 according to tables at [r11] ++@@ ++@@ Requires that q9 = 0x0F0F... as in preheat ++@@ Output in q0 ++@@ Clobbers q1, q2, q14, q15 ++@@ ++.type _vpaes_schedule_transform,%function ++.align 4 ++_vpaes_schedule_transform: ++ vld1.64 {q14,q15}, [r11] @ vmovdqa (%r11), %xmm2 # lo ++ @ vmovdqa 16(%r11), %xmm1 # hi ++ vand q1, q0, $s0F @ vpand %xmm9, %xmm0, %xmm1 ++ vshr.u8 q0, q0, #4 @ vpsrlb \$4, %xmm0, %xmm0 ++ vtbl.8 q2#lo, {q14}, q1#lo @ vpshufb %xmm1, %xmm2, %xmm2 ++ vtbl.8 q2#hi, {q14}, q1#hi ++ vtbl.8 q0#lo, {q15}, q0#lo @ vpshufb %xmm0, %xmm1, %xmm0 ++ vtbl.8 q0#hi, {q15}, q0#hi ++ veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0 ++ bx lr ++.size _vpaes_schedule_transform,.-_vpaes_schedule_transform ++ ++@@ ++@@ .aes_schedule_mangle ++@@ ++@@ Mangles q0 from (basis-transformed) standard version ++@@ to our version. ++@@ ++@@ On encrypt, ++@@ xor with 0x63 ++@@ multiply by circulant 0,1,1,1 ++@@ apply shiftrows transform ++@@ ++@@ On decrypt, ++@@ xor with 0x63 ++@@ multiply by "inverse mixcolumns" circulant E,B,D,9 ++@@ deskew ++@@ apply shiftrows transform ++@@ ++@@ ++@@ Writes out to [r2], and increments or decrements it ++@@ Keeps track of round number mod 4 in r8 ++@@ Preserves q0 ++@@ Clobbers q1-q5 ++@@ ++.type _vpaes_schedule_mangle,%function ++.align 4 ++_vpaes_schedule_mangle: ++ tst $dir, $dir ++ vmov q4, q0 @ vmovdqa %xmm0, %xmm4 # save xmm0 for later ++ adr r11, .Lk_mc_forward @ Must be aligned to 8 mod 16. ++ vld1.64 {q5}, [r11] @ vmovdqa .Lk_mc_forward(%rip),%xmm5 ++ ++ @ encrypting ++ @ Write to q2 so we do not overlap table and destination below. ++ veor q2, q0, $s63 @ vpxor .Lk_s63(%rip), %xmm0, %xmm4 ++ add $out, $out, #16 @ add \$16, %rdx ++ vtbl.8 q4#lo, {q2}, q5#lo @ vpshufb %xmm5, %xmm4, %xmm4 ++ vtbl.8 q4#hi, {q2}, q5#hi ++ vtbl.8 q1#lo, {q4}, q5#lo @ vpshufb %xmm5, %xmm4, %xmm1 ++ vtbl.8 q1#hi, {q4}, q5#hi ++ vtbl.8 q3#lo, {q1}, q5#lo @ vpshufb %xmm5, %xmm1, %xmm3 ++ vtbl.8 q3#hi, {q1}, q5#hi ++ veor q4, q4, q1 @ vpxor %xmm1, %xmm4, %xmm4 ++ vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1 ++ veor q3, q3, q4 @ vpxor %xmm4, %xmm3, %xmm3 ++ ++.Lschedule_mangle_both: ++ @ Write to q2 so table and destination do not overlap. ++ vtbl.8 q2#lo, {q3}, q1#lo @ vpshufb %xmm1, %xmm3, %xmm3 ++ vtbl.8 q2#hi, {q3}, q1#hi ++ add r8, r8, #64-16 @ add \$-16, %r8 ++ and r8, r8, #~(1<<6) @ and \$0x30, %r8 ++ vst1.64 {q2}, [$out] @ vmovdqu %xmm3, (%rdx) ++ bx lr ++.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle ++ ++.globl GFp_vpaes_set_encrypt_key ++.type GFp_vpaes_set_encrypt_key,%function ++.align 4 ++GFp_vpaes_set_encrypt_key: ++ stmdb sp!, {r7-r11, lr} ++ vstmdb sp!, {d8-d15} ++ ++ lsr r9, $bits, #5 @ shr \$5,%eax ++ add r9, r9, #5 @ \$5,%eax ++ str r9, [$out,#240] @ mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ mov $dir, #0 @ mov \$0,%ecx ++ mov r8, #0x30 @ mov \$0x30,%r8d ++ bl _vpaes_schedule_core ++ eor r0, r0, r0 ++ ++ vldmia sp!, {d8-d15} ++ ldmia sp!, {r7-r11, pc} @ return ++.size GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key ++___ ++} ++ ++{ ++my ($out, $inp) = map("r$_", (0..1)); ++my ($s0F, $s63, $s63_raw, $mc_forward) = map("q$_", (9..12)); ++ ++$code .= <<___; ++ ++@ Additional constants for converting to bsaes. ++.type _vpaes_convert_consts,%object ++.align 4 ++_vpaes_convert_consts: ++@ .Lk_opt_then_skew applies skew(opt(x)) XOR 0x63, where skew is the linear ++@ transform in the AES S-box. 0x63 is incorporated into the low half of the ++@ table. This was computed with the following script: ++@ ++@ def u64s_to_u128(x, y): ++@ return x | (y << 64) ++@ def u128_to_u64s(w): ++@ return w & ((1<<64)-1), w >> 64 ++@ def get_byte(w, i): ++@ return (w >> (i*8)) & 0xff ++@ def apply_table(table, b): ++@ lo = b & 0xf ++@ hi = b >> 4 ++@ return get_byte(table[0], lo) ^ get_byte(table[1], hi) ++@ def opt(b): ++@ table = [ ++@ u64s_to_u128(0xFF9F4929D6B66000, 0xF7974121DEBE6808), ++@ u64s_to_u128(0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0), ++@ ] ++@ return apply_table(table, b) ++@ def rot_byte(b, n): ++@ return 0xff & ((b << n) | (b >> (8-n))) ++@ def skew(x): ++@ return (x ^ rot_byte(x, 1) ^ rot_byte(x, 2) ^ rot_byte(x, 3) ^ ++@ rot_byte(x, 4)) ++@ table = [0, 0] ++@ for i in range(16): ++@ table[0] |= (skew(opt(i)) ^ 0x63) << (i*8) ++@ table[1] |= skew(opt(i<<4)) << (i*8) ++@ print("\t.quad\t0x%016x, 0x%016x" % u128_to_u64s(table[0])) ++@ print("\t.quad\t0x%016x, 0x%016x" % u128_to_u64s(table[1])) ++.Lk_opt_then_skew: ++ .quad 0x9cb8436798bc4763, 0x6440bb9f6044bf9b ++ .quad 0x1f30062936192f00, 0xb49bad829db284ab ++ ++@ void GFp_vpaes_encrypt_key_to_bsaes(AES_KEY *bsaes, const AES_KEY *vpaes); ++.globl GFp_vpaes_encrypt_key_to_bsaes ++.type GFp_vpaes_encrypt_key_to_bsaes,%function ++.align 4 ++GFp_vpaes_encrypt_key_to_bsaes: ++ stmdb sp!, {r11, lr} ++ ++ @ See _vpaes_schedule_core for the key schedule logic. In particular, ++ @ _vpaes_schedule_transform(.Lk_ipt) (section 2.2 of the paper), ++ @ _vpaes_schedule_mangle (section 4.3), and .Lschedule_mangle_last ++ @ contain the transformations not in the bsaes representation. This ++ @ function inverts those transforms. ++ @ ++ @ Note also that bsaes-armv7.pl expects aes-armv4.pl's key ++ @ representation, which does not match the other aes_nohw_* ++ @ implementations. The ARM aes_nohw_* stores each 32-bit word ++ @ byteswapped, as a convenience for (unsupported) big-endian ARM, at the ++ @ cost of extra REV and VREV32 operations in little-endian ARM. ++ ++ vmov.i8 $s0F, #0x0f @ Required by _vpaes_schedule_transform ++ adr r2, .Lk_mc_forward @ Must be aligned to 8 mod 16. ++ add r3, r2, 0x90 @ .Lk_sr+0x10-.Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression) ++ ++ vld1.64 {$mc_forward}, [r2] ++ vmov.i8 $s63, #0x5b @ .Lk_s63 from vpaes-x86_64 ++ adr r11, .Lk_opt @ Must be aligned to 8 mod 16. ++ vmov.i8 $s63_raw, #0x63 @ .LK_s63 without .Lk_ipt applied ++ ++ @ vpaes stores one fewer round count than bsaes, but the number of keys ++ @ is the same. ++ ldr r2, [$inp,#240] ++ add r2, r2, #1 ++ str r2, [$out,#240] ++ ++ @ The first key is transformed with _vpaes_schedule_transform(.Lk_ipt). ++ @ Invert this with .Lk_opt. ++ vld1.64 {q0}, [$inp]! ++ bl _vpaes_schedule_transform ++ vrev32.8 q0, q0 ++ vst1.64 {q0}, [$out]! ++ ++ @ The middle keys have _vpaes_schedule_transform(.Lk_ipt) applied, ++ @ followed by _vpaes_schedule_mangle. _vpaes_schedule_mangle XORs 0x63, ++ @ multiplies by the circulant 0,1,1,1, then applies ShiftRows. ++.Loop_enc_key_to_bsaes: ++ vld1.64 {q0}, [$inp]! ++ ++ @ Invert the ShiftRows step (see .Lschedule_mangle_both). Note we cycle ++ @ r3 in the opposite direction and start at .Lk_sr+0x10 instead of 0x30. ++ @ We use r3 rather than r8 to avoid a callee-saved register. ++ vld1.64 {q1}, [r3] ++ vtbl.8 q2#lo, {q0}, q1#lo ++ vtbl.8 q2#hi, {q0}, q1#hi ++ add r3, r3, #16 ++ and r3, r3, #~(1<<6) ++ vmov q0, q2 ++ ++ @ Handle the last key differently. ++ subs r2, r2, #1 ++ beq .Loop_enc_key_to_bsaes_last ++ ++ @ Multiply by the circulant. This is its own inverse. ++ vtbl.8 q1#lo, {q0}, $mc_forward#lo ++ vtbl.8 q1#hi, {q0}, $mc_forward#hi ++ vmov q0, q1 ++ vtbl.8 q2#lo, {q1}, $mc_forward#lo ++ vtbl.8 q2#hi, {q1}, $mc_forward#hi ++ veor q0, q0, q2 ++ vtbl.8 q1#lo, {q2}, $mc_forward#lo ++ vtbl.8 q1#hi, {q2}, $mc_forward#hi ++ veor q0, q0, q1 ++ ++ @ XOR and finish. ++ veor q0, q0, $s63 ++ bl _vpaes_schedule_transform ++ vrev32.8 q0, q0 ++ vst1.64 {q0}, [$out]! ++ b .Loop_enc_key_to_bsaes ++ ++.Loop_enc_key_to_bsaes_last: ++ @ The final key does not have a basis transform (note ++ @ .Lschedule_mangle_last inverts the original transform). It only XORs ++ @ 0x63 and applies ShiftRows. The latter was already inverted in the ++ @ loop. Note that, because we act on the original representation, we use ++ @ $s63_raw, not $s63. ++ veor q0, q0, $s63_raw ++ vrev32.8 q0, q0 ++ vst1.64 {q0}, [$out] ++ ++ @ Wipe registers which contained key material. ++ veor q0, q0, q0 ++ veor q1, q1, q1 ++ veor q2, q2, q2 ++ ++ ldmia sp!, {r11, pc} @ return ++.size GFp_vpaes_encrypt_key_to_bsaes,.-GFp_vpaes_encrypt_key_to_bsaes ++___ ++} ++ ++{ ++# Register-passed parameters. ++my ($inp, $out, $len, $key) = map("r$_", 0..3); ++# Temporaries. _vpaes_encrypt_core already uses r8..r11, so overlap $ivec and ++# $tmp. $ctr is r7 because it must be preserved across calls. ++my ($ctr, $ivec, $tmp) = map("r$_", 7..9); ++ ++# void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len, ++# const AES_KEY *key, const uint8_t ivec[16]); ++$code .= <<___; ++.globl GFp_vpaes_ctr32_encrypt_blocks ++.type GFp_vpaes_ctr32_encrypt_blocks,%function ++.align 4 ++GFp_vpaes_ctr32_encrypt_blocks: ++ mov ip, sp ++ stmdb sp!, {r7-r11, lr} ++ @ This function uses q4-q7 (d8-d15), which are callee-saved. ++ vstmdb sp!, {d8-d15} ++ ++ cmp $len, #0 ++ @ $ivec is passed on the stack. ++ ldr $ivec, [ip] ++ beq .Lctr32_done ++ ++ @ _vpaes_encrypt_core expects the key in r2, so swap $len and $key. ++ mov $tmp, $key ++ mov $key, $len ++ mov $len, $tmp ++___ ++my ($len, $key) = ($key, $len); ++$code .= <<___; ++ ++ @ Load the IV and counter portion. ++ ldr $ctr, [$ivec, #12] ++ vld1.8 {q7}, [$ivec] ++ ++ bl _vpaes_preheat ++ rev $ctr, $ctr @ The counter is big-endian. ++ ++.Lctr32_loop: ++ vmov q0, q7 ++ vld1.8 {q6}, [$inp]! @ Load input ahead of time ++ bl _vpaes_encrypt_core ++ veor q0, q0, q6 @ XOR input and result ++ vst1.8 {q0}, [$out]! ++ subs $len, $len, #1 ++ @ Update the counter. ++ add $ctr, $ctr, #1 ++ rev $tmp, $ctr ++ vmov.32 q7#hi[1], $tmp ++ bne .Lctr32_loop ++ ++.Lctr32_done: ++ vldmia sp!, {d8-d15} ++ ldmia sp!, {r7-r11, pc} @ return ++.size GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks ++___ ++} ++ ++foreach (split("\n",$code)) { ++ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo; ++ print $_,"\n"; ++} ++ ++close STDOUT; +diff --git a/crypto/fipsmodule/aes/asm/vpaes-armv8.pl b/crypto/fipsmodule/aes/asm/vpaes-armv8.pl +new file mode 100755 +index 0000000..b31bbb8 +--- /dev/null ++++ b/crypto/fipsmodule/aes/asm/vpaes-armv8.pl +@@ -0,0 +1,837 @@ ++#! /usr/bin/env perl ++# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the OpenSSL license (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++ ++###################################################################### ++## Constant-time SSSE3 AES core implementation. ++## version 0.1 ++## ++## By Mike Hamburg (Stanford University), 2009 ++## Public domain. ++## ++## For details see http://shiftleft.org/papers/vector_aes/ and ++## http://crypto.stanford.edu/vpaes/. ++## ++###################################################################### ++# ARMv8 NEON adaptation by <appro@openssl.org> ++# ++# Reason for undertaken effort is that there is at least one popular ++# SoC based on Cortex-A53 that doesn't have crypto extensions. ++# ++# CBC enc ECB enc/dec(*) [bit-sliced enc/dec] ++# Cortex-A53 21.5 18.1/20.6 [17.5/19.8 ] ++# Cortex-A57 36.0(**) 20.4/24.9(**) [14.4/16.6 ] ++# X-Gene 45.9(**) 45.8/57.7(**) [33.1/37.6(**) ] ++# Denver(***) 16.6(**) 15.1/17.8(**) [8.80/9.93 ] ++# Apple A7(***) 22.7(**) 10.9/14.3 [8.45/10.0 ] ++# Mongoose(***) 26.3(**) 21.0/25.0(**) [13.3/16.8 ] ++# ++# (*) ECB denotes approximate result for parallelizable modes ++# such as CBC decrypt, CTR, etc.; ++# (**) these results are worse than scalar compiler-generated ++# code, but it's constant-time and therefore preferred; ++# (***) presented for reference/comparison purposes; ++ ++$flavour = shift; ++while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or ++die "can't locate arm-xlate.pl"; ++ ++open OUT,"| \"$^X\" $xlate $flavour $output"; ++*STDOUT=*OUT; ++ ++$code.=<<___; ++#include <GFp/arm_arch.h> ++ ++.section .rodata ++ ++.type _vpaes_consts,%object ++.align 7 // totally strategic alignment ++_vpaes_consts: ++.Lk_mc_forward: // mc_forward ++ .quad 0x0407060500030201, 0x0C0F0E0D080B0A09 ++ .quad 0x080B0A0904070605, 0x000302010C0F0E0D ++ .quad 0x0C0F0E0D080B0A09, 0x0407060500030201 ++ .quad 0x000302010C0F0E0D, 0x080B0A0904070605 ++.Lk_mc_backward:// mc_backward ++ .quad 0x0605040702010003, 0x0E0D0C0F0A09080B ++ .quad 0x020100030E0D0C0F, 0x0A09080B06050407 ++ .quad 0x0E0D0C0F0A09080B, 0x0605040702010003 ++ .quad 0x0A09080B06050407, 0x020100030E0D0C0F ++.Lk_sr: // sr ++ .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 ++ .quad 0x030E09040F0A0500, 0x0B06010C07020D08 ++ .quad 0x0F060D040B020900, 0x070E050C030A0108 ++ .quad 0x0B0E0104070A0D00, 0x0306090C0F020508 ++ ++// ++// "Hot" constants ++// ++.Lk_inv: // inv, inva ++ .quad 0x0E05060F0D080180, 0x040703090A0B0C02 ++ .quad 0x01040A060F0B0780, 0x030D0E0C02050809 ++.Lk_ipt: // input transform (lo, hi) ++ .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 ++ .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 ++.Lk_sbo: // sbou, sbot ++ .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 ++ .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA ++.Lk_sb1: // sb1u, sb1t ++ .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF ++ .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 ++.Lk_sb2: // sb2u, sb2t ++ .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A ++ .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD ++ ++// ++// Key schedule constants ++// ++.Lk_dksd: // decryption key schedule: invskew x*D ++ .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 ++ .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E ++.Lk_dksb: // decryption key schedule: invskew x*B ++ .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 ++ .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 ++.Lk_dkse: // decryption key schedule: invskew x*E + 0x63 ++ .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 ++ .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 ++.Lk_dks9: // decryption key schedule: invskew x*9 ++ .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC ++ .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE ++ ++.Lk_rcon: // rcon ++ .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 ++ ++.Lk_opt: // output transform ++ .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 ++ .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 ++.Lk_deskew: // deskew tables: inverts the sbox's "skew" ++ .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A ++ .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 ++ ++.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)" ++.size _vpaes_consts,.-_vpaes_consts ++.align 6 ++ ++.text ++___ ++ ++{ ++my ($inp,$out,$key) = map("x$_",(0..2)); ++ ++my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_.16b",(18..23)); ++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_.16b",(24..27)); ++my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_.16b",(24..31)); ++ ++$code.=<<___; ++## ++## _aes_preheat ++## ++## Fills register %r10 -> .aes_consts (so you can -fPIC) ++## and %xmm9-%xmm15 as specified below. ++## ++.type _vpaes_encrypt_preheat,%function ++.align 4 ++_vpaes_encrypt_preheat: ++ adrp x10, :pg_hi21:.Lk_inv ++ add x10, x10, :lo12:.Lk_inv ++ movi v17.16b, #0x0f ++ ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv ++ ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo ++ ld1 {v24.2d-v27.2d}, [x10] // .Lk_sb1, .Lk_sb2 ++ ret ++.size _vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat ++ ++## ++## _aes_encrypt_core ++## ++## AES-encrypt %xmm0. ++## ++## Inputs: ++## %xmm0 = input ++## %xmm9-%xmm15 as in _vpaes_preheat ++## (%rdx) = scheduled keys ++## ++## Output in %xmm0 ++## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax ++## Preserves %xmm6 - %xmm8 so you get some local vectors ++## ++## ++.type _vpaes_encrypt_core,%function ++.align 4 ++_vpaes_encrypt_core: ++ mov x9, $key ++ ldr w8, [$key,#240] // pull rounds ++ adrp x11, :pg_hi21:.Lk_mc_forward+16 ++ add x11, x11, :lo12:.Lk_mc_forward+16 ++ // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ++ ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key ++ and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 ++ ushr v0.16b, v7.16b, #4 // vpsrlb \$4, %xmm0, %xmm0 ++ tbl v1.16b, {$iptlo}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 ++ // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi ++ tbl v2.16b, {$ipthi}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 ++ eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 ++ eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 ++ b .Lenc_entry ++ ++.align 4 ++.Lenc_loop: ++ // middle of middle round ++ add x10, x11, #0x40 ++ tbl v4.16b, {$sb1t}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] ++ tbl v0.16b, {$sb1u}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ tbl v5.16b, {$sb2t}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ tbl v2.16b, {$sb2u}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] ++ tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ and x11, x11, #~(1<<6) // and \$0x30, %r11 # ... mod 4 ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ sub w8, w8, #1 // nr-- ++ ++.Lenc_entry: ++ // top of round ++ and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k ++ ushr v0.16b, v0.16b, #4 // vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ tbl v5.16b, {$invhi}, v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ tbl v3.16b, {$invlo}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ tbl v4.16b, {$invlo}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ tbl v2.16b, {$invlo}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ tbl v3.16b, {$invlo}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 ++ cbnz w8, .Lenc_loop ++ ++ // middle of last round ++ add x10, x11, #0x80 ++ // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo ++ // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 ++ tbl v4.16b, {$sbou}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] ++ tbl v0.16b, {$sbot}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 ++ ret ++.size _vpaes_encrypt_core,.-_vpaes_encrypt_core ++ ++.globl GFp_vpaes_encrypt ++.type GFp_vpaes_encrypt,%function ++.align 4 ++GFp_vpaes_encrypt: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ++ ld1 {v7.16b}, [$inp] ++ bl _vpaes_encrypt_preheat ++ bl _vpaes_encrypt_core ++ st1 {v0.16b}, [$out] ++ ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++.size GFp_vpaes_encrypt,.-GFp_vpaes_encrypt ++ ++.type _vpaes_encrypt_2x,%function ++.align 4 ++_vpaes_encrypt_2x: ++ mov x9, $key ++ ldr w8, [$key,#240] // pull rounds ++ adrp x11, :pg_hi21:.Lk_mc_forward+16 ++ add x11, x11, :lo12:.Lk_mc_forward+16 ++ // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ++ ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key ++ and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 ++ ushr v0.16b, v14.16b, #4 // vpsrlb \$4, %xmm0, %xmm0 ++ and v9.16b, v15.16b, v17.16b ++ ushr v8.16b, v15.16b, #4 ++ tbl v1.16b, {$iptlo}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 ++ tbl v9.16b, {$iptlo}, v9.16b ++ // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi ++ tbl v2.16b, {$ipthi}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 ++ tbl v10.16b, {$ipthi}, v8.16b ++ eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 ++ eor v8.16b, v9.16b, v16.16b ++ eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 ++ eor v8.16b, v8.16b, v10.16b ++ b .Lenc_2x_entry ++ ++.align 4 ++.Lenc_2x_loop: ++ // middle of middle round ++ add x10, x11, #0x40 ++ tbl v4.16b, {$sb1t}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ tbl v12.16b, {$sb1t}, v10.16b ++ ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] ++ tbl v0.16b, {$sb1u}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ tbl v8.16b, {$sb1u}, v11.16b ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ eor v12.16b, v12.16b, v16.16b ++ tbl v5.16b, {$sb2t}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ tbl v13.16b, {$sb2t}, v10.16b ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ eor v8.16b, v8.16b, v12.16b ++ tbl v2.16b, {$sb2u}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ tbl v10.16b, {$sb2u}, v11.16b ++ ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] ++ tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ tbl v11.16b, {v8.16b}, v1.16b ++ eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ eor v10.16b, v10.16b, v13.16b ++ tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ tbl v8.16b, {v8.16b}, v4.16b ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ eor v11.16b, v11.16b, v10.16b ++ tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ tbl v12.16b, {v11.16b},v1.16b ++ eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ eor v8.16b, v8.16b, v11.16b ++ and x11, x11, #~(1<<6) // and \$0x30, %r11 # ... mod 4 ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ eor v8.16b, v8.16b, v12.16b ++ sub w8, w8, #1 // nr-- ++ ++.Lenc_2x_entry: ++ // top of round ++ and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k ++ ushr v0.16b, v0.16b, #4 // vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ and v9.16b, v8.16b, v17.16b ++ ushr v8.16b, v8.16b, #4 ++ tbl v5.16b, {$invhi},v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ tbl v13.16b, {$invhi},v9.16b ++ eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ eor v9.16b, v9.16b, v8.16b ++ tbl v3.16b, {$invlo},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ tbl v11.16b, {$invlo},v8.16b ++ tbl v4.16b, {$invlo},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ tbl v12.16b, {$invlo},v9.16b ++ eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ eor v11.16b, v11.16b, v13.16b ++ eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ eor v12.16b, v12.16b, v13.16b ++ tbl v2.16b, {$invlo},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ tbl v10.16b, {$invlo},v11.16b ++ tbl v3.16b, {$invlo},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ tbl v11.16b, {$invlo},v12.16b ++ eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ eor v10.16b, v10.16b, v9.16b ++ eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ eor v11.16b, v11.16b, v8.16b ++ ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 ++ cbnz w8, .Lenc_2x_loop ++ ++ // middle of last round ++ add x10, x11, #0x80 ++ // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo ++ // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 ++ tbl v4.16b, {$sbou}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ tbl v12.16b, {$sbou}, v10.16b ++ ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] ++ tbl v0.16b, {$sbot}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ tbl v8.16b, {$sbot}, v11.16b ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ eor v12.16b, v12.16b, v16.16b ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ eor v8.16b, v8.16b, v12.16b ++ tbl v0.16b, {v0.16b},v1.16b // vpshufb %xmm1, %xmm0, %xmm0 ++ tbl v1.16b, {v8.16b},v1.16b ++ ret ++.size _vpaes_encrypt_2x,.-_vpaes_encrypt_2x ++___ ++} ++{ ++my ($inp,$bits,$out,$dir)=("x0","w1","x2","w3"); ++my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_.16b",(18..21,8)); ++ ++$code.=<<___; ++######################################################## ++## ## ++## AES key schedule ## ++## ## ++######################################################## ++.type _vpaes_key_preheat,%function ++.align 4 ++_vpaes_key_preheat: ++ adrp x10, :pg_hi21:.Lk_inv ++ add x10, x10, :lo12:.Lk_inv ++ movi v16.16b, #0x5b // .Lk_s63 ++ adrp x11, :pg_hi21:.Lk_sb1 ++ add x11, x11, :lo12:.Lk_sb1 ++ movi v17.16b, #0x0f // .Lk_s0F ++ ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt ++ adrp x10, :pg_hi21:.Lk_dksd ++ add x10, x10, :lo12:.Lk_dksd ++ ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1 ++ adrp x11, :pg_hi21:.Lk_mc_forward ++ add x11, x11, :lo12:.Lk_mc_forward ++ ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb ++ ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9 ++ ld1 {v8.2d}, [x10] // .Lk_rcon ++ ld1 {v9.2d}, [x11] // .Lk_mc_forward[0] ++ ret ++.size _vpaes_key_preheat,.-_vpaes_key_preheat ++ ++.type _vpaes_schedule_core,%function ++.align 4 ++_vpaes_schedule_core: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29, x30, [sp,#-16]! ++ add x29,sp,#0 ++ ++ bl _vpaes_key_preheat // load the tables ++ ++ ld1 {v0.16b}, [$inp],#16 // vmovdqu (%rdi), %xmm0 # load key (unaligned) ++ ++ // input transform ++ mov v3.16b, v0.16b // vmovdqa %xmm0, %xmm3 ++ bl _vpaes_schedule_transform ++ mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 ++ ++ adrp x10, :pg_hi21:.Lk_sr // lea .Lk_sr(%rip),%r10 ++ add x10, x10, :lo12:.Lk_sr ++ ++ add x8, x8, x10 ++ ++ // encrypting, output zeroth round key after transform ++ st1 {v0.2d}, [$out] // vmovdqu %xmm0, (%rdx) ++ ++ cmp $bits, #192 // cmp \$192, %esi ++ b.hi .Lschedule_256 ++ b.eq .Lschedule_192 ++ // 128: fall though ++ ++## ++## .schedule_128 ++## ++## 128-bit specific part of key schedule. ++## ++## This schedule is really simple, because all its parts ++## are accomplished by the subroutines. ++## ++.Lschedule_128: ++ mov $inp, #10 // mov \$10, %esi ++ ++.Loop_schedule_128: ++ sub $inp, $inp, #1 // dec %esi ++ bl _vpaes_schedule_round ++ cbz $inp, .Lschedule_mangle_last ++ bl _vpaes_schedule_mangle // write output ++ b .Loop_schedule_128 ++ ++## ++## .aes_schedule_192 ++## ++## 192-bit specific part of key schedule. ++## ++## The main body of this schedule is the same as the 128-bit ++## schedule, but with more smearing. The long, high side is ++## stored in %xmm7 as before, and the short, low side is in ++## the high bits of %xmm6. ++## ++## This schedule is somewhat nastier, however, because each ++## round produces 192 bits of key material, or 1.5 round keys. ++## Therefore, on each cycle we do 2 rounds and produce 3 round ++## keys. ++## ++.align 4 ++.Lschedule_192: ++ sub $inp, $inp, #8 ++ ld1 {v0.16b}, [$inp] // vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) ++ bl _vpaes_schedule_transform // input transform ++ mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save short part ++ eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 # clear 4 ++ ins v6.d[0], v4.d[0] // vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros ++ mov $inp, #4 // mov \$4, %esi ++ ++.Loop_schedule_192: ++ sub $inp, $inp, #1 // dec %esi ++ bl _vpaes_schedule_round ++ ext v0.16b, v6.16b, v0.16b, #8 // vpalignr \$8,%xmm6,%xmm0,%xmm0 ++ bl _vpaes_schedule_mangle // save key n ++ bl _vpaes_schedule_192_smear ++ bl _vpaes_schedule_mangle // save key n+1 ++ bl _vpaes_schedule_round ++ cbz $inp, .Lschedule_mangle_last ++ bl _vpaes_schedule_mangle // save key n+2 ++ bl _vpaes_schedule_192_smear ++ b .Loop_schedule_192 ++ ++## ++## .aes_schedule_256 ++## ++## 256-bit specific part of key schedule. ++## ++## The structure here is very similar to the 128-bit ++## schedule, but with an additional "low side" in ++## %xmm6. The low side's rounds are the same as the ++## high side's, except no rcon and no rotation. ++## ++.align 4 ++.Lschedule_256: ++ ld1 {v0.16b}, [$inp] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) ++ bl _vpaes_schedule_transform // input transform ++ mov $inp, #7 // mov \$7, %esi ++ ++.Loop_schedule_256: ++ sub $inp, $inp, #1 // dec %esi ++ bl _vpaes_schedule_mangle // output low result ++ mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 ++ ++ // high round ++ bl _vpaes_schedule_round ++ cbz $inp, .Lschedule_mangle_last ++ bl _vpaes_schedule_mangle ++ ++ // low round. swap xmm7 and xmm6 ++ dup v0.4s, v0.s[3] // vpshufd \$0xFF, %xmm0, %xmm0 ++ movi v4.16b, #0 ++ mov v5.16b, v7.16b // vmovdqa %xmm7, %xmm5 ++ mov v7.16b, v6.16b // vmovdqa %xmm6, %xmm7 ++ bl _vpaes_schedule_low_round ++ mov v7.16b, v5.16b // vmovdqa %xmm5, %xmm7 ++ ++ b .Loop_schedule_256 ++ ++## ++## .aes_schedule_mangle_last ++## ++## Mangler for last round of key schedule ++## Mangles %xmm0 ++## when encrypting, outputs out(%xmm0) ^ 63 ++## when decrypting, outputs unskew(%xmm0) ++## ++## Always called right before return... jumps to cleanup and exits ++## ++.align 4 ++.Lschedule_mangle_last: ++ // schedule last round key from xmm0 ++ adrp x11, :pg_hi21:.Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew ++ add x11, x11, :lo12:.Lk_deskew ++ ++ cbnz $dir, .Lschedule_mangle_last_dec ++ ++ // encrypting ++ ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 ++ adrp x11, :pg_hi21:.Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform ++ add x11, x11, :lo12:.Lk_opt ++ add $out, $out, #32 // add \$32, %rdx ++ tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute ++ ++.Lschedule_mangle_last_dec: ++ ld1 {v20.2d-v21.2d}, [x11] // reload constants ++ sub $out, $out, #16 // add \$-16, %rdx ++ eor v0.16b, v0.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform // output transform ++ st1 {v0.2d}, [$out] // vmovdqu %xmm0, (%rdx) # save last key ++ ++ // cleanup ++ eor v0.16b, v0.16b, v0.16b // vpxor %xmm0, %xmm0, %xmm0 ++ eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 ++ eor v2.16b, v2.16b, v2.16b // vpxor %xmm2, %xmm2, %xmm2 ++ eor v3.16b, v3.16b, v3.16b // vpxor %xmm3, %xmm3, %xmm3 ++ eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 ++ eor v5.16b, v5.16b, v5.16b // vpxor %xmm5, %xmm5, %xmm5 ++ eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 ++ eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 ++ ldp x29, x30, [sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++.size _vpaes_schedule_core,.-_vpaes_schedule_core ++ ++## ++## .aes_schedule_192_smear ++## ++## Smear the short, low side in the 192-bit key schedule. ++## ++## Inputs: ++## %xmm7: high side, b a x y ++## %xmm6: low side, d c 0 0 ++## %xmm13: 0 ++## ++## Outputs: ++## %xmm6: b+c+d b+c 0 0 ++## %xmm0: b+c+d b+c b a ++## ++.type _vpaes_schedule_192_smear,%function ++.align 4 ++_vpaes_schedule_192_smear: ++ movi v1.16b, #0 ++ dup v0.4s, v7.s[3] ++ ins v1.s[3], v6.s[2] // vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 ++ ins v0.s[0], v7.s[2] // vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a ++ eor v6.16b, v6.16b, v1.16b // vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 ++ eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 ++ eor v6.16b, v6.16b, v0.16b // vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a ++ mov v0.16b, v6.16b // vmovdqa %xmm6, %xmm0 ++ ins v6.d[0], v1.d[0] // vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros ++ ret ++.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear ++ ++## ++## .aes_schedule_round ++## ++## Runs one main round of the key schedule on %xmm0, %xmm7 ++## ++## Specifically, runs subbytes on the high dword of %xmm0 ++## then rotates it by one byte and xors into the low dword of ++## %xmm7. ++## ++## Adds rcon from low byte of %xmm8, then rotates %xmm8 for ++## next rcon. ++## ++## Smears the dwords of %xmm7 by xoring the low into the ++## second low, result into third, result into highest. ++## ++## Returns results in %xmm7 = %xmm0. ++## Clobbers %xmm1-%xmm4, %r11. ++## ++.type _vpaes_schedule_round,%function ++.align 4 ++_vpaes_schedule_round: ++ // extract rcon from xmm8 ++ movi v4.16b, #0 // vpxor %xmm4, %xmm4, %xmm4 ++ ext v1.16b, $rcon, v4.16b, #15 // vpalignr \$15, %xmm8, %xmm4, %xmm1 ++ ext $rcon, $rcon, $rcon, #15 // vpalignr \$15, %xmm8, %xmm8, %xmm8 ++ eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 ++ ++ // rotate ++ dup v0.4s, v0.s[3] // vpshufd \$0xFF, %xmm0, %xmm0 ++ ext v0.16b, v0.16b, v0.16b, #1 // vpalignr \$1, %xmm0, %xmm0, %xmm0 ++ ++ // fall through... ++ ++ // low round: same as high round, but no rotation and no rcon. ++_vpaes_schedule_low_round: ++ // smear xmm7 ++ ext v1.16b, v4.16b, v7.16b, #12 // vpslldq \$4, %xmm7, %xmm1 ++ eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 ++ ext v4.16b, v4.16b, v7.16b, #8 // vpslldq \$8, %xmm7, %xmm4 ++ ++ // subbytes ++ and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ ushr v0.16b, v0.16b, #4 // vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ eor v7.16b, v7.16b, v4.16b // vpxor %xmm4, %xmm7, %xmm7 ++ tbl v2.16b, {$invhi}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ tbl v3.16b, {$invlo}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ tbl v4.16b, {$invlo}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ eor v7.16b, v7.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm7, %xmm7 ++ tbl v3.16b, {$invlo}, v3.16b // vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak ++ eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ tbl v2.16b, {$invlo}, v4.16b // vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak ++ eor v3.16b, v3.16b, v1.16b // vpxor %xmm1, %xmm3, %xmm3 # 2 = io ++ eor v2.16b, v2.16b, v0.16b // vpxor %xmm0, %xmm2, %xmm2 # 3 = jo ++ tbl v4.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou ++ tbl v1.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t ++ eor v1.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output ++ ++ // add in smeared stuff ++ eor v0.16b, v1.16b, v7.16b // vpxor %xmm7, %xmm1, %xmm0 ++ eor v7.16b, v1.16b, v7.16b // vmovdqa %xmm0, %xmm7 ++ ret ++.size _vpaes_schedule_round,.-_vpaes_schedule_round ++ ++## ++## .aes_schedule_transform ++## ++## Linear-transform %xmm0 according to tables at (%r11) ++## ++## Requires that %xmm9 = 0x0F0F... as in preheat ++## Output in %xmm0 ++## Clobbers %xmm1, %xmm2 ++## ++.type _vpaes_schedule_transform,%function ++.align 4 ++_vpaes_schedule_transform: ++ and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 ++ ushr v0.16b, v0.16b, #4 // vpsrlb \$4, %xmm0, %xmm0 ++ // vmovdqa (%r11), %xmm2 # lo ++ tbl v2.16b, {$iptlo}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 ++ // vmovdqa 16(%r11), %xmm1 # hi ++ tbl v0.16b, {$ipthi}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 ++ eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 ++ ret ++.size _vpaes_schedule_transform,.-_vpaes_schedule_transform ++ ++## ++## .aes_schedule_mangle ++## ++## Mangle xmm0 from (basis-transformed) standard version ++## to our version. ++## ++## On encrypt, ++## xor with 0x63 ++## multiply by circulant 0,1,1,1 ++## apply shiftrows transform ++## ++## On decrypt, ++## xor with 0x63 ++## multiply by "inverse mixcolumns" circulant E,B,D,9 ++## deskew ++## apply shiftrows transform ++## ++## ++## Writes out to (%rdx), and increments or decrements it ++## Keeps track of round number mod 4 in %r8 ++## Preserves xmm0 ++## Clobbers xmm1-xmm5 ++## ++.type _vpaes_schedule_mangle,%function ++.align 4 ++_vpaes_schedule_mangle: ++ mov v4.16b, v0.16b // vmovdqa %xmm0, %xmm4 # save xmm0 for later ++ // vmovdqa .Lk_mc_forward(%rip),%xmm5 ++ ++ // encrypting ++ eor v4.16b, v0.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm0, %xmm4 ++ add $out, $out, #16 // add \$16, %rdx ++ tbl v4.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm4 ++ tbl v1.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm1 ++ tbl v3.16b, {v1.16b}, v9.16b // vpshufb %xmm5, %xmm1, %xmm3 ++ eor v4.16b, v4.16b, v1.16b // vpxor %xmm1, %xmm4, %xmm4 ++ ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 ++ eor v3.16b, v3.16b, v4.16b // vpxor %xmm4, %xmm3, %xmm3 ++ ++.Lschedule_mangle_both: ++ tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 ++ add x8, x8, #64-16 // add \$-16, %r8 ++ and x8, x8, #~(1<<6) // and \$0x30, %r8 ++ st1 {v3.2d}, [$out] // vmovdqu %xmm3, (%rdx) ++ ret ++.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle ++ ++.globl GFp_vpaes_set_encrypt_key ++.type GFp_vpaes_set_encrypt_key,%function ++.align 4 ++GFp_vpaes_set_encrypt_key: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ stp d8,d9,[sp,#-16]! // ABI spec says so ++ ++ lsr w9, $bits, #5 // shr \$5,%eax ++ add w9, w9, #5 // \$5,%eax ++ str w9, [$out,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ mov $dir, #0 // mov \$0,%ecx ++ mov x8, #0x30 // mov \$0x30,%r8d ++ bl _vpaes_schedule_core ++ eor x0, x0, x0 ++ ++ ldp d8,d9,[sp],#16 ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++.size GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key ++___ ++} ++{ ++my ($inp,$out,$len,$key,$ivec) = map("x$_",(0..4)); ++my ($ctr, $ctr_tmp) = ("w6", "w7"); ++ ++# void GFp_vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len, ++# const AES_KEY *key, const uint8_t ivec[16]); ++$code.=<<___; ++.globl GFp_vpaes_ctr32_encrypt_blocks ++.type GFp_vpaes_ctr32_encrypt_blocks,%function ++.align 4 ++GFp_vpaes_ctr32_encrypt_blocks: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ stp d8,d9,[sp,#-16]! // ABI spec says so ++ stp d10,d11,[sp,#-16]! ++ stp d12,d13,[sp,#-16]! ++ stp d14,d15,[sp,#-16]! ++ ++ cbz $len, .Lctr32_done ++ ++ // Note, unlike the other functions, $len here is measured in blocks, ++ // not bytes. ++ mov x17, $len ++ mov x2, $key ++ ++ // Load the IV and counter portion. ++ ldr $ctr, [$ivec, #12] ++ ld1 {v7.16b}, [$ivec] ++ ++ bl _vpaes_encrypt_preheat ++ tst x17, #1 ++ rev $ctr, $ctr // The counter is big-endian. ++ b.eq .Lctr32_prep_loop ++ ++ // Handle one block so the remaining block count is even for ++ // _vpaes_encrypt_2x. ++ ld1 {v6.16b}, [$inp], #16 // Load input ahead of time ++ bl _vpaes_encrypt_core ++ eor v0.16b, v0.16b, v6.16b // XOR input and result ++ st1 {v0.16b}, [$out], #16 ++ subs x17, x17, #1 ++ // Update the counter. ++ add $ctr, $ctr, #1 ++ rev $ctr_tmp, $ctr ++ mov v7.s[3], $ctr_tmp ++ b.ls .Lctr32_done ++ ++.Lctr32_prep_loop: ++ // _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x ++ // uses v14 and v15. ++ mov v15.16b, v7.16b ++ mov v14.16b, v7.16b ++ add $ctr, $ctr, #1 ++ rev $ctr_tmp, $ctr ++ mov v15.s[3], $ctr_tmp ++ ++.Lctr32_loop: ++ ld1 {v6.16b,v7.16b}, [$inp], #32 // Load input ahead of time ++ bl _vpaes_encrypt_2x ++ eor v0.16b, v0.16b, v6.16b // XOR input and result ++ eor v1.16b, v1.16b, v7.16b // XOR input and result (#2) ++ st1 {v0.16b,v1.16b}, [$out], #32 ++ subs x17, x17, #2 ++ // Update the counter. ++ add $ctr_tmp, $ctr, #1 ++ add $ctr, $ctr, #2 ++ rev $ctr_tmp, $ctr_tmp ++ mov v14.s[3], $ctr_tmp ++ rev $ctr_tmp, $ctr ++ mov v15.s[3], $ctr_tmp ++ b.hi .Lctr32_loop ++ ++.Lctr32_done: ++ ldp d14,d15,[sp],#16 ++ ldp d12,d13,[sp],#16 ++ ldp d10,d11,[sp],#16 ++ ldp d8,d9,[sp],#16 ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++.size GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks ++___ ++} ++ ++print $code; ++ ++close STDOUT or die "error closing STDOUT"; +diff --git a/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl b/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl +new file mode 100644 +index 0000000..7e52ad6 +--- /dev/null ++++ b/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl +@@ -0,0 +1,294 @@ ++#! /usr/bin/env perl ++# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the OpenSSL license (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# ==================================================================== ++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++ ++# This file was adapted to AArch64 from the 32-bit version in ghash-armv4.pl. It ++# implements the multiplication algorithm described in: ++# ++# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software ++# Polynomial Multiplication on ARM Processors using the NEON Engine. ++# ++# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf ++# ++# The main distinction to keep in mind between 32-bit NEON and AArch64 SIMD is ++# AArch64 cannot compute over the upper halves of SIMD registers. In 32-bit ++# NEON, the low and high halves of the 128-bit register q0 are accessible as ++# 64-bit registers d0 and d1, respectively. In AArch64, dN is the lower half of ++# vN. Where the 32-bit version would use the upper half, this file must keep ++# halves in separate registers. ++# ++# The other distinction is in syntax. 32-bit NEON embeds lane information in the ++# instruction name, while AArch64 uses suffixes on the registers. For instance, ++# left-shifting 64-bit lanes of a SIMD register in 32-bit would be written: ++# ++# vshl.i64 q0, q0, #1 ++# ++# in 64-bit, it would be written: ++# ++# shl v0.2d, v0.2d, #1 ++# ++# See Programmer's Guide for ARMv8-A, section 7 for details. ++# http://infocenter.arm.com/help/topic/com.arm.doc.den0024a/DEN0024A_v8_architecture_PG.pdf ++# ++# Finally, note the 8-bit and 64-bit polynomial multipliers in AArch64 differ ++# only by suffix. pmull vR.8h, vA.8b, vB.8b multiplies eight 8-bit polynomials ++# and is always available. pmull vR.1q, vA.1d, vB.1d multiplies a 64-bit ++# polynomial and is conditioned on the PMULL extension. This file emulates the ++# latter with the former. ++ ++use strict; ++ ++my $flavour = shift; ++my $output; ++if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } ++else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } ++ ++if ($flavour && $flavour ne "void") { ++ $0 =~ m/(.*[\/\\])[^\/\\]+$/; ++ my $dir = $1; ++ my $xlate; ++ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or ++ ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or ++ die "can't locate arm-xlate.pl"; ++ ++ open OUT,"| \"$^X\" $xlate $flavour $output"; ++ *STDOUT=*OUT; ++} else { ++ open OUT,">$output"; ++ *STDOUT=*OUT; ++} ++ ++my ($Xi, $Htbl, $inp, $len) = map("x$_", (0..3)); # argument block ++my ($Xl, $Xm, $Xh, $INlo, $INhi) = map("v$_", (0..4)); ++my ($Hlo, $Hhi, $Hhl) = map("v$_", (5..7)); ++# d8-d15 are callee-saved, so avoid v8-v15. AArch64 SIMD has plenty of registers ++# to spare. ++my ($t0, $t1, $t2, $t3) = map("v$_", (16..19)); ++my ($t0l_t1l, $t0h_t1h, $t2l_t3l, $t2h_t3h) = map("v$_", (20..23)); ++my ($k48_k32, $k16_k0) = map("v$_", (24..25)); ++ ++my $code = ""; ++ ++# clmul64x64 emits code which emulates pmull $r.1q, $a.1d, $b.1d. $r, $a, and $b ++# must be distinct from $t* and $k*. $t* are clobbered by the emitted code. ++sub clmul64x64 { ++my ($r, $a, $b) = @_; ++$code .= <<___; ++ ext $t0.8b, $a.8b, $a.8b, #1 // A1 ++ pmull $t0.8h, $t0.8b, $b.8b // F = A1*B ++ ext $r.8b, $b.8b, $b.8b, #1 // B1 ++ pmull $r.8h, $a.8b, $r.8b // E = A*B1 ++ ext $t1.8b, $a.8b, $a.8b, #2 // A2 ++ pmull $t1.8h, $t1.8b, $b.8b // H = A2*B ++ ext $t3.8b, $b.8b, $b.8b, #2 // B2 ++ pmull $t3.8h, $a.8b, $t3.8b // G = A*B2 ++ ext $t2.8b, $a.8b, $a.8b, #3 // A3 ++ eor $t0.16b, $t0.16b, $r.16b // L = E + F ++ pmull $t2.8h, $t2.8b, $b.8b // J = A3*B ++ ext $r.8b, $b.8b, $b.8b, #3 // B3 ++ eor $t1.16b, $t1.16b, $t3.16b // M = G + H ++ pmull $r.8h, $a.8b, $r.8b // I = A*B3 ++ ++ // Here we diverge from the 32-bit version. It computes the following ++ // (instructions reordered for clarity): ++ // ++ // veor \$t0#lo, \$t0#lo, \$t0#hi @ t0 = P0 + P1 (L) ++ // vand \$t0#hi, \$t0#hi, \$k48 ++ // veor \$t0#lo, \$t0#lo, \$t0#hi ++ // ++ // veor \$t1#lo, \$t1#lo, \$t1#hi @ t1 = P2 + P3 (M) ++ // vand \$t1#hi, \$t1#hi, \$k32 ++ // veor \$t1#lo, \$t1#lo, \$t1#hi ++ // ++ // veor \$t2#lo, \$t2#lo, \$t2#hi @ t2 = P4 + P5 (N) ++ // vand \$t2#hi, \$t2#hi, \$k16 ++ // veor \$t2#lo, \$t2#lo, \$t2#hi ++ // ++ // veor \$t3#lo, \$t3#lo, \$t3#hi @ t3 = P6 + P7 (K) ++ // vmov.i64 \$t3#hi, #0 ++ // ++ // \$kN is a mask with the bottom N bits set. AArch64 cannot compute on ++ // upper halves of SIMD registers, so we must split each half into ++ // separate registers. To compensate, we pair computations up and ++ // parallelize. ++ ++ ext $t3.8b, $b.8b, $b.8b, #4 // B4 ++ eor $t2.16b, $t2.16b, $r.16b // N = I + J ++ pmull $t3.8h, $a.8b, $t3.8b // K = A*B4 ++ ++ // This can probably be scheduled more efficiently. For now, we just ++ // pair up independent instructions. ++ zip1 $t0l_t1l.2d, $t0.2d, $t1.2d ++ zip1 $t2l_t3l.2d, $t2.2d, $t3.2d ++ zip2 $t0h_t1h.2d, $t0.2d, $t1.2d ++ zip2 $t2h_t3h.2d, $t2.2d, $t3.2d ++ eor $t0l_t1l.16b, $t0l_t1l.16b, $t0h_t1h.16b ++ eor $t2l_t3l.16b, $t2l_t3l.16b, $t2h_t3h.16b ++ and $t0h_t1h.16b, $t0h_t1h.16b, $k48_k32.16b ++ and $t2h_t3h.16b, $t2h_t3h.16b, $k16_k0.16b ++ eor $t0l_t1l.16b, $t0l_t1l.16b, $t0h_t1h.16b ++ eor $t2l_t3l.16b, $t2l_t3l.16b, $t2h_t3h.16b ++ zip1 $t0.2d, $t0l_t1l.2d, $t0h_t1h.2d ++ zip1 $t2.2d, $t2l_t3l.2d, $t2h_t3h.2d ++ zip2 $t1.2d, $t0l_t1l.2d, $t0h_t1h.2d ++ zip2 $t3.2d, $t2l_t3l.2d, $t2h_t3h.2d ++ ++ ext $t0.16b, $t0.16b, $t0.16b, #15 // t0 = t0 << 8 ++ ext $t1.16b, $t1.16b, $t1.16b, #14 // t1 = t1 << 16 ++ pmull $r.8h, $a.8b, $b.8b // D = A*B ++ ext $t3.16b, $t3.16b, $t3.16b, #12 // t3 = t3 << 32 ++ ext $t2.16b, $t2.16b, $t2.16b, #13 // t2 = t2 << 24 ++ eor $t0.16b, $t0.16b, $t1.16b ++ eor $t2.16b, $t2.16b, $t3.16b ++ eor $r.16b, $r.16b, $t0.16b ++ eor $r.16b, $r.16b, $t2.16b ++___ ++} ++ ++$code .= <<___; ++#include <GFp/arm_arch.h> ++ ++.text ++ ++.global GFp_gcm_init_neon ++.type GFp_gcm_init_neon,%function ++.align 4 ++GFp_gcm_init_neon: ++ AARCH64_VALID_CALL_TARGET ++ // This function is adapted from gcm_init_v8. xC2 is t3. ++ ld1 {$t1.2d}, [x1] // load H ++ movi $t3.16b, #0xe1 ++ shl $t3.2d, $t3.2d, #57 // 0xc2.0 ++ ext $INlo.16b, $t1.16b, $t1.16b, #8 ++ ushr $t2.2d, $t3.2d, #63 ++ dup $t1.4s, $t1.s[1] ++ ext $t0.16b, $t2.16b, $t3.16b, #8 // t0=0xc2....01 ++ ushr $t2.2d, $INlo.2d, #63 ++ sshr $t1.4s, $t1.4s, #31 // broadcast carry bit ++ and $t2.16b, $t2.16b, $t0.16b ++ shl $INlo.2d, $INlo.2d, #1 ++ ext $t2.16b, $t2.16b, $t2.16b, #8 ++ and $t0.16b, $t0.16b, $t1.16b ++ orr $INlo.16b, $INlo.16b, $t2.16b // H<<<=1 ++ eor $Hlo.16b, $INlo.16b, $t0.16b // twisted H ++ st1 {$Hlo.2d}, [x0] // store Htable[0] ++ ret ++.size GFp_gcm_init_neon,.-GFp_gcm_init_neon ++ ++.global GFp_gcm_gmult_neon ++.type GFp_gcm_gmult_neon,%function ++.align 4 ++GFp_gcm_gmult_neon: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {$INlo.16b}, [$Xi] // load Xi ++ ld1 {$Hlo.1d}, [$Htbl], #8 // load twisted H ++ ld1 {$Hhi.1d}, [$Htbl] ++ adrp x9, :pg_hi21:.Lmasks // load constants ++ add x9, x9, :lo12:.Lmasks ++ ld1 {$k48_k32.2d, $k16_k0.2d}, [x9] ++ rev64 $INlo.16b, $INlo.16b // byteswap Xi ++ ext $INlo.16b, $INlo.16b, $INlo.16b, #8 ++ eor $Hhl.8b, $Hlo.8b, $Hhi.8b // Karatsuba pre-processing ++ ++ mov $len, #16 ++ b .Lgmult_neon ++.size GFp_gcm_gmult_neon,.-GFp_gcm_gmult_neon ++ ++.global GFp_gcm_ghash_neon ++.type GFp_gcm_ghash_neon,%function ++.align 4 ++GFp_gcm_ghash_neon: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {$Xl.16b}, [$Xi] // load Xi ++ ld1 {$Hlo.1d}, [$Htbl], #8 // load twisted H ++ ld1 {$Hhi.1d}, [$Htbl] ++ adrp x9, :pg_hi21:.Lmasks // load constants ++ add x9, x9, :lo12:.Lmasks ++ ld1 {$k48_k32.2d, $k16_k0.2d}, [x9] ++ rev64 $Xl.16b, $Xl.16b // byteswap Xi ++ ext $Xl.16b, $Xl.16b, $Xl.16b, #8 ++ eor $Hhl.8b, $Hlo.8b, $Hhi.8b // Karatsuba pre-processing ++ ++.Loop_neon: ++ ld1 {$INlo.16b}, [$inp], #16 // load inp ++ rev64 $INlo.16b, $INlo.16b // byteswap inp ++ ext $INlo.16b, $INlo.16b, $INlo.16b, #8 ++ eor $INlo.16b, $INlo.16b, $Xl.16b // inp ^= Xi ++ ++.Lgmult_neon: ++ // Split the input into $INlo and $INhi. (The upper halves are unused, ++ // so it is okay to leave them alone.) ++ ins $INhi.d[0], $INlo.d[1] ++___ ++&clmul64x64 ($Xl, $Hlo, $INlo); # H.lo·Xi.lo ++$code .= <<___; ++ eor $INlo.8b, $INlo.8b, $INhi.8b // Karatsuba pre-processing ++___ ++&clmul64x64 ($Xm, $Hhl, $INlo); # (H.lo+H.hi)·(Xi.lo+Xi.hi) ++&clmul64x64 ($Xh, $Hhi, $INhi); # H.hi·Xi.hi ++$code .= <<___; ++ ext $t0.16b, $Xl.16b, $Xh.16b, #8 ++ eor $Xm.16b, $Xm.16b, $Xl.16b // Karatsuba post-processing ++ eor $Xm.16b, $Xm.16b, $Xh.16b ++ eor $Xm.16b, $Xm.16b, $t0.16b // Xm overlaps Xh.lo and Xl.hi ++ ins $Xl.d[1], $Xm.d[0] // Xh|Xl - 256-bit result ++ // This is a no-op due to the ins instruction below. ++ // ins $Xh.d[0], $Xm.d[1] ++ ++ // equivalent of reduction_avx from ghash-x86_64.pl ++ shl $t1.2d, $Xl.2d, #57 // 1st phase ++ shl $t2.2d, $Xl.2d, #62 ++ eor $t2.16b, $t2.16b, $t1.16b // ++ shl $t1.2d, $Xl.2d, #63 ++ eor $t2.16b, $t2.16b, $t1.16b // ++ // Note Xm contains {Xl.d[1], Xh.d[0]}. ++ eor $t2.16b, $t2.16b, $Xm.16b ++ ins $Xl.d[1], $t2.d[0] // Xl.d[1] ^= t2.d[0] ++ ins $Xh.d[0], $t2.d[1] // Xh.d[0] ^= t2.d[1] ++ ++ ushr $t2.2d, $Xl.2d, #1 // 2nd phase ++ eor $Xh.16b, $Xh.16b,$Xl.16b ++ eor $Xl.16b, $Xl.16b,$t2.16b // ++ ushr $t2.2d, $t2.2d, #6 ++ ushr $Xl.2d, $Xl.2d, #1 // ++ eor $Xl.16b, $Xl.16b, $Xh.16b // ++ eor $Xl.16b, $Xl.16b, $t2.16b // ++ ++ subs $len, $len, #16 ++ bne .Loop_neon ++ ++ rev64 $Xl.16b, $Xl.16b // byteswap Xi and write ++ ext $Xl.16b, $Xl.16b, $Xl.16b, #8 ++ st1 {$Xl.16b}, [$Xi] ++ ++ ret ++.size GFp_gcm_ghash_neon,.-GFp_gcm_ghash_neon ++ ++.section .rodata ++.align 4 ++.Lmasks: ++.quad 0x0000ffffffffffff // k48 ++.quad 0x00000000ffffffff // k32 ++.quad 0x000000000000ffff // k16 ++.quad 0x0000000000000000 // k0 ++.asciz "GHASH for ARMv8, derived from ARMv4 version by <appro\@openssl.org>" ++.align 2 ++___ ++ ++foreach (split("\n",$code)) { ++ s/\`([^\`]*)\`/eval $1/geo; ++ ++ print $_,"\n"; ++} ++close STDOUT or die "error closing STDOUT"; # enforce flush +-- +Efraim Flashner <efraim@flashner.co.il> רנשלפ םירפא +GPG key = A28B F40C 3E55 1372 662D 14F7 41AA E7DC CA3D 8351 +Confidentiality cannot be guaranteed on emails sent or received unencrypted + diff --git a/gnu/packages/patches/rust-ring-0.16-test-files.patch b/gnu/packages/patches/rust-ring-0.16-test-files.patch new file mode 100644 index 0000000000..dbe5c0f4ee --- /dev/null +++ b/gnu/packages/patches/rust-ring-0.16-test-files.patch @@ -0,0 +1,54 @@ +This file exists in the upstream repository at the commit which +corresponds to the ring-0.16.20 release, but was excluded from the +release tarball. + +--- + tests/ed25519_verify_tests.txt | 34 ++++++++++++++++++++++++++++++++++ + 1 file changed, 34 insertions(+) + create mode 100644 tests/ed25519_verify_tests.txt + +diff --git a/tests/ed25519_verify_tests.txt b/tests/ed25519_verify_tests.txt +new file mode 100644 +index 0000000..74c94b3 +--- /dev/null ++++ b/tests/ed25519_verify_tests.txt +@@ -0,0 +1,34 @@ ++# BoringSSL TEST(Ed25519Test Malleability) ++ ++# Control; S is in range. ++MESSAGE = 54657374 ++SIG = 7c38e026f29e14aabd059a0f2db8b0cd783040609a8be684db12f82a27774ab07a9155711ecfaf7f99f277bad0c6ae7e39d4eef676573336a5c51eb6f946b30d ++PUB = 7d4d0e7f6153a69b6242b522abbee685fda4420f8834b108c3bdae369ef549fa ++Result = P ++ ++# Same as above, but with the order L added to S so it is out of range. ++# BoringSSL commit 472ba2c2dd52d06a657a63b7fbf02732a6649d21 ++MESSAGE = 54657374 ++SIG = 7c38e026f29e14aabd059a0f2db8b0cd783040609a8be684db12f82a27774ab067654bce3832c2d76f8f6f5dafc08d9339d4eef676573336a5c51eb6f946b31d ++PUB = 7d4d0e7f6153a69b6242b522abbee685fda4420f8834b108c3bdae369ef549fa ++Result = F ++ ++ ++# BoringSSL commit 3094902fcdc2db2cc832fa854b9a6a8be383926c ++MESSAGE = 124e583f8b8eca58bb29c271b41d36986bbc45541f8e51f9cb0133eca447601e ++SIG = dac119d6ca87fc59ae611c157048f4d4fc932a149dbe20ec6effd1436abf83ea05c7df0fef06147241259113909bc71bd3c53ba4464ffcad3c0968f2ffffff0f ++PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86 ++Result = P ++ ++# Control. Same key as above; same message and signature as below, except S is in range. ++PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86 ++MESSAGE = 6a0bc2b0057cedfc0fa2e3f7f7d39279b30f454a69dfd1117c758d86b19d85e0 ++SIG = 0971f86d2c9c78582524a103cb9cf949522ae528f8054dc20107d999be673ff4f58ac9d20ec563133cabc6230b1db8625f8446639ede46ad4df4053000000000 ++Result = P ++ ++# Same key as above, but S is out of range. ++# BoringSSL commit 472ba2c2dd52d06a657a63b7fbf02732a6649d21 ++PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86 ++MESSAGE = 6a0bc2b0057cedfc0fa2e3f7f7d39279b30f454a69dfd1117c758d86b19d85e0 ++SIG = 0971f86d2c9c78582524a103cb9cf949522ae528f8054dc20107d999be673ff4e25ebf2f2928766b1248bec6e91697775f8446639ede46ad4df4053000000010 ++Result = F +-- +Efraim Flashner <efraim@flashner.co.il> רנשלפ םירפא +GPG key = A28B F40C 3E55 1372 662D 14F7 41AA E7DC CA3D 8351 +Confidentiality cannot be guaranteed on emails sent or received unencrypted + |