13 files changed, 2784 insertions, 531 deletions
diff --git a/gnu/packages/patches/ddclient-skip-test.patch b/gnu/packages/patches/ddclient-skip-test.patch
deleted file mode 100644
index 28d748997b..0000000000
--- a/gnu/packages/patches/ddclient-skip-test.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From e5657802025f238b39581534f3b4d408565c8943 Mon Sep 17 00:00:00 2001
-From: Bruno Victal <mirai@makinata.eu>
-Date: Sun, 5 Feb 2023 21:05:00 +0000
-Subject: [PATCH] Disable sandbox incompatible tests.
-
-See: https://github.com/ddclient/ddclient/issues/465
----
- t/get_ip_from_if.pl | 21 ---------------------
- 1 file changed, 21 deletions(-)
-
-diff --git a/t/get_ip_from_if.pl b/t/get_ip_from_if.pl
-index 6f08e5d..d78c3d0 100644
---- a/t/get_ip_from_if.pl
-+++ b/t/get_ip_from_if.pl
-@@ -39,25 +39,4 @@ subtest "get_ip_from_interface tests" => sub {
-     }
- };
- 
--subtest "Get default interface and IP for test system" => sub {
--    my $interface = ddclient::get_default_interface(4);
--    if ($interface) {
--        isnt($interface, "lo", "Check for loopback 'lo'");
--        isnt($interface, "lo0", "Check for loopback 'lo0'");
--        my $ip1 = ddclient::get_ip_from_interface("default", 4);
--        my $ip2 = ddclient::get_ip_from_interface($interface, 4);
--        is($ip1, $ip2, "Check IPv4 from default interface");
--        ok(ddclient::is_ipv4($ip1), "Valid IPv4 from get_ip_from_interface($interface)");
--    }
--    $interface = ddclient::get_default_interface(6);
--    if ($interface) {
--        isnt($interface, "lo", "Check for loopback 'lo'");
--        isnt($interface, "lo0", "Check for loopback 'lo0'");
--        my $ip1 = ddclient::get_ip_from_interface("default", 6);
--        my $ip2 = ddclient::get_ip_from_interface($interface, 6);
--        is($ip1, $ip2, "Check IPv6 from default interface");
--        ok(ddclient::is_ipv6($ip1), "Valid IPv6 from get_ip_from_interface($interface)");
--    }
--};
--
- done_testing();
--- 
-2.38.1
-
diff --git a/gnu/packages/patches/elogind-fix-rpath.patch b/gnu/packages/patches/elogind-fix-rpath.patch
new file mode 100644
index 0000000000..2a76cc467f
--- /dev/null
+++ b/gnu/packages/patches/elogind-fix-rpath.patch
@@ -0,0 +1,60 @@
+Retrieved from https://github.com/elogind/elogind/issues/258
+
+From: Mark Hindley <mark@hindley.org.uk>
+Date: Wed, 24 May 2023 10:39:41 +0100
+Subject: Fixup_executable_rpath
+
+./meson.build sets
+
+ install_rpath : rootlibexecdir
+
+however src/shared/meson.build sets
+
+libshared = shared_library(
+          [snip]
+        install_dir : rootpkglibdir
+        )
+---
+ meson.build | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/meson.build b/meson.build
+index 694a2fd..a575f69 100644
+--- a/meson.build
++++ b/meson.build
+@@ -2903,7 +2903,7 @@ executable('elogind',
+                 dependencies : [threads,
+                                 libacl,
+                                 libudev],
+-                install_rpath : rootlibexecdir,
++                install_rpath : rootpkglibdir,
+                 install : true,
+                 install_dir : rootlibexecdir)
+ 
+@@ -2913,7 +2913,7 @@ exe = executable('loginctl',
+                         link_with : [libshared],
+                         dependencies : [threads,
+                                         libudev],
+-                        install_rpath : rootlibexecdir,
++                        install_rpath : rootpkglibdir,
+                         install : true,
+                         install_dir : rootbindir)
+ public_programs += [exe]
+@@ -2923,7 +2923,7 @@ exe = executable('elogind-inhibit',
+                         include_directories : includes,
+                         link_with : [libshared],
+                         dependencies : [threads],
+-                        install_rpath : rootlibexecdir,
++                        install_rpath : rootpkglibdir,
+                         install : true,
+                         install_dir : rootbindir)
+ public_programs += [exe]
+@@ -4283,7 +4283,7 @@ executable('elogind-uaccess-command',
+                         libshared],
+            dependencies: [libacl,
+                           libudev],
+-           install_rpath : rootlibexecdir,
++           install_rpath : rootpkglibdir,
+            install : true,
+            install_dir : rootlibexecdir)
+ #endif // 0
diff --git a/gnu/packages/patches/elogind-revert-polkit-detection.patch b/gnu/packages/patches/elogind-revert-polkit-detection.patch
deleted file mode 100644
index 43dd1684b6..0000000000
--- a/gnu/packages/patches/elogind-revert-polkit-detection.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 715ce0a6459e418f92e74c7ce52df3244c18f383 Mon Sep 17 00:00:00 2001
-From: Sven Eden <sven.eden@prydeworx.com>
-Date: Mon, 8 Mar 2021 08:40:08 +0100
-Subject: [PATCH] Revert "Disable polkit support if libpolkit is not installed"
-
-This reverts commit 1194dec4f8f2d1b8bd14e1625f34418ecfce817e.
-
-Removing polkit support with -Dpolkit=auto when libpolkit is not
-installed, removes the whole interface. This makes it impossible to
-add polkit support as a runtime dependency.
-
-Bug: #167
-Closes: #206
-Signed-off-by: Sven Eden <sven.eden@prydeworx.com>
----
- meson.build | 9 ---------
- 1 file changed, 9 deletions(-)
-
-diff --git a/meson.build b/meson.build
-index 2dd05db3c..f38551f55 100644
---- a/meson.build
-+++ b/meson.build
-@@ -1157,15 +1157,6 @@ if want_polkit != 'false' and not skip_deps
-                 message('Old polkit detected, will install pkla files')
-                 install_polkit_pkla = true
-         endif
--#if 1 /// Disable polkit completely if libpolkit is not there. See elogind issue #167
--        if not libpolkit.found()
--                if want_polkit != 'auto'
--                        error('Polkit requested but libpolkit was not found.')
--                endif
--                install_polkit = false
--                want_polkit    = false
--        endif
--#endif // 1
- endif
- conf.set10('ENABLE_POLKIT', install_polkit)
- 
--- 
-2.33.1
-
diff --git a/gnu/packages/patches/fbreader-fix-icon.patch b/gnu/packages/patches/fbreader-fix-icon.patch
new file mode 100644
index 0000000000..635abf6a43
--- /dev/null
+++ b/gnu/packages/patches/fbreader-fix-icon.patch
@@ -0,0 +1,29 @@
+Author: Danny Milosavljevic <dannym+a@scratchpost.org>
+Date: 2023-08-15
+
+--- orig/jswba9mn9nh43l7g4w2qslmr7i3q64vy-fbreader-0.99.6-checkout/fbreader/desktop/desktop	2023-08-14 23:56:03.092567740 +0200
++++ jswba9mn9nh43l7g4w2qslmr7i3q64vy-fbreader-0.99.6-checkout/fbreader/desktop/desktop	2023-08-14 23:56:59.936213278 +0200
+@@ -25,5 +25,5 @@
+ StartupNotify=true
+ Terminal=false
+ Type=Application
+-Icon=FBReader.png
++Icon=FBReader
+ Categories=Office;Viewer;Literature;
+--- orig/jswba9mn9nh43l7g4w2qslmr7i3q64vy-fbreader-0.99.6-checkout/fbreader/desktop/Makefile	2023-08-14 23:56:03.092567740 +0200
++++ jswba9mn9nh43l7g4w2qslmr7i3q64vy-fbreader-0.99.6-checkout/fbreader/desktop/Makefile	2023-08-15 00:23:19.046691430 +0200
+@@ -10,6 +10,14 @@
+ 	@install -m 0644 desktop $(DESTDIR)/usr/share/applications/$(TARGET).desktop
+ 	@install -d $(DESTDIR)$(IMAGEDIR)
+ 	@install -m 0644 ../data/icons/application/$(TARGET_ARCH).png $(DESTDIR)$(IMAGEDIR)/FBReader.png
++	@install -d $(DESTDIR)$(SHAREDIR)/icons/hicolor/16x16/apps
++	@install -m 0644 ../data/icons/application/16x16.png $(DESTDIR)$(SHAREDIR)/icons/hicolor/16x16/apps/FBReader.png
++	@install -d $(DESTDIR)$(SHAREDIR)/icons/hicolor/32x32/apps
++	@install -m 0644 ../data/icons/application/32x32.png $(DESTDIR)$(SHAREDIR)/icons/hicolor/32x32/apps/FBReader.png
++	@install -d $(DESTDIR)$(SHAREDIR)/icons/hicolor/48x48/apps
++	@install -m 0644 ../data/icons/application/48x48.png $(DESTDIR)$(SHAREDIR)/icons/hicolor/48x48/apps/FBReader.png
++	@install -d $(DESTDIR)$(SHAREDIR)/icons/hicolor/64x64/apps
++	@install -m 0644 ../data/icons/application/64x64.png $(DESTDIR)$(SHAREDIR)/icons/hicolor/64x64/apps/FBReader.png
+ 	@install -m 0644 ../data/default/config.desktop.xml $(SHARE_FBREADER)/default/config.xml
+ 	@install -m 0644 ../data/default/keymap.desktop.xml $(SHARE_FBREADER)/default/keymap.xml
+ 	@install -m 0644 ../data/default/styles.desktop.xml $(SHARE_FBREADER)/default/styles.xml
diff --git a/gnu/packages/patches/highlight-gui-data-dir.patch b/gnu/packages/patches/highlight-gui-data-dir.patch
new file mode 100644
index 0000000000..33f40d309c
--- /dev/null
+++ b/gnu/packages/patches/highlight-gui-data-dir.patch
@@ -0,0 +1,51 @@
+This patch distinguishes between the data directory path for GUI and the one
+for non-GUI by allowing to set the former path.
+
+highlight package has two outputs: out and gui.  Both outputs have files which
+will be in the same directory /share/highlight/ without this patch (see also
+install and install-gui tasks in makefile).  In the gui's data directory,
+there are GUI specific files in /share/highlight/gui-files/{l10n,ext}/.
+
+diff --git a/src/gui-qt/main.cpp b/src/gui-qt/main.cpp
+index 4700dc1..3567745 100644
+--- a/src/gui-qt/main.cpp
++++ b/src/gui-qt/main.cpp
+@@ -47,8 +47,8 @@ int main(int argc, char *argv[])
+ {
+     QApplication app(argc, argv);
+     QTranslator translator;
+-#ifdef DATA_DIR
+-    translator.load(QString("%1/gui_files/l10n/highlight_%2").arg(DATA_DIR).arg(QLocale::system().name()));
++#ifdef GUI_DATA_DIR
++    translator.load(QString("%1/gui_files/l10n/highlight_%2").arg(GUI_DATA_DIR).arg(QLocale::system().name()));
+ #else
+     translator.load(QString("%1/gui_files/l10n/highlight_%2").arg(QDir::currentPath()).arg(QLocale::system().name()));
+ #endif
+diff --git a/src/gui-qt/mainwindow.cpp b/src/gui-qt/mainwindow.cpp
+index 3a21ad2..f060431 100644
+--- a/src/gui-qt/mainwindow.cpp
++++ b/src/gui-qt/mainwindow.cpp
+@@ -2131,8 +2131,8 @@ QString MainWindow::getDistFileFilterPath(){
+ #ifdef Q_OS_OSX
+     return QCoreApplication::applicationDirPath()+"/../Resources/gui_files/ext/fileopenfilter.conf";
+ #else
+-    #ifdef DATA_DIR
+-    return QString(DATA_DIR) + "/gui_files/ext/fileopenfilter.conf";
++    #ifdef GUI_DATA_DIR
++    return QString(GUI_DATA_DIR) + "/gui_files/ext/fileopenfilter.conf";
+     #else
+     return QDir::currentPath()+"/gui_files/ext/fileopenfilter.conf";
+     #endif
+diff --git a/src/makefile b/src/makefile
+index b1d7988..2963105 100644
+--- a/src/makefile
++++ b/src/makefile
+@@ -118,7 +118,7 @@ gui-qt: highlight-gui
+ 
+ highlight-gui: libhighlight.a ${GUI_OBJECTS}
+ 	cd gui-qt && \
+-	${QMAKE} 'DEFINES+=DATA_DIR=\\\"${HL_DATA_DIR}\\\" CONFIG_DIR=\\\"${HL_CONFIG_DIR}\\\" DOC_DIR=\\\"${HL_DOC_DIR}\\\" ' && \
++	${QMAKE} 'DEFINES+=DATA_DIR=\\\"${HL_DATA_DIR}\\\" CONFIG_DIR=\\\"${HL_CONFIG_DIR}\\\" DOC_DIR=\\\"${HL_DOC_DIR}\\\" GUI_DATA_DIR=\\\"${GUI_DATA_DIR}\\\" ' && \
+ 	$(MAKE)
+ 
+ $(OBJECTFILES) : makefile
diff --git a/gnu/packages/patches/maturin-no-cross-compile.patch b/gnu/packages/patches/maturin-no-cross-compile.patch
new file mode 100644
index 0000000000..7394d0854e
--- /dev/null
+++ b/gnu/packages/patches/maturin-no-cross-compile.patch
@@ -0,0 +1,55 @@
+Remove dependencies on xwin and zig.  We're not offering cross-compilation
+options using these crates.
+
+diff --git a/Cargo.toml b/Cargo.toml
+index 6cbdca3..22ea5ef 100644
+--- a/Cargo.toml
++++ b/Cargo.toml
+@@ -76,16 +76,6 @@ version = "0.1.4"
+ [dependencies.cargo-options]
+ version = "0.6.0"
+ 
+-[dependencies.cargo-xwin]
+-version = "0.14.3"
+-optional = true
+-default-features = false
+-
+-[dependencies.cargo-zigbuild]
+-version = "0.16.10"
+-optional = true
+-default-features = false
+-
+ [dependencies.cargo_metadata]
+ version = "0.15.3"
+ 
+@@ -310,8 +300,6 @@ version = "4.3.0"
+ [features]
+ cli-completion = ["dep:clap_complete_command"]
+ cross-compile = [
+-    "zig",
+-    "xwin",
+ ]
+ default = [
+     "full",
+@@ -330,7 +318,6 @@ log = ["tracing-subscriber"]
+ native-tls = [
+     "dep:native-tls",
+     "ureq?/native-tls",
+-    "cargo-xwin?/native-tls",
+     "dep:rustls-pemfile",
+ ]
+ password-storage = [
+@@ -340,7 +327,6 @@ password-storage = [
+ rustls = [
+     "dep:rustls",
+     "ureq?/tls",
+-    "cargo-xwin?/rustls-tls",
+     "dep:rustls-pemfile",
+ ]
+ scaffolding = [
+@@ -358,5 +344,3 @@ upload = [
+     "wild",
+     "dep:dirs",
+ ]
+-xwin = ["cargo-xwin"]
+-zig = ["cargo-zigbuild"]
diff --git a/gnu/packages/patches/mcrl2-fix-1687.patch b/gnu/packages/patches/mcrl2-fix-1687.patch
deleted file mode 100644
index 449ecbf638..0000000000
--- a/gnu/packages/patches/mcrl2-fix-1687.patch
+++ /dev/null
@@ -1,337 +0,0 @@
-Taken from upstream:
-    https://github.com/mCRL2org/mCRL2/commit/f38998be5198236bc5bf5a957b0e132d6d6d8bee
-
-Fixes bug in ltsconvert:
-    https://listserver.tue.nl/pipermail/mcrl2-users/2022-June/000395.html
-
-From f38998be5198236bc5bf5a957b0e132d6d6d8bee Mon Sep 17 00:00:00 2001
-From: Jan Friso Groote <J.F.Groote@tue.nl>
-Date: Tue, 28 Jun 2022 12:27:47 +0200
-Subject: [PATCH] Solved bug report #1687
-
-Hidden actions were not properly recognized in ltsconvert. Multiactions
-that were partly hidden compared with the default action label, and had
-to be compared with a tau-action. This caused multiple tau-actions to be
-listed in the list of actions of an lts, and this caused other tools to
-go astray.
-
-The code to rename actions has completely be rewritten.
-
-This should solve #1687.
-
-A test have been added.
----
- libraries/lts/include/mcrl2/lts/lts.h   | 95 ++++++++++++++++++++++---
- libraries/lts/test/lts_test.cpp         | 61 ++++++++--------
- tools/release/ltsconvert/ltsconvert.cpp |  3 +-
- 3 files changed, 116 insertions(+), 43 deletions(-)
-
-diff --git a/libraries/lts/include/mcrl2/lts/lts.h b/libraries/lts/include/mcrl2/lts/lts.h
-index 095031e7c..8562eb900 100644
---- a/libraries/lts/include/mcrl2/lts/lts.h
-+++ b/libraries/lts/include/mcrl2/lts/lts.h
-@@ -25,6 +25,7 @@
- #include <algorithm>
- #include <cassert>
- #include <set>
-+#include <map>
- #include "mcrl2/lts/transition.h"
- #include "mcrl2/lts/lts_type.h"
- 
-@@ -482,40 +483,112 @@ class lts: public LTS_BASE
-         return;
-       }
- 
-+      std::map<labels_size_type, labels_size_type> action_rename_map;
-       for (labels_size_type i=0; i< num_action_labels(); ++i)
-       {
-         ACTION_LABEL_T a=action_label(i);
-         a.hide_actions(tau_actions);
--        if (a==ACTION_LABEL_T())  
-+        if (a==ACTION_LABEL_T::tau_action())  
-         {
--          m_hidden_label_set.insert(i);
-+          if (i!=const_tau_label_index)
-+          {
-+            m_hidden_label_set.insert(i);
-+          }
-         }
-         else if (a!=action_label(i))
-         {
--          set_action_label(i,a);  
-+          /* In this the action_label i is changed by the tau_actions but not renamed to tau.
-+             We check whether a maps onto another action label index. If yes, it is added to 
-+             the rename map, and we explicitly rename transition labels with this label afterwards.
-+             If no, we rename the action label.
-+          */
-+          bool found=false;
-+          for (labels_size_type j=0; !found && j< num_action_labels(); ++j)
-+          {
-+            if (a==action_label(j))
-+            { 
-+              if (i!=j)
-+              {
-+                action_rename_map[i]=j;
-+              }
-+              found=true;
-+            }
-+          }
-+          if (!found) // a!=action_label(j) for any j, then rename action_label(i) to a. 
-+          { 
-+            set_action_label(i,a);
-+          }
-+        }
-+      }
-+
-+      if (action_rename_map.size()>0)    // Check whether there are action labels that must be renamed, and
-+      {
-+        for(transition& t: m_transitions)
-+        {
-+          auto i = action_rename_map.find(t.label());
-+          if (i!=action_rename_map.end())
-+          { 
-+            t=transition(t.from(),i->second,t.to());
-+          }
-         }
-       }
-     }
- 
--    /** \brief Apply the recorded actions that are renamed to internal actions to the lts. 
--     *  \details After hiding actions, it checks whether action labels are
--     *           equal and merges actions with the same labels in the lts.
-+    /** \brief Rename the hidden actions in the lts. 
-+     *  \details Multiactions can be partially renamed. I.e. a|b becomes a if b is hidden.
-+     *           In such a case the new action a is mapped onto an existing action a; if such
-+     *           a label a does not exist, the action a|b is renamed to a. 
-      *  \param[in] tau_actions Vector with strings indicating which actions must be
-      *       transformed to tau's */
--    void apply_hidden_actions(void)
-+    void apply_hidden_actions(const std::vector<std::string>& tau_actions)
-     {
--      if (m_hidden_label_set.size()>0)    // Check whether there is something to rename.
-+      if (tau_actions.size()==0)
-+      { 
-+        return;
-+      }
-+      
-+      std::map<labels_size_type, labels_size_type> action_rename_map;
-+      for (labels_size_type i=0; i< num_action_labels(); ++i)
-+      {
-+        ACTION_LABEL_T a=action_label(i);
-+        a.hide_actions(tau_actions);
-+#ifndef NDEBUG
-+        ACTION_LABEL_T b=a;
-+        b.hide_actions(tau_actions);
-+        assert(a==b); // hide_actions applied twice yields the same result as applying it once.
-+#endif
-+        bool found=false;
-+        for (labels_size_type j=0; !found && j< num_action_labels(); ++j)
-+        {
-+          if (a==action_label(j))
-+          { 
-+            if (i!=j)
-+            {
-+              action_rename_map[i]=j;
-+            }
-+            found=true;
-+          }
-+        }
-+        if (!found) // a!=action_label(j) for any j, then rename action_label(i) to a. 
-+        { 
-+          set_action_label(i,a);
-+        }
-+      }
-+    
-+
-+      if (action_rename_map.size()>0)    // Check whether there is something to rename.
-       {
-         for(transition& t: m_transitions)
-         {
--          if (m_hidden_label_set.count(t.label()))
-+          auto i = action_rename_map.find(t.label());
-+          if (i!=action_rename_map.end())
-           { 
--            t=transition(t.from(),tau_label_index(),t.to());
-+            t=transition(t.from(),i->second,t.to());
-           }
-         }
--        m_hidden_label_set.clear();       // Empty the hidden label set. 
-       }
-     }
-+
-     /** \brief Checks whether this LTS has state values associated with its states.
-      * \retval true if the LTS has state information;
-      * \retval false otherwise.
-diff --git a/libraries/lts/test/lts_test.cpp b/libraries/lts/test/lts_test.cpp
-index 5840393d9..ad69f6275 100644
---- a/libraries/lts/test/lts_test.cpp
-+++ b/libraries/lts/test/lts_test.cpp
-@@ -149,7 +149,7 @@ static void reduce_lts_in_various_ways(const std::string& test_description,
-   BOOST_CHECK(is_deterministic(l));
- }
- 
--static void reduce_simple_loop()
-+BOOST_AUTO_TEST_CASE(reduce_simple_loop)
- {
-   std::string SIMPLE_AUT =
-     "des (0,2,2)\n"
-@@ -173,7 +173,7 @@ static void reduce_simple_loop()
-   reduce_lts_in_various_ways("Simple loop", SIMPLE_AUT, expected);
- }
- 
--static void reduce_simple_loop_with_tau()
-+BOOST_AUTO_TEST_CASE(reduce_simple_loop_with_tau)
- {
-   std::string SIMPLE_AUT =
-     "des (0,2,2)\n"
-@@ -200,7 +200,7 @@ static void reduce_simple_loop_with_tau()
- /* The example below was encountered by David Jansen. The problem is that
-  * for branching bisimulations the tau may supersede the b, not leading to the
-  * necessary splitting into two equivalence classes. */
--static void tricky_example_for_branching_bisimulation()
-+BOOST_AUTO_TEST_CASE(tricky_example_for_branching_bisimulation)
- {
-   std::string TRICKY_BB =
-     "des (0,3,2)\n"
-@@ -226,7 +226,7 @@ static void tricky_example_for_branching_bisimulation()
- }
- 
- 
--static void reduce_abp()
-+BOOST_AUTO_TEST_CASE(reduce_abp)
- {
-   std::string ABP_AUT =
-     "des (0,92,74)\n"
-@@ -342,7 +342,7 @@ static void reduce_abp()
- 
- // Peterson's protocol has the interesting property that the number of states modulo branching bisimulation
- // differs from the number of states modulo weak bisimulation, as observed by Rob van Glabbeek.
--static void reduce_peterson()
-+BOOST_AUTO_TEST_CASE(reduce_peterson)
- {
-   std::string PETERSON_AUT =
-     "des (0,59,35)\n"
-@@ -423,7 +423,7 @@ static void reduce_peterson()
-   reduce_lts_in_various_ways("Peterson protocol", PETERSON_AUT, expected);
- }
- 
--static void test_reachability()
-+BOOST_AUTO_TEST_CASE(test_reachability)
- {
-   std::string REACH =
-     "des (0,4,5)       \n"
-@@ -449,7 +449,7 @@ static void test_reachability()
- 
- // The example below caused failures in the GW mlogn branching bisimulation
- // algorithm when cleaning the code up.
--static void failing_test_groote_wijs_algorithm()
-+BOOST_AUTO_TEST_CASE(failing_test_groote_wijs_algorithm)
- {
-   std::string GWLTS =
-     "des(0,29,10)\n"
-@@ -511,7 +511,7 @@ static void failing_test_groote_wijs_algorithm()
- // It has not been implemented fully. The problem is that it is difficult to
- // prescribe the order in which refinements have to be done.
- 
--static void counterexample_jk_1(std::size_t k)
-+void counterexample_jk_1(std::size_t k)
- {
-     // numbering scheme of states:
-     // states 0..k-1 are the blue squares
-@@ -571,7 +571,7 @@ static void counterexample_jk_1(std::size_t k)
- 
- // In the meantime, the bug is corrected:  this is why the first part of the
- // algorithm now follows a much simpler line than previously.
--static void counterexample_postprocessing()
-+BOOST_AUTO_TEST_CASE(counterexample_postprocessing)
- {
-   std::string POSTPROCESS_AUT =
-     "des(0,33,13)\n"
-@@ -634,7 +634,7 @@ static void counterexample_postprocessing()
-   test_lts("postprocessing problem (branching bisimulation signature [Blom/Orzan 2003])",l,expected_label_count, expected_state_count, expected_transition_count);
- }
- 
--static void regression_delete_old_bb_slice()
-+BOOST_AUTO_TEST_CASE(regression_delete_old_bb_slice)
- {
-   std::string POSTPROCESS_AUT =
-     "des(0,163,100)\n"
-@@ -824,7 +824,7 @@ static void regression_delete_old_bb_slice()
-   test_lts("regression test for GJKW bug (branching bisimulation signature [Blom/Orzan 2003])",l,expected_label_count, expected_state_count, expected_transition_count);
- }
- 
--void is_deterministic_test1()
-+BOOST_AUTO_TEST_CASE(is_deterministic_test1)
- {
-   std::string automaton =
-     "des(0,2,2)\n"
-@@ -837,7 +837,7 @@ void is_deterministic_test1()
-   BOOST_CHECK(is_deterministic(l_det));
- }
- 
--void is_deterministic_test2()
-+BOOST_AUTO_TEST_CASE(is_deterministic_test2)
- {
-   std::string automaton =
-     "des(0,2,2)\n"
-@@ -850,24 +850,25 @@ void is_deterministic_test2()
-   BOOST_CHECK(!is_deterministic(l_det));
- }
- 
--void test_is_deterministic()
-+BOOST_AUTO_TEST_CASE(hide_actions1)
- {
--  is_deterministic_test1();
--  is_deterministic_test2();
--}
-+  std::string automaton =
-+     "des (0,4,3)\n"
-+     "(0,\"<state>\",1)\n"
-+     "(1,\"return|hello\",2)\n"
-+     "(1,\"return\",2)\n"
-+     "(2,\"world\",1)\n";
-+
-+  std::istringstream is(automaton);
-+  lts::lts_aut_t l;
-+  l.load(is);
-+  std::vector<std::string>hidden_actions(1,"hello");
-+  l.apply_hidden_actions(hidden_actions);
-+  reduce(l,lts::lts_eq_bisim);
-+  std::size_t expected_label_count = 5;
-+  std::size_t expected_state_count = 3;
-+  std::size_t expected_transition_count = 3;
-+  test_lts("regression test for GJKW bug (branching bisimulation [Jansen/Groote/Keiren/Wijs 2019])",l,expected_label_count, expected_state_count, expected_transition_count);
-+
- 
--BOOST_AUTO_TEST_CASE(test_main)
--{
--  reduce_simple_loop();
--  reduce_simple_loop_with_tau();
--  tricky_example_for_branching_bisimulation();
--  reduce_abp();
--  reduce_peterson();
--  test_reachability();
--  test_is_deterministic();
--  failing_test_groote_wijs_algorithm();
--  counterexample_jk_1(3);
--  counterexample_postprocessing();
--  regression_delete_old_bb_slice();
--  // TODO: Add groote wijs branching bisimulation and add weak bisimulation tests. For the last Peterson is a good candidate.
- }
-diff --git a/tools/release/ltsconvert/ltsconvert.cpp b/tools/release/ltsconvert/ltsconvert.cpp
-index 231deabe2..5645d31d1 100644
---- a/tools/release/ltsconvert/ltsconvert.cpp
-+++ b/tools/release/ltsconvert/ltsconvert.cpp
-@@ -123,8 +123,7 @@ class ltsconvert_tool : public input_output_tool
- 
-       LTS_TYPE l;
-       l.load(tool_options.infilename);
--      l.record_hidden_actions(tool_options.tau_actions);
--      l.apply_hidden_actions();
-+      l.apply_hidden_actions(tool_options.tau_actions);
- 
-       if (tool_options.check_reach)
-       {
--- 
-2.35.1
-
diff --git a/gnu/packages/patches/mcrl2-fix-counterexample.patch b/gnu/packages/patches/mcrl2-fix-counterexample.patch
deleted file mode 100644
index abf541f50c..0000000000
--- a/gnu/packages/patches/mcrl2-fix-counterexample.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-Taken from upstream:
-    https://github.com/mCRL2org/mCRL2/commit/435421429dde9dcc5956e8a978597111a3947ec1
-
-Fixes bug in ltscompare:
-    https://listserver.tue.nl/pipermail/mcrl2-users/2022-June/000396.html
-
-From 435421429dde9dcc5956e8a978597111a3947ec1 Mon Sep 17 00:00:00 2001
-From: Maurice Laveaux <m.laveaux@tue.nl>
-Date: Wed, 29 Jun 2022 10:27:58 +0200
-Subject: [PATCH] Write counterexample's structured output trace on single
- line.
-
----
- libraries/lts/include/mcrl2/lts/detail/counter_example.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libraries/lts/include/mcrl2/lts/detail/counter_example.h b/libraries/lts/include/mcrl2/lts/detail/counter_example.h
-index c339cfde4..ca3967768 100644
---- a/libraries/lts/include/mcrl2/lts/detail/counter_example.h
-+++ b/libraries/lts/include/mcrl2/lts/detail/counter_example.h
-@@ -139,7 +139,7 @@ class counter_example_constructor
-       if (m_structured_output)
-       {
-         std::cout << m_name << ": ";
--        result.save("", mcrl2::lts::trace::tfPlain);   // Write to stdout. 
-+        result.save("", mcrl2::lts::trace::tfLine);   // Write to stdout.
-       }
-       else
-       {
--- 
-2.35.1
-
diff --git a/gnu/packages/patches/openssh-hurd.patch b/gnu/packages/patches/openssh-hurd.patch
deleted file mode 100644
index 1ad09a7ee6..0000000000
--- a/gnu/packages/patches/openssh-hurd.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-Author: Jan (janneke) Nieuwenhuizen" <janneke@gnu.org>
-Not upstreamed.
-
-From 1ddae040d67e9a4ebcc3e1b95af1bff12c0f086b Mon Sep 17 00:00:00 2001
-From: "Jan (janneke) Nieuwenhuizen" <janneke@gnu.org>
-Date: Tue, 7 Apr 2020 17:41:05 +0200
-Subject: [PATCH] Build fix for the Hurd.
-
-* gss-serv.c (ssh_gssapi_acquire_cred): Use HOST_NAME_MAX instead of
-MAXHOSTNAMELEN.
----
- gss-serv.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/gss-serv.c b/gss-serv.c
-index 1d47870e7..22081c6f1 100644
---- a/gss-serv.c
-+++ b/gss-serv.c
-@@ -107,7 +107,7 @@ ssh_gssapi_acquire_cred(Gssctxt *ctx)
- 		gss_create_empty_oid_set(&status, &oidset);
- 		gss_add_oid_set_member(&status, ctx->oid, &oidset);
- 
--		if (gethostname(lname, MAXHOSTNAMELEN)) {
-+		if (gethostname(lname, HOST_NAME_MAX)) {
- 			gss_release_oid_set(&status, &oidset);
- 			return (-1);
- 		}
--- 
-2.26.0
-
diff --git a/gnu/packages/patches/po4a-partial-texinfo-menu-fix.patch b/gnu/packages/patches/po4a-partial-texinfo-menu-fix.patch
new file mode 100644
index 0000000000..8a075fa74c
--- /dev/null
+++ b/gnu/packages/patches/po4a-partial-texinfo-menu-fix.patch
@@ -0,0 +1,242 @@
+Submitted upstream: https://github.com/mquinson/po4a/pull/437
+
+From 43db5c0b14ec2a8ba44d338bce024df87256457b Mon Sep 17 00:00:00 2001
+From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
+Date: Thu, 27 Jul 2023 17:44:49 -0400
+Subject: [PATCH] lib: Texinfo: Translate partial menu node names.
+
+Fixes <https://issues.guix.gnu.org/64881>.
+
+* lib/Locale/Po4a/Texinfo.pm (translate_buffer_menuentry): Refine
+regexp, so that it matches menu entries lacking a description.
+Only call 'translate_buffer' on the description if it was provided.
+* t/fmt/texinfo/partialmenus.trans: New file.
+* t/fmt/texinfo/partialmenus.texi: Likewise.
+* t/fmt/texinfo/partialmenus.pot: Likewise.
+* t/fmt/texinfo/partialmenus.po: Likewise.
+* t/fmt/texinfo/partialmenus.norm: Likewise.
+* t/fmt-texinfo.t: Register the new 'partialmenus' test.
+---
+ lib/Locale/Po4a/Texinfo.pm       |  7 +++---
+ t/fmt-texinfo.t                  |  2 +-
+ t/fmt/texinfo/partialmenus.norm  | 21 +++++++++++++++++
+ t/fmt/texinfo/partialmenus.po    | 40 ++++++++++++++++++++++++++++++++
+ t/fmt/texinfo/partialmenus.pot   | 40 ++++++++++++++++++++++++++++++++
+ t/fmt/texinfo/partialmenus.texi  | 14 +++++++++++
+ t/fmt/texinfo/partialmenus.trans | 21 +++++++++++++++++
+ 7 files changed, 141 insertions(+), 4 deletions(-)
+ create mode 100644 t/fmt/texinfo/partialmenus.norm
+ create mode 100644 t/fmt/texinfo/partialmenus.po
+ create mode 100644 t/fmt/texinfo/partialmenus.pot
+ create mode 100644 t/fmt/texinfo/partialmenus.texi
+ create mode 100644 t/fmt/texinfo/partialmenus.trans
+
+diff --git a/lib/Locale/Po4a/Texinfo.pm b/lib/Locale/Po4a/Texinfo.pm
+index b4750699..1c3a4bae 100644
+--- a/lib/Locale/Po4a/Texinfo.pm
++++ b/lib/Locale/Po4a/Texinfo.pm
+@@ -336,7 +336,7 @@ sub translate_buffer_menuentry {
+ 
+     my $translated_buffer = "";
+ 
+-    if (   $buffer =~ m/^(.*?)(::)\s+(.*)$/s
++    if (   $buffer =~ m/^(.*?)(::)(?:\s+(.*))?$/s
+         or $buffer =~ m/^(.*?: .*?)(\.)\s+(.*)$/s )
+     {
+         my ( $name, $sep, $description ) = ( $1, $2, $3 );
+@@ -347,8 +347,9 @@ sub translate_buffer_menuentry {
+             $translated_buffer .= ' ' x ( $menu_sep_width - 1 - $l );
+             $l = $menu_sep_width - 1;
+         }
+-        ( $t, @e ) = $self->translate_buffer( $description, $no_wrap, @env );
+-
++        if ($description) {
++            ( $t, @e ) = $self->translate_buffer( $description, $no_wrap, @env );
++        }
+         # Replace newlines with space for proper wrapping
+         # See https://github.com/mquinson/po4a/issues/122
+         $t =~ s/\n/ /sg;
+diff --git a/t/fmt-texinfo.t b/t/fmt-texinfo.t
+index 4b067e43..d9ed5df3 100644
+--- a/t/fmt-texinfo.t
++++ b/t/fmt-texinfo.t
+@@ -10,7 +10,7 @@ use Testhelper;
+ 
+ my @tests;
+ 
+-for my $test (qw(longmenu comments tindex)) {
++for my $test (qw(longmenu partialmenus comments tindex)) {
+     push @tests,
+       {
+         'format' => 'texinfo',
+diff --git a/t/fmt/texinfo/partialmenus.norm b/t/fmt/texinfo/partialmenus.norm
+new file mode 100644
+index 00000000..99240682
+--- /dev/null
++++ b/t/fmt/texinfo/partialmenus.norm
+@@ -0,0 +1,21 @@
++\input texinfo
++@c ===========================================================================
++@c
++@c This file was generated with po4a. Translate the source file.
++@c
++@c ===========================================================================
++
++
++@c These menus do not contain a description, which used to cause a
++@c Texinfo menu entry to not be translated.
++@menu
++* A menu entry without any description::  A menu entry without any 
++                                            description
++* Optional menu name: The menu node::  Optional menu name: The menu node
++@end menu
++
++@node A menu entry without any description
++@chapter A menu entry without any description
++
++@node The menu node
++@chapter Optional menu name
+diff --git a/t/fmt/texinfo/partialmenus.po b/t/fmt/texinfo/partialmenus.po
+new file mode 100644
+index 00000000..31a14443
+--- /dev/null
++++ b/t/fmt/texinfo/partialmenus.po
+@@ -0,0 +1,40 @@
++# SOME DESCRIPTIVE TITLE
++# Copyright (C) YEAR Free Software Foundation, Inc.
++# This file is distributed under the same license as the PACKAGE package.
++# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
++#
++#, fuzzy
++msgid ""
++msgstr ""
++"Project-Id-Version: PACKAGE VERSION\n"
++"POT-Creation-Date: 2023-07-27 17:29-0400\n"
++"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
++"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
++"Language-Team: LANGUAGE <LL@li.org>\n"
++"Language: \n"
++"MIME-Version: 1.0\n"
++"Content-Type: text/plain; charset=UTF-8\n"
++"Content-Transfer-Encoding: 8bit\n"
++
++#. type: chapter
++#: partialmenus.texi:8 partialmenus.texi:10 partialmenus.texi:11
++#, no-wrap
++msgid "A menu entry without any description"
++msgstr "A MENU ENTRY WITHOUT ANY DESCRIPTION"
++
++#. type: menuentry
++#: partialmenus.texi:8
++msgid "Optional menu name: The menu node"
++msgstr "OPTIONAL MENU NAME: THE MENU NODE"
++
++#. type: node
++#: partialmenus.texi:13
++#, no-wrap
++msgid "The menu node"
++msgstr "THE MENU NODE"
++
++#. type: chapter
++#: partialmenus.texi:14
++#, no-wrap
++msgid "Optional menu name"
++msgstr "OPTIONAL MENU NAME"
+diff --git a/t/fmt/texinfo/partialmenus.pot b/t/fmt/texinfo/partialmenus.pot
+new file mode 100644
+index 00000000..0379f805
+--- /dev/null
++++ b/t/fmt/texinfo/partialmenus.pot
+@@ -0,0 +1,40 @@
++# SOME DESCRIPTIVE TITLE
++# Copyright (C) YEAR Free Software Foundation, Inc.
++# This file is distributed under the same license as the PACKAGE package.
++# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
++#
++#, fuzzy
++msgid ""
++msgstr ""
++"Project-Id-Version: PACKAGE VERSION\n"
++"POT-Creation-Date: 2023-08-16 09:47-0400\n"
++"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
++"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
++"Language-Team: LANGUAGE <LL@li.org>\n"
++"Language: \n"
++"MIME-Version: 1.0\n"
++"Content-Type: text/plain; charset=UTF-8\n"
++"Content-Transfer-Encoding: 8bit\n"
++
++#. type: chapter
++#: partialmenus.texi:8 partialmenus.texi:10 partialmenus.texi:11
++#, no-wrap
++msgid "A menu entry without any description"
++msgstr ""
++
++#. type: menuentry
++#: partialmenus.texi:8
++msgid "Optional menu name: The menu node"
++msgstr ""
++
++#. type: node
++#: partialmenus.texi:13
++#, no-wrap
++msgid "The menu node"
++msgstr ""
++
++#. type: chapter
++#: partialmenus.texi:14
++#, no-wrap
++msgid "Optional menu name"
++msgstr ""
+diff --git a/t/fmt/texinfo/partialmenus.texi b/t/fmt/texinfo/partialmenus.texi
+new file mode 100644
+index 00000000..f8663a2b
+--- /dev/null
++++ b/t/fmt/texinfo/partialmenus.texi
+@@ -0,0 +1,14 @@
++\input texinfo
++
++@c These menus do not contain a description, which used to cause a
++@c Texinfo menu entry to not be translated.
++@menu
++* A menu entry without any description::
++* Optional menu name: The menu node::
++@end menu
++
++@node A menu entry without any description
++@chapter A menu entry without any description
++
++@node The menu node
++@chapter Optional menu name
+diff --git a/t/fmt/texinfo/partialmenus.trans b/t/fmt/texinfo/partialmenus.trans
+new file mode 100644
+index 00000000..0ef742a1
+--- /dev/null
++++ b/t/fmt/texinfo/partialmenus.trans
+@@ -0,0 +1,21 @@
++\input texinfo
++@c ===========================================================================
++@c
++@c This file was generated with po4a. Translate the source file.
++@c
++@c ===========================================================================
++
++
++@c These menus do not contain a description, which used to cause a
++@c Texinfo menu entry to not be translated.
++@menu
++* A MENU ENTRY WITHOUT ANY DESCRIPTION::  A MENU ENTRY WITHOUT ANY 
++                                            DESCRIPTION
++* OPTIONAL MENU NAME: THE MENU NODE::  OPTIONAL MENU NAME: THE MENU NODE
++@end menu
++
++@node A MENU ENTRY WITHOUT ANY DESCRIPTION
++@chapter A MENU ENTRY WITHOUT ANY DESCRIPTION
++
++@node THE MENU NODE
++@chapter OPTIONAL MENU NAME
+
+base-commit: 5b1cd768afdf4e9445812c5d43428495a0fde3c6
+-- 
+2.41.0
+
diff --git a/gnu/packages/patches/rust-nettle-sys-disable-vendor.patch b/gnu/packages/patches/rust-nettle-sys-disable-vendor.patch
deleted file mode 100644
index ae5ef5ebe0..0000000000
--- a/gnu/packages/patches/rust-nettle-sys-disable-vendor.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-Subject: nettle-sys: clear out "vendored" feature cruft from build.rs
-From: Daniel Kahn Gillmor's avatarDaniel Kahn Gillmor <dkg@fifthhorseman.net>
-
-https://salsa.debian.org/rust-team/debcargo-conf/-/commit/0c71150ad26bb66a8396dcdab055181af232ddc5
-https://sources.debian.org/src/rust-nettle-sys/2.0.4-3/debian/patches/disable-vendor.diff/
---- a/Cargo.toml	2019-10-23 13:08:07.000000000 -0400
-+++ b/Cargo.toml	2019-10-23 14:08:46.644064014 -0400
-@@ -29,12 +29,9 @@
- version = "0.51.1"
- default-features = false
- 
--[build-dependencies.nettle-src]
--version = "3.5.1-0"
--optional = true
--
- [build-dependencies.pkg-config]
- version = "0.3"
- 
- [features]
- vendored = ["nettle-src"]
-+nettle-src = []
-diff --git a/build.rs b/build.rs
-index 44f7af3..ede4b2f 100644
---- a/build.rs
-+++ b/build.rs
-@@ -1,7 +1,5 @@
- extern crate bindgen;
- extern crate pkg_config;
--#[cfg(feature = "vendored")]
--extern crate nettle_src;
- 
- use std::env;
- use std::fs;
-@@ -36,14 +34,6 @@ fn main() {
-     println!("cargo:rerun-if-env-changed=NETTLE_STATIC");
-     println!("cargo:rerun-if-env-changed={}", NETTLE_PREGENERATED_BINDINGS);
- 
--    #[cfg(feature = "vendored")]
--    {
--        let artifacts = nettle_src::Build::new().build();
--        println!("cargo:vendored=1");
--        env::set_var("PKG_CONFIG_PATH",
--                     artifacts.lib_dir().join("pkgconfig"));
--    }
--
-     let nettle = pkg_config::probe_library("nettle hogweed").unwrap();
- 
-     let mode = match env::var_os("NETTLE_STATIC") {
diff --git a/gnu/packages/patches/rust-ring-0.16-missing-files.patch b/gnu/packages/patches/rust-ring-0.16-missing-files.patch
new file mode 100644
index 0000000000..fa2f94a801
--- /dev/null
+++ b/gnu/packages/patches/rust-ring-0.16-missing-files.patch
@@ -0,0 +1,2293 @@
+These 4 files exist in the git repository for rust-ring, and are from
+the same commit where 0.16.20 is taken from. They were not added to the
+include list in Cargo.toml, so they were not added to the tarball.
+
+---
+ crypto/curve25519/make_curve25519_tables.py   | 222 +++++
+ crypto/fipsmodule/aes/asm/vpaes-armv7.pl      | 896 ++++++++++++++++++
+ crypto/fipsmodule/aes/asm/vpaes-armv8.pl      | 837 ++++++++++++++++
+ .../fipsmodule/modes/asm/ghash-neon-armv8.pl  | 294 ++++++
+ 4 files changed, 2249 insertions(+)
+ create mode 100755 crypto/curve25519/make_curve25519_tables.py
+ create mode 100644 crypto/fipsmodule/aes/asm/vpaes-armv7.pl
+ create mode 100755 crypto/fipsmodule/aes/asm/vpaes-armv8.pl
+ create mode 100644 crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
+
+diff --git a/crypto/curve25519/make_curve25519_tables.py b/crypto/curve25519/make_curve25519_tables.py
+new file mode 100755
+index 0000000..50dee2a
+--- /dev/null
++++ b/crypto/curve25519/make_curve25519_tables.py
+@@ -0,0 +1,222 @@
++#!/usr/bin/env python
++# coding=utf-8
++# Copyright (c) 2020, Google Inc.
++#
++# Permission to use, copy, modify, and/or distribute this software for any
++# purpose with or without fee is hereby granted, provided that the above
++# copyright notice and this permission notice appear in all copies.
++#
++# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
++# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
++# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
++# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
++# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
++# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
++# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
++
++import StringIO
++import subprocess
++
++# Base field Z_p
++p = 2**255 - 19
++
++def modp_inv(x):
++    return pow(x, p-2, p)
++
++# Square root of -1
++modp_sqrt_m1 = pow(2, (p-1) // 4, p)
++
++# Compute corresponding x-coordinate, with low bit corresponding to
++# sign, or return None on failure
++def recover_x(y, sign):
++    if y >= p:
++        return None
++    x2 = (y*y-1) * modp_inv(d*y*y+1)
++    if x2 == 0:
++        if sign:
++            return None
++        else:
++            return 0
++
++    # Compute square root of x2
++    x = pow(x2, (p+3) // 8, p)
++    if (x*x - x2) % p != 0:
++        x = x * modp_sqrt_m1 % p
++    if (x*x - x2) % p != 0:
++        return None
++
++    if (x & 1) != sign:
++        x = p - x
++    return x
++
++# Curve constant
++d = -121665 * modp_inv(121666) % p
++
++# Base point
++g_y = 4 * modp_inv(5) % p
++g_x = recover_x(g_y, 0)
++
++# Points are represented as affine tuples (x, y).
++
++def point_add(P, Q):
++    x1, y1 = P
++    x2, y2 = Q
++    x3 = ((x1*y2 + y1*x2) * modp_inv(1 + d*x1*x2*y1*y2)) % p
++    y3 = ((y1*y2 + x1*x2) * modp_inv(1 - d*x1*x2*y1*y2)) % p
++    return (x3, y3)
++
++# Computes Q = s * P
++def point_mul(s, P):
++    Q = (0, 1)  # Neutral element
++    while s > 0:
++        if s & 1:
++            Q = point_add(Q, P)
++        P = point_add(P, P)
++        s >>= 1
++    return Q
++
++def to_bytes(x):
++    ret = bytearray(32)
++    for i in range(len(ret)):
++        ret[i] = x % 256
++        x >>= 8
++    assert x == 0
++    return ret
++
++def to_ge_precomp(P):
++    # typedef struct {
++    #   fe_loose yplusx;
++    #   fe_loose yminusx;
++    #   fe_loose xy2d;
++    # } ge_precomp;
++    x, y = P
++    return ((y + x) % p, (y - x) % p, (x * y * 2 * d) % p)
++
++def to_base_25_5(x):
++    limbs = (26, 25, 26, 25, 26, 25, 26, 25, 26, 25)
++    ret = []
++    for l in limbs:
++        ret.append(x & ((1<<l) - 1))
++        x >>= l
++    assert x == 0
++    return ret
++
++def to_base_51(x):
++    ret = []
++    for _ in range(5):
++        ret.append(x & ((1<<51) - 1))
++        x >>= 51
++    assert x == 0
++    return ret
++
++def to_literal(x):
++    ret = "{{\n#if defined(BORINGSSL_CURVE25519_64BIT)\n"
++    ret += ", ".join(map(str, to_base_51(x)))
++    ret += "\n#else\n"
++    ret += ", ".join(map(str, to_base_25_5(x)))
++    ret += "\n#endif\n}}"
++    return ret
++
++def main():
++    d2 = (2 * d) % p
++
++    small_precomp = bytearray()
++    for i in range(1, 16):
++        s = (i&1) | ((i&2) << (64-1)) | ((i&4) << (128-2)) | ((i&8) << (192-3))
++        P = point_mul(s, (g_x, g_y))
++        small_precomp += to_bytes(P[0])
++        small_precomp += to_bytes(P[1])
++
++    large_precomp = []
++    for i in range(32):
++        large_precomp.append([])
++        for j in range(8):
++            P = point_mul((j + 1) << (i * 8), (g_x, g_y))
++            large_precomp[-1].append(to_ge_precomp(P))
++
++    bi_precomp = []
++    for i in range(8):
++        P = point_mul(2*i + 1, (g_x, g_y))
++        bi_precomp.append(to_ge_precomp(P))
++
++
++    buf = StringIO.StringIO()
++    buf.write("""/* Copyright (c) 2020, Google Inc.
++ *
++ * Permission to use, copy, modify, and/or distribute this software for any
++ * purpose with or without fee is hereby granted, provided that the above
++ * copyright notice and this permission notice appear in all copies.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
++ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
++ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
++ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
++
++// This file is generated from
++//    ./make_curve25519_tables.py > curve25519_tables.h
++
++
++static const fe d = """)
++    buf.write(to_literal(d))
++    buf.write(""";
++
++static const fe sqrtm1 = """)
++    buf.write(to_literal(modp_sqrt_m1))
++    buf.write(""";
++
++static const fe d2 = """)
++    buf.write(to_literal(d2))
++    buf.write(""";
++
++#if defined(OPENSSL_SMALL)
++
++// This block of code replaces the standard base-point table with a much smaller
++// one. The standard table is 30,720 bytes while this one is just 960.
++//
++// This table contains 15 pairs of group elements, (x, y), where each field
++// element is serialised with |fe_tobytes|. If |i| is the index of the group
++// element then consider i+1 as a four-bit number: (i₀, i₁, i₂, i₃) (where i₀
++// is the most significant bit). The value of the group element is then:
++// (i₀×2^192 + i₁×2^128 + i₂×2^64 + i₃)G, where G is the generator.
++static const uint8_t k25519SmallPrecomp[15 * 2 * 32] = {""")
++    for i, b in enumerate(small_precomp):
++        buf.write("0x%02x, " % b)
++    buf.write("""
++};
++
++#else
++
++// k25519Precomp[i][j] = (j+1)*256^i*B
++static const ge_precomp k25519Precomp[32][8] = {
++""")
++    for child in large_precomp:
++        buf.write("{\n")
++        for val in child:
++            buf.write("{\n")
++            for term in val:
++                buf.write(to_literal(term) + ",\n")
++            buf.write("},\n")
++        buf.write("},\n")
++    buf.write("""};
++
++#endif  // OPENSSL_SMALL
++
++// Bi[i] = (2*i+1)*B
++static const ge_precomp Bi[8] = {
++""")
++    for val in bi_precomp:
++        buf.write("{\n")
++        for term in val:
++                buf.write(to_literal(term) + ",\n")
++        buf.write("},\n")
++    buf.write("""};
++""")
++
++    proc = subprocess.Popen(["clang-format"], stdin=subprocess.PIPE)
++    proc.communicate(buf.getvalue())
++
++if __name__ == "__main__":
++    main()
+diff --git a/crypto/fipsmodule/aes/asm/vpaes-armv7.pl b/crypto/fipsmodule/aes/asm/vpaes-armv7.pl
+new file mode 100644
+index 0000000..d36a97a
+--- /dev/null
++++ b/crypto/fipsmodule/aes/asm/vpaes-armv7.pl
+@@ -0,0 +1,896 @@
++#! /usr/bin/env perl
++# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
++#
++# Licensed under the OpenSSL license (the "License").  You may not use
++# this file except in compliance with the License.  You can obtain a copy
++# in the file LICENSE in the source distribution or at
++# https://www.openssl.org/source/license.html
++
++
++######################################################################
++## Constant-time SSSE3 AES core implementation.
++## version 0.1
++##
++## By Mike Hamburg (Stanford University), 2009
++## Public domain.
++##
++## For details see http://shiftleft.org/papers/vector_aes/ and
++## http://crypto.stanford.edu/vpaes/.
++##
++######################################################################
++# Adapted from the original x86_64 version and <appro@openssl.org>'s ARMv8
++# version.
++#
++# armv7, aarch64, and x86_64 differ in several ways:
++#
++# * x86_64 SSSE3 instructions are two-address (destination operand is also a
++#   source), while NEON is three-address (destination operand is separate from
++#   two sources).
++#
++# * aarch64 has 32 SIMD registers available, while x86_64 and armv7 have 16.
++#
++# * x86_64 instructions can take memory references, while ARM is a load/store
++#   architecture. This means we sometimes need a spare register.
++#
++# * aarch64 and x86_64 have 128-bit byte shuffle instructions (tbl and pshufb),
++#   while armv7 only has a 64-bit byte shuffle (vtbl).
++#
++# This means this armv7 version must be a mix of both aarch64 and x86_64
++# implementations. armv7 and aarch64 have analogous SIMD instructions, so we
++# base the instructions on aarch64. However, we cannot use aarch64's register
++# allocation. x86_64's register count matches, but x86_64 is two-address.
++# vpaes-armv8.pl already accounts for this in the comments, which use
++# three-address AVX instructions instead of the original SSSE3 ones. We base
++# register usage on these comments, which are preserved in this file.
++#
++# This means we do not use separate input and output registers as in aarch64 and
++# cannot pin as many constants in the preheat functions. However, the load/store
++# architecture means we must still deviate from x86_64 in places.
++#
++# Next, we account for the byte shuffle instructions. vtbl takes 64-bit source
++# and destination and 128-bit table. Fortunately, armv7 also allows addressing
++# upper and lower halves of each 128-bit register. The lower half of q{N} is
++# d{2*N}. The upper half is d{2*N+1}. Instead of the following non-existent
++# instruction,
++#
++#     vtbl.8 q0, q1, q2   @ Index each of q2's 16 bytes into q1. Store in q0.
++#
++# we write:
++#
++#     vtbl.8 d0, q1, d4   @ Index each of d4's 8 bytes into q1. Store in d0.
++#     vtbl.8 d1, q1, d5   @ Index each of d5's 8 bytes into q1. Store in d1.
++#
++# For readability, we write d0 and d1 as q0#lo and q0#hi, respectively and
++# post-process before outputting. (This is adapted from ghash-armv4.pl.) Note,
++# however, that destination (q0) and table (q1) registers may no longer match.
++# We adjust the register usage from x86_64 to avoid this. (Unfortunately, the
++# two-address pshufb always matched these operands, so this is common.)
++#
++# This file also runs against the limit of ARMv7's ADR pseudo-instruction. ADR
++# expands to an ADD or SUB of the pc register to find an address. That immediate
++# must fit in ARM's encoding scheme: 8 bits of constant and 4 bits of rotation.
++# This means larger values must be more aligned.
++#
++# ARM additionally has two encodings, ARM and Thumb mode. Our assembly files may
++# use either encoding (do we actually need to support this?). In ARM mode, the
++# distances get large enough to require 16-byte alignment. Moving constants
++# closer to their use resolves most of this, but common constants in
++# _vpaes_consts are used by the whole file. Affected ADR instructions must be
++# placed at 8 mod 16 (the pc register is 8 ahead). Instructions with this
++# constraint have been commented.
++#
++# For details on ARM's immediate value encoding scheme, see
++# https://alisdair.mcdiarmid.org/arm-immediate-value-encoding/
++#
++# Finally, a summary of armv7 and aarch64 SIMD syntax differences:
++#
++# * armv7 prefixes SIMD instructions with 'v', while aarch64 does not.
++#
++# * armv7 SIMD registers are named like q0 (and d0 for the half-width ones).
++#   aarch64 names registers like v0, and denotes half-width operations in an
++#   instruction suffix (see below).
++#
++# * aarch64 embeds size and lane information in register suffixes. v0.16b is
++#   16 bytes, v0.8h is eight u16s, v0.4s is four u32s, and v0.2d is two u64s.
++#   armv7 embeds the total size in the register name (see above) and the size of
++#   each element in an instruction suffix, which may look like vmov.i8,
++#   vshr.u8, or vtbl.8, depending on instruction.
++
++use strict;
++
++my $flavour = shift;
++my $output;
++while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/;
++my $dir=$1;
++my $xlate;
++( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
++die "can't locate arm-xlate.pl";
++
++open OUT,"| \"$^X\" $xlate $flavour $output";
++*STDOUT=*OUT;
++
++my $code = "";
++
++$code.=<<___;
++.syntax	unified
++
++.arch	armv7-a
++.fpu	neon
++
++#if defined(__thumb2__)
++.thumb
++#else
++.code	32
++#endif
++
++.text
++
++.type	_vpaes_consts,%object
++.align	7	@ totally strategic alignment
++_vpaes_consts:
++.Lk_mc_forward:	@ mc_forward
++	.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
++	.quad	0x080B0A0904070605, 0x000302010C0F0E0D
++	.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
++	.quad	0x000302010C0F0E0D, 0x080B0A0904070605
++.Lk_mc_backward:@ mc_backward
++	.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
++	.quad	0x020100030E0D0C0F, 0x0A09080B06050407
++	.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
++	.quad	0x0A09080B06050407, 0x020100030E0D0C0F
++.Lk_sr:		@ sr
++	.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
++	.quad	0x030E09040F0A0500, 0x0B06010C07020D08
++	.quad	0x0F060D040B020900, 0x070E050C030A0108
++	.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
++
++@
++@ "Hot" constants
++@
++.Lk_inv:	@ inv, inva
++	.quad	0x0E05060F0D080180, 0x040703090A0B0C02
++	.quad	0x01040A060F0B0780, 0x030D0E0C02050809
++.Lk_ipt:	@ input transform (lo, hi)
++	.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
++	.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
++.Lk_sbo:	@ sbou, sbot
++	.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
++	.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
++.Lk_sb1:	@ sb1u, sb1t
++	.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
++	.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
++.Lk_sb2:	@ sb2u, sb2t
++	.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
++	.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
++
++.asciz  "Vector Permutation AES for ARMv7 NEON, Mike Hamburg (Stanford University)"
++.size	_vpaes_consts,.-_vpaes_consts
++.align	6
++___
++
++{
++my ($inp,$out,$key) = map("r$_", (0..2));
++
++my ($invlo,$invhi) = map("q$_", (10..11));
++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("q$_", (12..15));
++
++$code.=<<___;
++@@
++@@  _aes_preheat
++@@
++@@  Fills q9-q15 as specified below.
++@@
++.type	_vpaes_preheat,%function
++.align	4
++_vpaes_preheat:
++	adr	r10, .Lk_inv
++	vmov.i8	q9, #0x0f		@ .Lk_s0F
++	vld1.64	{q10,q11}, [r10]!	@ .Lk_inv
++	add	r10, r10, #64		@ Skip .Lk_ipt, .Lk_sbo
++	vld1.64	{q12,q13}, [r10]!	@ .Lk_sb1
++	vld1.64	{q14,q15}, [r10]	@ .Lk_sb2
++	bx	lr
++
++@@
++@@  _aes_encrypt_core
++@@
++@@  AES-encrypt q0.
++@@
++@@  Inputs:
++@@     q0 = input
++@@     q9-q15 as in _vpaes_preheat
++@@    [$key] = scheduled keys
++@@
++@@  Output in q0
++@@  Clobbers  q1-q5, r8-r11
++@@  Preserves q6-q8 so you get some local vectors
++@@
++@@
++.type	_vpaes_encrypt_core,%function
++.align 4
++_vpaes_encrypt_core:
++	mov	r9, $key
++	ldr	r8, [$key,#240]		@ pull rounds
++	adr	r11, .Lk_ipt
++	@ vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
++	@ vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
++	vld1.64	{q2, q3}, [r11]
++	adr	r11, .Lk_mc_forward+16
++	vld1.64	{q5}, [r9]!		@ vmovdqu	(%r9),	%xmm5		# round0 key
++	vand	q1, q0, q9		@ vpand	%xmm9,	%xmm0,	%xmm1
++	vshr.u8	q0, q0, #4		@ vpsrlb	\$4,	%xmm0,	%xmm0
++	vtbl.8	q1#lo, {q2}, q1#lo	@ vpshufb	%xmm1,	%xmm2,	%xmm1
++	vtbl.8	q1#hi, {q2}, q1#hi
++	vtbl.8	q2#lo, {q3}, q0#lo	@ vpshufb	%xmm0,	%xmm3,	%xmm2
++	vtbl.8	q2#hi, {q3}, q0#hi
++	veor	q0, q1, q5		@ vpxor	%xmm5,	%xmm1,	%xmm0
++	veor	q0, q0, q2		@ vpxor	%xmm2,	%xmm0,	%xmm0
++
++	@ .Lenc_entry ends with a bnz instruction which is normally paired with
++	@ subs in .Lenc_loop.
++	tst	r8, r8
++	b	.Lenc_entry
++
++.align 4
++.Lenc_loop:
++	@ middle of middle round
++	add	r10, r11, #0x40
++	vtbl.8	q4#lo, {$sb1t}, q2#lo	@ vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
++	vtbl.8	q4#hi, {$sb1t}, q2#hi
++	vld1.64	{q1}, [r11]!		@ vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
++	vtbl.8	q0#lo, {$sb1u}, q3#lo	@ vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
++	vtbl.8	q0#hi, {$sb1u}, q3#hi
++	veor	q4, q4, q5		@ vpxor		%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
++	vtbl.8	q5#lo, {$sb2t}, q2#lo	@ vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
++	vtbl.8	q5#hi, {$sb2t}, q2#hi
++	veor	q0, q0, q4		@ vpxor		%xmm4,	%xmm0,	%xmm0	# 0 = A
++	vtbl.8	q2#lo, {$sb2u}, q3#lo	@ vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
++	vtbl.8	q2#hi, {$sb2u}, q3#hi
++	vld1.64	{q4}, [r10]		@ vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
++	vtbl.8	q3#lo, {q0}, q1#lo	@ vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
++	vtbl.8	q3#hi, {q0}, q1#hi
++	veor	q2, q2, q5		@ vpxor		%xmm5,	%xmm2,	%xmm2	# 2 = 2A
++	@ Write to q5 instead of q0, so the table and destination registers do
++	@ not overlap.
++	vtbl.8	q5#lo, {q0}, q4#lo	@ vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
++	vtbl.8	q5#hi, {q0}, q4#hi
++	veor	q3, q3, q2		@ vpxor		%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
++	vtbl.8	q4#lo, {q3}, q1#lo	@ vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
++	vtbl.8	q4#hi, {q3}, q1#hi
++	@ Here we restore the original q0/q5 usage.
++	veor	q0, q5, q3		@ vpxor		%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
++	and	r11, r11, #~(1<<6)	@ and		\$0x30,	%r11		# ... mod 4
++	veor	q0, q0, q4		@ vpxor		%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
++	subs	r8, r8, #1		@ nr--
++
++.Lenc_entry:
++	@ top of round
++	vand	q1, q0, q9		@ vpand		%xmm0,	%xmm9,	%xmm1   # 0 = k
++	vshr.u8	q0, q0, #4		@ vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
++	vtbl.8	q5#lo, {$invhi}, q1#lo	@ vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
++	vtbl.8	q5#hi, {$invhi}, q1#hi
++	veor	q1, q1, q0		@ vpxor		%xmm0,	%xmm1,	%xmm1	# 0 = j
++	vtbl.8	q3#lo, {$invlo}, q0#lo	@ vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
++	vtbl.8	q3#hi, {$invlo}, q0#hi
++	vtbl.8	q4#lo, {$invlo}, q1#lo	@ vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
++	vtbl.8	q4#hi, {$invlo}, q1#hi
++	veor	q3, q3, q5		@ vpxor		%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
++	veor	q4, q4, q5		@ vpxor		%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
++	vtbl.8	q2#lo, {$invlo}, q3#lo	@ vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
++	vtbl.8	q2#hi, {$invlo}, q3#hi
++	vtbl.8	q3#lo, {$invlo}, q4#lo	@ vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
++	vtbl.8	q3#hi, {$invlo}, q4#hi
++	veor	q2, q2, q1		@ vpxor		%xmm1,	%xmm2,	%xmm2  	# 2 = io
++	veor	q3, q3, q0		@ vpxor		%xmm0,	%xmm3,	%xmm3	# 3 = jo
++	vld1.64	{q5}, [r9]!		@ vmovdqu	(%r9),	%xmm5
++	bne	.Lenc_loop
++
++	@ middle of last round
++	add	r10, r11, #0x80
++
++	adr	r11, .Lk_sbo
++	@ Read to q1 instead of q4, so the vtbl.8 instruction below does not
++	@ overlap table and destination registers.
++	vld1.64 {q1}, [r11]!		@ vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou
++	vld1.64 {q0}, [r11]		@ vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
++	vtbl.8	q4#lo, {q1}, q2#lo	@ vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
++	vtbl.8	q4#hi, {q1}, q2#hi
++	vld1.64	{q1}, [r10]		@ vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
++	@ Write to q2 instead of q0 below, to avoid overlapping table and
++	@ destination registers.
++	vtbl.8	q2#lo, {q0}, q3#lo	@ vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
++	vtbl.8	q2#hi, {q0}, q3#hi
++	veor	q4, q4, q5		@ vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
++	veor	q2, q2, q4		@ vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
++	@ Here we restore the original q0/q2 usage.
++	vtbl.8	q0#lo, {q2}, q1#lo	@ vpshufb	%xmm1,	%xmm0,	%xmm0
++	vtbl.8	q0#hi, {q2}, q1#hi
++	bx	lr
++.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
++
++.globl	GFp_vpaes_encrypt
++.type	GFp_vpaes_encrypt,%function
++.align	4
++GFp_vpaes_encrypt:
++	@ _vpaes_encrypt_core uses r8-r11. Round up to r7-r11 to maintain stack
++	@ alignment.
++	stmdb	sp!, {r7-r11,lr}
++	@ _vpaes_encrypt_core uses q4-q5 (d8-d11), which are callee-saved.
++	vstmdb	sp!, {d8-d11}
++
++	vld1.64	{q0}, [$inp]
++	bl	_vpaes_preheat
++	bl	_vpaes_encrypt_core
++	vst1.64	{q0}, [$out]
++
++	vldmia	sp!, {d8-d11}
++	ldmia	sp!, {r7-r11, pc}	@ return
++.size	GFp_vpaes_encrypt,.-GFp_vpaes_encrypt
++___
++}
++{
++my ($inp,$bits,$out,$dir)=("r0","r1","r2","r3");
++my ($rcon,$s0F,$invlo,$invhi,$s63) = map("q$_",(8..12));
++
++$code.=<<___;
++@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++@@                                                    @@
++@@                  AES key schedule                  @@
++@@                                                    @@
++@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++
++@ This function diverges from both x86_64 and armv7 in which constants are
++@ pinned. x86_64 has a common preheat function for all operations. aarch64
++@ separates them because it has enough registers to pin nearly all constants.
++@ armv7 does not have enough registers, but needing explicit loads and stores
++@ also complicates using x86_64's register allocation directly.
++@
++@ We pin some constants for convenience and leave q14 and q15 free to load
++@ others on demand.
++
++@
++@  Key schedule constants
++@
++.type	_vpaes_key_consts,%object
++.align	4
++_vpaes_key_consts:
++.Lk_rcon:	@ rcon
++	.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
++
++.Lk_opt:	@ output transform
++	.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
++	.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
++.Lk_deskew:	@ deskew tables: inverts the sbox's "skew"
++	.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
++	.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
++.size	_vpaes_key_consts,.-_vpaes_key_consts
++
++.type	_vpaes_key_preheat,%function
++.align	4
++_vpaes_key_preheat:
++	adr	r11, .Lk_rcon
++	vmov.i8	$s63, #0x5b			@ .Lk_s63
++	adr	r10, .Lk_inv			@ Must be aligned to 8 mod 16.
++	vmov.i8	$s0F, #0x0f			@ .Lk_s0F
++	vld1.64	{$invlo,$invhi}, [r10]		@ .Lk_inv
++	vld1.64	{$rcon}, [r11]			@ .Lk_rcon
++	bx	lr
++.size	_vpaes_key_preheat,.-_vpaes_key_preheat
++
++.type	_vpaes_schedule_core,%function
++.align	4
++_vpaes_schedule_core:
++	@ We only need to save lr, but ARM requires an 8-byte stack alignment,
++	@ so save an extra register.
++	stmdb	sp!, {r3,lr}
++
++	bl	_vpaes_key_preheat	@ load the tables
++
++	adr	r11, .Lk_ipt		@ Must be aligned to 8 mod 16.
++	vld1.64	{q0}, [$inp]!		@ vmovdqu	(%rdi),	%xmm0		# load key (unaligned)
++
++	@ input transform
++	@ Use q4 here rather than q3 so .Lschedule_am_decrypting does not
++	@ overlap table and destination.
++	vmov	q4, q0			@ vmovdqa	%xmm0,	%xmm3
++	bl	_vpaes_schedule_transform
++	adr	r10, .Lk_sr		@ Must be aligned to 8 mod 16.
++	vmov	q7, q0			@ vmovdqa	%xmm0,	%xmm7
++
++	add	r8, r8, r10
++
++	@ encrypting, output zeroth round key after transform
++	vst1.64	{q0}, [$out]		@ vmovdqu	%xmm0,	(%rdx)
++
++	@ *ring*: Decryption removed.
++
++.Lschedule_go:
++	cmp	$bits, #192		@ cmp	\$192,	%esi
++	bhi	.Lschedule_256
++	@ 128: fall though
++
++@@
++@@  .schedule_128
++@@
++@@  128-bit specific part of key schedule.
++@@
++@@  This schedule is really simple, because all its parts
++@@  are accomplished by the subroutines.
++@@
++.Lschedule_128:
++	mov	$inp, #10		@ mov	\$10, %esi
++
++.Loop_schedule_128:
++	bl 	_vpaes_schedule_round
++	subs	$inp, $inp, #1		@ dec	%esi
++	beq 	.Lschedule_mangle_last
++	bl	_vpaes_schedule_mangle	@ write output
++	b 	.Loop_schedule_128
++
++@@
++@@  .aes_schedule_256
++@@
++@@  256-bit specific part of key schedule.
++@@
++@@  The structure here is very similar to the 128-bit
++@@  schedule, but with an additional "low side" in
++@@  q6.  The low side's rounds are the same as the
++@@  high side's, except no rcon and no rotation.
++@@
++.align	4
++.Lschedule_256:
++	vld1.64	{q0}, [$inp]			@ vmovdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
++	bl	_vpaes_schedule_transform	@ input transform
++	mov	$inp, #7			@ mov	\$7, %esi
++
++.Loop_schedule_256:
++	bl	_vpaes_schedule_mangle		@ output low result
++	vmov	q6, q0				@ vmovdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
++
++	@ high round
++	bl	_vpaes_schedule_round
++	subs	$inp, $inp, #1			@ dec	%esi
++	beq 	.Lschedule_mangle_last
++	bl	_vpaes_schedule_mangle
++
++	@ low round. swap xmm7 and xmm6
++	vdup.32	q0, q0#hi[1]		@ vpshufd	\$0xFF,	%xmm0,	%xmm0
++	vmov.i8	q4, #0
++	vmov	q5, q7			@ vmovdqa	%xmm7,	%xmm5
++	vmov	q7, q6			@ vmovdqa	%xmm6,	%xmm7
++	bl	_vpaes_schedule_low_round
++	vmov	q7, q5			@ vmovdqa	%xmm5,	%xmm7
++
++	b	.Loop_schedule_256
++
++@@
++@@  .aes_schedule_mangle_last
++@@
++@@  Mangler for last round of key schedule
++@@  Mangles q0
++@@    when encrypting, outputs out(q0) ^ 63
++@@    when decrypting, outputs unskew(q0)
++@@
++@@  Always called right before return... jumps to cleanup and exits
++@@
++.align	4
++.Lschedule_mangle_last:
++	@ schedule last round key from xmm0
++	adr	r11, .Lk_deskew			@ lea	.Lk_deskew(%rip),%r11	# prepare to deskew
++
++	@ encrypting
++	vld1.64	{q1}, [r8]		@ vmovdqa	(%r8,%r10),%xmm1
++	adr	r11, .Lk_opt		@ lea		.Lk_opt(%rip),	%r11		# prepare to output transform
++	add	$out, $out, #32		@ add		\$32,	%rdx
++	vmov	q2, q0
++	vtbl.8	q0#lo, {q2}, q1#lo	@ vpshufb	%xmm1,	%xmm0,	%xmm0		# output permute
++	vtbl.8	q0#hi, {q2}, q1#hi
++
++.Lschedule_mangle_last_dec:
++	sub	$out, $out, #16			@ add	\$-16,	%rdx
++	veor	q0, q0, $s63			@ vpxor	.Lk_s63(%rip),	%xmm0,	%xmm0
++	bl	_vpaes_schedule_transform	@ output transform
++	vst1.64	{q0}, [$out]			@ vmovdqu	%xmm0,	(%rdx)		# save last key
++
++	@ cleanup
++	veor	q0, q0, q0		@ vpxor	%xmm0,	%xmm0,	%xmm0
++	veor	q1, q1, q1		@ vpxor	%xmm1,	%xmm1,	%xmm1
++	veor	q2, q2, q2		@ vpxor	%xmm2,	%xmm2,	%xmm2
++	veor	q3, q3, q3		@ vpxor	%xmm3,	%xmm3,	%xmm3
++	veor	q4, q4, q4		@ vpxor	%xmm4,	%xmm4,	%xmm4
++	veor	q5, q5, q5		@ vpxor	%xmm5,	%xmm5,	%xmm5
++	veor	q6, q6, q6		@ vpxor	%xmm6,	%xmm6,	%xmm6
++	veor	q7, q7, q7		@ vpxor	%xmm7,	%xmm7,	%xmm7
++	ldmia	sp!, {r3,pc}		@ return
++.size	_vpaes_schedule_core,.-_vpaes_schedule_core
++
++@@
++@@  .aes_schedule_round
++@@
++@@  Runs one main round of the key schedule on q0, q7
++@@
++@@  Specifically, runs subbytes on the high dword of q0
++@@  then rotates it by one byte and xors into the low dword of
++@@  q7.
++@@
++@@  Adds rcon from low byte of q8, then rotates q8 for
++@@  next rcon.
++@@
++@@  Smears the dwords of q7 by xoring the low into the
++@@  second low, result into third, result into highest.
++@@
++@@  Returns results in q7 = q0.
++@@  Clobbers q1-q4, r11.
++@@
++.type	_vpaes_schedule_round,%function
++.align	4
++_vpaes_schedule_round:
++	@ extract rcon from xmm8
++	vmov.i8	q4, #0				@ vpxor		%xmm4,	%xmm4,	%xmm4
++	vext.8	q1, $rcon, q4, #15		@ vpalignr	\$15,	%xmm8,	%xmm4,	%xmm1
++	vext.8	$rcon, $rcon, $rcon, #15	@ vpalignr	\$15,	%xmm8,	%xmm8,	%xmm8
++	veor	q7, q7, q1			@ vpxor		%xmm1,	%xmm7,	%xmm7
++
++	@ rotate
++	vdup.32	q0, q0#hi[1]			@ vpshufd	\$0xFF,	%xmm0,	%xmm0
++	vext.8	q0, q0, q0, #1			@ vpalignr	\$1,	%xmm0,	%xmm0,	%xmm0
++
++	@ fall through...
++
++	@ low round: same as high round, but no rotation and no rcon.
++_vpaes_schedule_low_round:
++	@ The x86_64 version pins .Lk_sb1 in %xmm13 and .Lk_sb1+16 in %xmm12.
++	@ We pin other values in _vpaes_key_preheat, so load them now.
++	adr	r11, .Lk_sb1
++	vld1.64	{q14,q15}, [r11]
++
++	@ smear xmm7
++	vext.8	q1, q4, q7, #12			@ vpslldq	\$4,	%xmm7,	%xmm1
++	veor	q7, q7, q1			@ vpxor	%xmm1,	%xmm7,	%xmm7
++	vext.8	q4, q4, q7, #8			@ vpslldq	\$8,	%xmm7,	%xmm4
++
++	@ subbytes
++	vand	q1, q0, $s0F			@ vpand		%xmm9,	%xmm0,	%xmm1		# 0 = k
++	vshr.u8	q0, q0, #4			@ vpsrlb	\$4,	%xmm0,	%xmm0		# 1 = i
++	 veor	q7, q7, q4			@ vpxor		%xmm4,	%xmm7,	%xmm7
++	vtbl.8	q2#lo, {$invhi}, q1#lo		@ vpshufb	%xmm1,	%xmm11,	%xmm2		# 2 = a/k
++	vtbl.8	q2#hi, {$invhi}, q1#hi
++	veor	q1, q1, q0			@ vpxor		%xmm0,	%xmm1,	%xmm1		# 0 = j
++	vtbl.8	q3#lo, {$invlo}, q0#lo		@ vpshufb	%xmm0, 	%xmm10,	%xmm3		# 3 = 1/i
++	vtbl.8	q3#hi, {$invlo}, q0#hi
++	veor	q3, q3, q2			@ vpxor		%xmm2,	%xmm3,	%xmm3		# 3 = iak = 1/i + a/k
++	vtbl.8	q4#lo, {$invlo}, q1#lo		@ vpshufb	%xmm1,	%xmm10,	%xmm4		# 4 = 1/j
++	vtbl.8	q4#hi, {$invlo}, q1#hi
++	 veor	q7, q7, $s63			@ vpxor		.Lk_s63(%rip),	%xmm7,	%xmm7
++	vtbl.8	q3#lo, {$invlo}, q3#lo		@ vpshufb	%xmm3,	%xmm10,	%xmm3		# 2 = 1/iak
++	vtbl.8	q3#hi, {$invlo}, q3#hi
++	veor	q4, q4, q2			@ vpxor		%xmm2,	%xmm4,	%xmm4		# 4 = jak = 1/j + a/k
++	vtbl.8	q2#lo, {$invlo}, q4#lo		@ vpshufb	%xmm4,	%xmm10,	%xmm2		# 3 = 1/jak
++	vtbl.8	q2#hi, {$invlo}, q4#hi
++	veor	q3, q3, q1			@ vpxor		%xmm1,	%xmm3,	%xmm3		# 2 = io
++	veor	q2, q2, q0			@ vpxor		%xmm0,	%xmm2,	%xmm2		# 3 = jo
++	vtbl.8	q4#lo, {q15}, q3#lo		@ vpshufb	%xmm3,	%xmm13,	%xmm4		# 4 = sbou
++	vtbl.8	q4#hi, {q15}, q3#hi
++	vtbl.8	q1#lo, {q14}, q2#lo		@ vpshufb	%xmm2,	%xmm12,	%xmm1		# 0 = sb1t
++	vtbl.8	q1#hi, {q14}, q2#hi
++	veor	q1, q1, q4			@ vpxor		%xmm4,	%xmm1,	%xmm1		# 0 = sbox output
++
++	@ add in smeared stuff
++	veor	q0, q1, q7			@ vpxor	%xmm7,	%xmm1,	%xmm0
++	veor	q7, q1, q7			@ vmovdqa	%xmm0,	%xmm7
++	bx	lr
++.size	_vpaes_schedule_round,.-_vpaes_schedule_round
++
++@@
++@@  .aes_schedule_transform
++@@
++@@  Linear-transform q0 according to tables at [r11]
++@@
++@@  Requires that q9 = 0x0F0F... as in preheat
++@@  Output in q0
++@@  Clobbers q1, q2, q14, q15
++@@
++.type	_vpaes_schedule_transform,%function
++.align	4
++_vpaes_schedule_transform:
++	vld1.64	{q14,q15}, [r11]	@ vmovdqa	(%r11),	%xmm2 	# lo
++					@ vmovdqa	16(%r11),	%xmm1 # hi
++	vand	q1, q0, $s0F		@ vpand	%xmm9,	%xmm0,	%xmm1
++	vshr.u8	q0, q0, #4		@ vpsrlb	\$4,	%xmm0,	%xmm0
++	vtbl.8	q2#lo, {q14}, q1#lo	@ vpshufb	%xmm1,	%xmm2,	%xmm2
++	vtbl.8	q2#hi, {q14}, q1#hi
++	vtbl.8	q0#lo, {q15}, q0#lo	@ vpshufb	%xmm0,	%xmm1,	%xmm0
++	vtbl.8	q0#hi, {q15}, q0#hi
++	veor	q0, q0, q2		@ vpxor	%xmm2,	%xmm0,	%xmm0
++	bx	lr
++.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
++
++@@
++@@  .aes_schedule_mangle
++@@
++@@  Mangles q0 from (basis-transformed) standard version
++@@  to our version.
++@@
++@@  On encrypt,
++@@    xor with 0x63
++@@    multiply by circulant 0,1,1,1
++@@    apply shiftrows transform
++@@
++@@  On decrypt,
++@@    xor with 0x63
++@@    multiply by "inverse mixcolumns" circulant E,B,D,9
++@@    deskew
++@@    apply shiftrows transform
++@@
++@@
++@@  Writes out to [r2], and increments or decrements it
++@@  Keeps track of round number mod 4 in r8
++@@  Preserves q0
++@@  Clobbers q1-q5
++@@
++.type	_vpaes_schedule_mangle,%function
++.align	4
++_vpaes_schedule_mangle:
++	tst	$dir, $dir
++	vmov	q4, q0			@ vmovdqa	%xmm0,	%xmm4	# save xmm0 for later
++	adr	r11, .Lk_mc_forward	@ Must be aligned to 8 mod 16.
++	vld1.64	{q5}, [r11]		@ vmovdqa	.Lk_mc_forward(%rip),%xmm5
++
++	@ encrypting
++	@ Write to q2 so we do not overlap table and destination below.
++	veor	q2, q0, $s63		@ vpxor		.Lk_s63(%rip),	%xmm0,	%xmm4
++	add	$out, $out, #16		@ add		\$16,	%rdx
++	vtbl.8	q4#lo, {q2}, q5#lo	@ vpshufb	%xmm5,	%xmm4,	%xmm4
++	vtbl.8	q4#hi, {q2}, q5#hi
++	vtbl.8	q1#lo, {q4}, q5#lo	@ vpshufb	%xmm5,	%xmm4,	%xmm1
++	vtbl.8	q1#hi, {q4}, q5#hi
++	vtbl.8	q3#lo, {q1}, q5#lo	@ vpshufb	%xmm5,	%xmm1,	%xmm3
++	vtbl.8	q3#hi, {q1}, q5#hi
++	veor	q4, q4, q1		@ vpxor		%xmm1,	%xmm4,	%xmm4
++	vld1.64	{q1}, [r8]		@ vmovdqa	(%r8,%r10),	%xmm1
++	veor	q3, q3, q4		@ vpxor		%xmm4,	%xmm3,	%xmm3
++
++.Lschedule_mangle_both:
++	@ Write to q2 so table and destination do not overlap.
++	vtbl.8	q2#lo, {q3}, q1#lo	@ vpshufb	%xmm1,	%xmm3,	%xmm3
++	vtbl.8	q2#hi, {q3}, q1#hi
++	add	r8, r8, #64-16		@ add	\$-16,	%r8
++	and	r8, r8, #~(1<<6)	@ and	\$0x30,	%r8
++	vst1.64	{q2}, [$out]		@ vmovdqu	%xmm3,	(%rdx)
++	bx	lr
++.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
++
++.globl	GFp_vpaes_set_encrypt_key
++.type	GFp_vpaes_set_encrypt_key,%function
++.align	4
++GFp_vpaes_set_encrypt_key:
++	stmdb	sp!, {r7-r11, lr}
++	vstmdb	sp!, {d8-d15}
++
++	lsr	r9, $bits, #5		@ shr	\$5,%eax
++	add	r9, r9, #5		@ \$5,%eax
++	str	r9, [$out,#240]		@ mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
++
++	mov	$dir, #0		@ mov	\$0,%ecx
++	mov	r8, #0x30		@ mov	\$0x30,%r8d
++	bl	_vpaes_schedule_core
++	eor	r0, r0, r0
++
++	vldmia	sp!, {d8-d15}
++	ldmia	sp!, {r7-r11, pc}	@ return
++.size	GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key
++___
++}
++
++{
++my ($out, $inp) = map("r$_", (0..1));
++my ($s0F, $s63, $s63_raw, $mc_forward) = map("q$_", (9..12));
++
++$code .= <<___;
++
++@ Additional constants for converting to bsaes.
++.type	_vpaes_convert_consts,%object
++.align	4
++_vpaes_convert_consts:
++@ .Lk_opt_then_skew applies skew(opt(x)) XOR 0x63, where skew is the linear
++@ transform in the AES S-box. 0x63 is incorporated into the low half of the
++@ table. This was computed with the following script:
++@
++@   def u64s_to_u128(x, y):
++@       return x | (y << 64)
++@   def u128_to_u64s(w):
++@       return w & ((1<<64)-1), w >> 64
++@   def get_byte(w, i):
++@       return (w >> (i*8)) & 0xff
++@   def apply_table(table, b):
++@       lo = b & 0xf
++@       hi = b >> 4
++@       return get_byte(table[0], lo) ^ get_byte(table[1], hi)
++@   def opt(b):
++@       table = [
++@           u64s_to_u128(0xFF9F4929D6B66000, 0xF7974121DEBE6808),
++@           u64s_to_u128(0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0),
++@       ]
++@       return apply_table(table, b)
++@   def rot_byte(b, n):
++@       return 0xff & ((b << n) | (b >> (8-n)))
++@   def skew(x):
++@       return (x ^ rot_byte(x, 1) ^ rot_byte(x, 2) ^ rot_byte(x, 3) ^
++@               rot_byte(x, 4))
++@   table = [0, 0]
++@   for i in range(16):
++@       table[0] |= (skew(opt(i)) ^ 0x63) << (i*8)
++@       table[1] |= skew(opt(i<<4)) << (i*8)
++@   print("\t.quad\t0x%016x, 0x%016x" % u128_to_u64s(table[0]))
++@   print("\t.quad\t0x%016x, 0x%016x" % u128_to_u64s(table[1]))
++.Lk_opt_then_skew:
++	.quad	0x9cb8436798bc4763, 0x6440bb9f6044bf9b
++	.quad	0x1f30062936192f00, 0xb49bad829db284ab
++
++@ void GFp_vpaes_encrypt_key_to_bsaes(AES_KEY *bsaes, const AES_KEY *vpaes);
++.globl	GFp_vpaes_encrypt_key_to_bsaes
++.type	GFp_vpaes_encrypt_key_to_bsaes,%function
++.align	4
++GFp_vpaes_encrypt_key_to_bsaes:
++	stmdb	sp!, {r11, lr}
++
++	@ See _vpaes_schedule_core for the key schedule logic. In particular,
++	@ _vpaes_schedule_transform(.Lk_ipt) (section 2.2 of the paper),
++	@ _vpaes_schedule_mangle (section 4.3), and .Lschedule_mangle_last
++	@ contain the transformations not in the bsaes representation. This
++	@ function inverts those transforms.
++	@
++	@ Note also that bsaes-armv7.pl expects aes-armv4.pl's key
++	@ representation, which does not match the other aes_nohw_*
++	@ implementations. The ARM aes_nohw_* stores each 32-bit word
++	@ byteswapped, as a convenience for (unsupported) big-endian ARM, at the
++	@ cost of extra REV and VREV32 operations in little-endian ARM.
++
++	vmov.i8	$s0F, #0x0f		@ Required by _vpaes_schedule_transform
++	adr	r2, .Lk_mc_forward	@ Must be aligned to 8 mod 16.
++	add	r3, r2, 0x90		@ .Lk_sr+0x10-.Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression)
++
++	vld1.64	{$mc_forward}, [r2]
++	vmov.i8	$s63, #0x5b		@ .Lk_s63 from vpaes-x86_64
++	adr	r11, .Lk_opt		@ Must be aligned to 8 mod 16.
++	vmov.i8	$s63_raw, #0x63		@ .LK_s63 without .Lk_ipt applied
++
++	@ vpaes stores one fewer round count than bsaes, but the number of keys
++	@ is the same.
++	ldr	r2, [$inp,#240]
++	add	r2, r2, #1
++	str	r2, [$out,#240]
++
++	@ The first key is transformed with _vpaes_schedule_transform(.Lk_ipt).
++	@ Invert this with .Lk_opt.
++	vld1.64	{q0}, [$inp]!
++	bl	_vpaes_schedule_transform
++	vrev32.8	q0, q0
++	vst1.64	{q0}, [$out]!
++
++	@ The middle keys have _vpaes_schedule_transform(.Lk_ipt) applied,
++	@ followed by _vpaes_schedule_mangle. _vpaes_schedule_mangle XORs 0x63,
++	@ multiplies by the circulant 0,1,1,1, then applies ShiftRows.
++.Loop_enc_key_to_bsaes:
++	vld1.64	{q0}, [$inp]!
++
++	@ Invert the ShiftRows step (see .Lschedule_mangle_both). Note we cycle
++	@ r3 in the opposite direction and start at .Lk_sr+0x10 instead of 0x30.
++	@ We use r3 rather than r8 to avoid a callee-saved register.
++	vld1.64	{q1}, [r3]
++	vtbl.8  q2#lo, {q0}, q1#lo
++	vtbl.8  q2#hi, {q0}, q1#hi
++	add	r3, r3, #16
++	and	r3, r3, #~(1<<6)
++	vmov	q0, q2
++
++	@ Handle the last key differently.
++	subs	r2, r2, #1
++	beq	.Loop_enc_key_to_bsaes_last
++
++	@ Multiply by the circulant. This is its own inverse.
++	vtbl.8	q1#lo, {q0}, $mc_forward#lo
++	vtbl.8	q1#hi, {q0}, $mc_forward#hi
++	vmov	q0, q1
++	vtbl.8	q2#lo, {q1}, $mc_forward#lo
++	vtbl.8	q2#hi, {q1}, $mc_forward#hi
++	veor	q0, q0, q2
++	vtbl.8	q1#lo, {q2}, $mc_forward#lo
++	vtbl.8	q1#hi, {q2}, $mc_forward#hi
++	veor	q0, q0, q1
++
++	@ XOR and finish.
++	veor	q0, q0, $s63
++	bl	_vpaes_schedule_transform
++	vrev32.8	q0, q0
++	vst1.64	{q0}, [$out]!
++	b	.Loop_enc_key_to_bsaes
++
++.Loop_enc_key_to_bsaes_last:
++	@ The final key does not have a basis transform (note
++	@ .Lschedule_mangle_last inverts the original transform). It only XORs
++	@ 0x63 and applies ShiftRows. The latter was already inverted in the
++	@ loop. Note that, because we act on the original representation, we use
++	@ $s63_raw, not $s63.
++	veor	q0, q0, $s63_raw
++	vrev32.8	q0, q0
++	vst1.64	{q0}, [$out]
++
++	@ Wipe registers which contained key material.
++	veor	q0, q0, q0
++	veor	q1, q1, q1
++	veor	q2, q2, q2
++
++	ldmia	sp!, {r11, pc}	@ return
++.size	GFp_vpaes_encrypt_key_to_bsaes,.-GFp_vpaes_encrypt_key_to_bsaes
++___
++}
++
++{
++# Register-passed parameters.
++my ($inp, $out, $len, $key) = map("r$_", 0..3);
++# Temporaries. _vpaes_encrypt_core already uses r8..r11, so overlap $ivec and
++# $tmp. $ctr is r7 because it must be preserved across calls.
++my ($ctr, $ivec, $tmp) = map("r$_", 7..9);
++
++# void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
++#                                 const AES_KEY *key, const uint8_t ivec[16]);
++$code .= <<___;
++.globl	GFp_vpaes_ctr32_encrypt_blocks
++.type	GFp_vpaes_ctr32_encrypt_blocks,%function
++.align	4
++GFp_vpaes_ctr32_encrypt_blocks:
++	mov	ip, sp
++	stmdb	sp!, {r7-r11, lr}
++	@ This function uses q4-q7 (d8-d15), which are callee-saved.
++	vstmdb	sp!, {d8-d15}
++
++	cmp	$len, #0
++	@ $ivec is passed on the stack.
++	ldr	$ivec, [ip]
++	beq	.Lctr32_done
++
++	@ _vpaes_encrypt_core expects the key in r2, so swap $len and $key.
++	mov	$tmp, $key
++	mov	$key, $len
++	mov	$len, $tmp
++___
++my ($len, $key) = ($key, $len);
++$code .= <<___;
++
++	@ Load the IV and counter portion.
++	ldr	$ctr, [$ivec, #12]
++	vld1.8	{q7}, [$ivec]
++
++	bl	_vpaes_preheat
++	rev	$ctr, $ctr		@ The counter is big-endian.
++
++.Lctr32_loop:
++	vmov	q0, q7
++	vld1.8	{q6}, [$inp]!		@ Load input ahead of time
++	bl	_vpaes_encrypt_core
++	veor	q0, q0, q6		@ XOR input and result
++	vst1.8	{q0}, [$out]!
++	subs	$len, $len, #1
++	@ Update the counter.
++	add	$ctr, $ctr, #1
++	rev	$tmp, $ctr
++	vmov.32	q7#hi[1], $tmp
++	bne	.Lctr32_loop
++
++.Lctr32_done:
++	vldmia	sp!, {d8-d15}
++	ldmia	sp!, {r7-r11, pc}	@ return
++.size	GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks
++___
++}
++
++foreach (split("\n",$code)) {
++	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo;
++	print $_,"\n";
++}
++
++close STDOUT;
+diff --git a/crypto/fipsmodule/aes/asm/vpaes-armv8.pl b/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
+new file mode 100755
+index 0000000..b31bbb8
+--- /dev/null
++++ b/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
+@@ -0,0 +1,837 @@
++#! /usr/bin/env perl
++# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
++#
++# Licensed under the OpenSSL license (the "License").  You may not use
++# this file except in compliance with the License.  You can obtain a copy
++# in the file LICENSE in the source distribution or at
++# https://www.openssl.org/source/license.html
++
++
++######################################################################
++## Constant-time SSSE3 AES core implementation.
++## version 0.1
++##
++## By Mike Hamburg (Stanford University), 2009
++## Public domain.
++##
++## For details see http://shiftleft.org/papers/vector_aes/ and
++## http://crypto.stanford.edu/vpaes/.
++##
++######################################################################
++# ARMv8 NEON adaptation by <appro@openssl.org>
++#
++# Reason for undertaken effort is that there is at least one popular
++# SoC based on Cortex-A53 that doesn't have crypto extensions.
++#
++#                   CBC enc     ECB enc/dec(*)   [bit-sliced enc/dec]
++# Cortex-A53        21.5        18.1/20.6        [17.5/19.8         ]
++# Cortex-A57        36.0(**)    20.4/24.9(**)    [14.4/16.6         ]
++# X-Gene            45.9(**)    45.8/57.7(**)    [33.1/37.6(**)     ]
++# Denver(***)       16.6(**)    15.1/17.8(**)    [8.80/9.93         ]
++# Apple A7(***)     22.7(**)    10.9/14.3        [8.45/10.0         ]
++# Mongoose(***)     26.3(**)    21.0/25.0(**)    [13.3/16.8         ]
++#
++# (*)	ECB denotes approximate result for parallelizable modes
++#	such as CBC decrypt, CTR, etc.;
++# (**)	these results are worse than scalar compiler-generated
++#	code, but it's constant-time and therefore preferred;
++# (***)	presented for reference/comparison purposes;
++
++$flavour = shift;
++while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
++die "can't locate arm-xlate.pl";
++
++open OUT,"| \"$^X\" $xlate $flavour $output";
++*STDOUT=*OUT;
++
++$code.=<<___;
++#include <GFp/arm_arch.h>
++
++.section	.rodata
++
++.type	_vpaes_consts,%object
++.align	7	// totally strategic alignment
++_vpaes_consts:
++.Lk_mc_forward:	// mc_forward
++	.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
++	.quad	0x080B0A0904070605, 0x000302010C0F0E0D
++	.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
++	.quad	0x000302010C0F0E0D, 0x080B0A0904070605
++.Lk_mc_backward:// mc_backward
++	.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
++	.quad	0x020100030E0D0C0F, 0x0A09080B06050407
++	.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
++	.quad	0x0A09080B06050407, 0x020100030E0D0C0F
++.Lk_sr:		// sr
++	.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
++	.quad	0x030E09040F0A0500, 0x0B06010C07020D08
++	.quad	0x0F060D040B020900, 0x070E050C030A0108
++	.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
++
++//
++// "Hot" constants
++//
++.Lk_inv:	// inv, inva
++	.quad	0x0E05060F0D080180, 0x040703090A0B0C02
++	.quad	0x01040A060F0B0780, 0x030D0E0C02050809
++.Lk_ipt:	// input transform (lo, hi)
++	.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
++	.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
++.Lk_sbo:	// sbou, sbot
++	.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
++	.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
++.Lk_sb1:	// sb1u, sb1t
++	.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
++	.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
++.Lk_sb2:	// sb2u, sb2t
++	.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
++	.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
++
++//
++//  Key schedule constants
++//
++.Lk_dksd:	// decryption key schedule: invskew x*D
++	.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
++	.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
++.Lk_dksb:	// decryption key schedule: invskew x*B
++	.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
++	.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
++.Lk_dkse:	// decryption key schedule: invskew x*E + 0x63
++	.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
++	.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
++.Lk_dks9:	// decryption key schedule: invskew x*9
++	.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
++	.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE
++
++.Lk_rcon:	// rcon
++	.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
++
++.Lk_opt:	// output transform
++	.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
++	.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
++.Lk_deskew:	// deskew tables: inverts the sbox's "skew"
++	.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
++	.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
++
++.asciz  "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
++.size	_vpaes_consts,.-_vpaes_consts
++.align	6
++
++.text
++___
++
++{
++my ($inp,$out,$key) = map("x$_",(0..2));
++
++my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_.16b",(18..23));
++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_.16b",(24..27));
++my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_.16b",(24..31));
++
++$code.=<<___;
++##
++##  _aes_preheat
++##
++##  Fills register %r10 -> .aes_consts (so you can -fPIC)
++##  and %xmm9-%xmm15 as specified below.
++##
++.type	_vpaes_encrypt_preheat,%function
++.align	4
++_vpaes_encrypt_preheat:
++	adrp	x10, :pg_hi21:.Lk_inv
++	add	x10, x10, :lo12:.Lk_inv
++	movi	v17.16b, #0x0f
++	ld1	{v18.2d-v19.2d}, [x10],#32	// .Lk_inv
++	ld1	{v20.2d-v23.2d}, [x10],#64	// .Lk_ipt, .Lk_sbo
++	ld1	{v24.2d-v27.2d}, [x10]		// .Lk_sb1, .Lk_sb2
++	ret
++.size	_vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat
++
++##
++##  _aes_encrypt_core
++##
++##  AES-encrypt %xmm0.
++##
++##  Inputs:
++##     %xmm0 = input
++##     %xmm9-%xmm15 as in _vpaes_preheat
++##    (%rdx) = scheduled keys
++##
++##  Output in %xmm0
++##  Clobbers  %xmm1-%xmm5, %r9, %r10, %r11, %rax
++##  Preserves %xmm6 - %xmm8 so you get some local vectors
++##
++##
++.type	_vpaes_encrypt_core,%function
++.align 4
++_vpaes_encrypt_core:
++	mov	x9, $key
++	ldr	w8, [$key,#240]			// pull rounds
++	adrp	x11, :pg_hi21:.Lk_mc_forward+16
++	add	x11, x11, :lo12:.Lk_mc_forward+16
++						// vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
++	ld1	{v16.2d}, [x9], #16		// vmovdqu	(%r9),	%xmm5		# round0 key
++	and	v1.16b, v7.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
++	ushr	v0.16b, v7.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0
++	tbl	v1.16b, {$iptlo}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm1
++						// vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
++	tbl	v2.16b, {$ipthi}, v0.16b	// vpshufb	%xmm0,	%xmm3,	%xmm2
++	eor	v0.16b, v1.16b, v16.16b		// vpxor	%xmm5,	%xmm1,	%xmm0
++	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
++	b	.Lenc_entry
++
++.align 4
++.Lenc_loop:
++	// middle of middle round
++	add	x10, x11, #0x40
++	tbl	v4.16b, {$sb1t}, v2.16b		// vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
++	ld1	{v1.2d}, [x11], #16		// vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
++	tbl	v0.16b, {$sb1u}, v3.16b		// vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
++	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
++	tbl	v5.16b,	{$sb2t}, v2.16b		// vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
++	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
++	tbl	v2.16b, {$sb2u}, v3.16b		// vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
++	ld1	{v4.2d}, [x10]			// vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
++	tbl	v3.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
++	eor	v2.16b, v2.16b, v5.16b		// vpxor	%xmm5,	%xmm2,	%xmm2	# 2 = 2A
++	tbl	v0.16b, {v0.16b}, v4.16b	// vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
++	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
++	tbl	v4.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
++	eor	v0.16b, v0.16b, v3.16b		// vpxor	%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
++	and	x11, x11, #~(1<<6)		// and		\$0x30,	%r11		# ... mod 4
++	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
++	sub	w8, w8, #1			// nr--
++
++.Lenc_entry:
++	// top of round
++	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm0,	%xmm9,	%xmm1   # 0 = k
++	ushr	v0.16b, v0.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
++	tbl	v5.16b, {$invhi}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
++	eor	v1.16b, v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
++	tbl	v3.16b, {$invlo}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
++	tbl	v4.16b, {$invlo}, v1.16b	// vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
++	eor	v3.16b, v3.16b, v5.16b		// vpxor	%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
++	eor	v4.16b, v4.16b, v5.16b		// vpxor	%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
++	tbl	v2.16b, {$invlo}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
++	tbl	v3.16b, {$invlo}, v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
++	eor	v2.16b, v2.16b, v1.16b		// vpxor	%xmm1,	%xmm2,	%xmm2  	# 2 = io
++	eor	v3.16b, v3.16b, v0.16b		// vpxor	%xmm0,	%xmm3,	%xmm3	# 3 = jo
++	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm5
++	cbnz	w8, .Lenc_loop
++
++	// middle of last round
++	add	x10, x11, #0x80
++						// vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
++						// vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
++	tbl	v4.16b, {$sbou}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
++	ld1	{v1.2d}, [x10]			// vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
++	tbl	v0.16b, {$sbot}, v3.16b		// vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
++	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
++	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
++	tbl	v0.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0
++	ret
++.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
++
++.globl	GFp_vpaes_encrypt
++.type	GFp_vpaes_encrypt,%function
++.align	4
++GFp_vpaes_encrypt:
++	AARCH64_SIGN_LINK_REGISTER
++	stp	x29,x30,[sp,#-16]!
++	add	x29,sp,#0
++
++	ld1	{v7.16b}, [$inp]
++	bl	_vpaes_encrypt_preheat
++	bl	_vpaes_encrypt_core
++	st1	{v0.16b}, [$out]
++
++	ldp	x29,x30,[sp],#16
++	AARCH64_VALIDATE_LINK_REGISTER
++	ret
++.size	GFp_vpaes_encrypt,.-GFp_vpaes_encrypt
++
++.type	_vpaes_encrypt_2x,%function
++.align 4
++_vpaes_encrypt_2x:
++	mov	x9, $key
++	ldr	w8, [$key,#240]			// pull rounds
++	adrp	x11, :pg_hi21:.Lk_mc_forward+16
++	add	x11, x11, :lo12:.Lk_mc_forward+16
++						// vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
++	ld1	{v16.2d}, [x9], #16		// vmovdqu	(%r9),	%xmm5		# round0 key
++	and	v1.16b,  v14.16b,  v17.16b	// vpand	%xmm9,	%xmm0,	%xmm1
++	ushr	v0.16b,  v14.16b,  #4		// vpsrlb	\$4,	%xmm0,	%xmm0
++	 and	v9.16b,  v15.16b,  v17.16b
++	 ushr	v8.16b,  v15.16b,  #4
++	tbl	v1.16b,  {$iptlo}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm1
++	 tbl	v9.16b,  {$iptlo}, v9.16b
++						// vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
++	tbl	v2.16b,  {$ipthi}, v0.16b	// vpshufb	%xmm0,	%xmm3,	%xmm2
++	 tbl	v10.16b, {$ipthi}, v8.16b
++	eor	v0.16b,  v1.16b,   v16.16b	// vpxor	%xmm5,	%xmm1,	%xmm0
++	 eor	v8.16b,  v9.16b,   v16.16b
++	eor	v0.16b,  v0.16b,   v2.16b	// vpxor	%xmm2,	%xmm0,	%xmm0
++	 eor	v8.16b,  v8.16b,   v10.16b
++	b	.Lenc_2x_entry
++
++.align 4
++.Lenc_2x_loop:
++	// middle of middle round
++	add	x10, x11, #0x40
++	tbl	v4.16b,  {$sb1t}, v2.16b	// vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
++	 tbl	v12.16b, {$sb1t}, v10.16b
++	ld1	{v1.2d}, [x11], #16		// vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
++	tbl	v0.16b,  {$sb1u}, v3.16b	// vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
++	 tbl	v8.16b,  {$sb1u}, v11.16b
++	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
++	 eor	v12.16b, v12.16b, v16.16b
++	tbl	v5.16b,	 {$sb2t}, v2.16b	// vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
++	 tbl	v13.16b, {$sb2t}, v10.16b
++	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
++	 eor	v8.16b,  v8.16b,  v12.16b
++	tbl	v2.16b,  {$sb2u}, v3.16b	// vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
++	 tbl	v10.16b, {$sb2u}, v11.16b
++	ld1	{v4.2d}, [x10]			// vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
++	tbl	v3.16b,  {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
++	 tbl	v11.16b, {v8.16b}, v1.16b
++	eor	v2.16b,  v2.16b,  v5.16b	// vpxor	%xmm5,	%xmm2,	%xmm2	# 2 = 2A
++	 eor	v10.16b, v10.16b, v13.16b
++	tbl	v0.16b,  {v0.16b}, v4.16b	// vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
++	 tbl	v8.16b,  {v8.16b}, v4.16b
++	eor	v3.16b,  v3.16b,  v2.16b	// vpxor	%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
++	 eor	v11.16b, v11.16b, v10.16b
++	tbl	v4.16b,  {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
++	 tbl	v12.16b, {v11.16b},v1.16b
++	eor	v0.16b,  v0.16b,  v3.16b	// vpxor	%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
++	 eor	v8.16b,  v8.16b,  v11.16b
++	and	x11, x11, #~(1<<6)		// and		\$0x30,	%r11		# ... mod 4
++	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
++	 eor	v8.16b,  v8.16b,  v12.16b
++	sub	w8, w8, #1			// nr--
++
++.Lenc_2x_entry:
++	// top of round
++	and	v1.16b,  v0.16b, v17.16b	// vpand	%xmm0,	%xmm9,	%xmm1   # 0 = k
++	ushr	v0.16b,  v0.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
++	 and	v9.16b,  v8.16b, v17.16b
++	 ushr	v8.16b,  v8.16b, #4
++	tbl	v5.16b,  {$invhi},v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
++	 tbl	v13.16b, {$invhi},v9.16b
++	eor	v1.16b,  v1.16b,  v0.16b	// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
++	 eor	v9.16b,  v9.16b,  v8.16b
++	tbl	v3.16b,  {$invlo},v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
++	 tbl	v11.16b, {$invlo},v8.16b
++	tbl	v4.16b,  {$invlo},v1.16b	// vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
++	 tbl	v12.16b, {$invlo},v9.16b
++	eor	v3.16b,  v3.16b,  v5.16b	// vpxor	%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
++	 eor	v11.16b, v11.16b, v13.16b
++	eor	v4.16b,  v4.16b,  v5.16b	// vpxor	%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
++	 eor	v12.16b, v12.16b, v13.16b
++	tbl	v2.16b,  {$invlo},v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
++	 tbl	v10.16b, {$invlo},v11.16b
++	tbl	v3.16b,  {$invlo},v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
++	 tbl	v11.16b, {$invlo},v12.16b
++	eor	v2.16b,  v2.16b,  v1.16b	// vpxor	%xmm1,	%xmm2,	%xmm2  	# 2 = io
++	 eor	v10.16b, v10.16b, v9.16b
++	eor	v3.16b,  v3.16b,  v0.16b	// vpxor	%xmm0,	%xmm3,	%xmm3	# 3 = jo
++	 eor	v11.16b, v11.16b, v8.16b
++	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm5
++	cbnz	w8, .Lenc_2x_loop
++
++	// middle of last round
++	add	x10, x11, #0x80
++						// vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
++						// vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
++	tbl	v4.16b,  {$sbou}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
++	 tbl	v12.16b, {$sbou}, v10.16b
++	ld1	{v1.2d}, [x10]			// vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
++	tbl	v0.16b,  {$sbot}, v3.16b	// vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
++	 tbl	v8.16b,  {$sbot}, v11.16b
++	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
++	 eor	v12.16b, v12.16b, v16.16b
++	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
++	 eor	v8.16b,  v8.16b,  v12.16b
++	tbl	v0.16b,  {v0.16b},v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0
++	 tbl	v1.16b,  {v8.16b},v1.16b
++	ret
++.size	_vpaes_encrypt_2x,.-_vpaes_encrypt_2x
++___
++}
++{
++my ($inp,$bits,$out,$dir)=("x0","w1","x2","w3");
++my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_.16b",(18..21,8));
++
++$code.=<<___;
++########################################################
++##                                                    ##
++##                  AES key schedule                  ##
++##                                                    ##
++########################################################
++.type	_vpaes_key_preheat,%function
++.align	4
++_vpaes_key_preheat:
++	adrp	x10, :pg_hi21:.Lk_inv
++	add	x10, x10, :lo12:.Lk_inv
++	movi	v16.16b, #0x5b			// .Lk_s63
++	adrp	x11, :pg_hi21:.Lk_sb1
++	add	x11, x11, :lo12:.Lk_sb1
++	movi	v17.16b, #0x0f			// .Lk_s0F
++	ld1	{v18.2d-v21.2d}, [x10]		// .Lk_inv, .Lk_ipt
++	adrp	x10, :pg_hi21:.Lk_dksd
++	add	x10, x10, :lo12:.Lk_dksd
++	ld1	{v22.2d-v23.2d}, [x11]		// .Lk_sb1
++	adrp	x11, :pg_hi21:.Lk_mc_forward
++	add	x11, x11, :lo12:.Lk_mc_forward
++	ld1	{v24.2d-v27.2d}, [x10],#64	// .Lk_dksd, .Lk_dksb
++	ld1	{v28.2d-v31.2d}, [x10],#64	// .Lk_dkse, .Lk_dks9
++	ld1	{v8.2d}, [x10]			// .Lk_rcon
++	ld1	{v9.2d}, [x11]			// .Lk_mc_forward[0]
++	ret
++.size	_vpaes_key_preheat,.-_vpaes_key_preheat
++
++.type	_vpaes_schedule_core,%function
++.align	4
++_vpaes_schedule_core:
++	AARCH64_SIGN_LINK_REGISTER
++	stp	x29, x30, [sp,#-16]!
++	add	x29,sp,#0
++
++	bl	_vpaes_key_preheat		// load the tables
++
++	ld1	{v0.16b}, [$inp],#16		// vmovdqu	(%rdi),	%xmm0		# load key (unaligned)
++
++	// input transform
++	mov	v3.16b, v0.16b			// vmovdqa	%xmm0,	%xmm3
++	bl	_vpaes_schedule_transform
++	mov	v7.16b, v0.16b			// vmovdqa	%xmm0,	%xmm7
++
++	adrp	x10, :pg_hi21:.Lk_sr		// lea	.Lk_sr(%rip),%r10
++	add	x10, x10, :lo12:.Lk_sr
++
++	add	x8, x8, x10
++
++	// encrypting, output zeroth round key after transform
++	st1	{v0.2d}, [$out]			// vmovdqu	%xmm0,	(%rdx)
++
++	cmp	$bits, #192			// cmp	\$192,	%esi
++	b.hi	.Lschedule_256
++	b.eq	.Lschedule_192
++	// 128: fall though
++
++##
++##  .schedule_128
++##
++##  128-bit specific part of key schedule.
++##
++##  This schedule is really simple, because all its parts
++##  are accomplished by the subroutines.
++##
++.Lschedule_128:
++	mov	$inp, #10			// mov	\$10, %esi
++
++.Loop_schedule_128:
++	sub	$inp, $inp, #1			// dec	%esi
++	bl 	_vpaes_schedule_round
++	cbz 	$inp, .Lschedule_mangle_last
++	bl	_vpaes_schedule_mangle		// write output
++	b 	.Loop_schedule_128
++
++##
++##  .aes_schedule_192
++##
++##  192-bit specific part of key schedule.
++##
++##  The main body of this schedule is the same as the 128-bit
++##  schedule, but with more smearing.  The long, high side is
++##  stored in %xmm7 as before, and the short, low side is in
++##  the high bits of %xmm6.
++##
++##  This schedule is somewhat nastier, however, because each
++##  round produces 192 bits of key material, or 1.5 round keys.
++##  Therefore, on each cycle we do 2 rounds and produce 3 round
++##  keys.
++##
++.align	4
++.Lschedule_192:
++	sub	$inp, $inp, #8
++	ld1	{v0.16b}, [$inp]		// vmovdqu	8(%rdi),%xmm0		# load key part 2 (very unaligned)
++	bl	_vpaes_schedule_transform	// input transform
++	mov	v6.16b, v0.16b			// vmovdqa	%xmm0,	%xmm6		# save short part
++	eor	v4.16b, v4.16b, v4.16b		// vpxor	%xmm4,	%xmm4, %xmm4	# clear 4
++	ins	v6.d[0], v4.d[0]		// vmovhlps	%xmm4,	%xmm6,	%xmm6		# clobber low side with zeros
++	mov	$inp, #4			// mov	\$4,	%esi
++
++.Loop_schedule_192:
++	sub	$inp, $inp, #1			// dec	%esi
++	bl	_vpaes_schedule_round
++	ext	v0.16b, v6.16b, v0.16b, #8	// vpalignr	\$8,%xmm6,%xmm0,%xmm0
++	bl	_vpaes_schedule_mangle		// save key n
++	bl	_vpaes_schedule_192_smear
++	bl	_vpaes_schedule_mangle		// save key n+1
++	bl	_vpaes_schedule_round
++	cbz 	$inp, .Lschedule_mangle_last
++	bl	_vpaes_schedule_mangle		// save key n+2
++	bl	_vpaes_schedule_192_smear
++	b	.Loop_schedule_192
++
++##
++##  .aes_schedule_256
++##
++##  256-bit specific part of key schedule.
++##
++##  The structure here is very similar to the 128-bit
++##  schedule, but with an additional "low side" in
++##  %xmm6.  The low side's rounds are the same as the
++##  high side's, except no rcon and no rotation.
++##
++.align	4
++.Lschedule_256:
++	ld1	{v0.16b}, [$inp]		// vmovdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
++	bl	_vpaes_schedule_transform	// input transform
++	mov	$inp, #7			// mov	\$7, %esi
++
++.Loop_schedule_256:
++	sub	$inp, $inp, #1			// dec	%esi
++	bl	_vpaes_schedule_mangle		// output low result
++	mov	v6.16b, v0.16b			// vmovdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
++
++	// high round
++	bl	_vpaes_schedule_round
++	cbz 	$inp, .Lschedule_mangle_last
++	bl	_vpaes_schedule_mangle
++
++	// low round. swap xmm7 and xmm6
++	dup	v0.4s, v0.s[3]			// vpshufd	\$0xFF,	%xmm0,	%xmm0
++	movi	v4.16b, #0
++	mov	v5.16b, v7.16b			// vmovdqa	%xmm7,	%xmm5
++	mov	v7.16b, v6.16b			// vmovdqa	%xmm6,	%xmm7
++	bl	_vpaes_schedule_low_round
++	mov	v7.16b, v5.16b			// vmovdqa	%xmm5,	%xmm7
++
++	b	.Loop_schedule_256
++
++##
++##  .aes_schedule_mangle_last
++##
++##  Mangler for last round of key schedule
++##  Mangles %xmm0
++##    when encrypting, outputs out(%xmm0) ^ 63
++##    when decrypting, outputs unskew(%xmm0)
++##
++##  Always called right before return... jumps to cleanup and exits
++##
++.align	4
++.Lschedule_mangle_last:
++	// schedule last round key from xmm0
++	adrp	x11, :pg_hi21:.Lk_deskew	// lea	.Lk_deskew(%rip),%r11	# prepare to deskew
++	add	x11, x11, :lo12:.Lk_deskew
++
++	cbnz	$dir, .Lschedule_mangle_last_dec
++
++	// encrypting
++	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),%xmm1
++	adrp	x11, :pg_hi21:.Lk_opt		// lea	.Lk_opt(%rip),	%r11		# prepare to output transform
++	add	x11, x11, :lo12:.Lk_opt
++	add	$out, $out, #32			// add	\$32,	%rdx
++	tbl	v0.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0		# output permute
++
++.Lschedule_mangle_last_dec:
++	ld1	{v20.2d-v21.2d}, [x11]		// reload constants
++	sub	$out, $out, #16			// add	\$-16,	%rdx
++	eor	v0.16b, v0.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm0,	%xmm0
++	bl	_vpaes_schedule_transform	// output transform
++	st1	{v0.2d}, [$out]			// vmovdqu	%xmm0,	(%rdx)		# save last key
++
++	// cleanup
++	eor	v0.16b, v0.16b, v0.16b		// vpxor	%xmm0,	%xmm0,	%xmm0
++	eor	v1.16b, v1.16b, v1.16b		// vpxor	%xmm1,	%xmm1,	%xmm1
++	eor	v2.16b, v2.16b, v2.16b		// vpxor	%xmm2,	%xmm2,	%xmm2
++	eor	v3.16b, v3.16b, v3.16b		// vpxor	%xmm3,	%xmm3,	%xmm3
++	eor	v4.16b, v4.16b, v4.16b		// vpxor	%xmm4,	%xmm4,	%xmm4
++	eor	v5.16b, v5.16b, v5.16b		// vpxor	%xmm5,	%xmm5,	%xmm5
++	eor	v6.16b, v6.16b, v6.16b		// vpxor	%xmm6,	%xmm6,	%xmm6
++	eor	v7.16b, v7.16b, v7.16b		// vpxor	%xmm7,	%xmm7,	%xmm7
++	ldp	x29, x30, [sp],#16
++	AARCH64_VALIDATE_LINK_REGISTER
++	ret
++.size	_vpaes_schedule_core,.-_vpaes_schedule_core
++
++##
++##  .aes_schedule_192_smear
++##
++##  Smear the short, low side in the 192-bit key schedule.
++##
++##  Inputs:
++##    %xmm7: high side, b  a  x  y
++##    %xmm6:  low side, d  c  0  0
++##    %xmm13: 0
++##
++##  Outputs:
++##    %xmm6: b+c+d  b+c  0  0
++##    %xmm0: b+c+d  b+c  b  a
++##
++.type	_vpaes_schedule_192_smear,%function
++.align	4
++_vpaes_schedule_192_smear:
++	movi	v1.16b, #0
++	dup	v0.4s, v7.s[3]
++	ins	v1.s[3], v6.s[2]	// vpshufd	\$0x80,	%xmm6,	%xmm1	# d c 0 0 -> c 0 0 0
++	ins	v0.s[0], v7.s[2]	// vpshufd	\$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
++	eor	v6.16b, v6.16b, v1.16b	// vpxor	%xmm1,	%xmm6,	%xmm6	# -> c+d c 0 0
++	eor	v1.16b, v1.16b, v1.16b	// vpxor	%xmm1,	%xmm1,	%xmm1
++	eor	v6.16b, v6.16b, v0.16b	// vpxor	%xmm0,	%xmm6,	%xmm6	# -> b+c+d b+c b a
++	mov	v0.16b, v6.16b		// vmovdqa	%xmm6,	%xmm0
++	ins	v6.d[0], v1.d[0]	// vmovhlps	%xmm1,	%xmm6,	%xmm6	# clobber low side with zeros
++	ret
++.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
++
++##
++##  .aes_schedule_round
++##
++##  Runs one main round of the key schedule on %xmm0, %xmm7
++##
++##  Specifically, runs subbytes on the high dword of %xmm0
++##  then rotates it by one byte and xors into the low dword of
++##  %xmm7.
++##
++##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
++##  next rcon.
++##
++##  Smears the dwords of %xmm7 by xoring the low into the
++##  second low, result into third, result into highest.
++##
++##  Returns results in %xmm7 = %xmm0.
++##  Clobbers %xmm1-%xmm4, %r11.
++##
++.type	_vpaes_schedule_round,%function
++.align	4
++_vpaes_schedule_round:
++	// extract rcon from xmm8
++	movi	v4.16b, #0			// vpxor	%xmm4,	%xmm4,	%xmm4
++	ext	v1.16b, $rcon, v4.16b, #15	// vpalignr	\$15,	%xmm8,	%xmm4,	%xmm1
++	ext	$rcon, $rcon, $rcon, #15	// vpalignr	\$15,	%xmm8,	%xmm8,	%xmm8
++	eor	v7.16b, v7.16b, v1.16b		// vpxor	%xmm1,	%xmm7,	%xmm7
++
++	// rotate
++	dup	v0.4s, v0.s[3]			// vpshufd	\$0xFF,	%xmm0,	%xmm0
++	ext	v0.16b, v0.16b, v0.16b, #1	// vpalignr	\$1,	%xmm0,	%xmm0,	%xmm0
++
++	// fall through...
++
++	// low round: same as high round, but no rotation and no rcon.
++_vpaes_schedule_low_round:
++	// smear xmm7
++	ext	v1.16b, v4.16b, v7.16b, #12	// vpslldq	\$4,	%xmm7,	%xmm1
++	eor	v7.16b, v7.16b, v1.16b		// vpxor	%xmm1,	%xmm7,	%xmm7
++	ext	v4.16b, v4.16b, v7.16b, #8	// vpslldq	\$8,	%xmm7,	%xmm4
++
++	// subbytes
++	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1		# 0 = k
++	ushr	v0.16b, v0.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0		# 1 = i
++	 eor	v7.16b, v7.16b, v4.16b		// vpxor	%xmm4,	%xmm7,	%xmm7
++	tbl	v2.16b, {$invhi}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm2		# 2 = a/k
++	eor	v1.16b, v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1		# 0 = j
++	tbl	v3.16b, {$invlo}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3		# 3 = 1/i
++	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3		# 3 = iak = 1/i + a/k
++	tbl	v4.16b, {$invlo}, v1.16b	// vpshufb	%xmm1,	%xmm10,	%xmm4		# 4 = 1/j
++	 eor	v7.16b, v7.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm7,	%xmm7
++	tbl	v3.16b, {$invlo}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm3		# 2 = 1/iak
++	eor	v4.16b, v4.16b, v2.16b		// vpxor	%xmm2,	%xmm4,	%xmm4		# 4 = jak = 1/j + a/k
++	tbl	v2.16b, {$invlo}, v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm2		# 3 = 1/jak
++	eor	v3.16b, v3.16b, v1.16b		// vpxor	%xmm1,	%xmm3,	%xmm3		# 2 = io
++	eor	v2.16b, v2.16b, v0.16b		// vpxor	%xmm0,	%xmm2,	%xmm2		# 3 = jo
++	tbl	v4.16b, {v23.16b}, v3.16b	// vpshufb	%xmm3,	%xmm13,	%xmm4		# 4 = sbou
++	tbl	v1.16b, {v22.16b}, v2.16b	// vpshufb	%xmm2,	%xmm12,	%xmm1		# 0 = sb1t
++	eor	v1.16b, v1.16b, v4.16b		// vpxor	%xmm4,	%xmm1,	%xmm1		# 0 = sbox output
++
++	// add in smeared stuff
++	eor	v0.16b, v1.16b, v7.16b		// vpxor	%xmm7,	%xmm1,	%xmm0
++	eor	v7.16b, v1.16b, v7.16b		// vmovdqa	%xmm0,	%xmm7
++	ret
++.size	_vpaes_schedule_round,.-_vpaes_schedule_round
++
++##
++##  .aes_schedule_transform
++##
++##  Linear-transform %xmm0 according to tables at (%r11)
++##
++##  Requires that %xmm9 = 0x0F0F... as in preheat
++##  Output in %xmm0
++##  Clobbers %xmm1, %xmm2
++##
++.type	_vpaes_schedule_transform,%function
++.align	4
++_vpaes_schedule_transform:
++	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
++	ushr	v0.16b, v0.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0
++						// vmovdqa	(%r11),	%xmm2 	# lo
++	tbl	v2.16b, {$iptlo}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm2
++						// vmovdqa	16(%r11),	%xmm1 # hi
++	tbl	v0.16b, {$ipthi}, v0.16b	// vpshufb	%xmm0,	%xmm1,	%xmm0
++	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
++	ret
++.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
++
++##
++##  .aes_schedule_mangle
++##
++##  Mangle xmm0 from (basis-transformed) standard version
++##  to our version.
++##
++##  On encrypt,
++##    xor with 0x63
++##    multiply by circulant 0,1,1,1
++##    apply shiftrows transform
++##
++##  On decrypt,
++##    xor with 0x63
++##    multiply by "inverse mixcolumns" circulant E,B,D,9
++##    deskew
++##    apply shiftrows transform
++##
++##
++##  Writes out to (%rdx), and increments or decrements it
++##  Keeps track of round number mod 4 in %r8
++##  Preserves xmm0
++##  Clobbers xmm1-xmm5
++##
++.type	_vpaes_schedule_mangle,%function
++.align	4
++_vpaes_schedule_mangle:
++	mov	v4.16b, v0.16b			// vmovdqa	%xmm0,	%xmm4	# save xmm0 for later
++						// vmovdqa	.Lk_mc_forward(%rip),%xmm5
++
++	// encrypting
++	eor	v4.16b, v0.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm0,	%xmm4
++	add	$out, $out, #16			// add	\$16,	%rdx
++	tbl	v4.16b, {v4.16b}, v9.16b	// vpshufb	%xmm5,	%xmm4,	%xmm4
++	tbl	v1.16b, {v4.16b}, v9.16b	// vpshufb	%xmm5,	%xmm4,	%xmm1
++	tbl	v3.16b, {v1.16b}, v9.16b	// vpshufb	%xmm5,	%xmm1,	%xmm3
++	eor	v4.16b, v4.16b, v1.16b		// vpxor	%xmm1,	%xmm4,	%xmm4
++	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),	%xmm1
++	eor	v3.16b, v3.16b, v4.16b		// vpxor	%xmm4,	%xmm3,	%xmm3
++
++.Lschedule_mangle_both:
++	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
++	add	x8, x8, #64-16			// add	\$-16,	%r8
++	and	x8, x8, #~(1<<6)		// and	\$0x30,	%r8
++	st1	{v3.2d}, [$out]			// vmovdqu	%xmm3,	(%rdx)
++	ret
++.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
++
++.globl	GFp_vpaes_set_encrypt_key
++.type	GFp_vpaes_set_encrypt_key,%function
++.align	4
++GFp_vpaes_set_encrypt_key:
++	AARCH64_SIGN_LINK_REGISTER
++	stp	x29,x30,[sp,#-16]!
++	add	x29,sp,#0
++	stp	d8,d9,[sp,#-16]!	// ABI spec says so
++
++	lsr	w9, $bits, #5		// shr	\$5,%eax
++	add	w9, w9, #5		// \$5,%eax
++	str	w9, [$out,#240]		// mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
++
++	mov	$dir, #0		// mov	\$0,%ecx
++	mov	x8, #0x30		// mov	\$0x30,%r8d
++	bl	_vpaes_schedule_core
++	eor	x0, x0, x0
++
++	ldp	d8,d9,[sp],#16
++	ldp	x29,x30,[sp],#16
++	AARCH64_VALIDATE_LINK_REGISTER
++	ret
++.size	GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key
++___
++}
++{
++my ($inp,$out,$len,$key,$ivec) = map("x$_",(0..4));
++my ($ctr, $ctr_tmp) = ("w6", "w7");
++
++# void GFp_vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
++#                                     const AES_KEY *key, const uint8_t ivec[16]);
++$code.=<<___;
++.globl	GFp_vpaes_ctr32_encrypt_blocks
++.type	GFp_vpaes_ctr32_encrypt_blocks,%function
++.align	4
++GFp_vpaes_ctr32_encrypt_blocks:
++	AARCH64_SIGN_LINK_REGISTER
++	stp	x29,x30,[sp,#-16]!
++	add	x29,sp,#0
++	stp	d8,d9,[sp,#-16]!	// ABI spec says so
++	stp	d10,d11,[sp,#-16]!
++	stp	d12,d13,[sp,#-16]!
++	stp	d14,d15,[sp,#-16]!
++
++	cbz	$len, .Lctr32_done
++
++	// Note, unlike the other functions, $len here is measured in blocks,
++	// not bytes.
++	mov	x17, $len
++	mov	x2,  $key
++
++	// Load the IV and counter portion.
++	ldr	$ctr, [$ivec, #12]
++	ld1	{v7.16b}, [$ivec]
++
++	bl	_vpaes_encrypt_preheat
++	tst	x17, #1
++	rev	$ctr, $ctr		// The counter is big-endian.
++	b.eq	.Lctr32_prep_loop
++
++	// Handle one block so the remaining block count is even for
++	// _vpaes_encrypt_2x.
++	ld1	{v6.16b}, [$inp], #16	// Load input ahead of time
++	bl	_vpaes_encrypt_core
++	eor	v0.16b, v0.16b, v6.16b	// XOR input and result
++	st1	{v0.16b}, [$out], #16
++	subs	x17, x17, #1
++	// Update the counter.
++	add	$ctr, $ctr, #1
++	rev	$ctr_tmp, $ctr
++	mov	v7.s[3], $ctr_tmp
++	b.ls	.Lctr32_done
++
++.Lctr32_prep_loop:
++	// _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x
++	// uses v14 and v15.
++	mov	v15.16b, v7.16b
++	mov	v14.16b, v7.16b
++	add	$ctr, $ctr, #1
++	rev	$ctr_tmp, $ctr
++	mov	v15.s[3], $ctr_tmp
++
++.Lctr32_loop:
++	ld1	{v6.16b,v7.16b}, [$inp], #32	// Load input ahead of time
++	bl	_vpaes_encrypt_2x
++	eor	v0.16b, v0.16b, v6.16b		// XOR input and result
++	eor	v1.16b, v1.16b, v7.16b		// XOR input and result (#2)
++	st1	{v0.16b,v1.16b}, [$out], #32
++	subs	x17, x17, #2
++	// Update the counter.
++	add	$ctr_tmp, $ctr, #1
++	add	$ctr, $ctr, #2
++	rev	$ctr_tmp, $ctr_tmp
++	mov	v14.s[3], $ctr_tmp
++	rev	$ctr_tmp, $ctr
++	mov	v15.s[3], $ctr_tmp
++	b.hi	.Lctr32_loop
++
++.Lctr32_done:
++	ldp	d14,d15,[sp],#16
++	ldp	d12,d13,[sp],#16
++	ldp	d10,d11,[sp],#16
++	ldp	d8,d9,[sp],#16
++	ldp	x29,x30,[sp],#16
++	AARCH64_VALIDATE_LINK_REGISTER
++	ret
++.size	GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks
++___
++}
++
++print $code;
++
++close STDOUT or die "error closing STDOUT";
+diff --git a/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl b/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
+new file mode 100644
+index 0000000..7e52ad6
+--- /dev/null
++++ b/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
+@@ -0,0 +1,294 @@
++#! /usr/bin/env perl
++# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
++#
++# Licensed under the OpenSSL license (the "License").  You may not use
++# this file except in compliance with the License.  You can obtain a copy
++# in the file LICENSE in the source distribution or at
++# https://www.openssl.org/source/license.html
++
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++
++# This file was adapted to AArch64 from the 32-bit version in ghash-armv4.pl. It
++# implements the multiplication algorithm described in:
++#
++# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
++# Polynomial Multiplication on ARM Processors using the NEON Engine.
++#
++# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
++#
++# The main distinction to keep in mind between 32-bit NEON and AArch64 SIMD is
++# AArch64 cannot compute over the upper halves of SIMD registers. In 32-bit
++# NEON, the low and high halves of the 128-bit register q0 are accessible as
++# 64-bit registers d0 and d1, respectively. In AArch64, dN is the lower half of
++# vN. Where the 32-bit version would use the upper half, this file must keep
++# halves in separate registers.
++#
++# The other distinction is in syntax. 32-bit NEON embeds lane information in the
++# instruction name, while AArch64 uses suffixes on the registers. For instance,
++# left-shifting 64-bit lanes of a SIMD register in 32-bit would be written:
++#
++#     vshl.i64 q0, q0, #1
++#
++# in 64-bit, it would be written:
++#
++#     shl v0.2d, v0.2d, #1
++#
++# See Programmer's Guide for ARMv8-A, section 7 for details.
++# http://infocenter.arm.com/help/topic/com.arm.doc.den0024a/DEN0024A_v8_architecture_PG.pdf
++#
++# Finally, note the 8-bit and 64-bit polynomial multipliers in AArch64 differ
++# only by suffix. pmull vR.8h, vA.8b, vB.8b multiplies eight 8-bit polynomials
++# and is always available. pmull vR.1q, vA.1d, vB.1d multiplies a 64-bit
++# polynomial and is conditioned on the PMULL extension. This file emulates the
++# latter with the former.
++
++use strict;
++
++my $flavour = shift;
++my $output;
++if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
++else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
++
++if ($flavour && $flavour ne "void") {
++    $0 =~ m/(.*[\/\\])[^\/\\]+$/;
++    my $dir = $1;
++    my $xlate;
++    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
++    ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
++    die "can't locate arm-xlate.pl";
++
++    open OUT,"| \"$^X\" $xlate $flavour $output";
++    *STDOUT=*OUT;
++} else {
++    open OUT,">$output";
++    *STDOUT=*OUT;
++}
++
++my ($Xi, $Htbl, $inp, $len) = map("x$_", (0..3));	# argument block
++my ($Xl, $Xm, $Xh, $INlo, $INhi) = map("v$_", (0..4));
++my ($Hlo, $Hhi, $Hhl) = map("v$_", (5..7));
++# d8-d15 are callee-saved, so avoid v8-v15. AArch64 SIMD has plenty of registers
++# to spare.
++my ($t0, $t1, $t2, $t3) = map("v$_", (16..19));
++my ($t0l_t1l, $t0h_t1h, $t2l_t3l, $t2h_t3h) = map("v$_", (20..23));
++my ($k48_k32, $k16_k0) = map("v$_", (24..25));
++
++my $code = "";
++
++# clmul64x64 emits code which emulates pmull $r.1q, $a.1d, $b.1d. $r, $a, and $b
++# must be distinct from $t* and $k*. $t* are clobbered by the emitted code.
++sub clmul64x64 {
++my ($r, $a, $b) = @_;
++$code .= <<___;
++	ext	$t0.8b, $a.8b, $a.8b, #1	// A1
++	pmull	$t0.8h, $t0.8b, $b.8b		// F = A1*B
++	ext	$r.8b, $b.8b, $b.8b, #1		// B1
++	pmull	$r.8h, $a.8b, $r.8b		// E = A*B1
++	ext	$t1.8b, $a.8b, $a.8b, #2	// A2
++	pmull	$t1.8h, $t1.8b, $b.8b		// H = A2*B
++	ext	$t3.8b, $b.8b, $b.8b, #2	// B2
++	pmull	$t3.8h, $a.8b, $t3.8b		// G = A*B2
++	ext	$t2.8b, $a.8b, $a.8b, #3	// A3
++	eor	$t0.16b, $t0.16b, $r.16b	// L = E + F
++	pmull	$t2.8h, $t2.8b, $b.8b		// J = A3*B
++	ext	$r.8b, $b.8b, $b.8b, #3		// B3
++	eor	$t1.16b, $t1.16b, $t3.16b	// M = G + H
++	pmull	$r.8h, $a.8b, $r.8b		// I = A*B3
++
++	// Here we diverge from the 32-bit version. It computes the following
++	// (instructions reordered for clarity):
++	//
++	//     veor	\$t0#lo, \$t0#lo, \$t0#hi	@ t0 = P0 + P1 (L)
++	//     vand	\$t0#hi, \$t0#hi, \$k48
++	//     veor	\$t0#lo, \$t0#lo, \$t0#hi
++	//
++	//     veor	\$t1#lo, \$t1#lo, \$t1#hi	@ t1 = P2 + P3 (M)
++	//     vand	\$t1#hi, \$t1#hi, \$k32
++	//     veor	\$t1#lo, \$t1#lo, \$t1#hi
++	//
++	//     veor	\$t2#lo, \$t2#lo, \$t2#hi	@ t2 = P4 + P5 (N)
++	//     vand	\$t2#hi, \$t2#hi, \$k16
++	//     veor	\$t2#lo, \$t2#lo, \$t2#hi
++	//
++	//     veor	\$t3#lo, \$t3#lo, \$t3#hi	@ t3 = P6 + P7 (K)
++	//     vmov.i64	\$t3#hi, #0
++	//
++	// \$kN is a mask with the bottom N bits set. AArch64 cannot compute on
++	// upper halves of SIMD registers, so we must split each half into
++	// separate registers. To compensate, we pair computations up and
++	// parallelize.
++
++	ext	$t3.8b, $b.8b, $b.8b, #4	// B4
++	eor	$t2.16b, $t2.16b, $r.16b	// N = I + J
++	pmull	$t3.8h, $a.8b, $t3.8b		// K = A*B4
++
++	// This can probably be scheduled more efficiently. For now, we just
++	// pair up independent instructions.
++	zip1	$t0l_t1l.2d, $t0.2d, $t1.2d
++	zip1	$t2l_t3l.2d, $t2.2d, $t3.2d
++	zip2	$t0h_t1h.2d, $t0.2d, $t1.2d
++	zip2	$t2h_t3h.2d, $t2.2d, $t3.2d
++	eor	$t0l_t1l.16b, $t0l_t1l.16b, $t0h_t1h.16b
++	eor	$t2l_t3l.16b, $t2l_t3l.16b, $t2h_t3h.16b
++	and	$t0h_t1h.16b, $t0h_t1h.16b, $k48_k32.16b
++	and	$t2h_t3h.16b, $t2h_t3h.16b, $k16_k0.16b
++	eor	$t0l_t1l.16b, $t0l_t1l.16b, $t0h_t1h.16b
++	eor	$t2l_t3l.16b, $t2l_t3l.16b, $t2h_t3h.16b
++	zip1	$t0.2d, $t0l_t1l.2d, $t0h_t1h.2d
++	zip1	$t2.2d, $t2l_t3l.2d, $t2h_t3h.2d
++	zip2	$t1.2d, $t0l_t1l.2d, $t0h_t1h.2d
++	zip2	$t3.2d, $t2l_t3l.2d, $t2h_t3h.2d
++
++	ext	$t0.16b, $t0.16b, $t0.16b, #15	// t0 = t0 << 8
++	ext	$t1.16b, $t1.16b, $t1.16b, #14	// t1 = t1 << 16
++	pmull	$r.8h, $a.8b, $b.8b		// D = A*B
++	ext	$t3.16b, $t3.16b, $t3.16b, #12	// t3 = t3 << 32
++	ext	$t2.16b, $t2.16b, $t2.16b, #13	// t2 = t2 << 24
++	eor	$t0.16b, $t0.16b, $t1.16b
++	eor	$t2.16b, $t2.16b, $t3.16b
++	eor	$r.16b, $r.16b, $t0.16b
++	eor	$r.16b, $r.16b, $t2.16b
++___
++}
++
++$code .= <<___;
++#include <GFp/arm_arch.h>
++
++.text
++
++.global	GFp_gcm_init_neon
++.type	GFp_gcm_init_neon,%function
++.align	4
++GFp_gcm_init_neon:
++	AARCH64_VALID_CALL_TARGET
++	// This function is adapted from gcm_init_v8. xC2 is t3.
++	ld1	{$t1.2d}, [x1]			// load H
++	movi	$t3.16b, #0xe1
++	shl	$t3.2d, $t3.2d, #57		// 0xc2.0
++	ext	$INlo.16b, $t1.16b, $t1.16b, #8
++	ushr	$t2.2d, $t3.2d, #63
++	dup	$t1.4s, $t1.s[1]
++	ext	$t0.16b, $t2.16b, $t3.16b, #8	// t0=0xc2....01
++	ushr	$t2.2d, $INlo.2d, #63
++	sshr	$t1.4s, $t1.4s, #31		// broadcast carry bit
++	and	$t2.16b, $t2.16b, $t0.16b
++	shl	$INlo.2d, $INlo.2d, #1
++	ext	$t2.16b, $t2.16b, $t2.16b, #8
++	and	$t0.16b, $t0.16b, $t1.16b
++	orr	$INlo.16b, $INlo.16b, $t2.16b	// H<<<=1
++	eor	$Hlo.16b, $INlo.16b, $t0.16b	// twisted H
++	st1	{$Hlo.2d}, [x0]			// store Htable[0]
++	ret
++.size	GFp_gcm_init_neon,.-GFp_gcm_init_neon
++
++.global	GFp_gcm_gmult_neon
++.type	GFp_gcm_gmult_neon,%function
++.align	4
++GFp_gcm_gmult_neon:
++	AARCH64_VALID_CALL_TARGET
++	ld1	{$INlo.16b}, [$Xi]		// load Xi
++	ld1	{$Hlo.1d}, [$Htbl], #8		// load twisted H
++	ld1	{$Hhi.1d}, [$Htbl]
++	adrp	x9, :pg_hi21:.Lmasks		// load constants
++	add	x9, x9, :lo12:.Lmasks
++	ld1	{$k48_k32.2d, $k16_k0.2d}, [x9]
++	rev64	$INlo.16b, $INlo.16b		// byteswap Xi
++	ext	$INlo.16b, $INlo.16b, $INlo.16b, #8
++	eor	$Hhl.8b, $Hlo.8b, $Hhi.8b	// Karatsuba pre-processing
++
++	mov	$len, #16
++	b	.Lgmult_neon
++.size	GFp_gcm_gmult_neon,.-GFp_gcm_gmult_neon
++
++.global	GFp_gcm_ghash_neon
++.type	GFp_gcm_ghash_neon,%function
++.align	4
++GFp_gcm_ghash_neon:
++	AARCH64_VALID_CALL_TARGET
++	ld1	{$Xl.16b}, [$Xi]		// load Xi
++	ld1	{$Hlo.1d}, [$Htbl], #8		// load twisted H
++	ld1	{$Hhi.1d}, [$Htbl]
++	adrp	x9, :pg_hi21:.Lmasks		// load constants
++	add	x9, x9, :lo12:.Lmasks
++	ld1	{$k48_k32.2d, $k16_k0.2d}, [x9]
++	rev64	$Xl.16b, $Xl.16b		// byteswap Xi
++	ext	$Xl.16b, $Xl.16b, $Xl.16b, #8
++	eor	$Hhl.8b, $Hlo.8b, $Hhi.8b	// Karatsuba pre-processing
++
++.Loop_neon:
++	ld1	{$INlo.16b}, [$inp], #16	// load inp
++	rev64	$INlo.16b, $INlo.16b		// byteswap inp
++	ext	$INlo.16b, $INlo.16b, $INlo.16b, #8
++	eor	$INlo.16b, $INlo.16b, $Xl.16b	// inp ^= Xi
++
++.Lgmult_neon:
++	// Split the input into $INlo and $INhi. (The upper halves are unused,
++	// so it is okay to leave them alone.)
++	ins	$INhi.d[0], $INlo.d[1]
++___
++&clmul64x64	($Xl, $Hlo, $INlo);		# H.lo·Xi.lo
++$code .= <<___;
++	eor	$INlo.8b, $INlo.8b, $INhi.8b	// Karatsuba pre-processing
++___
++&clmul64x64	($Xm, $Hhl, $INlo);		# (H.lo+H.hi)·(Xi.lo+Xi.hi)
++&clmul64x64	($Xh, $Hhi, $INhi);		# H.hi·Xi.hi
++$code .= <<___;
++	ext	$t0.16b, $Xl.16b, $Xh.16b, #8
++	eor	$Xm.16b, $Xm.16b, $Xl.16b	// Karatsuba post-processing
++	eor	$Xm.16b, $Xm.16b, $Xh.16b
++	eor	$Xm.16b, $Xm.16b, $t0.16b	// Xm overlaps Xh.lo and Xl.hi
++	ins	$Xl.d[1], $Xm.d[0]		// Xh|Xl - 256-bit result
++	// This is a no-op due to the ins instruction below.
++	// ins	$Xh.d[0], $Xm.d[1]
++
++	// equivalent of reduction_avx from ghash-x86_64.pl
++	shl	$t1.2d, $Xl.2d, #57		// 1st phase
++	shl	$t2.2d, $Xl.2d, #62
++	eor	$t2.16b, $t2.16b, $t1.16b	//
++	shl	$t1.2d, $Xl.2d, #63
++	eor	$t2.16b, $t2.16b, $t1.16b	//
++	// Note Xm contains {Xl.d[1], Xh.d[0]}.
++	eor	$t2.16b, $t2.16b, $Xm.16b
++	ins	$Xl.d[1], $t2.d[0]		// Xl.d[1] ^= t2.d[0]
++	ins	$Xh.d[0], $t2.d[1]		// Xh.d[0] ^= t2.d[1]
++
++	ushr	$t2.2d, $Xl.2d, #1		// 2nd phase
++	eor	$Xh.16b, $Xh.16b,$Xl.16b
++	eor	$Xl.16b, $Xl.16b,$t2.16b	//
++	ushr	$t2.2d, $t2.2d, #6
++	ushr	$Xl.2d, $Xl.2d, #1		//
++	eor	$Xl.16b, $Xl.16b, $Xh.16b	//
++	eor	$Xl.16b, $Xl.16b, $t2.16b	//
++
++	subs	$len, $len, #16
++	bne	.Loop_neon
++
++	rev64	$Xl.16b, $Xl.16b		// byteswap Xi and write
++	ext	$Xl.16b, $Xl.16b, $Xl.16b, #8
++	st1	{$Xl.16b}, [$Xi]
++
++	ret
++.size	GFp_gcm_ghash_neon,.-GFp_gcm_ghash_neon
++
++.section	.rodata
++.align	4
++.Lmasks:
++.quad	0x0000ffffffffffff	// k48
++.quad	0x00000000ffffffff	// k32
++.quad	0x000000000000ffff	// k16
++.quad	0x0000000000000000	// k0
++.asciz  "GHASH for ARMv8, derived from ARMv4 version by <appro\@openssl.org>"
++.align  2
++___
++
++foreach (split("\n",$code)) {
++	s/\`([^\`]*)\`/eval $1/geo;
++
++	print $_,"\n";
++}
++close STDOUT or die "error closing STDOUT"; # enforce flush
+-- 
+Efraim Flashner   <efraim@flashner.co.il>   רנשלפ םירפא
+GPG key = A28B F40C 3E55 1372 662D  14F7 41AA E7DC CA3D 8351
+Confidentiality cannot be guaranteed on emails sent or received unencrypted
+
diff --git a/gnu/packages/patches/rust-ring-0.16-test-files.patch b/gnu/packages/patches/rust-ring-0.16-test-files.patch
new file mode 100644
index 0000000000..dbe5c0f4ee
--- /dev/null
+++ b/gnu/packages/patches/rust-ring-0.16-test-files.patch
@@ -0,0 +1,54 @@
+This file exists in the upstream repository at the commit which
+corresponds to the ring-0.16.20 release, but was excluded from the
+release tarball.
+
+---
+ tests/ed25519_verify_tests.txt | 34 ++++++++++++++++++++++++++++++++++
+ 1 file changed, 34 insertions(+)
+ create mode 100644 tests/ed25519_verify_tests.txt
+
+diff --git a/tests/ed25519_verify_tests.txt b/tests/ed25519_verify_tests.txt
+new file mode 100644
+index 0000000..74c94b3
+--- /dev/null
++++ b/tests/ed25519_verify_tests.txt
+@@ -0,0 +1,34 @@
++# BoringSSL TEST(Ed25519Test Malleability)
++
++# Control; S is in range.
++MESSAGE = 54657374
++SIG = 7c38e026f29e14aabd059a0f2db8b0cd783040609a8be684db12f82a27774ab07a9155711ecfaf7f99f277bad0c6ae7e39d4eef676573336a5c51eb6f946b30d
++PUB = 7d4d0e7f6153a69b6242b522abbee685fda4420f8834b108c3bdae369ef549fa
++Result = P
++
++# Same as above, but with the order L added to S so it is out of range.
++# BoringSSL commit 472ba2c2dd52d06a657a63b7fbf02732a6649d21
++MESSAGE = 54657374
++SIG = 7c38e026f29e14aabd059a0f2db8b0cd783040609a8be684db12f82a27774ab067654bce3832c2d76f8f6f5dafc08d9339d4eef676573336a5c51eb6f946b31d
++PUB = 7d4d0e7f6153a69b6242b522abbee685fda4420f8834b108c3bdae369ef549fa
++Result = F
++
++
++# BoringSSL commit 3094902fcdc2db2cc832fa854b9a6a8be383926c
++MESSAGE = 124e583f8b8eca58bb29c271b41d36986bbc45541f8e51f9cb0133eca447601e
++SIG = dac119d6ca87fc59ae611c157048f4d4fc932a149dbe20ec6effd1436abf83ea05c7df0fef06147241259113909bc71bd3c53ba4464ffcad3c0968f2ffffff0f
++PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
++Result = P
++
++# Control. Same key as above; same message and signature as below, except S is in range.
++PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
++MESSAGE = 6a0bc2b0057cedfc0fa2e3f7f7d39279b30f454a69dfd1117c758d86b19d85e0
++SIG = 0971f86d2c9c78582524a103cb9cf949522ae528f8054dc20107d999be673ff4f58ac9d20ec563133cabc6230b1db8625f8446639ede46ad4df4053000000000
++Result = P
++
++# Same key as above, but S is out of range.
++# BoringSSL commit 472ba2c2dd52d06a657a63b7fbf02732a6649d21
++PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
++MESSAGE = 6a0bc2b0057cedfc0fa2e3f7f7d39279b30f454a69dfd1117c758d86b19d85e0
++SIG = 0971f86d2c9c78582524a103cb9cf949522ae528f8054dc20107d999be673ff4e25ebf2f2928766b1248bec6e91697775f8446639ede46ad4df4053000000010
++Result = F
+-- 
+Efraim Flashner   <efraim@flashner.co.il>   רנשלפ םירפא
+GPG key = A28B F40C 3E55 1372 662D  14F7 41AA E7DC CA3D 8351
+Confidentiality cannot be guaranteed on emails sent or received unencrypted
+