crypto: aesni - Use unaligned loads from gcm_context_data

author Dave Watson <davejwatson@fb.com>

Wed, 15 Aug 2018 17:29:42 +0000 (10:29 -0700)

committer Herbert Xu <herbert@gondor.apana.org.au>

Sat, 25 Aug 2018 11:50:42 +0000 (19:50 +0800)
author Dave Watson <davejwatson@fb.com>
Wed, 15 Aug 2018 17:29:42 +0000 (10:29 -0700)
committer Herbert Xu <herbert@gondor.apana.org.au>
Sat, 25 Aug 2018 11:50:42 +0000 (19:50 +0800)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S

index e762ef417562ff96ba769a555a36f546ff5d7d92..d27a50656aa1f720c124e06036da624cb199d0f1 100644 (file)
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -223,34 +223,34 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
         pcmpeqd TWOONE(%rip), \TMP2
         pand    POLY(%rip), \TMP2
         pxor    \TMP2, \TMP3
         pcmpeqd TWOONE(%rip), \TMP2
         pand    POLY(%rip), \TMP2
         pxor    \TMP2, \TMP3
-       movdqa  \TMP3, HashKey(%arg2)
+       movdqu  \TMP3, HashKey(%arg2)
  
         movdqa     \TMP3, \TMP5
         pshufd     $78, \TMP3, \TMP1
         pxor       \TMP3, \TMP1
  
         movdqa     \TMP3, \TMP5
         pshufd     $78, \TMP3, \TMP1
         pxor       \TMP3, \TMP1
-       movdqa     \TMP1, HashKey_k(%arg2)
+       movdqu     \TMP1, HashKey_k(%arg2)
  
         GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
  # TMP5 = HashKey^2<<1 (mod poly)
  
         GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
  # TMP5 = HashKey^2<<1 (mod poly)
-       movdqa     \TMP5, HashKey_2(%arg2)
+       movdqu     \TMP5, HashKey_2(%arg2)
  # HashKey_2 = HashKey^2<<1 (mod poly)
         pshufd     $78, \TMP5, \TMP1
         pxor       \TMP5, \TMP1
  # HashKey_2 = HashKey^2<<1 (mod poly)
         pshufd     $78, \TMP5, \TMP1
         pxor       \TMP5, \TMP1
-       movdqa     \TMP1, HashKey_2_k(%arg2)
+       movdqu     \TMP1, HashKey_2_k(%arg2)
  
         GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
  # TMP5 = HashKey^3<<1 (mod poly)
  
         GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
  # TMP5 = HashKey^3<<1 (mod poly)
-       movdqa     \TMP5, HashKey_3(%arg2)
+       movdqu     \TMP5, HashKey_3(%arg2)
         pshufd     $78, \TMP5, \TMP1
         pxor       \TMP5, \TMP1
         pshufd     $78, \TMP5, \TMP1
         pxor       \TMP5, \TMP1
-       movdqa     \TMP1, HashKey_3_k(%arg2)
+       movdqu     \TMP1, HashKey_3_k(%arg2)
  
         GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
  # TMP5 = HashKey^3<<1 (mod poly)
  
         GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
  # TMP5 = HashKey^3<<1 (mod poly)
-       movdqa     \TMP5, HashKey_4(%arg2)
+       movdqu     \TMP5, HashKey_4(%arg2)
         pshufd     $78, \TMP5, \TMP1
         pxor       \TMP5, \TMP1
         pshufd     $78, \TMP5, \TMP1
         pxor       \TMP5, \TMP1
-       movdqa     \TMP1, HashKey_4_k(%arg2)
+       movdqu     \TMP1, HashKey_4_k(%arg2)
  .endm
  
  # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
  .endm
  
  # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
@@ -271,7 +271,7 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
         movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
  
         PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
         movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
  
         PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-       movdqa HashKey(%arg2), %xmm13
+       movdqu HashKey(%arg2), %xmm13
  
         CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
         %xmm4, %xmm5, %xmm6
  
         CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
         %xmm4, %xmm5, %xmm6
@@ -997,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         pshufd    $78, \XMM5, \TMP6
         pxor      \XMM5, \TMP6
         paddd     ONE(%rip), \XMM0              # INCR CNT
         pshufd    $78, \XMM5, \TMP6
         pxor      \XMM5, \TMP6
         paddd     ONE(%rip), \XMM0              # INCR CNT
-       movdqa    HashKey_4(%arg2), \TMP5
+       movdqu    HashKey_4(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
         movdqa    \XMM0, \XMM1
         paddd     ONE(%rip), \XMM0              # INCR CNT
         PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
         movdqa    \XMM0, \XMM1
         paddd     ONE(%rip), \XMM0              # INCR CNT
@@ -1016,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         pxor      (%arg1), \XMM2
         pxor      (%arg1), \XMM3
         pxor      (%arg1), \XMM4
         pxor      (%arg1), \XMM2
         pxor      (%arg1), \XMM3
         pxor      (%arg1), \XMM4
-       movdqa    HashKey_4_k(%arg2), \TMP5
+       movdqu    HashKey_4_k(%arg2), \TMP5
         PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
         movaps 0x10(%arg1), \TMP1
         AESENC    \TMP1, \XMM1              # Round 1
         PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
         movaps 0x10(%arg1), \TMP1
         AESENC    \TMP1, \XMM1              # Round 1
@@ -1031,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         movdqa    \XMM6, \TMP1
         pshufd    $78, \XMM6, \TMP2
         pxor      \XMM6, \TMP2
         movdqa    \XMM6, \TMP1
         pshufd    $78, \XMM6, \TMP2
         pxor      \XMM6, \TMP2
-       movdqa    HashKey_3(%arg2), \TMP5
+       movdqu    HashKey_3(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
         movaps 0x30(%arg1), \TMP3
         AESENC    \TMP3, \XMM1              # Round 3
         PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
         movaps 0x30(%arg1), \TMP3
         AESENC    \TMP3, \XMM1              # Round 3
@@ -1044,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         AESENC    \TMP3, \XMM2
         AESENC    \TMP3, \XMM3
         AESENC    \TMP3, \XMM4
         AESENC    \TMP3, \XMM2
         AESENC    \TMP3, \XMM3
         AESENC    \TMP3, \XMM4
-       movdqa    HashKey_3_k(%arg2), \TMP5
+       movdqu    HashKey_3_k(%arg2), \TMP5
         PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
         movaps 0x50(%arg1), \TMP3
         AESENC    \TMP3, \XMM1              # Round 5
         PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
         movaps 0x50(%arg1), \TMP3
         AESENC    \TMP3, \XMM1              # Round 5
@@ -1058,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         movdqa    \XMM7, \TMP1
         pshufd    $78, \XMM7, \TMP2
         pxor      \XMM7, \TMP2
         movdqa    \XMM7, \TMP1
         pshufd    $78, \XMM7, \TMP2
         pxor      \XMM7, \TMP2
-       movdqa    HashKey_2(%arg2), \TMP5
+       movdqu    HashKey_2(%arg2), \TMP5
  
          # Multiply TMP5 * HashKey using karatsuba
  
  
          # Multiply TMP5 * HashKey using karatsuba
  
@@ -1074,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         AESENC    \TMP3, \XMM2
         AESENC    \TMP3, \XMM3
         AESENC    \TMP3, \XMM4
         AESENC    \TMP3, \XMM2
         AESENC    \TMP3, \XMM3
         AESENC    \TMP3, \XMM4
-       movdqa    HashKey_2_k(%arg2), \TMP5
+       movdqu    HashKey_2_k(%arg2), \TMP5
         PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
         movaps 0x80(%arg1), \TMP3
         AESENC    \TMP3, \XMM1             # Round 8
         PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
         movaps 0x80(%arg1), \TMP3
         AESENC    \TMP3, \XMM1             # Round 8
@@ -1092,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         movdqa    \XMM8, \TMP1
         pshufd    $78, \XMM8, \TMP2
         pxor      \XMM8, \TMP2
         movdqa    \XMM8, \TMP1
         pshufd    $78, \XMM8, \TMP2
         pxor      \XMM8, \TMP2
-       movdqa    HashKey(%arg2), \TMP5
+       movdqu    HashKey(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
         movaps 0x90(%arg1), \TMP3
         AESENC    \TMP3, \XMM1            # Round 9
         PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
         movaps 0x90(%arg1), \TMP3
         AESENC    \TMP3, \XMM1            # Round 9
@@ -1121,7 +1121,7 @@ aes_loop_par_enc_done\@:
         AESENCLAST \TMP3, \XMM2
         AESENCLAST \TMP3, \XMM3
         AESENCLAST \TMP3, \XMM4
         AESENCLAST \TMP3, \XMM2
         AESENCLAST \TMP3, \XMM3
         AESENCLAST \TMP3, \XMM4
-       movdqa    HashKey_k(%arg2), \TMP5
+       movdqu    HashKey_k(%arg2), \TMP5
         PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
         movdqu    (%arg4,%r11,1), \TMP3
         pxor      \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
         PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
         movdqu    (%arg4,%r11,1), \TMP3
         pxor      \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
@@ -1205,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         pshufd    $78, \XMM5, \TMP6
         pxor      \XMM5, \TMP6
         paddd     ONE(%rip), \XMM0              # INCR CNT
         pshufd    $78, \XMM5, \TMP6
         pxor      \XMM5, \TMP6
         paddd     ONE(%rip), \XMM0              # INCR CNT
-       movdqa    HashKey_4(%arg2), \TMP5
+       movdqu    HashKey_4(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
         movdqa    \XMM0, \XMM1
         paddd     ONE(%rip), \XMM0              # INCR CNT
         PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
         movdqa    \XMM0, \XMM1
         paddd     ONE(%rip), \XMM0              # INCR CNT
@@ -1224,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         pxor      (%arg1), \XMM2
         pxor      (%arg1), \XMM3
         pxor      (%arg1), \XMM4
         pxor      (%arg1), \XMM2
         pxor      (%arg1), \XMM3
         pxor      (%arg1), \XMM4
-       movdqa    HashKey_4_k(%arg2), \TMP5
+       movdqu    HashKey_4_k(%arg2), \TMP5
         PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
         movaps 0x10(%arg1), \TMP1
         AESENC    \TMP1, \XMM1              # Round 1
         PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
         movaps 0x10(%arg1), \TMP1
         AESENC    \TMP1, \XMM1              # Round 1
@@ -1239,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         movdqa    \XMM6, \TMP1
         pshufd    $78, \XMM6, \TMP2
         pxor      \XMM6, \TMP2
         movdqa    \XMM6, \TMP1
         pshufd    $78, \XMM6, \TMP2
         pxor      \XMM6, \TMP2
-       movdqa    HashKey_3(%arg2), \TMP5
+       movdqu    HashKey_3(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
         movaps 0x30(%arg1), \TMP3
         AESENC    \TMP3, \XMM1              # Round 3
         PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
         movaps 0x30(%arg1), \TMP3
         AESENC    \TMP3, \XMM1              # Round 3
@@ -1252,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         AESENC    \TMP3, \XMM2
         AESENC    \TMP3, \XMM3
         AESENC    \TMP3, \XMM4
         AESENC    \TMP3, \XMM2
         AESENC    \TMP3, \XMM3
         AESENC    \TMP3, \XMM4
-       movdqa    HashKey_3_k(%arg2), \TMP5
+       movdqu    HashKey_3_k(%arg2), \TMP5
         PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
         movaps 0x50(%arg1), \TMP3
         AESENC    \TMP3, \XMM1              # Round 5
         PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
         movaps 0x50(%arg1), \TMP3
         AESENC    \TMP3, \XMM1              # Round 5
@@ -1266,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         movdqa    \XMM7, \TMP1
         pshufd    $78, \XMM7, \TMP2
         pxor      \XMM7, \TMP2
         movdqa    \XMM7, \TMP1
         pshufd    $78, \XMM7, \TMP2
         pxor      \XMM7, \TMP2
-       movdqa    HashKey_2(%arg2), \TMP5
+       movdqu    HashKey_2(%arg2), \TMP5
  
          # Multiply TMP5 * HashKey using karatsuba
  
  
          # Multiply TMP5 * HashKey using karatsuba
  
@@ -1282,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         AESENC    \TMP3, \XMM2
         AESENC    \TMP3, \XMM3
         AESENC    \TMP3, \XMM4
         AESENC    \TMP3, \XMM2
         AESENC    \TMP3, \XMM3
         AESENC    \TMP3, \XMM4
-       movdqa    HashKey_2_k(%arg2), \TMP5
+       movdqu    HashKey_2_k(%arg2), \TMP5
         PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
         movaps 0x80(%arg1), \TMP3
         AESENC    \TMP3, \XMM1             # Round 8
         PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
         movaps 0x80(%arg1), \TMP3
         AESENC    \TMP3, \XMM1             # Round 8
@@ -1300,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
         movdqa    \XMM8, \TMP1
         pshufd    $78, \XMM8, \TMP2
         pxor      \XMM8, \TMP2
         movdqa    \XMM8, \TMP1
         pshufd    $78, \XMM8, \TMP2
         pxor      \XMM8, \TMP2
-       movdqa    HashKey(%arg2), \TMP5
+       movdqu    HashKey(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
         movaps 0x90(%arg1), \TMP3
         AESENC    \TMP3, \XMM1            # Round 9
         PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
         movaps 0x90(%arg1), \TMP3
         AESENC    \TMP3, \XMM1            # Round 9
@@ -1329,7 +1329,7 @@ aes_loop_par_dec_done\@:
         AESENCLAST \TMP3, \XMM2
         AESENCLAST \TMP3, \XMM3
         AESENCLAST \TMP3, \XMM4
         AESENCLAST \TMP3, \XMM2
         AESENCLAST \TMP3, \XMM3
         AESENCLAST \TMP3, \XMM4
-       movdqa    HashKey_k(%arg2), \TMP5
+       movdqu    HashKey_k(%arg2), \TMP5
         PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
         movdqu    (%arg4,%r11,1), \TMP3
         pxor      \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
         PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
         movdqu    (%arg4,%r11,1), \TMP3
         pxor      \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
@@ -1405,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
         movdqa    \XMM1, \TMP6
         pshufd    $78, \XMM1, \TMP2
         pxor      \XMM1, \TMP2
         movdqa    \XMM1, \TMP6
         pshufd    $78, \XMM1, \TMP2
         pxor      \XMM1, \TMP2
-       movdqa    HashKey_4(%arg2), \TMP5
+       movdqu    HashKey_4(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
         PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
         PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
         PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
-       movdqa    HashKey_4_k(%arg2), \TMP4
+       movdqu    HashKey_4_k(%arg2), \TMP4
         PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
         movdqa    \XMM1, \XMMDst
         movdqa    \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
         PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
         movdqa    \XMM1, \XMMDst
         movdqa    \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
@@ -1418,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
         movdqa    \XMM2, \TMP1
         pshufd    $78, \XMM2, \TMP2
         pxor      \XMM2, \TMP2
         movdqa    \XMM2, \TMP1
         pshufd    $78, \XMM2, \TMP2
         pxor      \XMM2, \TMP2
-       movdqa    HashKey_3(%arg2), \TMP5
+       movdqu    HashKey_3(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
         PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
         PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
         PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
-       movdqa    HashKey_3_k(%arg2), \TMP4
+       movdqu    HashKey_3_k(%arg2), \TMP4
         PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
         pxor      \TMP1, \TMP6
         pxor      \XMM2, \XMMDst
         PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
         pxor      \TMP1, \TMP6
         pxor      \XMM2, \XMMDst
@@ -1433,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
         movdqa    \XMM3, \TMP1
         pshufd    $78, \XMM3, \TMP2
         pxor      \XMM3, \TMP2
         movdqa    \XMM3, \TMP1
         pshufd    $78, \XMM3, \TMP2
         pxor      \XMM3, \TMP2
-       movdqa    HashKey_2(%arg2), \TMP5
+       movdqu    HashKey_2(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
         PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
         PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
         PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
-       movdqa    HashKey_2_k(%arg2), \TMP4
+       movdqu    HashKey_2_k(%arg2), \TMP4
         PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
         pxor      \TMP1, \TMP6
         pxor      \XMM3, \XMMDst
         PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
         pxor      \TMP1, \TMP6
         pxor      \XMM3, \XMMDst
@@ -1446,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
         movdqa    \XMM4, \TMP1
         pshufd    $78, \XMM4, \TMP2
         pxor      \XMM4, \TMP2
         movdqa    \XMM4, \TMP1
         pshufd    $78, \XMM4, \TMP2
         pxor      \XMM4, \TMP2
-       movdqa    HashKey(%arg2), \TMP5
+       movdqu    HashKey(%arg2), \TMP5
         PCLMULQDQ 0x11, \TMP5, \TMP1        # TMP1 = a1*b1
         PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
         PCLMULQDQ 0x11, \TMP5, \TMP1        # TMP1 = a1*b1
         PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
-       movdqa    HashKey_k(%arg2), \TMP4
+       movdqu    HashKey_k(%arg2), \TMP4
         PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
         pxor      \TMP1, \TMP6
         pxor      \XMM4, \XMMDst
         PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
         pxor      \TMP1, \TMP6
         pxor      \XMM4, \XMMDst
author	Dave Watson <davejwatson@fb.com>
	Wed, 15 Aug 2018 17:29:42 +0000 (10:29 -0700)
committer	Herbert Xu <herbert@gondor.apana.org.au>
	Sat, 25 Aug 2018 11:50:42 +0000 (19:50 +0800)