I'm very pleased to announce the release of our new website and documentation using the new toolchain with Hugo and AsciiDoctor. To get more information about the new toolchain please read the FreeBSD Documentation Project Primer[1], Hugo docs[2] and AsciiDoctor docs[3]. Acknowledgment: Benedict Reuschling <bcr@> Glen Barber <gjb@> Hiroki Sato <hrs@> Li-Wen Hsu <lwhsu@> Sean Chittenden <seanc@> The FreeBSD Foundation [1] https://docs.FreeBSD.org/en/books/fdp-primer/ [2] https://gohugo.io/documentation/ [3] https://docs.asciidoctor.org/home/ Approved by: doceng, core
122 lines
3.4 KiB
Diff
122 lines
3.4 KiB
Diff
--- crypto/openssl/crypto/bn/asm/rsaz-avx2.pl.orig
|
|
+++ crypto/openssl/crypto/bn/asm/rsaz-avx2.pl
|
|
@@ -239,7 +239,7 @@
|
|
vmovdqu 32*8-128($ap), $ACC8
|
|
|
|
lea 192(%rsp), $tp0 # 64+128=192
|
|
- vpbroadcastq .Land_mask(%rip), $AND_MASK
|
|
+ vmovdqu .Land_mask(%rip), $AND_MASK
|
|
jmp .LOOP_GRANDE_SQR_1024
|
|
|
|
.align 32
|
|
@@ -1070,10 +1070,10 @@
|
|
vpmuludq 32*6-128($np),$Yi,$TEMP1
|
|
vpaddq $TEMP1,$ACC6,$ACC6
|
|
vpmuludq 32*7-128($np),$Yi,$TEMP2
|
|
- vpblendd \$3, $ZERO, $ACC9, $ACC9 # correct $ACC3
|
|
+ vpblendd \$3, $ZERO, $ACC9, $TEMP1 # correct $ACC3
|
|
vpaddq $TEMP2,$ACC7,$ACC7
|
|
vpmuludq 32*8-128($np),$Yi,$TEMP0
|
|
- vpaddq $ACC9, $ACC3, $ACC3 # correct $ACC3
|
|
+ vpaddq $TEMP1, $ACC3, $ACC3 # correct $ACC3
|
|
vpaddq $TEMP0,$ACC8,$ACC8
|
|
|
|
mov %rbx, %rax
|
|
@@ -1086,7 +1086,9 @@
|
|
vmovdqu -8+32*2-128($ap),$TEMP2
|
|
|
|
mov $r1, %rax
|
|
+ vpblendd \$0xfc, $ZERO, $ACC9, $ACC9 # correct $ACC3
|
|
imull $n0, %eax
|
|
+ vpaddq $ACC9,$ACC4,$ACC4 # correct $ACC3
|
|
and \$0x1fffffff, %eax
|
|
|
|
imulq 16-128($ap),%rbx
|
|
@@ -1322,15 +1324,12 @@
|
|
# But as we underutilize resources, it's possible to correct in
|
|
# each iteration with marginal performance loss. But then, as
|
|
# we do it in each iteration, we can correct less digits, and
|
|
-# avoid performance penalties completely. Also note that we
|
|
-# correct only three digits out of four. This works because
|
|
-# most significant digit is subjected to less additions.
|
|
+# avoid performance penalties completely.
|
|
|
|
$TEMP0 = $ACC9;
|
|
$TEMP3 = $Bi;
|
|
$TEMP4 = $Yi;
|
|
$code.=<<___;
|
|
- vpermq \$0, $AND_MASK, $AND_MASK
|
|
vpaddq (%rsp), $TEMP1, $ACC0
|
|
|
|
vpsrlq \$29, $ACC0, $TEMP1
|
|
@@ -1763,7 +1762,7 @@
|
|
|
|
.align 64
|
|
.Land_mask:
|
|
- .quad 0x1fffffff,0x1fffffff,0x1fffffff,-1
|
|
+ .quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
|
|
.Lscatter_permd:
|
|
.long 0,2,4,6,7,7,7,7
|
|
.Lgather_permd:
|
|
--- crypto/openssl/ssl/ssl.h.orig
|
|
+++ crypto/openssl/ssl/ssl.h
|
|
@@ -1727,7 +1727,7 @@
|
|
# define SSL_ST_BEFORE 0x4000
|
|
# define SSL_ST_OK 0x03
|
|
# define SSL_ST_RENEGOTIATE (0x04|SSL_ST_INIT)
|
|
-# define SSL_ST_ERR 0x05
|
|
+# define SSL_ST_ERR (0x05|SSL_ST_INIT)
|
|
|
|
# define SSL_CB_LOOP 0x01
|
|
# define SSL_CB_EXIT 0x02
|
|
--- secure/lib/libcrypto/amd64/rsaz-avx2.S.orig
|
|
+++ secure/lib/libcrypto/amd64/rsaz-avx2.S
|
|
@@ -68,7 +68,7 @@
|
|
vmovdqu 256-128(%rsi),%ymm8
|
|
|
|
leaq 192(%rsp),%rbx
|
|
- vpbroadcastq .Land_mask(%rip),%ymm15
|
|
+ vmovdqu .Land_mask(%rip),%ymm15
|
|
jmp .LOOP_GRANDE_SQR_1024
|
|
|
|
.align 32
|
|
@@ -801,10 +801,10 @@
|
|
vpmuludq 192-128(%rcx),%ymm11,%ymm12
|
|
vpaddq %ymm12,%ymm6,%ymm6
|
|
vpmuludq 224-128(%rcx),%ymm11,%ymm13
|
|
- vpblendd $3,%ymm14,%ymm9,%ymm9
|
|
+ vpblendd $3,%ymm14,%ymm9,%ymm12
|
|
vpaddq %ymm13,%ymm7,%ymm7
|
|
vpmuludq 256-128(%rcx),%ymm11,%ymm0
|
|
- vpaddq %ymm9,%ymm3,%ymm3
|
|
+ vpaddq %ymm12,%ymm3,%ymm3
|
|
vpaddq %ymm0,%ymm8,%ymm8
|
|
|
|
movq %rbx,%rax
|
|
@@ -817,7 +817,9 @@
|
|
vmovdqu -8+64-128(%rsi),%ymm13
|
|
|
|
movq %r10,%rax
|
|
+ vpblendd $0xfc,%ymm14,%ymm9,%ymm9
|
|
imull %r8d,%eax
|
|
+ vpaddq %ymm9,%ymm4,%ymm4
|
|
andl $0x1fffffff,%eax
|
|
|
|
imulq 16-128(%rsi),%rbx
|
|
@@ -1046,7 +1048,6 @@
|
|
|
|
decl %r14d
|
|
jnz .Loop_mul_1024
|
|
- vpermq $0,%ymm15,%ymm15
|
|
vpaddq (%rsp),%ymm12,%ymm0
|
|
|
|
vpsrlq $29,%ymm0,%ymm12
|
|
@@ -1686,7 +1687,7 @@
|
|
|
|
.align 64
|
|
.Land_mask:
|
|
-.quad 0x1fffffff,0x1fffffff,0x1fffffff,-1
|
|
+.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
|
|
.Lscatter_permd:
|
|
.long 0,2,4,6,7,7,7,7
|
|
.Lgather_permd:
|