summaryrefslogtreecommitdiffstats
path: root/crypto/sha/asm/sha256-mips.S
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/sha/asm/sha256-mips.S')
-rw-r--r--crypto/sha/asm/sha256-mips.S1998
1 files changed, 1998 insertions, 0 deletions
diff --git a/crypto/sha/asm/sha256-mips.S b/crypto/sha/asm/sha256-mips.S
new file mode 100644
index 0000000..5fc697f
--- /dev/null
+++ b/crypto/sha/asm/sha256-mips.S
@@ -0,0 +1,1998 @@
+#ifdef OPENSSL_FIPSCANISTER
+# include <openssl/fipssyms.h>
+#endif
+
+.text
+.set noat
+#if !defined(__vxworks) || defined(__pic__)
+.option pic2
+#endif
+
+.align 5
+.globl sha256_block_data_order
+.ent sha256_block_data_order
+sha256_block_data_order:
+ .frame $29,128,$31
+ .mask 3237937152,-4
+ .set noreorder
+ .cpload $25
+ sub $29,128
+ sw $31,128-1*4($29)
+ sw $30,128-2*4($29)
+ sw $23,128-3*4($29)
+ sw $22,128-4*4($29)
+ sw $21,128-5*4($29)
+ sw $20,128-6*4($29)
+ sw $19,128-7*4($29)
+ sw $18,128-8*4($29)
+ sw $17,128-9*4($29)
+ sw $16,128-10*4($29)
+ sll $23,$6,6
+ .set reorder
+ la $6,K256 # PIC-ified 'load address'
+
+ lw $1,0*4($4) # load context
+ lw $2,1*4($4)
+ lw $3,2*4($4)
+ lw $7,3*4($4)
+ lw $24,4*4($4)
+ lw $25,5*4($4)
+ lw $30,6*4($4)
+ lw $31,7*4($4)
+
+ add $23,$5 # pointer to the end of input
+ sw $23,16*4($29)
+ b .Loop
+
+.align 5
+.Loop:
+ lwl $8,3($5)
+ lwr $8,0($5)
+ lwl $9,7($5)
+ lwr $9,4($5)
+ srl $13,$8,24 # byte swap(0)
+ srl $14,$8,8
+ andi $15,$8,0xFF00
+ sll $8,$8,24
+ andi $14,0xFF00
+ sll $15,$15,8
+ or $8,$13
+ or $14,$15
+ or $8,$14
+ addu $12,$8,$31 # 0
+ srl $31,$24,6
+ xor $15,$25,$30
+ sll $14,$24,7
+ and $15,$24
+ srl $13,$24,11
+ xor $31,$14
+ sll $14,$24,21
+ xor $31,$13
+ srl $13,$24,25
+ xor $31,$14
+ sll $14,$24,26
+ xor $31,$13
+ xor $15,$30 # Ch(e,f,g)
+ xor $13,$14,$31 # Sigma1(e)
+
+ srl $31,$1,2
+ addu $12,$15
+ lw $15,0($6) # K[0]
+ sll $14,$1,10
+ addu $12,$13
+ srl $13,$1,13
+ xor $31,$14
+ sll $14,$1,19
+ xor $31,$13
+ srl $13,$1,22
+ xor $31,$14
+ sll $14,$1,30
+ xor $31,$13
+ sw $8,0($29) # offload to ring buffer
+ xor $31,$14 # Sigma0(a)
+
+ or $13,$1,$2
+ and $14,$1,$2
+ and $13,$3
+ or $14,$13 # Maj(a,b,c)
+ addu $12,$15 # +=K[0]
+ addu $31,$14
+
+ addu $7,$12
+ addu $31,$12
+ lwl $10,11($5)
+ lwr $10,8($5)
+ srl $14,$9,24 # byte swap(1)
+ srl $15,$9,8
+ andi $16,$9,0xFF00
+ sll $9,$9,24
+ andi $15,0xFF00
+ sll $16,$16,8
+ or $9,$14
+ or $15,$16
+ or $9,$15
+ addu $13,$9,$30 # 1
+ srl $30,$7,6
+ xor $16,$24,$25
+ sll $15,$7,7
+ and $16,$7
+ srl $14,$7,11
+ xor $30,$15
+ sll $15,$7,21
+ xor $30,$14
+ srl $14,$7,25
+ xor $30,$15
+ sll $15,$7,26
+ xor $30,$14
+ xor $16,$25 # Ch(e,f,g)
+ xor $14,$15,$30 # Sigma1(e)
+
+ srl $30,$31,2
+ addu $13,$16
+ lw $16,4($6) # K[1]
+ sll $15,$31,10
+ addu $13,$14
+ srl $14,$31,13
+ xor $30,$15
+ sll $15,$31,19
+ xor $30,$14
+ srl $14,$31,22
+ xor $30,$15
+ sll $15,$31,30
+ xor $30,$14
+ sw $9,4($29) # offload to ring buffer
+ xor $30,$15 # Sigma0(a)
+
+ or $14,$31,$1
+ and $15,$31,$1
+ and $14,$2
+ or $15,$14 # Maj(a,b,c)
+ addu $13,$16 # +=K[1]
+ addu $30,$15
+
+ addu $3,$13
+ addu $30,$13
+ lwl $11,15($5)
+ lwr $11,12($5)
+ srl $15,$10,24 # byte swap(2)
+ srl $16,$10,8
+ andi $17,$10,0xFF00
+ sll $10,$10,24
+ andi $16,0xFF00
+ sll $17,$17,8
+ or $10,$15
+ or $16,$17
+ or $10,$16
+ addu $14,$10,$25 # 2
+ srl $25,$3,6
+ xor $17,$7,$24
+ sll $16,$3,7
+ and $17,$3
+ srl $15,$3,11
+ xor $25,$16
+ sll $16,$3,21
+ xor $25,$15
+ srl $15,$3,25
+ xor $25,$16
+ sll $16,$3,26
+ xor $25,$15
+ xor $17,$24 # Ch(e,f,g)
+ xor $15,$16,$25 # Sigma1(e)
+
+ srl $25,$30,2
+ addu $14,$17
+ lw $17,8($6) # K[2]
+ sll $16,$30,10
+ addu $14,$15
+ srl $15,$30,13
+ xor $25,$16
+ sll $16,$30,19
+ xor $25,$15
+ srl $15,$30,22
+ xor $25,$16
+ sll $16,$30,30
+ xor $25,$15
+ sw $10,8($29) # offload to ring buffer
+ xor $25,$16 # Sigma0(a)
+
+ or $15,$30,$31
+ and $16,$30,$31
+ and $15,$1
+ or $16,$15 # Maj(a,b,c)
+ addu $14,$17 # +=K[2]
+ addu $25,$16
+
+ addu $2,$14
+ addu $25,$14
+ lwl $12,19($5)
+ lwr $12,16($5)
+ srl $16,$11,24 # byte swap(3)
+ srl $17,$11,8
+ andi $18,$11,0xFF00
+ sll $11,$11,24
+ andi $17,0xFF00
+ sll $18,$18,8
+ or $11,$16
+ or $17,$18
+ or $11,$17
+ addu $15,$11,$24 # 3
+ srl $24,$2,6
+ xor $18,$3,$7
+ sll $17,$2,7
+ and $18,$2
+ srl $16,$2,11
+ xor $24,$17
+ sll $17,$2,21
+ xor $24,$16
+ srl $16,$2,25
+ xor $24,$17
+ sll $17,$2,26
+ xor $24,$16
+ xor $18,$7 # Ch(e,f,g)
+ xor $16,$17,$24 # Sigma1(e)
+
+ srl $24,$25,2
+ addu $15,$18
+ lw $18,12($6) # K[3]
+ sll $17,$25,10
+ addu $15,$16
+ srl $16,$25,13
+ xor $24,$17
+ sll $17,$25,19
+ xor $24,$16
+ srl $16,$25,22
+ xor $24,$17
+ sll $17,$25,30
+ xor $24,$16
+ sw $11,12($29) # offload to ring buffer
+ xor $24,$17 # Sigma0(a)
+
+ or $16,$25,$30
+ and $17,$25,$30
+ and $16,$31
+ or $17,$16 # Maj(a,b,c)
+ addu $15,$18 # +=K[3]
+ addu $24,$17
+
+ addu $1,$15
+ addu $24,$15
+ lwl $13,23($5)
+ lwr $13,20($5)
+ srl $17,$12,24 # byte swap(4)
+ srl $18,$12,8
+ andi $19,$12,0xFF00
+ sll $12,$12,24
+ andi $18,0xFF00
+ sll $19,$19,8
+ or $12,$17
+ or $18,$19
+ or $12,$18
+ addu $16,$12,$7 # 4
+ srl $7,$1,6
+ xor $19,$2,$3
+ sll $18,$1,7
+ and $19,$1
+ srl $17,$1,11
+ xor $7,$18
+ sll $18,$1,21
+ xor $7,$17
+ srl $17,$1,25
+ xor $7,$18
+ sll $18,$1,26
+ xor $7,$17
+ xor $19,$3 # Ch(e,f,g)
+ xor $17,$18,$7 # Sigma1(e)
+
+ srl $7,$24,2
+ addu $16,$19
+ lw $19,16($6) # K[4]
+ sll $18,$24,10
+ addu $16,$17
+ srl $17,$24,13
+ xor $7,$18
+ sll $18,$24,19
+ xor $7,$17
+ srl $17,$24,22
+ xor $7,$18
+ sll $18,$24,30
+ xor $7,$17
+ sw $12,16($29) # offload to ring buffer
+ xor $7,$18 # Sigma0(a)
+
+ or $17,$24,$25
+ and $18,$24,$25
+ and $17,$30
+ or $18,$17 # Maj(a,b,c)
+ addu $16,$19 # +=K[4]
+ addu $7,$18
+
+ addu $31,$16
+ addu $7,$16
+ lwl $14,27($5)
+ lwr $14,24($5)
+ srl $18,$13,24 # byte swap(5)
+ srl $19,$13,8
+ andi $20,$13,0xFF00
+ sll $13,$13,24
+ andi $19,0xFF00
+ sll $20,$20,8
+ or $13,$18
+ or $19,$20
+ or $13,$19
+ addu $17,$13,$3 # 5
+ srl $3,$31,6
+ xor $20,$1,$2
+ sll $19,$31,7
+ and $20,$31
+ srl $18,$31,11
+ xor $3,$19
+ sll $19,$31,21
+ xor $3,$18
+ srl $18,$31,25
+ xor $3,$19
+ sll $19,$31,26
+ xor $3,$18
+ xor $20,$2 # Ch(e,f,g)
+ xor $18,$19,$3 # Sigma1(e)
+
+ srl $3,$7,2
+ addu $17,$20
+ lw $20,20($6) # K[5]
+ sll $19,$7,10
+ addu $17,$18
+ srl $18,$7,13
+ xor $3,$19
+ sll $19,$7,19
+ xor $3,$18
+ srl $18,$7,22
+ xor $3,$19
+ sll $19,$7,30
+ xor $3,$18
+ sw $13,20($29) # offload to ring buffer
+ xor $3,$19 # Sigma0(a)
+
+ or $18,$7,$24
+ and $19,$7,$24
+ and $18,$25
+ or $19,$18 # Maj(a,b,c)
+ addu $17,$20 # +=K[5]
+ addu $3,$19
+
+ addu $30,$17
+ addu $3,$17
+ lwl $15,31($5)
+ lwr $15,28($5)
+ srl $19,$14,24 # byte swap(6)
+ srl $20,$14,8
+ andi $21,$14,0xFF00
+ sll $14,$14,24
+ andi $20,0xFF00
+ sll $21,$21,8
+ or $14,$19
+ or $20,$21
+ or $14,$20
+ addu $18,$14,$2 # 6
+ srl $2,$30,6
+ xor $21,$31,$1
+ sll $20,$30,7
+ and $21,$30
+ srl $19,$30,11
+ xor $2,$20
+ sll $20,$30,21
+ xor $2,$19
+ srl $19,$30,25
+ xor $2,$20
+ sll $20,$30,26
+ xor $2,$19
+ xor $21,$1 # Ch(e,f,g)
+ xor $19,$20,$2 # Sigma1(e)
+
+ srl $2,$3,2
+ addu $18,$21
+ lw $21,24($6) # K[6]
+ sll $20,$3,10
+ addu $18,$19
+ srl $19,$3,13
+ xor $2,$20
+ sll $20,$3,19
+ xor $2,$19
+ srl $19,$3,22
+ xor $2,$20
+ sll $20,$3,30
+ xor $2,$19
+ sw $14,24($29) # offload to ring buffer
+ xor $2,$20 # Sigma0(a)
+
+ or $19,$3,$7
+ and $20,$3,$7
+ and $19,$24
+ or $20,$19 # Maj(a,b,c)
+ addu $18,$21 # +=K[6]
+ addu $2,$20
+
+ addu $25,$18
+ addu $2,$18
+ lwl $16,35($5)
+ lwr $16,32($5)
+ srl $20,$15,24 # byte swap(7)
+ srl $21,$15,8
+ andi $22,$15,0xFF00
+ sll $15,$15,24
+ andi $21,0xFF00
+ sll $22,$22,8
+ or $15,$20
+ or $21,$22
+ or $15,$21
+ addu $19,$15,$1 # 7
+ srl $1,$25,6
+ xor $22,$30,$31
+ sll $21,$25,7
+ and $22,$25
+ srl $20,$25,11
+ xor $1,$21
+ sll $21,$25,21
+ xor $1,$20
+ srl $20,$25,25
+ xor $1,$21
+ sll $21,$25,26
+ xor $1,$20
+ xor $22,$31 # Ch(e,f,g)
+ xor $20,$21,$1 # Sigma1(e)
+
+ srl $1,$2,2
+ addu $19,$22
+ lw $22,28($6) # K[7]
+ sll $21,$2,10
+ addu $19,$20
+ srl $20,$2,13
+ xor $1,$21
+ sll $21,$2,19
+ xor $1,$20
+ srl $20,$2,22
+ xor $1,$21
+ sll $21,$2,30
+ xor $1,$20
+ sw $15,28($29) # offload to ring buffer
+ xor $1,$21 # Sigma0(a)
+
+ or $20,$2,$3
+ and $21,$2,$3
+ and $20,$7
+ or $21,$20 # Maj(a,b,c)
+ addu $19,$22 # +=K[7]
+ addu $1,$21
+
+ addu $24,$19
+ addu $1,$19
+ lwl $17,39($5)
+ lwr $17,36($5)
+ srl $21,$16,24 # byte swap(8)
+ srl $22,$16,8
+ andi $23,$16,0xFF00
+ sll $16,$16,24
+ andi $22,0xFF00
+ sll $23,$23,8
+ or $16,$21
+ or $22,$23
+ or $16,$22
+ addu $20,$16,$31 # 8
+ srl $31,$24,6
+ xor $23,$25,$30
+ sll $22,$24,7
+ and $23,$24
+ srl $21,$24,11
+ xor $31,$22
+ sll $22,$24,21
+ xor $31,$21
+ srl $21,$24,25
+ xor $31,$22
+ sll $22,$24,26
+ xor $31,$21
+ xor $23,$30 # Ch(e,f,g)
+ xor $21,$22,$31 # Sigma1(e)
+
+ srl $31,$1,2
+ addu $20,$23
+ lw $23,32($6) # K[8]
+ sll $22,$1,10
+ addu $20,$21
+ srl $21,$1,13
+ xor $31,$22
+ sll $22,$1,19
+ xor $31,$21
+ srl $21,$1,22
+ xor $31,$22
+ sll $22,$1,30
+ xor $31,$21
+ sw $16,32($29) # offload to ring buffer
+ xor $31,$22 # Sigma0(a)
+
+ or $21,$1,$2
+ and $22,$1,$2
+ and $21,$3
+ or $22,$21 # Maj(a,b,c)
+ addu $20,$23 # +=K[8]
+ addu $31,$22
+
+ addu $7,$20
+ addu $31,$20
+ lwl $18,43($5)
+ lwr $18,40($5)
+ srl $22,$17,24 # byte swap(9)
+ srl $23,$17,8
+ andi $8,$17,0xFF00
+ sll $17,$17,24
+ andi $23,0xFF00
+ sll $8,$8,8
+ or $17,$22
+ or $23,$8
+ or $17,$23
+ addu $21,$17,$30 # 9
+ srl $30,$7,6
+ xor $8,$24,$25
+ sll $23,$7,7
+ and $8,$7
+ srl $22,$7,11
+ xor $30,$23
+ sll $23,$7,21
+ xor $30,$22
+ srl $22,$7,25
+ xor $30,$23
+ sll $23,$7,26
+ xor $30,$22
+ xor $8,$25 # Ch(e,f,g)
+ xor $22,$23,$30 # Sigma1(e)
+
+ srl $30,$31,2
+ addu $21,$8
+ lw $8,36($6) # K[9]
+ sll $23,$31,10
+ addu $21,$22
+ srl $22,$31,13
+ xor $30,$23
+ sll $23,$31,19
+ xor $30,$22
+ srl $22,$31,22
+ xor $30,$23
+ sll $23,$31,30
+ xor $30,$22
+ sw $17,36($29) # offload to ring buffer
+ xor $30,$23 # Sigma0(a)
+
+ or $22,$31,$1
+ and $23,$31,$1
+ and $22,$2
+ or $23,$22 # Maj(a,b,c)
+ addu $21,$8 # +=K[9]
+ addu $30,$23
+
+ addu $3,$21
+ addu $30,$21
+ lwl $19,47($5)
+ lwr $19,44($5)
+ srl $23,$18,24 # byte swap(10)
+ srl $8,$18,8
+ andi $9,$18,0xFF00
+ sll $18,$18,24
+ andi $8,0xFF00
+ sll $9,$9,8
+ or $18,$23
+ or $8,$9
+ or $18,$8
+ addu $22,$18,$25 # 10
+ srl $25,$3,6
+ xor $9,$7,$24
+ sll $8,$3,7
+ and $9,$3
+ srl $23,$3,11
+ xor $25,$8
+ sll $8,$3,21
+ xor $25,$23
+ srl $23,$3,25
+ xor $25,$8
+ sll $8,$3,26
+ xor $25,$23
+ xor $9,$24 # Ch(e,f,g)
+ xor $23,$8,$25 # Sigma1(e)
+
+ srl $25,$30,2
+ addu $22,$9
+ lw $9,40($6) # K[10]
+ sll $8,$30,10
+ addu $22,$23
+ srl $23,$30,13
+ xor $25,$8
+ sll $8,$30,19
+ xor $25,$23
+ srl $23,$30,22
+ xor $25,$8
+ sll $8,$30,30
+ xor $25,$23
+ sw $18,40($29) # offload to ring buffer
+ xor $25,$8 # Sigma0(a)
+
+ or $23,$30,$31
+ and $8,$30,$31
+ and $23,$1
+ or $8,$23 # Maj(a,b,c)
+ addu $22,$9 # +=K[10]
+ addu $25,$8
+
+ addu $2,$22
+ addu $25,$22
+ lwl $20,51($5)
+ lwr $20,48($5)
+ srl $8,$19,24 # byte swap(11)
+ srl $9,$19,8
+ andi $10,$19,0xFF00
+ sll $19,$19,24
+ andi $9,0xFF00
+ sll $10,$10,8
+ or $19,$8
+ or $9,$10
+ or $19,$9
+ addu $23,$19,$24 # 11
+ srl $24,$2,6
+ xor $10,$3,$7
+ sll $9,$2,7
+ and $10,$2
+ srl $8,$2,11
+ xor $24,$9
+ sll $9,$2,21
+ xor $24,$8
+ srl $8,$2,25
+ xor $24,$9
+ sll $9,$2,26
+ xor $24,$8
+ xor $10,$7 # Ch(e,f,g)
+ xor $8,$9,$24 # Sigma1(e)
+
+ srl $24,$25,2
+ addu $23,$10
+ lw $10,44($6) # K[11]
+ sll $9,$25,10
+ addu $23,$8
+ srl $8,$25,13
+ xor $24,$9
+ sll $9,$25,19
+ xor $24,$8
+ srl $8,$25,22
+ xor $24,$9
+ sll $9,$25,30
+ xor $24,$8
+ sw $19,44($29) # offload to ring buffer
+ xor $24,$9 # Sigma0(a)
+
+ or $8,$25,$30
+ and $9,$25,$30
+ and $8,$31
+ or $9,$8 # Maj(a,b,c)
+ addu $23,$10 # +=K[11]
+ addu $24,$9
+
+ addu $1,$23
+ addu $24,$23
+ lwl $21,55($5)
+ lwr $21,52($5)
+ srl $9,$20,24 # byte swap(12)
+ srl $10,$20,8
+ andi $11,$20,0xFF00
+ sll $20,$20,24
+ andi $10,0xFF00
+ sll $11,$11,8
+ or $20,$9
+ or $10,$11
+ or $20,$10
+ addu $8,$20,$7 # 12
+ srl $7,$1,6
+ xor $11,$2,$3
+ sll $10,$1,7
+ and $11,$1
+ srl $9,$1,11
+ xor $7,$10
+ sll $10,$1,21
+ xor $7,$9
+ srl $9,$1,25
+ xor $7,$10
+ sll $10,$1,26
+ xor $7,$9
+ xor $11,$3 # Ch(e,f,g)
+ xor $9,$10,$7 # Sigma1(e)
+
+ srl $7,$24,2
+ addu $8,$11
+ lw $11,48($6) # K[12]
+ sll $10,$24,10
+ addu $8,$9
+ srl $9,$24,13
+ xor $7,$10
+ sll $10,$24,19
+ xor $7,$9
+ srl $9,$24,22
+ xor $7,$10
+ sll $10,$24,30
+ xor $7,$9
+ sw $20,48($29) # offload to ring buffer
+ xor $7,$10 # Sigma0(a)
+
+ or $9,$24,$25
+ and $10,$24,$25
+ and $9,$30
+ or $10,$9 # Maj(a,b,c)
+ addu $8,$11 # +=K[12]
+ addu $7,$10
+
+ addu $31,$8
+ addu $7,$8
+ lwl $22,59($5)
+ lwr $22,56($5)
+ srl $10,$21,24 # byte swap(13)
+ srl $11,$21,8
+ andi $12,$21,0xFF00
+ sll $21,$21,24
+ andi $11,0xFF00
+ sll $12,$12,8
+ or $21,$10
+ or $11,$12
+ or $21,$11
+ addu $9,$21,$3 # 13
+ srl $3,$31,6
+ xor $12,$1,$2
+ sll $11,$31,7
+ and $12,$31
+ srl $10,$31,11
+ xor $3,$11
+ sll $11,$31,21
+ xor $3,$10
+ srl $10,$31,25
+ xor $3,$11
+ sll $11,$31,26
+ xor $3,$10
+ xor $12,$2 # Ch(e,f,g)
+ xor $10,$11,$3 # Sigma1(e)
+
+ srl $3,$7,2
+ addu $9,$12
+ lw $12,52($6) # K[13]
+ sll $11,$7,10
+ addu $9,$10
+ srl $10,$7,13
+ xor $3,$11
+ sll $11,$7,19
+ xor $3,$10
+ srl $10,$7,22
+ xor $3,$11
+ sll $11,$7,30
+ xor $3,$10
+ sw $21,52($29) # offload to ring buffer
+ xor $3,$11 # Sigma0(a)
+
+ or $10,$7,$24
+ and $11,$7,$24
+ and $10,$25
+ or $11,$10 # Maj(a,b,c)
+ addu $9,$12 # +=K[13]
+ addu $3,$11
+
+ addu $30,$9
+ addu $3,$9
+ lw $8,0($29) # prefetch from ring buffer
+ lwl $23,63($5)
+ lwr $23,60($5)
+ srl $11,$22,24 # byte swap(14)
+ srl $12,$22,8
+ andi $13,$22,0xFF00
+ sll $22,$22,24
+ andi $12,0xFF00
+ sll $13,$13,8
+ or $22,$11
+ or $12,$13
+ or $22,$12
+ addu $10,$22,$2 # 14
+ srl $2,$30,6
+ xor $13,$31,$1
+ sll $12,$30,7
+ and $13,$30
+ srl $11,$30,11
+ xor $2,$12
+ sll $12,$30,21
+ xor $2,$11
+ srl $11,$30,25
+ xor $2,$12
+ sll $12,$30,26
+ xor $2,$11
+ xor $13,$1 # Ch(e,f,g)
+ xor $11,$12,$2 # Sigma1(e)
+
+ srl $2,$3,2
+ addu $10,$13
+ lw $13,56($6) # K[14]
+ sll $12,$3,10
+ addu $10,$11
+ srl $11,$3,13
+ xor $2,$12
+ sll $12,$3,19
+ xor $2,$11
+ srl $11,$3,22
+ xor $2,$12
+ sll $12,$3,30
+ xor $2,$11
+ sw $22,56($29) # offload to ring buffer
+ xor $2,$12 # Sigma0(a)
+
+ or $11,$3,$7
+ and $12,$3,$7
+ and $11,$24
+ or $12,$11 # Maj(a,b,c)
+ addu $10,$13 # +=K[14]
+ addu $2,$12
+
+ addu $25,$10
+ addu $2,$10
+ lw $9,4($29) # prefetch from ring buffer
+ srl $12,$23,24 # byte swap(15)
+ srl $13,$23,8
+ andi $14,$23,0xFF00
+ sll $23,$23,24
+ andi $13,0xFF00
+ sll $14,$14,8
+ or $23,$12
+ or $13,$14
+ or $23,$13
+ addu $11,$23,$1 # 15
+ srl $1,$25,6
+ xor $14,$30,$31
+ sll $13,$25,7
+ and $14,$25
+ srl $12,$25,11
+ xor $1,$13
+ sll $13,$25,21
+ xor $1,$12
+ srl $12,$25,25
+ xor $1,$13
+ sll $13,$25,26
+ xor $1,$12
+ xor $14,$31 # Ch(e,f,g)
+ xor $12,$13,$1 # Sigma1(e)
+
+ srl $1,$2,2
+ addu $11,$14
+ lw $14,60($6) # K[15]
+ sll $13,$2,10
+ addu $11,$12
+ srl $12,$2,13
+ xor $1,$13
+ sll $13,$2,19
+ xor $1,$12
+ srl $12,$2,22
+ xor $1,$13
+ sll $13,$2,30
+ xor $1,$12
+ sw $23,60($29) # offload to ring buffer
+ xor $1,$13 # Sigma0(a)
+
+ or $12,$2,$3
+ and $13,$2,$3
+ and $12,$7
+ or $13,$12 # Maj(a,b,c)
+ addu $11,$14 # +=K[15]
+ addu $1,$13
+
+ addu $24,$11
+ addu $1,$11
+ lw $10,8($29) # prefetch from ring buffer
+ b .L16_xx
+.align 4
+.L16_xx:
+ srl $14,$9,3 # Xupdate(16)
+ addu $8,$17 # +=X[i+9]
+ sll $13,$9,14
+ srl $12,$9,7
+ xor $14,$13
+ sll $13,11
+ xor $14,$12
+ srl $12,$9,18
+ xor $14,$13
+
+ srl $15,$22,10
+ xor $14,$12 # sigma0(X[i+1])
+ sll $13,$22,13
+ addu $8,$14
+ srl $12,$22,17
+ xor $15,$13
+ sll $13,2
+ xor $15,$12
+ srl $12,$22,19
+ xor $15,$13
+
+ xor $15,$12 # sigma1(X[i+14])
+ addu $8,$15
+ addu $12,$8,$31 # 16
+ srl $31,$24,6
+ xor $15,$25,$30
+ sll $14,$24,7
+ and $15,$24
+ srl $13,$24,11
+ xor $31,$14
+ sll $14,$24,21
+ xor $31,$13
+ srl $13,$24,25
+ xor $31,$14
+ sll $14,$24,26
+ xor $31,$13
+ xor $15,$30 # Ch(e,f,g)
+ xor $13,$14,$31 # Sigma1(e)
+
+ srl $31,$1,2
+ addu $12,$15
+ lw $15,64($6) # K[16]
+ sll $14,$1,10
+ addu $12,$13
+ srl $13,$1,13
+ xor $31,$14
+ sll $14,$1,19
+ xor $31,$13
+ srl $13,$1,22
+ xor $31,$14
+ sll $14,$1,30
+ xor $31,$13
+ sw $8,0($29) # offload to ring buffer
+ xor $31,$14 # Sigma0(a)
+
+ or $13,$1,$2
+ and $14,$1,$2
+ and $13,$3
+ or $14,$13 # Maj(a,b,c)
+ addu $12,$15 # +=K[16]
+ addu $31,$14
+
+ addu $7,$12
+ addu $31,$12
+ lw $11,12($29) # prefetch from ring buffer
+ srl $15,$10,3 # Xupdate(17)
+ addu $9,$18 # +=X[i+9]
+ sll $14,$10,14
+ srl $13,$10,7
+ xor $15,$14
+ sll $14,11
+ xor $15,$13
+ srl $13,$10,18
+ xor $15,$14
+
+ srl $16,$23,10
+ xor $15,$13 # sigma0(X[i+1])
+ sll $14,$23,13
+ addu $9,$15
+ srl $13,$23,17
+ xor $16,$14
+ sll $14,2
+ xor $16,$13
+ srl $13,$23,19
+ xor $16,$14
+
+ xor $16,$13 # sigma1(X[i+14])
+ addu $9,$16
+ addu $13,$9,$30 # 17
+ srl $30,$7,6
+ xor $16,$24,$25
+ sll $15,$7,7
+ and $16,$7
+ srl $14,$7,11
+ xor $30,$15
+ sll $15,$7,21
+ xor $30,$14
+ srl $14,$7,25
+ xor $30,$15
+ sll $15,$7,26
+ xor $30,$14
+ xor $16,$25 # Ch(e,f,g)
+ xor $14,$15,$30 # Sigma1(e)
+
+ srl $30,$31,2
+ addu $13,$16
+ lw $16,68($6) # K[17]
+ sll $15,$31,10
+ addu $13,$14
+ srl $14,$31,13
+ xor $30,$15
+ sll $15,$31,19
+ xor $30,$14
+ srl $14,$31,22
+ xor $30,$15
+ sll $15,$31,30
+ xor $30,$14
+ sw $9,4($29) # offload to ring buffer
+ xor $30,$15 # Sigma0(a)
+
+ or $14,$31,$1
+ and $15,$31,$1
+ and $14,$2
+ or $15,$14 # Maj(a,b,c)
+ addu $13,$16 # +=K[17]
+ addu $30,$15
+
+ addu $3,$13
+ addu $30,$13
+ lw $12,16($29) # prefetch from ring buffer
+ srl $16,$11,3 # Xupdate(18)
+ addu $10,$19 # +=X[i+9]
+ sll $15,$11,14
+ srl $14,$11,7
+ xor $16,$15
+ sll $15,11
+ xor $16,$14
+ srl $14,$11,18
+ xor $16,$15
+
+ srl $17,$8,10
+ xor $16,$14 # sigma0(X[i+1])
+ sll $15,$8,13
+ addu $10,$16
+ srl $14,$8,17
+ xor $17,$15
+ sll $15,2
+ xor $17,$14
+ srl $14,$8,19
+ xor $17,$15
+
+ xor $17,$14 # sigma1(X[i+14])
+ addu $10,$17
+ addu $14,$10,$25 # 18
+ srl $25,$3,6
+ xor $17,$7,$24
+ sll $16,$3,7
+ and $17,$3
+ srl $15,$3,11
+ xor $25,$16
+ sll $16,$3,21
+ xor $25,$15
+ srl $15,$3,25
+ xor $25,$16
+ sll $16,$3,26
+ xor $25,$15
+ xor $17,$24 # Ch(e,f,g)
+ xor $15,$16,$25 # Sigma1(e)
+
+ srl $25,$30,2
+ addu $14,$17
+ lw $17,72($6) # K[18]
+ sll $16,$30,10
+ addu $14,$15
+ srl $15,$30,13
+ xor $25,$16
+ sll $16,$30,19
+ xor $25,$15
+ srl $15,$30,22
+ xor $25,$16
+ sll $16,$30,30
+ xor $25,$15
+ sw $10,8($29) # offload to ring buffer
+ xor $25,$16 # Sigma0(a)
+
+ or $15,$30,$31
+ and $16,$30,$31
+ and $15,$1
+ or $16,$15 # Maj(a,b,c)
+ addu $14,$17 # +=K[18]
+ addu $25,$16
+
+ addu $2,$14
+ addu $25,$14
+ lw $13,20($29) # prefetch from ring buffer
+ srl $17,$12,3 # Xupdate(19)
+ addu $11,$20 # +=X[i+9]
+ sll $16,$12,14
+ srl $15,$12,7
+ xor $17,$16
+ sll $16,11
+ xor $17,$15
+ srl $15,$12,18
+ xor $17,$16
+
+ srl $18,$9,10
+ xor $17,$15 # sigma0(X[i+1])
+ sll $16,$9,13
+ addu $11,$17
+ srl $15,$9,17
+ xor $18,$16
+ sll $16,2
+ xor $18,$15
+ srl $15,$9,19
+ xor $18,$16
+
+ xor $18,$15 # sigma1(X[i+14])
+ addu $11,$18
+ addu $15,$11,$24 # 19
+ srl $24,$2,6
+ xor $18,$3,$7
+ sll $17,$2,7
+ and $18,$2
+ srl $16,$2,11
+ xor $24,$17
+ sll $17,$2,21
+ xor $24,$16
+ srl $16,$2,25
+ xor $24,$17
+ sll $17,$2,26
+ xor $24,$16
+ xor $18,$7 # Ch(e,f,g)
+ xor $16,$17,$24 # Sigma1(e)
+
+ srl $24,$25,2
+ addu $15,$18
+ lw $18,76($6) # K[19]
+ sll $17,$25,10
+ addu $15,$16
+ srl $16,$25,13
+ xor $24,$17
+ sll $17,$25,19
+ xor $24,$16
+ srl $16,$25,22
+ xor $24,$17
+ sll $17,$25,30
+ xor $24,$16
+ sw $11,12($29) # offload to ring buffer
+ xor $24,$17 # Sigma0(a)
+
+ or $16,$25,$30
+ and $17,$25,$30
+ and $16,$31
+ or $17,$16 # Maj(a,b,c)
+ addu $15,$18 # +=K[19]
+ addu $24,$17
+
+ addu $1,$15
+ addu $24,$15
+ lw $14,24($29) # prefetch from ring buffer
+ srl $18,$13,3 # Xupdate(20)
+ addu $12,$21 # +=X[i+9]
+ sll $17,$13,14
+ srl $16,$13,7
+ xor $18,$17
+ sll $17,11
+ xor $18,$16
+ srl $16,$13,18
+ xor $18,$17
+
+ srl $19,$10,10
+ xor $18,$16 # sigma0(X[i+1])
+ sll $17,$10,13
+ addu $12,$18
+ srl $16,$10,17
+ xor $19,$17
+ sll $17,2
+ xor $19,$16
+ srl $16,$10,19
+ xor $19,$17
+
+ xor $19,$16 # sigma1(X[i+14])
+ addu $12,$19
+ addu $16,$12,$7 # 20
+ srl $7,$1,6
+ xor $19,$2,$3
+ sll $18,$1,7
+ and $19,$1
+ srl $17,$1,11
+ xor $7,$18
+ sll $18,$1,21
+ xor $7,$17
+ srl $17,$1,25
+ xor $7,$18
+ sll $18,$1,26
+ xor $7,$17
+ xor $19,$3 # Ch(e,f,g)
+ xor $17,$18,$7 # Sigma1(e)
+
+ srl $7,$24,2
+ addu $16,$19
+ lw $19,80($6) # K[20]
+ sll $18,$24,10
+ addu $16,$17
+ srl $17,$24,13
+ xor $7,$18
+ sll $18,$24,19
+ xor $7,$17
+ srl $17,$24,22
+ xor $7,$18
+ sll $18,$24,30
+ xor $7,$17
+ sw $12,16($29) # offload to ring buffer
+ xor $7,$18 # Sigma0(a)
+
+ or $17,$24,$25
+ and $18,$24,$25
+ and $17,$30
+ or $18,$17 # Maj(a,b,c)
+ addu $16,$19 # +=K[20]
+ addu $7,$18
+
+ addu $31,$16
+ addu $7,$16
+ lw $15,28($29) # prefetch from ring buffer
+ srl $19,$14,3 # Xupdate(21)
+ addu $13,$22 # +=X[i+9]
+ sll $18,$14,14
+ srl $17,$14,7
+ xor $19,$18
+ sll $18,11
+ xor $19,$17
+ srl $17,$14,18
+ xor $19,$18
+
+ srl $20,$11,10
+ xor $19,$17 # sigma0(X[i+1])
+ sll $18,$11,13
+ addu $13,$19
+ srl $17,$11,17
+ xor $20,$18
+ sll $18,2
+ xor $20,$17
+ srl $17,$11,19
+ xor $20,$18
+
+ xor $20,$17 # sigma1(X[i+14])
+ addu $13,$20
+ addu $17,$13,$3 # 21
+ srl $3,$31,6
+ xor $20,$1,$2
+ sll $19,$31,7
+ and $20,$31
+ srl $18,$31,11
+ xor $3,$19
+ sll $19,$31,21
+ xor $3,$18
+ srl $18,$31,25
+ xor $3,$19
+ sll $19,$31,26
+ xor $3,$18
+ xor $20,$2 # Ch(e,f,g)
+ xor $18,$19,$3 # Sigma1(e)
+
+ srl $3,$7,2
+ addu $17,$20
+ lw $20,84($6) # K[21]
+ sll $19,$7,10
+ addu $17,$18
+ srl $18,$7,13
+ xor $3,$19
+ sll $19,$7,19
+ xor $3,$18
+ srl $18,$7,22
+ xor $3,$19
+ sll $19,$7,30
+ xor $3,$18
+ sw $13,20($29) # offload to ring buffer
+ xor $3,$19 # Sigma0(a)
+
+ or $18,$7,$24
+ and $19,$7,$24
+ and $18,$25
+ or $19,$18 # Maj(a,b,c)
+ addu $17,$20 # +=K[21]
+ addu $3,$19
+
+ addu $30,$17
+ addu $3,$17
+ lw $16,32($29) # prefetch from ring buffer
+ srl $20,$15,3 # Xupdate(22)
+ addu $14,$23 # +=X[i+9]
+ sll $19,$15,14
+ srl $18,$15,7
+ xor $20,$19
+ sll $19,11
+ xor $20,$18
+ srl $18,$15,18
+ xor $20,$19
+
+ srl $21,$12,10
+ xor $20,$18 # sigma0(X[i+1])
+ sll $19,$12,13
+ addu $14,$20
+ srl $18,$12,17
+ xor $21,$19
+ sll $19,2
+ xor $21,$18
+ srl $18,$12,19
+ xor $21,$19
+
+ xor $21,$18 # sigma1(X[i+14])
+ addu $14,$21
+ addu $18,$14,$2 # 22
+ srl $2,$30,6
+ xor $21,$31,$1
+ sll $20,$30,7
+ and $21,$30
+ srl $19,$30,11
+ xor $2,$20
+ sll $20,$30,21
+ xor $2,$19
+ srl $19,$30,25
+ xor $2,$20
+ sll $20,$30,26
+ xor $2,$19
+ xor $21,$1 # Ch(e,f,g)
+ xor $19,$20,$2 # Sigma1(e)
+
+ srl $2,$3,2
+ addu $18,$21
+ lw $21,88($6) # K[22]
+ sll $20,$3,10
+ addu $18,$19
+ srl $19,$3,13
+ xor $2,$20
+ sll $20,$3,19
+ xor $2,$19
+ srl $19,$3,22
+ xor $2,$20
+ sll $20,$3,30
+ xor $2,$19
+ sw $14,24($29) # offload to ring buffer
+ xor $2,$20 # Sigma0(a)
+
+ or $19,$3,$7
+ and $20,$3,$7
+ and $19,$24
+ or $20,$19 # Maj(a,b,c)
+ addu $18,$21 # +=K[22]
+ addu $2,$20
+
+ addu $25,$18
+ addu $2,$18
+ lw $17,36($29) # prefetch from ring buffer
+ srl $21,$16,3 # Xupdate(23)
+ addu $15,$8 # +=X[i+9]
+ sll $20,$16,14
+ srl $19,$16,7
+ xor $21,$20
+ sll $20,11
+ xor $21,$19
+ srl $19,$16,18
+ xor $21,$20
+
+ srl $22,$13,10
+ xor $21,$19 # sigma0(X[i+1])
+ sll $20,$13,13
+ addu $15,$21
+ srl $19,$13,17
+ xor $22,$20
+ sll $20,2
+ xor $22,$19
+ srl $19,$13,19
+ xor $22,$20
+
+ xor $22,$19 # sigma1(X[i+14])
+ addu $15,$22
+ addu $19,$15,$1 # 23
+ srl $1,$25,6
+ xor $22,$30,$31
+ sll $21,$25,7
+ and $22,$25
+ srl $20,$25,11
+ xor $1,$21
+ sll $21,$25,21
+ xor $1,$20
+ srl $20,$25,25
+ xor $1,$21
+ sll $21,$25,26
+ xor $1,$20
+ xor $22,$31 # Ch(e,f,g)
+ xor $20,$21,$1 # Sigma1(e)
+
+ srl $1,$2,2
+ addu $19,$22
+ lw $22,92($6) # K[23]
+ sll $21,$2,10
+ addu $19,$20
+ srl $20,$2,13
+ xor $1,$21
+ sll $21,$2,19
+ xor $1,$20
+ srl $20,$2,22
+ xor $1,$21
+ sll $21,$2,30
+ xor $1,$20
+ sw $15,28($29) # offload to ring buffer
+ xor $1,$21 # Sigma0(a)
+
+ or $20,$2,$3
+ and $21,$2,$3
+ and $20,$7
+ or $21,$20 # Maj(a,b,c)
+ addu $19,$22 # +=K[23]
+ addu $1,$21
+
+ addu $24,$19
+ addu $1,$19
+ lw $18,40($29) # prefetch from ring buffer
+ srl $22,$17,3 # Xupdate(24)
+ addu $16,$9 # +=X[i+9]
+ sll $21,$17,14
+ srl $20,$17,7
+ xor $22,$21
+ sll $21,11
+ xor $22,$20
+ srl $20,$17,18
+ xor $22,$21
+
+ srl $23,$14,10
+ xor $22,$20 # sigma0(X[i+1])
+ sll $21,$14,13
+ addu $16,$22
+ srl $20,$14,17
+ xor $23,$21
+ sll $21,2
+ xor $23,$20
+ srl $20,$14,19
+ xor $23,$21
+
+ xor $23,$20 # sigma1(X[i+14])
+ addu $16,$23
+ addu $20,$16,$31 # 24
+ srl $31,$24,6
+ xor $23,$25,$30
+ sll $22,$24,7
+ and $23,$24
+ srl $21,$24,11
+ xor $31,$22
+ sll $22,$24,21
+ xor $31,$21
+ srl $21,$24,25
+ xor $31,$22
+ sll $22,$24,26
+ xor $31,$21
+ xor $23,$30 # Ch(e,f,g)
+ xor $21,$22,$31 # Sigma1(e)
+
+ srl $31,$1,2
+ addu $20,$23
+ lw $23,96($6) # K[24]
+ sll $22,$1,10
+ addu $20,$21
+ srl $21,$1,13
+ xor $31,$22
+ sll $22,$1,19
+ xor $31,$21
+ srl $21,$1,22
+ xor $31,$22
+ sll $22,$1,30
+ xor $31,$21
+ sw $16,32($29) # offload to ring buffer
+ xor $31,$22 # Sigma0(a)
+
+ or $21,$1,$2
+ and $22,$1,$2
+ and $21,$3
+ or $22,$21 # Maj(a,b,c)
+ addu $20,$23 # +=K[24]
+ addu $31,$22
+
+ addu $7,$20
+ addu $31,$20
+ lw $19,44($29) # prefetch from ring buffer
+ srl $23,$18,3 # Xupdate(25)
+ addu $17,$10 # +=X[i+9]
+ sll $22,$18,14
+ srl $21,$18,7
+ xor $23,$22
+ sll $22,11
+ xor $23,$21
+ srl $21,$18,18
+ xor $23,$22
+
+ srl $8,$15,10
+ xor $23,$21 # sigma0(X[i+1])
+ sll $22,$15,13
+ addu $17,$23
+ srl $21,$15,17
+ xor $8,$22
+ sll $22,2
+ xor $8,$21
+ srl $21,$15,19
+ xor $8,$22
+
+ xor $8,$21 # sigma1(X[i+14])
+ addu $17,$8
+ addu $21,$17,$30 # 25
+ srl $30,$7,6
+ xor $8,$24,$25
+ sll $23,$7,7
+ and $8,$7
+ srl $22,$7,11
+ xor $30,$23
+ sll $23,$7,21
+ xor $30,$22
+ srl $22,$7,25
+ xor $30,$23
+ sll $23,$7,26
+ xor $30,$22
+ xor $8,$25 # Ch(e,f,g)
+ xor $22,$23,$30 # Sigma1(e)
+
+ srl $30,$31,2
+ addu $21,$8
+ lw $8,100($6) # K[25]
+ sll $23,$31,10
+ addu $21,$22
+ srl $22,$31,13
+ xor $30,$23
+ sll $23,$31,19
+ xor $30,$22
+ srl $22,$31,22
+ xor $30,$23
+ sll $23,$31,30
+ xor $30,$22
+ sw $17,36($29) # offload to ring buffer
+ xor $30,$23 # Sigma0(a)
+
+ or $22,$31,$1
+ and $23,$31,$1
+ and $22,$2
+ or $23,$22 # Maj(a,b,c)
+ addu $21,$8 # +=K[25]
+ addu $30,$23
+
+ addu $3,$21
+ addu $30,$21
+ lw $20,48($29) # prefetch from ring buffer
+ srl $8,$19,3 # Xupdate(26)
+ addu $18,$11 # +=X[i+9]
+ sll $23,$19,14
+ srl $22,$19,7
+ xor $8,$23
+ sll $23,11
+ xor $8,$22
+ srl $22,$19,18
+ xor $8,$23
+
+ srl $9,$16,10
+ xor $8,$22 # sigma0(X[i+1])
+ sll $23,$16,13
+ addu $18,$8
+ srl $22,$16,17
+ xor $9,$23
+ sll $23,2
+ xor $9,$22
+ srl $22,$16,19
+ xor $9,$23
+
+ xor $9,$22 # sigma1(X[i+14])
+ addu $18,$9
+ addu $22,$18,$25 # 26
+ srl $25,$3,6
+ xor $9,$7,$24
+ sll $8,$3,7
+ and $9,$3
+ srl $23,$3,11
+ xor $25,$8
+ sll $8,$3,21
+ xor $25,$23
+ srl $23,$3,25
+ xor $25,$8
+ sll $8,$3,26
+ xor $25,$23
+ xor $9,$24 # Ch(e,f,g)
+ xor $23,$8,$25 # Sigma1(e)
+
+ srl $25,$30,2
+ addu $22,$9
+ lw $9,104($6) # K[26]
+ sll $8,$30,10
+ addu $22,$23
+ srl $23,$30,13
+ xor $25,$8
+ sll $8,$30,19
+ xor $25,$23
+ srl $23,$30,22
+ xor $25,$8
+ sll $8,$30,30
+ xor $25,$23
+ sw $18,40($29) # offload to ring buffer
+ xor $25,$8 # Sigma0(a)
+
+ or $23,$30,$31
+ and $8,$30,$31
+ and $23,$1
+ or $8,$23 # Maj(a,b,c)
+ addu $22,$9 # +=K[26]
+ addu $25,$8
+
+ addu $2,$22
+ addu $25,$22
+ lw $21,52($29) # prefetch from ring buffer
+ srl $9,$20,3 # Xupdate(27)
+ addu $19,$12 # +=X[i+9]
+ sll $8,$20,14
+ srl $23,$20,7
+ xor $9,$8
+ sll $8,11
+ xor $9,$23
+ srl $23,$20,18
+ xor $9,$8
+
+ srl $10,$17,10
+ xor $9,$23 # sigma0(X[i+1])
+ sll $8,$17,13
+ addu $19,$9
+ srl $23,$17,17
+ xor $10,$8
+ sll $8,2
+ xor $10,$23
+ srl $23,$17,19
+ xor $10,$8
+
+ xor $10,$23 # sigma1(X[i+14])
+ addu $19,$10
+ addu $23,$19,$24 # 27
+ srl $24,$2,6
+ xor $10,$3,$7
+ sll $9,$2,7
+ and $10,$2
+ srl $8,$2,11
+ xor $24,$9
+ sll $9,$2,21
+ xor $24,$8
+ srl $8,$2,25
+ xor $24,$9
+ sll $9,$2,26
+ xor $24,$8
+ xor $10,$7 # Ch(e,f,g)
+ xor $8,$9,$24 # Sigma1(e)
+
+ srl $24,$25,2
+ addu $23,$10
+ lw $10,108($6) # K[27]
+ sll $9,$25,10
+ addu $23,$8
+ srl $8,$25,13
+ xor $24,$9
+ sll $9,$25,19
+ xor $24,$8
+ srl $8,$25,22
+ xor $24,$9
+ sll $9,$25,30
+ xor $24,$8
+ sw $19,44($29) # offload to ring buffer
+ xor $24,$9 # Sigma0(a)
+
+ or $8,$25,$30
+ and $9,$25,$30
+ and $8,$31
+ or $9,$8 # Maj(a,b,c)
+ addu $23,$10 # +=K[27]
+ addu $24,$9
+
+ addu $1,$23
+ addu $24,$23
+ lw $22,56($29) # prefetch from ring buffer
+ srl $10,$21,3 # Xupdate(28)
+ addu $20,$13 # +=X[i+9]
+ sll $9,$21,14
+ srl $8,$21,7
+ xor $10,$9
+ sll $9,11
+ xor $10,$8
+ srl $8,$21,18
+ xor $10,$9
+
+ srl $11,$18,10
+ xor $10,$8 # sigma0(X[i+1])
+ sll $9,$18,13
+ addu $20,$10
+ srl $8,$18,17
+ xor $11,$9
+ sll $9,2
+ xor $11,$8
+ srl $8,$18,19
+ xor $11,$9
+
+ xor $11,$8 # sigma1(X[i+14])
+ addu $20,$11
+ addu $8,$20,$7 # 28
+ srl $7,$1,6
+ xor $11,$2,$3
+ sll $10,$1,7
+ and $11,$1
+ srl $9,$1,11
+ xor $7,$10
+ sll $10,$1,21
+ xor $7,$9
+ srl $9,$1,25
+ xor $7,$10
+ sll $10,$1,26
+ xor $7,$9
+ xor $11,$3 # Ch(e,f,g)
+ xor $9,$10,$7 # Sigma1(e)
+
+ srl $7,$24,2
+ addu $8,$11
+ lw $11,112($6) # K[28]
+ sll $10,$24,10
+ addu $8,$9
+ srl $9,$24,13
+ xor $7,$10
+ sll $10,$24,19
+ xor $7,$9
+ srl $9,$24,22
+ xor $7,$10
+ sll $10,$24,30
+ xor $7,$9
+ sw $20,48($29) # offload to ring buffer
+ xor $7,$10 # Sigma0(a)
+
+ or $9,$24,$25
+ and $10,$24,$25
+ and $9,$30
+ or $10,$9 # Maj(a,b,c)
+ addu $8,$11 # +=K[28]
+ addu $7,$10
+
+ addu $31,$8
+ addu $7,$8
+ lw $23,60($29) # prefetch from ring buffer
+ srl $11,$22,3 # Xupdate(29)
+ addu $21,$14 # +=X[i+9]
+ sll $10,$22,14
+ srl $9,$22,7
+ xor $11,$10
+ sll $10,11
+ xor $11,$9
+ srl $9,$22,18
+ xor $11,$10
+
+ srl $12,$19,10
+ xor $11,$9 # sigma0(X[i+1])
+ sll $10,$19,13
+ addu $21,$11
+ srl $9,$19,17
+ xor $12,$10
+ sll $10,2
+ xor $12,$9
+ srl $9,$19,19
+ xor $12,$10
+
+ xor $12,$9 # sigma1(X[i+14])
+ addu $21,$12
+ addu $9,$21,$3 # 29
+ srl $3,$31,6
+ xor $12,$1,$2
+ sll $11,$31,7
+ and $12,$31
+ srl $10,$31,11
+ xor $3,$11
+ sll $11,$31,21
+ xor $3,$10
+ srl $10,$31,25
+ xor $3,$11
+ sll $11,$31,26
+ xor $3,$10
+ xor $12,$2 # Ch(e,f,g)
+ xor $10,$11,$3 # Sigma1(e)
+
+ srl $3,$7,2
+ addu $9,$12
+ lw $12,116($6) # K[29]
+ sll $11,$7,10
+ addu $9,$10
+ srl $10,$7,13
+ xor $3,$11
+ sll $11,$7,19
+ xor $3,$10
+ srl $10,$7,22
+ xor $3,$11
+ sll $11,$7,30
+ xor $3,$10
+ sw $21,52($29) # offload to ring buffer
+ xor $3,$11 # Sigma0(a)
+
+ or $10,$7,$24
+ and $11,$7,$24
+ and $10,$25
+ or $11,$10 # Maj(a,b,c)
+ addu $9,$12 # +=K[29]
+ addu $3,$11
+
+ addu $30,$9
+ addu $3,$9
+ lw $8,0($29) # prefetch from ring buffer
+ srl $12,$23,3 # Xupdate(30)
+ addu $22,$15 # +=X[i+9]
+ sll $11,$23,14
+ srl $10,$23,7
+ xor $12,$11
+ sll $11,11
+ xor $12,$10
+ srl $10,$23,18
+ xor $12,$11
+
+ srl $13,$20,10
+ xor $12,$10 # sigma0(X[i+1])
+ sll $11,$20,13
+ addu $22,$12
+ srl $10,$20,17
+ xor $13,$11
+ sll $11,2
+ xor $13,$10
+ srl $10,$20,19
+ xor $13,$11
+
+ xor $13,$10 # sigma1(X[i+14])
+ addu $22,$13
+ addu $10,$22,$2 # 30
+ srl $2,$30,6
+ xor $13,$31,$1
+ sll $12,$30,7
+ and $13,$30
+ srl $11,$30,11
+ xor $2,$12
+ sll $12,$30,21
+ xor $2,$11
+ srl $11,$30,25
+ xor $2,$12
+ sll $12,$30,26
+ xor $2,$11
+ xor $13,$1 # Ch(e,f,g)
+ xor $11,$12,$2 # Sigma1(e)
+
+ srl $2,$3,2
+ addu $10,$13
+ lw $13,120($6) # K[30]
+ sll $12,$3,10
+ addu $10,$11
+ srl $11,$3,13
+ xor $2,$12
+ sll $12,$3,19
+ xor $2,$11
+ srl $11,$3,22
+ xor $2,$12
+ sll $12,$3,30
+ xor $2,$11
+ sw $22,56($29) # offload to ring buffer
+ xor $2,$12 # Sigma0(a)
+
+ or $11,$3,$7
+ and $12,$3,$7
+ and $11,$24
+ or $12,$11 # Maj(a,b,c)
+ addu $10,$13 # +=K[30]
+ addu $2,$12
+
+ addu $25,$10
+ addu $2,$10
+ lw $9,4($29) # prefetch from ring buffer
+ srl $13,$8,3 # Xupdate(31)
+ addu $23,$16 # +=X[i+9]
+ sll $12,$8,14
+ srl $11,$8,7
+ xor $13,$12
+ sll $12,11
+ xor $13,$11
+ srl $11,$8,18
+ xor $13,$12
+
+ srl $14,$21,10
+ xor $13,$11 # sigma0(X[i+1])
+ sll $12,$21,13
+ addu $23,$13
+ srl $11,$21,17
+ xor $14,$12
+ sll $12,2
+ xor $14,$11
+ srl $11,$21,19
+ xor $14,$12
+
+ xor $14,$11 # sigma1(X[i+14])
+ addu $23,$14
+ addu $11,$23,$1 # 31
+ srl $1,$25,6
+ xor $14,$30,$31
+ sll $13,$25,7
+ and $14,$25
+ srl $12,$25,11
+ xor $1,$13
+ sll $13,$25,21
+ xor $1,$12
+ srl $12,$25,25
+ xor $1,$13
+ sll $13,$25,26
+ xor $1,$12
+ xor $14,$31 # Ch(e,f,g)
+ xor $12,$13,$1 # Sigma1(e)
+
+ srl $1,$2,2
+ addu $11,$14
+ lw $14,124($6) # K[31]
+ sll $13,$2,10
+ addu $11,$12
+ srl $12,$2,13
+ xor $1,$13
+ sll $13,$2,19
+ xor $1,$12
+ srl $12,$2,22
+ xor $1,$13
+ sll $13,$2,30
+ xor $1,$12
+ sw $23,60($29) # offload to ring buffer
+ xor $1,$13 # Sigma0(a)
+
+ or $12,$2,$3
+ and $13,$2,$3
+ and $12,$7
+ or $13,$12 # Maj(a,b,c)
+ addu $11,$14 # +=K[31]
+ addu $1,$13
+
+ addu $24,$11
+ addu $1,$11
+ lw $10,8($29) # prefetch from ring buffer
+ and $14,0xfff
+ li $15,2290
+ .set noreorder
+ bne $14,$15,.L16_xx
+ add $6,16*4 # Ktbl+=16
+
+ lw $23,16*4($29) # restore pointer to the end of input
+ lw $8,0*4($4)
+ lw $9,1*4($4)
+ lw $10,2*4($4)
+ add $5,16*4
+ lw $11,3*4($4)
+ addu $1,$8
+ lw $12,4*4($4)
+ addu $2,$9
+ lw $13,5*4($4)
+ addu $3,$10
+ lw $14,6*4($4)
+ addu $7,$11
+ lw $15,7*4($4)
+ addu $24,$12
+ sw $1,0*4($4)
+ addu $25,$13
+ sw $2,1*4($4)
+ addu $30,$14
+ sw $3,2*4($4)
+ addu $31,$15
+ sw $7,3*4($4)
+ sw $24,4*4($4)
+ sw $25,5*4($4)
+ sw $30,6*4($4)
+ sw $31,7*4($4)
+
+ bnel $5,$23,.Loop
+ sub $6,192 # rewind $6
+
+ lw $31,128-1*4($29)
+ lw $30,128-2*4($29)
+ lw $23,128-3*4($29)
+ lw $22,128-4*4($29)
+ lw $21,128-5*4($29)
+ lw $20,128-6*4($29)
+ lw $19,128-7*4($29)
+ lw $18,128-8*4($29)
+ lw $17,128-9*4($29)
+ lw $16,128-10*4($29)
+ jr $31
+ add $29,128
+.end sha256_block_data_order
+
+.rdata
+.align 5
+K256:
+ .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+.asciiz "SHA256 for MIPS, CRYPTOGAMS by <appro@openssl.org>"
+.align 5
+