mirror of
https://github.com/v2fly/v2ray-core.git
synced 2025-01-03 07:56:42 -05:00
Merge pull request #215 from aead/master
Replace ChaCha20 implementation with an optimized version
This commit is contained in:
commit
08526a32ff
@ -7,6 +7,7 @@ before_install:
|
||||
- go get golang.org/x/tools/cmd/cover
|
||||
- go get github.com/onsi/gomega
|
||||
- go get github.com/onsi/ginkgo
|
||||
- go get github.com/aead/chacha20
|
||||
|
||||
script:
|
||||
- go test -tags json github.com/v2ray/v2ray-core/...
|
||||
|
@ -3,9 +3,27 @@ package crypto
|
||||
import (
|
||||
"crypto/cipher"
|
||||
|
||||
"github.com/v2ray/v2ray-core/common/crypto/internal"
|
||||
"github.com/aead/chacha20"
|
||||
)
|
||||
|
||||
func NewChaCha20Stream(key []byte, iv []byte) cipher.Stream {
|
||||
return internal.NewChaCha20Stream(key, iv, 20)
|
||||
func makeNonce(nonce *[chacha20.NonceSize]byte, iv []byte) {
|
||||
switch len(iv) {
|
||||
case 8:
|
||||
copy(nonce[4:], iv)
|
||||
case 12:
|
||||
copy(nonce[:], iv)
|
||||
default:
|
||||
panic("bad nonce length")
|
||||
}
|
||||
}
|
||||
|
||||
func NewChaCha20Stream(key []byte, iv []byte) cipher.Stream {
|
||||
var Key [32]byte
|
||||
var Nonce [12]byte
|
||||
if len(key) != 32 {
|
||||
panic("bad key length")
|
||||
}
|
||||
copy(Key[:], key)
|
||||
makeNonce(&Nonce, iv)
|
||||
return chacha20.NewCipher(&Nonce, &Key)
|
||||
}
|
||||
|
@ -1,80 +0,0 @@
|
||||
package internal
|
||||
|
||||
//go:generate go run chacha_core_gen.go
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
const (
|
||||
wordSize = 4 // the size of ChaCha20's words
|
||||
stateSize = 16 // the size of ChaCha20's state, in words
|
||||
blockSize = stateSize * wordSize // the size of ChaCha20's block, in bytes
|
||||
)
|
||||
|
||||
type ChaCha20Stream struct {
|
||||
state [stateSize]uint32 // the state as an array of 16 32-bit words
|
||||
block [blockSize]byte // the keystream as an array of 64 bytes
|
||||
offset int // the offset of used bytes in block
|
||||
rounds int
|
||||
}
|
||||
|
||||
func NewChaCha20Stream(key []byte, nonce []byte, rounds int) *ChaCha20Stream {
|
||||
s := new(ChaCha20Stream)
|
||||
// the magic constants for 256-bit keys
|
||||
s.state[0] = 0x61707865
|
||||
s.state[1] = 0x3320646e
|
||||
s.state[2] = 0x79622d32
|
||||
s.state[3] = 0x6b206574
|
||||
|
||||
for i := 0; i < 8; i++ {
|
||||
s.state[i+4] = binary.LittleEndian.Uint32(key[i*4 : i*4+4])
|
||||
}
|
||||
|
||||
switch len(nonce) {
|
||||
case 8:
|
||||
s.state[14] = binary.LittleEndian.Uint32(nonce[0:])
|
||||
s.state[15] = binary.LittleEndian.Uint32(nonce[4:])
|
||||
case 12:
|
||||
s.state[13] = binary.LittleEndian.Uint32(nonce[0:4])
|
||||
s.state[14] = binary.LittleEndian.Uint32(nonce[4:8])
|
||||
s.state[15] = binary.LittleEndian.Uint32(nonce[8:12])
|
||||
default:
|
||||
panic("bad nonce length")
|
||||
}
|
||||
|
||||
s.rounds = rounds
|
||||
ChaCha20Block(&s.state, s.block[:], s.rounds)
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *ChaCha20Stream) XORKeyStream(dst, src []byte) {
|
||||
// Stride over the input in 64-byte blocks, minus the amount of keystream
|
||||
// previously used. This will produce best results when processing blocks
|
||||
// of a size evenly divisible by 64.
|
||||
i := 0
|
||||
max := len(src)
|
||||
for i < max {
|
||||
gap := blockSize - s.offset
|
||||
|
||||
limit := i + gap
|
||||
if limit > max {
|
||||
limit = max
|
||||
}
|
||||
|
||||
o := s.offset
|
||||
for j := i; j < limit; j++ {
|
||||
dst[j] = src[j] ^ s.block[o]
|
||||
o++
|
||||
}
|
||||
|
||||
i += gap
|
||||
s.offset = o
|
||||
|
||||
if o == blockSize {
|
||||
s.offset = 0
|
||||
s.state[12]++
|
||||
ChaCha20Block(&s.state, s.block[:], s.rounds)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,124 +0,0 @@
|
||||
// GENERATED CODE. DO NOT MODIFY!
|
||||
package internal
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
func ChaCha20Block(s *[16]uint32, out []byte, rounds int) {
|
||||
var x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 = s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[8], s[9], s[10], s[11], s[12], s[13], s[14], s[15]
|
||||
for i := 0; i < rounds; i += 2 {
|
||||
var x uint32
|
||||
|
||||
x0 += x4
|
||||
x = x12 ^ x0
|
||||
x12 = (x << 16) | (x >> (32 - 16))
|
||||
x8 += x12
|
||||
x = x4 ^ x8
|
||||
x4 = (x << 12) | (x >> (32 - 12))
|
||||
x0 += x4
|
||||
x = x12 ^ x0
|
||||
x12 = (x << 8) | (x >> (32 - 8))
|
||||
x8 += x12
|
||||
x = x4 ^ x8
|
||||
x4 = (x << 7) | (x >> (32 - 7))
|
||||
x1 += x5
|
||||
x = x13 ^ x1
|
||||
x13 = (x << 16) | (x >> (32 - 16))
|
||||
x9 += x13
|
||||
x = x5 ^ x9
|
||||
x5 = (x << 12) | (x >> (32 - 12))
|
||||
x1 += x5
|
||||
x = x13 ^ x1
|
||||
x13 = (x << 8) | (x >> (32 - 8))
|
||||
x9 += x13
|
||||
x = x5 ^ x9
|
||||
x5 = (x << 7) | (x >> (32 - 7))
|
||||
x2 += x6
|
||||
x = x14 ^ x2
|
||||
x14 = (x << 16) | (x >> (32 - 16))
|
||||
x10 += x14
|
||||
x = x6 ^ x10
|
||||
x6 = (x << 12) | (x >> (32 - 12))
|
||||
x2 += x6
|
||||
x = x14 ^ x2
|
||||
x14 = (x << 8) | (x >> (32 - 8))
|
||||
x10 += x14
|
||||
x = x6 ^ x10
|
||||
x6 = (x << 7) | (x >> (32 - 7))
|
||||
x3 += x7
|
||||
x = x15 ^ x3
|
||||
x15 = (x << 16) | (x >> (32 - 16))
|
||||
x11 += x15
|
||||
x = x7 ^ x11
|
||||
x7 = (x << 12) | (x >> (32 - 12))
|
||||
x3 += x7
|
||||
x = x15 ^ x3
|
||||
x15 = (x << 8) | (x >> (32 - 8))
|
||||
x11 += x15
|
||||
x = x7 ^ x11
|
||||
x7 = (x << 7) | (x >> (32 - 7))
|
||||
x0 += x5
|
||||
x = x15 ^ x0
|
||||
x15 = (x << 16) | (x >> (32 - 16))
|
||||
x10 += x15
|
||||
x = x5 ^ x10
|
||||
x5 = (x << 12) | (x >> (32 - 12))
|
||||
x0 += x5
|
||||
x = x15 ^ x0
|
||||
x15 = (x << 8) | (x >> (32 - 8))
|
||||
x10 += x15
|
||||
x = x5 ^ x10
|
||||
x5 = (x << 7) | (x >> (32 - 7))
|
||||
x1 += x6
|
||||
x = x12 ^ x1
|
||||
x12 = (x << 16) | (x >> (32 - 16))
|
||||
x11 += x12
|
||||
x = x6 ^ x11
|
||||
x6 = (x << 12) | (x >> (32 - 12))
|
||||
x1 += x6
|
||||
x = x12 ^ x1
|
||||
x12 = (x << 8) | (x >> (32 - 8))
|
||||
x11 += x12
|
||||
x = x6 ^ x11
|
||||
x6 = (x << 7) | (x >> (32 - 7))
|
||||
x2 += x7
|
||||
x = x13 ^ x2
|
||||
x13 = (x << 16) | (x >> (32 - 16))
|
||||
x8 += x13
|
||||
x = x7 ^ x8
|
||||
x7 = (x << 12) | (x >> (32 - 12))
|
||||
x2 += x7
|
||||
x = x13 ^ x2
|
||||
x13 = (x << 8) | (x >> (32 - 8))
|
||||
x8 += x13
|
||||
x = x7 ^ x8
|
||||
x7 = (x << 7) | (x >> (32 - 7))
|
||||
x3 += x4
|
||||
x = x14 ^ x3
|
||||
x14 = (x << 16) | (x >> (32 - 16))
|
||||
x9 += x14
|
||||
x = x4 ^ x9
|
||||
x4 = (x << 12) | (x >> (32 - 12))
|
||||
x3 += x4
|
||||
x = x14 ^ x3
|
||||
x14 = (x << 8) | (x >> (32 - 8))
|
||||
x9 += x14
|
||||
x = x4 ^ x9
|
||||
x4 = (x << 7) | (x >> (32 - 7))
|
||||
}
|
||||
binary.LittleEndian.PutUint32(out[0:4], s[0]+x0)
|
||||
binary.LittleEndian.PutUint32(out[4:8], s[1]+x1)
|
||||
binary.LittleEndian.PutUint32(out[8:12], s[2]+x2)
|
||||
binary.LittleEndian.PutUint32(out[12:16], s[3]+x3)
|
||||
binary.LittleEndian.PutUint32(out[16:20], s[4]+x4)
|
||||
binary.LittleEndian.PutUint32(out[20:24], s[5]+x5)
|
||||
binary.LittleEndian.PutUint32(out[24:28], s[6]+x6)
|
||||
binary.LittleEndian.PutUint32(out[28:32], s[7]+x7)
|
||||
binary.LittleEndian.PutUint32(out[32:36], s[8]+x8)
|
||||
binary.LittleEndian.PutUint32(out[36:40], s[9]+x9)
|
||||
binary.LittleEndian.PutUint32(out[40:44], s[10]+x10)
|
||||
binary.LittleEndian.PutUint32(out[44:48], s[11]+x11)
|
||||
binary.LittleEndian.PutUint32(out[48:52], s[12]+x12)
|
||||
binary.LittleEndian.PutUint32(out[52:56], s[13]+x13)
|
||||
binary.LittleEndian.PutUint32(out[56:60], s[14]+x14)
|
||||
binary.LittleEndian.PutUint32(out[60:64], s[15]+x15)
|
||||
}
|
@ -1,70 +0,0 @@
|
||||
// +build generate
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
func writeQuarterRound(file *os.File, a, b, c, d int) {
|
||||
add := "x%d+=x%d\n"
|
||||
xor := "x=x%d^x%d\n"
|
||||
rotate := "x%d=(x << %d) | (x >> (32 - %d))\n"
|
||||
|
||||
fmt.Fprintf(file, add, a, b)
|
||||
fmt.Fprintf(file, xor, d, a)
|
||||
fmt.Fprintf(file, rotate, d, 16, 16)
|
||||
|
||||
fmt.Fprintf(file, add, c, d)
|
||||
fmt.Fprintf(file, xor, b, c)
|
||||
fmt.Fprintf(file, rotate, b, 12, 12)
|
||||
|
||||
fmt.Fprintf(file, add, a, b)
|
||||
fmt.Fprintf(file, xor, d, a)
|
||||
fmt.Fprintf(file, rotate, d, 8, 8)
|
||||
|
||||
fmt.Fprintf(file, add, c, d)
|
||||
fmt.Fprintf(file, xor, b, c)
|
||||
fmt.Fprintf(file, rotate, b, 7, 7)
|
||||
}
|
||||
|
||||
func writeChacha20Block(file *os.File) {
|
||||
fmt.Fprintln(file, `
|
||||
func ChaCha20Block(s *[16]uint32, out []byte, rounds int) {
|
||||
var x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15 = s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]
|
||||
for i := 0; i < rounds; i+=2 {
|
||||
var x uint32
|
||||
`)
|
||||
|
||||
writeQuarterRound(file, 0, 4, 8, 12)
|
||||
writeQuarterRound(file, 1, 5, 9, 13)
|
||||
writeQuarterRound(file, 2, 6, 10, 14)
|
||||
writeQuarterRound(file, 3, 7, 11, 15)
|
||||
writeQuarterRound(file, 0, 5, 10, 15)
|
||||
writeQuarterRound(file, 1, 6, 11, 12)
|
||||
writeQuarterRound(file, 2, 7, 8, 13)
|
||||
writeQuarterRound(file, 3, 4, 9, 14)
|
||||
fmt.Fprintln(file, "}")
|
||||
for i := 0; i < 16; i++ {
|
||||
fmt.Fprintf(file, "binary.LittleEndian.PutUint32(out[%d:%d], s[%d]+x%d)\n", i*4, i*4+4, i, i)
|
||||
}
|
||||
fmt.Fprintln(file, "}")
|
||||
fmt.Fprintln(file)
|
||||
}
|
||||
|
||||
func main() {
|
||||
file, err := os.OpenFile("chacha_core.go", os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to generate chacha_core.go: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
fmt.Fprintln(file, "// GENERATED CODE. DO NOT MODIFY!")
|
||||
fmt.Fprintln(file, "package internal")
|
||||
fmt.Fprintln(file)
|
||||
fmt.Fprintln(file, "import \"encoding/binary\"")
|
||||
fmt.Fprintln(file)
|
||||
writeChacha20Block(file)
|
||||
}
|
Loading…
Reference in New Issue
Block a user