1
0
mirror of https://github.com/v2fly/v2ray-core.git synced 2025-01-03 07:56:42 -05:00

Merge pull request #215 from aead/master

Replace ChaCha20 implementation with an optimized version
This commit is contained in:
Darien Raymond 2016-07-23 12:53:07 +02:00 committed by GitHub
commit 08526a32ff
5 changed files with 22 additions and 277 deletions

View File

@ -7,6 +7,7 @@ before_install:
- go get golang.org/x/tools/cmd/cover
- go get github.com/onsi/gomega
- go get github.com/onsi/ginkgo
- go get github.com/aead/chacha20
script:
- go test -tags json github.com/v2ray/v2ray-core/...

View File

@ -3,9 +3,27 @@ package crypto
import (
"crypto/cipher"
"github.com/v2ray/v2ray-core/common/crypto/internal"
"github.com/aead/chacha20"
)
func NewChaCha20Stream(key []byte, iv []byte) cipher.Stream {
return internal.NewChaCha20Stream(key, iv, 20)
func makeNonce(nonce *[chacha20.NonceSize]byte, iv []byte) {
switch len(iv) {
case 8:
copy(nonce[4:], iv)
case 12:
copy(nonce[:], iv)
default:
panic("bad nonce length")
}
}
func NewChaCha20Stream(key []byte, iv []byte) cipher.Stream {
var Key [32]byte
var Nonce [12]byte
if len(key) != 32 {
panic("bad key length")
}
copy(Key[:], key)
makeNonce(&Nonce, iv)
return chacha20.NewCipher(&Nonce, &Key)
}

View File

@ -1,80 +0,0 @@
package internal
//go:generate go run chacha_core_gen.go
import (
"encoding/binary"
)
const (
wordSize = 4 // the size of ChaCha20's words
stateSize = 16 // the size of ChaCha20's state, in words
blockSize = stateSize * wordSize // the size of ChaCha20's block, in bytes
)
type ChaCha20Stream struct {
state [stateSize]uint32 // the state as an array of 16 32-bit words
block [blockSize]byte // the keystream as an array of 64 bytes
offset int // the offset of used bytes in block
rounds int
}
func NewChaCha20Stream(key []byte, nonce []byte, rounds int) *ChaCha20Stream {
s := new(ChaCha20Stream)
// the magic constants for 256-bit keys
s.state[0] = 0x61707865
s.state[1] = 0x3320646e
s.state[2] = 0x79622d32
s.state[3] = 0x6b206574
for i := 0; i < 8; i++ {
s.state[i+4] = binary.LittleEndian.Uint32(key[i*4 : i*4+4])
}
switch len(nonce) {
case 8:
s.state[14] = binary.LittleEndian.Uint32(nonce[0:])
s.state[15] = binary.LittleEndian.Uint32(nonce[4:])
case 12:
s.state[13] = binary.LittleEndian.Uint32(nonce[0:4])
s.state[14] = binary.LittleEndian.Uint32(nonce[4:8])
s.state[15] = binary.LittleEndian.Uint32(nonce[8:12])
default:
panic("bad nonce length")
}
s.rounds = rounds
ChaCha20Block(&s.state, s.block[:], s.rounds)
return s
}
func (s *ChaCha20Stream) XORKeyStream(dst, src []byte) {
// Stride over the input in 64-byte blocks, minus the amount of keystream
// previously used. This will produce best results when processing blocks
// of a size evenly divisible by 64.
i := 0
max := len(src)
for i < max {
gap := blockSize - s.offset
limit := i + gap
if limit > max {
limit = max
}
o := s.offset
for j := i; j < limit; j++ {
dst[j] = src[j] ^ s.block[o]
o++
}
i += gap
s.offset = o
if o == blockSize {
s.offset = 0
s.state[12]++
ChaCha20Block(&s.state, s.block[:], s.rounds)
}
}
}

View File

@ -1,124 +0,0 @@
// GENERATED CODE. DO NOT MODIFY!
package internal
import "encoding/binary"
func ChaCha20Block(s *[16]uint32, out []byte, rounds int) {
var x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 = s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[8], s[9], s[10], s[11], s[12], s[13], s[14], s[15]
for i := 0; i < rounds; i += 2 {
var x uint32
x0 += x4
x = x12 ^ x0
x12 = (x << 16) | (x >> (32 - 16))
x8 += x12
x = x4 ^ x8
x4 = (x << 12) | (x >> (32 - 12))
x0 += x4
x = x12 ^ x0
x12 = (x << 8) | (x >> (32 - 8))
x8 += x12
x = x4 ^ x8
x4 = (x << 7) | (x >> (32 - 7))
x1 += x5
x = x13 ^ x1
x13 = (x << 16) | (x >> (32 - 16))
x9 += x13
x = x5 ^ x9
x5 = (x << 12) | (x >> (32 - 12))
x1 += x5
x = x13 ^ x1
x13 = (x << 8) | (x >> (32 - 8))
x9 += x13
x = x5 ^ x9
x5 = (x << 7) | (x >> (32 - 7))
x2 += x6
x = x14 ^ x2
x14 = (x << 16) | (x >> (32 - 16))
x10 += x14
x = x6 ^ x10
x6 = (x << 12) | (x >> (32 - 12))
x2 += x6
x = x14 ^ x2
x14 = (x << 8) | (x >> (32 - 8))
x10 += x14
x = x6 ^ x10
x6 = (x << 7) | (x >> (32 - 7))
x3 += x7
x = x15 ^ x3
x15 = (x << 16) | (x >> (32 - 16))
x11 += x15
x = x7 ^ x11
x7 = (x << 12) | (x >> (32 - 12))
x3 += x7
x = x15 ^ x3
x15 = (x << 8) | (x >> (32 - 8))
x11 += x15
x = x7 ^ x11
x7 = (x << 7) | (x >> (32 - 7))
x0 += x5
x = x15 ^ x0
x15 = (x << 16) | (x >> (32 - 16))
x10 += x15
x = x5 ^ x10
x5 = (x << 12) | (x >> (32 - 12))
x0 += x5
x = x15 ^ x0
x15 = (x << 8) | (x >> (32 - 8))
x10 += x15
x = x5 ^ x10
x5 = (x << 7) | (x >> (32 - 7))
x1 += x6
x = x12 ^ x1
x12 = (x << 16) | (x >> (32 - 16))
x11 += x12
x = x6 ^ x11
x6 = (x << 12) | (x >> (32 - 12))
x1 += x6
x = x12 ^ x1
x12 = (x << 8) | (x >> (32 - 8))
x11 += x12
x = x6 ^ x11
x6 = (x << 7) | (x >> (32 - 7))
x2 += x7
x = x13 ^ x2
x13 = (x << 16) | (x >> (32 - 16))
x8 += x13
x = x7 ^ x8
x7 = (x << 12) | (x >> (32 - 12))
x2 += x7
x = x13 ^ x2
x13 = (x << 8) | (x >> (32 - 8))
x8 += x13
x = x7 ^ x8
x7 = (x << 7) | (x >> (32 - 7))
x3 += x4
x = x14 ^ x3
x14 = (x << 16) | (x >> (32 - 16))
x9 += x14
x = x4 ^ x9
x4 = (x << 12) | (x >> (32 - 12))
x3 += x4
x = x14 ^ x3
x14 = (x << 8) | (x >> (32 - 8))
x9 += x14
x = x4 ^ x9
x4 = (x << 7) | (x >> (32 - 7))
}
binary.LittleEndian.PutUint32(out[0:4], s[0]+x0)
binary.LittleEndian.PutUint32(out[4:8], s[1]+x1)
binary.LittleEndian.PutUint32(out[8:12], s[2]+x2)
binary.LittleEndian.PutUint32(out[12:16], s[3]+x3)
binary.LittleEndian.PutUint32(out[16:20], s[4]+x4)
binary.LittleEndian.PutUint32(out[20:24], s[5]+x5)
binary.LittleEndian.PutUint32(out[24:28], s[6]+x6)
binary.LittleEndian.PutUint32(out[28:32], s[7]+x7)
binary.LittleEndian.PutUint32(out[32:36], s[8]+x8)
binary.LittleEndian.PutUint32(out[36:40], s[9]+x9)
binary.LittleEndian.PutUint32(out[40:44], s[10]+x10)
binary.LittleEndian.PutUint32(out[44:48], s[11]+x11)
binary.LittleEndian.PutUint32(out[48:52], s[12]+x12)
binary.LittleEndian.PutUint32(out[52:56], s[13]+x13)
binary.LittleEndian.PutUint32(out[56:60], s[14]+x14)
binary.LittleEndian.PutUint32(out[60:64], s[15]+x15)
}

View File

@ -1,70 +0,0 @@
// +build generate
package main
import (
"fmt"
"log"
"os"
)
func writeQuarterRound(file *os.File, a, b, c, d int) {
add := "x%d+=x%d\n"
xor := "x=x%d^x%d\n"
rotate := "x%d=(x << %d) | (x >> (32 - %d))\n"
fmt.Fprintf(file, add, a, b)
fmt.Fprintf(file, xor, d, a)
fmt.Fprintf(file, rotate, d, 16, 16)
fmt.Fprintf(file, add, c, d)
fmt.Fprintf(file, xor, b, c)
fmt.Fprintf(file, rotate, b, 12, 12)
fmt.Fprintf(file, add, a, b)
fmt.Fprintf(file, xor, d, a)
fmt.Fprintf(file, rotate, d, 8, 8)
fmt.Fprintf(file, add, c, d)
fmt.Fprintf(file, xor, b, c)
fmt.Fprintf(file, rotate, b, 7, 7)
}
func writeChacha20Block(file *os.File) {
fmt.Fprintln(file, `
func ChaCha20Block(s *[16]uint32, out []byte, rounds int) {
var x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15 = s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]
for i := 0; i < rounds; i+=2 {
var x uint32
`)
writeQuarterRound(file, 0, 4, 8, 12)
writeQuarterRound(file, 1, 5, 9, 13)
writeQuarterRound(file, 2, 6, 10, 14)
writeQuarterRound(file, 3, 7, 11, 15)
writeQuarterRound(file, 0, 5, 10, 15)
writeQuarterRound(file, 1, 6, 11, 12)
writeQuarterRound(file, 2, 7, 8, 13)
writeQuarterRound(file, 3, 4, 9, 14)
fmt.Fprintln(file, "}")
for i := 0; i < 16; i++ {
fmt.Fprintf(file, "binary.LittleEndian.PutUint32(out[%d:%d], s[%d]+x%d)\n", i*4, i*4+4, i, i)
}
fmt.Fprintln(file, "}")
fmt.Fprintln(file)
}
func main() {
file, err := os.OpenFile("chacha_core.go", os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
if err != nil {
log.Fatalf("Failed to generate chacha_core.go: %v", err)
}
defer file.Close()
fmt.Fprintln(file, "// GENERATED CODE. DO NOT MODIFY!")
fmt.Fprintln(file, "package internal")
fmt.Fprintln(file)
fmt.Fprintln(file, "import \"encoding/binary\"")
fmt.Fprintln(file)
writeChacha20Block(file)
}