go-ethereum/crypto/sha3/sha3.go

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package sha3 implements the SHA3 hash algorithm (formerly called Keccak) chosen by NIST in 2012.
// This file provides a SHA3 implementation which implements the standard hash.Hash interface.
// Writing input data, including padding, and reading output data are computed in this file.
// Note that the current implementation can compute the hash of an integral number of bytes only.
// This is a consequence of the hash interface in which a buffer of bytes is passed in.
// The internals of the Keccak-f function are computed in keccakf.go.
// For the detailed specification, refer to the Keccak web site (http://keccak.noekeon.org/).
package sha3

import (
	"encoding/binary"
	"hash"
)

// laneSize is the size in bytes of each "lane" of the internal state of SHA3 (5 * 5 * 8).
// Note that changing this size would requires using a type other than uint64 to store each lane.
const laneSize = 8

// sliceSize represents the dimensions of the internal state, a square matrix of
// sliceSize ** 2 lanes. This is the size of both the "rows" and "columns" dimensions in the
// terminology of the SHA3 specification.
const sliceSize = 5

// numLanes represents the total number of lanes in the state.
const numLanes = sliceSize * sliceSize

// stateSize is the size in bytes of the internal state of SHA3 (5 * 5 * WSize).
const stateSize = laneSize * numLanes

// digest represents the partial evaluation of a checksum.
// Note that capacity, and not outputSize, is the critical security parameter, as SHA3 can output
// an arbitrary number of bytes for any given capacity. The Keccak proposal recommends that
// capacity = 2*outputSize to ensure that finding a collision of size outputSize requires
// O(2^{outputSize/2}) computations (the birthday lower bound). Future standards may modify the
// capacity/outputSize ratio to allow for more output with lower cryptographic security.
type digest struct {
	a          [numLanes]uint64  // main state of the hash
	b          [numLanes]uint64  // intermediate states
	c          [sliceSize]uint64 // intermediate states
	d          [sliceSize]uint64 // intermediate states
	outputSize int               // desired output size in bytes
	capacity   int               // number of bytes to leave untouched during squeeze/absorb
	absorbed   int               // number of bytes absorbed thus far
}

// minInt returns the lesser of two integer arguments, to simplify the absorption routine.
func minInt(v1, v2 int) int {
	if v1 <= v2 {
		return v1
	}
	return v2
}

// rate returns the number of bytes of the internal state which can be absorbed or squeezed
// in between calls to the permutation function.
func (d *digest) rate() int {
	return stateSize - d.capacity
}

// Reset clears the internal state by zeroing bytes in the state buffer.
// This can be skipped for a newly-created hash state; the default zero-allocated state is correct.
func (d *digest) Reset() {
	d.absorbed = 0
	for i := range d.a {
		d.a[i] = 0
	}
}

// BlockSize, required by the hash.Hash interface, does not have a standard intepretation
// for a sponge-based construction like SHA3. We return the data rate: the number of bytes which
// can be absorbed per invocation of the permutation function. For Merkle-Damgård based hashes
// (ie SHA1, SHA2, MD5) the output size of the internal compression function is returned.
// We consider this to be roughly equivalent because it represents the number of bytes of output
// produced per cryptographic operation.
func (d *digest) BlockSize() int { return d.rate() }

// Size returns the output size of the hash function in bytes.
func (d *digest) Size() int {
	return d.outputSize
}

// unalignedAbsorb is a helper function for Write, which absorbs data that isn't aligned with an
// 8-byte lane. This requires shifting the individual bytes into position in a uint64.
func (d *digest) unalignedAbsorb(p []byte) {
	var t uint64
	for i := len(p) - 1; i >= 0; i-- {
		t <<= 8
		t |= uint64(p[i])
	}
	offset := (d.absorbed) % d.rate()
	t <<= 8 * uint(offset%laneSize)
	d.a[offset/laneSize] ^= t
	d.absorbed += len(p)
}

// Write "absorbs" bytes into the state of the SHA3 hash, updating as needed when the sponge
// "fills up" with rate() bytes. Since lanes are stored internally as type uint64, this requires
// converting the incoming bytes into uint64s using a little endian interpretation. This
// implementation is optimized for large, aligned writes of multiples of 8 bytes (laneSize).
// Non-aligned or uneven numbers of bytes require shifting and are slower.
func (d *digest) Write(p []byte) (int, error) {
	// An initial offset is needed if the we aren't absorbing to the first lane initially.
	offset := d.absorbed % d.rate()
	toWrite := len(p)

	// The first lane may need to absorb unaligned and/or incomplete data.
	if (offset%laneSize != 0 || len(p) < 8) && len(p) > 0 {
		toAbsorb := minInt(laneSize-(offset%laneSize), len(p))
		d.unalignedAbsorb(p[:toAbsorb])
		p = p[toAbsorb:]
		offset = (d.absorbed) % d.rate()

		// For every rate() bytes absorbed, the state must be permuted via the F Function.
		if (d.absorbed)%d.rate() == 0 {
			d.keccakF()
		}
	}

	// This loop should absorb the bulk of the data into full, aligned lanes.
	// It will call the update function as necessary.
	for len(p) > 7 {
		firstLane := offset / laneSize
		lastLane := minInt(d.rate()/laneSize, firstLane+len(p)/laneSize)

		// This inner loop absorbs input bytes into the state in groups of 8, converted to uint64s.
		for lane := firstLane; lane < lastLane; lane++ {
			d.a[lane] ^= binary.LittleEndian.Uint64(p[:laneSize])
			p = p[laneSize:]
		}
		d.absorbed += (lastLane - firstLane) * laneSize
		// For every rate() bytes absorbed, the state must be permuted via the F Function.
		if (d.absorbed)%d.rate() == 0 {
			d.keccakF()
		}

		offset = 0
	}

	// If there are insufficient bytes to fill the final lane, an unaligned absorption.
	// This should always start at a correct lane boundary though, or else it would be caught
	// by the uneven opening lane case above.
	if len(p) > 0 {
		d.unalignedAbsorb(p)
	}

	return toWrite, nil
}

// pad computes the SHA3 padding scheme based on the number of bytes absorbed.
// The padding is a 1 bit, followed by an arbitrary number of 0s and then a final 1 bit, such that
// the input bits plus padding bits are a multiple of rate(). Adding the padding simply requires
// xoring an opening and closing bit into the appropriate lanes.
func (d *digest) pad() {
	offset := d.absorbed % d.rate()
	// The opening pad bit must be shifted into position based on the number of bytes absorbed
	padOpenLane := offset / laneSize
	d.a[padOpenLane] ^= 0x0000000000000001 << uint(8*(offset%laneSize))
	// The closing padding bit is always in the last position
	padCloseLane := (d.rate() / laneSize) - 1
	d.a[padCloseLane] ^= 0x8000000000000000
}

// finalize prepares the hash to output data by padding and one final permutation of the state.
func (d *digest) finalize() {
	d.pad()
	d.keccakF()
}

// squeeze outputs an arbitrary number of bytes from the hash state.
// Squeezing can require multiple calls to the F function (one per rate() bytes squeezed),
// although this is not the case for standard SHA3 parameters. This implementation only supports
// squeezing a single time, subsequent squeezes may lose alignment. Future implementations
// may wish to support multiple squeeze calls, for example to support use as a PRNG.
func (d *digest) squeeze(in []byte, toSqueeze int) []byte {
	// Because we read in blocks of laneSize, we need enough room to read
	// an integral number of lanes
	needed := toSqueeze + (laneSize-toSqueeze%laneSize)%laneSize
	if cap(in)-len(in) < needed {
		newIn := make([]byte, len(in), len(in)+needed)
		copy(newIn, in)
		in = newIn
	}
	out := in[len(in) : len(in)+needed]

	for len(out) > 0 {
		for i := 0; i < d.rate() && len(out) > 0; i += laneSize {
			binary.LittleEndian.PutUint64(out[:], d.a[i/laneSize])
			out = out[laneSize:]
		}
		if len(out) > 0 {
			d.keccakF()
		}
	}
	return in[:len(in)+toSqueeze] // Re-slice in case we wrote extra data.
}

// Sum applies padding to the hash state and then squeezes out the desired nubmer of output bytes.
func (d *digest) Sum(in []byte) []byte {
	// Make a copy of the original hash so that caller can keep writing and summing.
	dup := *d
	dup.finalize()
	return dup.squeeze(in, dup.outputSize)
}

// The NewKeccakX constructors enable initializing a hash in any of the four recommend sizes
// from the Keccak specification, all of which set capacity=2*outputSize. Note that the final
// NIST standard for SHA3 may specify different input/output lengths.
// The output size is indicated in bits but converted into bytes internally.
func NewKeccak224() hash.Hash { return &digest{outputSize: 224 / 8, capacity: 2 * 224 / 8} }
func NewKeccak256() hash.Hash { return &digest{outputSize: 256 / 8, capacity: 2 * 256 / 8} }
func NewKeccak384() hash.Hash { return &digest{outputSize: 384 / 8, capacity: 2 * 384 / 8} }
func NewKeccak512() hash.Hash { return &digest{outputSize: 512 / 8, capacity: 2 * 512 / 8} }
Moved sha3 from `obscuren` 2015-01-22 00:25:00 +01:00			`// Copyright 2013 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Package sha3 implements the SHA3 hash algorithm (formerly called Keccak) chosen by NIST in 2012.`
			`// This file provides a SHA3 implementation which implements the standard hash.Hash interface.`
			`// Writing input data, including padding, and reading output data are computed in this file.`
			`// Note that the current implementation can compute the hash of an integral number of bytes only.`
			`// This is a consequence of the hash interface in which a buffer of bytes is passed in.`
			`// The internals of the Keccak-f function are computed in keccakf.go.`
			`// For the detailed specification, refer to the Keccak web site (http://keccak.noekeon.org/).`
			`package sha3`

			`import (`
			`"encoding/binary"`
			`"hash"`
			`)`

			`// laneSize is the size in bytes of each "lane" of the internal state of SHA3 (5 * 5 * 8).`
			`// Note that changing this size would requires using a type other than uint64 to store each lane.`
			`const laneSize = 8`

			`// sliceSize represents the dimensions of the internal state, a square matrix of`
			`// sliceSize ** 2 lanes. This is the size of both the "rows" and "columns" dimensions in the`
			`// terminology of the SHA3 specification.`
			`const sliceSize = 5`

			`// numLanes represents the total number of lanes in the state.`
			`const numLanes = sliceSize * sliceSize`

			`// stateSize is the size in bytes of the internal state of SHA3 (5 * 5 * WSize).`
			`const stateSize = laneSize * numLanes`

			`// digest represents the partial evaluation of a checksum.`
			`// Note that capacity, and not outputSize, is the critical security parameter, as SHA3 can output`
			`// an arbitrary number of bytes for any given capacity. The Keccak proposal recommends that`
			`// capacity = 2*outputSize to ensure that finding a collision of size outputSize requires`
			`// O(2^{outputSize/2}) computations (the birthday lower bound). Future standards may modify the`
			`// capacity/outputSize ratio to allow for more output with lower cryptographic security.`
			`type digest struct {`
			`a [numLanes]uint64 // main state of the hash`
			`b [numLanes]uint64 // intermediate states`
			`c [sliceSize]uint64 // intermediate states`
			`d [sliceSize]uint64 // intermediate states`
			`outputSize int // desired output size in bytes`
			`capacity int // number of bytes to leave untouched during squeeze/absorb`
			`absorbed int // number of bytes absorbed thus far`
			`}`

			`// minInt returns the lesser of two integer arguments, to simplify the absorption routine.`
			`func minInt(v1, v2 int) int {`
			`if v1 <= v2 {`
			`return v1`
			`}`
			`return v2`
			`}`

			`// rate returns the number of bytes of the internal state which can be absorbed or squeezed`
			`// in between calls to the permutation function.`
			`func (d *digest) rate() int {`
			`return stateSize - d.capacity`
			`}`

			`// Reset clears the internal state by zeroing bytes in the state buffer.`
			`// This can be skipped for a newly-created hash state; the default zero-allocated state is correct.`
			`func (d *digest) Reset() {`
			`d.absorbed = 0`
			`for i := range d.a {`
			`d.a[i] = 0`
			`}`
			`}`

			`// BlockSize, required by the hash.Hash interface, does not have a standard intepretation`
			`// for a sponge-based construction like SHA3. We return the data rate: the number of bytes which`
			`// can be absorbed per invocation of the permutation function. For Merkle-Damgård based hashes`
			`// (ie SHA1, SHA2, MD5) the output size of the internal compression function is returned.`
			`// We consider this to be roughly equivalent because it represents the number of bytes of output`
			`// produced per cryptographic operation.`
			`func (d *digest) BlockSize() int { return d.rate() }`

			`// Size returns the output size of the hash function in bytes.`
			`func (d *digest) Size() int {`
			`return d.outputSize`
			`}`

			`// unalignedAbsorb is a helper function for Write, which absorbs data that isn't aligned with an`
			`// 8-byte lane. This requires shifting the individual bytes into position in a uint64.`
			`func (d *digest) unalignedAbsorb(p []byte) {`
			`var t uint64`
			`for i := len(p) - 1; i >= 0; i-- {`
			`t <<= 8`
			`t \|= uint64(p[i])`
			`}`
			`offset := (d.absorbed) % d.rate()`
			`t <<= 8 * uint(offset%laneSize)`
			`d.a[offset/laneSize] ^= t`
			`d.absorbed += len(p)`
			`}`

			`// Write "absorbs" bytes into the state of the SHA3 hash, updating as needed when the sponge`
			`// "fills up" with rate() bytes. Since lanes are stored internally as type uint64, this requires`
			`// converting the incoming bytes into uint64s using a little endian interpretation. This`
			`// implementation is optimized for large, aligned writes of multiples of 8 bytes (laneSize).`
			`// Non-aligned or uneven numbers of bytes require shifting and are slower.`
			`func (d *digest) Write(p []byte) (int, error) {`
			`// An initial offset is needed if the we aren't absorbing to the first lane initially.`
			`offset := d.absorbed % d.rate()`
			`toWrite := len(p)`

			`// The first lane may need to absorb unaligned and/or incomplete data.`
			`if (offset%laneSize != 0 \|\| len(p) < 8) && len(p) > 0 {`
			`toAbsorb := minInt(laneSize-(offset%laneSize), len(p))`
			`d.unalignedAbsorb(p[:toAbsorb])`
			`p = p[toAbsorb:]`
			`offset = (d.absorbed) % d.rate()`

			`// For every rate() bytes absorbed, the state must be permuted via the F Function.`
			`if (d.absorbed)%d.rate() == 0 {`
			`d.keccakF()`
			`}`
			`}`

			`// This loop should absorb the bulk of the data into full, aligned lanes.`
			`// It will call the update function as necessary.`
			`for len(p) > 7 {`
			`firstLane := offset / laneSize`
			`lastLane := minInt(d.rate()/laneSize, firstLane+len(p)/laneSize)`

			`// This inner loop absorbs input bytes into the state in groups of 8, converted to uint64s.`
			`for lane := firstLane; lane < lastLane; lane++ {`
			`d.a[lane] ^= binary.LittleEndian.Uint64(p[:laneSize])`
			`p = p[laneSize:]`
			`}`
			`d.absorbed += (lastLane - firstLane) * laneSize`
			`// For every rate() bytes absorbed, the state must be permuted via the F Function.`
			`if (d.absorbed)%d.rate() == 0 {`
			`d.keccakF()`
			`}`

			`offset = 0`
			`}`

			`// If there are insufficient bytes to fill the final lane, an unaligned absorption.`
			`// This should always start at a correct lane boundary though, or else it would be caught`
			`// by the uneven opening lane case above.`
			`if len(p) > 0 {`
			`d.unalignedAbsorb(p)`
			`}`

			`return toWrite, nil`
			`}`

			`// pad computes the SHA3 padding scheme based on the number of bytes absorbed.`
			`// The padding is a 1 bit, followed by an arbitrary number of 0s and then a final 1 bit, such that`
			`// the input bits plus padding bits are a multiple of rate(). Adding the padding simply requires`
			`// xoring an opening and closing bit into the appropriate lanes.`
			`func (d *digest) pad() {`
			`offset := d.absorbed % d.rate()`
			`// The opening pad bit must be shifted into position based on the number of bytes absorbed`
			`padOpenLane := offset / laneSize`
			`d.a[padOpenLane] ^= 0x0000000000000001 << uint(8*(offset%laneSize))`
			`// The closing padding bit is always in the last position`
			`padCloseLane := (d.rate() / laneSize) - 1`
			`d.a[padCloseLane] ^= 0x8000000000000000`
			`}`

			`// finalize prepares the hash to output data by padding and one final permutation of the state.`
			`func (d *digest) finalize() {`
			`d.pad()`
			`d.keccakF()`
			`}`

			`// squeeze outputs an arbitrary number of bytes from the hash state.`
			`// Squeezing can require multiple calls to the F function (one per rate() bytes squeezed),`
			`// although this is not the case for standard SHA3 parameters. This implementation only supports`
			`// squeezing a single time, subsequent squeezes may lose alignment. Future implementations`
			`// may wish to support multiple squeeze calls, for example to support use as a PRNG.`
			`func (d *digest) squeeze(in []byte, toSqueeze int) []byte {`
			`// Because we read in blocks of laneSize, we need enough room to read`
			`// an integral number of lanes`
			`needed := toSqueeze + (laneSize-toSqueeze%laneSize)%laneSize`
			`if cap(in)-len(in) < needed {`
			`newIn := make([]byte, len(in), len(in)+needed)`
			`copy(newIn, in)`
			`in = newIn`
			`}`
			`out := in[len(in) : len(in)+needed]`

			`for len(out) > 0 {`
			`for i := 0; i < d.rate() && len(out) > 0; i += laneSize {`
			`binary.LittleEndian.PutUint64(out[:], d.a[i/laneSize])`
			`out = out[laneSize:]`
			`}`
			`if len(out) > 0 {`
			`d.keccakF()`
			`}`
			`}`
			`return in[:len(in)+toSqueeze] // Re-slice in case we wrote extra data.`
			`}`

			`// Sum applies padding to the hash state and then squeezes out the desired nubmer of output bytes.`
			`func (d *digest) Sum(in []byte) []byte {`
			`// Make a copy of the original hash so that caller can keep writing and summing.`
			`dup := *d`
			`dup.finalize()`
			`return dup.squeeze(in, dup.outputSize)`
			`}`

			`// The NewKeccakX constructors enable initializing a hash in any of the four recommend sizes`
			`// from the Keccak specification, all of which set capacity=2*outputSize. Note that the final`
			`// NIST standard for SHA3 may specify different input/output lengths.`
			`// The output size is indicated in bits but converted into bytes internally.`
			`func NewKeccak224() hash.Hash { return &digest{outputSize: 224 / 8, capacity: 2 * 224 / 8} }`
			`func NewKeccak256() hash.Hash { return &digest{outputSize: 256 / 8, capacity: 2 * 256 / 8} }`
			`func NewKeccak384() hash.Hash { return &digest{outputSize: 384 / 8, capacity: 2 * 384 / 8} }`
			`func NewKeccak512() hash.Hash { return &digest{outputSize: 512 / 8, capacity: 2 * 512 / 8} }`