| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | // Copyright 2017 The go-ethereum Authors | 
					
						
							|  |  |  | // This file is part of the go-ethereum library. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // The go-ethereum library is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | // it under the terms of the GNU Lesser General Public License as published by | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | // (at your option) any later version. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // The go-ethereum library is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
					
						
							|  |  |  | // GNU Lesser General Public License for more details. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // You should have received a copy of the GNU Lesser General Public License | 
					
						
							|  |  |  | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package bitutil | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | import "errors" | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | var ( | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	// errMissingData is returned from decompression if the byte referenced by | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	// the bitset header overflows the input data. | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	errMissingData = errors.New("missing bytes on input") | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	// errUnreferencedData is returned from decompression if not all bytes were used | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	// up from the input data after decompressing it. | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	errUnreferencedData = errors.New("extra bytes on input") | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	// errExceededTarget is returned from decompression if the bitset header has | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	// more bits defined than the number of target buffer space available. | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	errExceededTarget = errors.New("target data size exceeded") | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	// errZeroContent is returned from decompression if a data byte referenced in | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	// the bitset header is actually a zero byte. | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	errZeroContent = errors.New("zero byte in input content") | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // The compression algorithm implemented by CompressBytes and DecompressBytes is | 
					
						
							|  |  |  | // optimized for sparse input data which contains a lot of zero bytes. Decompression | 
					
						
							|  |  |  | // requires knowledge of the decompressed data length. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // Compression works as follows: | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | //   if data only contains zeroes, | 
					
						
							|  |  |  | //       CompressBytes(data) == nil | 
					
						
							|  |  |  | //   otherwise if len(data) <= 1, | 
					
						
							|  |  |  | //       CompressBytes(data) == data | 
					
						
							|  |  |  | //   otherwise: | 
					
						
							|  |  |  | //       CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...) | 
					
						
							|  |  |  | //       where | 
					
						
							|  |  |  | //         nonZeroBitset(data) is a bit vector with len(data) bits (MSB first): | 
					
						
							|  |  |  | //             nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0  if data[i] != 0 | 
					
						
							|  |  |  | //             len(nonZeroBitset(data)) == (len(data)+7)/8 | 
					
						
							|  |  |  | //         nonZeroBytes(data) contains the non-zero bytes of data in the same order | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // CompressBytes compresses the input byte slice according to the sparse bitset | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | // representation algorithm. If the result is bigger than the original input, no | 
					
						
							|  |  |  | // compression is done. | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | func CompressBytes(data []byte) []byte { | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	if out := bitsetEncodeBytes(data); len(out) < len(data) { | 
					
						
							|  |  |  | 		return out | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	cpy := make([]byte, len(data)) | 
					
						
							|  |  |  | 	copy(cpy, data) | 
					
						
							|  |  |  | 	return cpy | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // bitsetEncodeBytes compresses the input byte slice according to the sparse | 
					
						
							|  |  |  | // bitset representation algorithm. | 
					
						
							|  |  |  | func bitsetEncodeBytes(data []byte) []byte { | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	// Empty slices get compressed to nil | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 	if len(data) == 0 { | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	// One byte slices compress to nil or retain the single byte | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 	if len(data) == 1 { | 
					
						
							|  |  |  | 		if data[0] == 0 { | 
					
						
							|  |  |  | 			return nil | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 		return data | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	// Calculate the bitset of set bytes, and gather the non-zero bytes | 
					
						
							|  |  |  | 	nonZeroBitset := make([]byte, (len(data)+7)/8) | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 	nonZeroBytes := make([]byte, 0, len(data)) | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 	for i, b := range data { | 
					
						
							|  |  |  | 		if b != 0 { | 
					
						
							|  |  |  | 			nonZeroBytes = append(nonZeroBytes, b) | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 			nonZeroBitset[i/8] |= 1 << byte(7-i%8) | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if len(nonZeroBytes) == 0 { | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...) | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | // DecompressBytes decompresses data with a known target size. If the input data | 
					
						
							|  |  |  | // matches the size of the target, it means no compression was done in the first | 
					
						
							|  |  |  | // place. | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | func DecompressBytes(data []byte, target int) ([]byte, error) { | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	if len(data) > target { | 
					
						
							|  |  |  | 		return nil, errExceededTarget | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if len(data) == target { | 
					
						
							|  |  |  | 		cpy := make([]byte, len(data)) | 
					
						
							|  |  |  | 		copy(cpy, data) | 
					
						
							|  |  |  | 		return cpy, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return bitsetDecodeBytes(data, target) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // bitsetDecodeBytes decompresses data with a known target size. | 
					
						
							|  |  |  | func bitsetDecodeBytes(data []byte, target int) ([]byte, error) { | 
					
						
							|  |  |  | 	out, size, err := bitsetDecodePartialBytes(data, target) | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	if size != len(data) { | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 		return nil, errUnreferencedData | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	return out, nil | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | // bitsetDecodePartialBytes decompresses data with a known target size, but does | 
					
						
							|  |  |  | // not enforce consuming all the input bytes. In addition to the decompressed | 
					
						
							|  |  |  | // output, the function returns the length of compressed input data corresponding | 
					
						
							|  |  |  | // to the output as the input slice may be longer. | 
					
						
							|  |  |  | func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) { | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	// Sanity check 0 targets to avoid infinite recursion | 
					
						
							|  |  |  | 	if target == 0 { | 
					
						
							|  |  |  | 		return nil, 0, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Handle the zero and single byte corner cases | 
					
						
							|  |  |  | 	decomp := make([]byte, target) | 
					
						
							|  |  |  | 	if len(data) == 0 { | 
					
						
							|  |  |  | 		return decomp, 0, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if target == 1 { | 
					
						
							|  |  |  | 		decomp[0] = data[0] // copy to avoid referencing the input slice | 
					
						
							|  |  |  | 		if data[0] != 0 { | 
					
						
							|  |  |  | 			return decomp, 1, nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return decomp, 0, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Decompress the bitset of set bytes and distribute the non zero bytes | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 	nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8) | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, ptr, err | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	for i := 0; i < 8*len(nonZeroBitset); i++ { | 
					
						
							|  |  |  | 		if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 { | 
					
						
							|  |  |  | 			// Make sure we have enough data to push into the correct slot | 
					
						
							|  |  |  | 			if ptr >= len(data) { | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 				return nil, 0, errMissingData | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 			} | 
					
						
							|  |  |  | 			if i >= len(decomp) { | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 				return nil, 0, errExceededTarget | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 			} | 
					
						
							|  |  |  | 			// Make sure the data is valid and push into the slot | 
					
						
							|  |  |  | 			if data[ptr] == 0 { | 
					
						
							| 
									
										
										
										
											2017-05-08 10:40:48 +03:00
										 |  |  | 				return nil, 0, errZeroContent | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | 			} | 
					
						
							|  |  |  | 			decomp[i] = data[ptr] | 
					
						
							|  |  |  | 			ptr++ | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2017-05-06 18:35:59 +03:00
										 |  |  | 	return decomp, ptr, nil | 
					
						
							| 
									
										
										
										
											2017-05-05 20:11:55 +02:00
										 |  |  | } |