Crypto++  8.7
Free C++ class library of cryptographic schemes
lsh512_sse.cpp
1 // lsh.cpp - written and placed in the public domain by Jeffrey Walton
2 // Based on the specification and source code provided by
3 // Korea Internet & Security Agency (KISA) website. Also
4 // see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5 // and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6 
7 // We are hitting some sort of GCC bug in the LSH AVX2 code path.
8 // Clang is OK on the AVX2 code path. We believe it is GCC Issue
9 // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10 // makes using zeroupper a little tricky.
11 
12 #include "pch.h"
13 #include "config.h"
14 
15 #include "lsh.h"
16 #include "misc.h"
17 
18 // Squash MS LNK4221 and libtool warnings
19 extern const char LSH512_SSE_FNAME[] = __FILE__;
20 
21 #if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22 
23 #if defined(CRYPTOPP_SSSE3_AVAILABLE)
24 # include <emmintrin.h>
25 # include <tmmintrin.h>
26 #endif
27 
28 #if defined(CRYPTOPP_XOP_AVAILABLE)
29 # include <ammintrin.h>
30 #endif
31 
32 // GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
33 #if (CRYPTOPP_GCC_VERSION >= 40500)
34 # include <x86intrin.h>
35 #endif
36 
37 ANONYMOUS_NAMESPACE_BEGIN
38 
39 /* LSH Constants */
40 
41 const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
42 // const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
43 // const unsigned int LSH512_CV_BYTE_LEN = 128;
44 const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
45 
46 // const unsigned int MSG_BLK_WORD_LEN = 32;
47 const unsigned int CV_WORD_LEN = 16;
48 const unsigned int CONST_WORD_LEN = 8;
49 // const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
50 const unsigned int NUM_STEPS = 28;
51 
52 const unsigned int ROT_EVEN_ALPHA = 23;
53 const unsigned int ROT_EVEN_BETA = 59;
54 const unsigned int ROT_ODD_ALPHA = 7;
55 const unsigned int ROT_ODD_BETA = 3;
56 
57 const unsigned int LSH_TYPE_512_512 = 0x0010040;
58 const unsigned int LSH_TYPE_512_384 = 0x0010030;
59 const unsigned int LSH_TYPE_512_256 = 0x0010020;
60 const unsigned int LSH_TYPE_512_224 = 0x001001C;
61 
62 // const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
63 // const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
64 
65 /* Error Code */
66 
67 const unsigned int LSH_SUCCESS = 0x0;
68 // const unsigned int LSH_ERR_NULL_PTR = 0x2401;
69 // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
70 const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
71 const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
72 
73 /* Index into our state array */
74 
75 const unsigned int AlgorithmType = 80;
76 const unsigned int RemainingBits = 81;
77 
78 NAMESPACE_END
79 
80 NAMESPACE_BEGIN(CryptoPP)
81 NAMESPACE_BEGIN(LSH)
82 
83 // lsh512.cpp
84 extern const word64 LSH512_IV224[CV_WORD_LEN];
85 extern const word64 LSH512_IV256[CV_WORD_LEN];
86 extern const word64 LSH512_IV384[CV_WORD_LEN];
87 extern const word64 LSH512_IV512[CV_WORD_LEN];
88 extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
89 
90 NAMESPACE_END // LSH
91 NAMESPACE_END // Crypto++
92 
93 ANONYMOUS_NAMESPACE_BEGIN
94 
95 using CryptoPP::byte;
96 using CryptoPP::word32;
97 using CryptoPP::word64;
100 
101 using CryptoPP::GetBlock;
105 
106 using CryptoPP::LSH::LSH512_IV224;
107 using CryptoPP::LSH::LSH512_IV256;
108 using CryptoPP::LSH::LSH512_IV384;
109 using CryptoPP::LSH::LSH512_IV512;
110 using CryptoPP::LSH::LSH512_StepConstants;
111 
112 typedef byte lsh_u8;
113 typedef word32 lsh_u32;
114 typedef word64 lsh_u64;
115 typedef word32 lsh_uint;
116 typedef word32 lsh_err;
117 typedef word32 lsh_type;
118 
119 struct LSH512_SSSE3_Context
120 {
121  LSH512_SSSE3_Context(word64* state, word64 algType, word64& remainingBitLength) :
122  cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
123  last_block(reinterpret_cast<byte*>(state+48)),
124  remain_databitlen(remainingBitLength),
125  alg_type(static_cast<lsh_type>(algType)) {}
126 
127  lsh_u64* cv_l; // start of our state block
128  lsh_u64* cv_r;
129  lsh_u64* sub_msgs;
130  lsh_u8* last_block;
131  lsh_u64& remain_databitlen;
132  lsh_type alg_type;
133 };
134 
135 struct LSH512_SSSE3_Internal
136 {
137  LSH512_SSSE3_Internal(word64* state) :
138  submsg_e_l(state+16), submsg_e_r(state+24),
139  submsg_o_l(state+32), submsg_o_r(state+40) { }
140 
141  lsh_u64* submsg_e_l; /* even left sub-message */
142  lsh_u64* submsg_e_r; /* even right sub-message */
143  lsh_u64* submsg_o_l; /* odd left sub-message */
144  lsh_u64* submsg_o_r; /* odd right sub-message */
145 };
146 
147 // const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
148 
149 /* LSH AlgType Macro */
150 
151 inline bool LSH_IS_LSH512(lsh_uint val) {
152  return (val & 0xf0000) == 0x10000;
153 }
154 
155 inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
156  return val >> 24;
157 }
158 
159 inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
160  return val & 0xffff;
161 }
162 
163 inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
164  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
165 }
166 
167 inline lsh_u64 loadLE64(lsh_u64 v) {
169 }
170 
171 lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
172  return rotlFixed(x, r);
173 }
174 
175 // Original code relied upon unaligned lsh_u64 buffer
176 inline void load_msg_blk(LSH512_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
177 {
178  lsh_u64* submsg_e_l = i_state->submsg_e_l;
179  lsh_u64* submsg_e_r = i_state->submsg_e_r;
180  lsh_u64* submsg_o_l = i_state->submsg_o_l;
181  lsh_u64* submsg_o_r = i_state->submsg_o_r;
182 
183  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
184  _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
185  _mm_storeu_si128(M128_CAST(submsg_e_l+2),
186  _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
187  _mm_storeu_si128(M128_CAST(submsg_e_l+4),
188  _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
189  _mm_storeu_si128(M128_CAST(submsg_e_l+6),
190  _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
191 
192  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
193  _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
194  _mm_storeu_si128(M128_CAST(submsg_e_r+2),
195  _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
196  _mm_storeu_si128(M128_CAST(submsg_e_r+4),
197  _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
198  _mm_storeu_si128(M128_CAST(submsg_e_r+6),
199  _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
200 
201  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
202  _mm_loadu_si128(CONST_M128_CAST(msgblk+128)));
203  _mm_storeu_si128(M128_CAST(submsg_o_l+2),
204  _mm_loadu_si128(CONST_M128_CAST(msgblk+144)));
205  _mm_storeu_si128(M128_CAST(submsg_o_l+4),
206  _mm_loadu_si128(CONST_M128_CAST(msgblk+160)));
207  _mm_storeu_si128(M128_CAST(submsg_o_l+6),
208  _mm_loadu_si128(CONST_M128_CAST(msgblk+176)));
209 
210  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
211  _mm_loadu_si128(CONST_M128_CAST(msgblk+192)));
212  _mm_storeu_si128(M128_CAST(submsg_o_r+2),
213  _mm_loadu_si128(CONST_M128_CAST(msgblk+208)));
214  _mm_storeu_si128(M128_CAST(submsg_o_r+4),
215  _mm_loadu_si128(CONST_M128_CAST(msgblk+224)));
216  _mm_storeu_si128(M128_CAST(submsg_o_r+6),
217  _mm_loadu_si128(CONST_M128_CAST(msgblk+240)));
218 }
219 
220 inline void msg_exp_even(LSH512_SSSE3_Internal* i_state)
221 {
222  CRYPTOPP_ASSERT(i_state != NULLPTR);
223 
224  lsh_u64* submsg_e_l = i_state->submsg_e_l;
225  lsh_u64* submsg_e_r = i_state->submsg_e_r;
226  lsh_u64* submsg_o_l = i_state->submsg_o_l;
227  lsh_u64* submsg_o_r = i_state->submsg_o_r;
228 
229  __m128i temp;
230  _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_shuffle_epi32(
231  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)), _MM_SHUFFLE(1,0,3,2)));
232 
233  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0));
234  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
235  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)));
236  _mm_storeu_si128(M128_CAST(submsg_e_l+2), temp);
237  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_shuffle_epi32(
238  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)), _MM_SHUFFLE(1,0,3,2)));
239 
240  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4));
241  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_unpacklo_epi64(
242  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
243  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
244  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_unpackhi_epi64(
245  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
246  _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_shuffle_epi32(
247  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)), _MM_SHUFFLE(1,0,3,2)));
248 
249  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0));
250  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
251  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)));
252  _mm_storeu_si128(M128_CAST(submsg_e_r+2), temp);
253  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_shuffle_epi32(
254  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)), _MM_SHUFFLE(1,0,3,2)));
255 
256  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4));
257  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_unpacklo_epi64(
258  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
259  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
260  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_unpackhi_epi64(
261  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
262 
263  _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi64(
264  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)),
265  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
266  _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_add_epi64(
267  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)),
268  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
269  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi64(
270  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)),
271  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
272  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_add_epi64(
273  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
274  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
275 
276  _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi64(
277  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)),
278  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
279  _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_add_epi64(
280  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)),
281  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
282  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi64(
283  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)),
284  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
285  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_add_epi64(
286  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
287  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
288 }
289 
290 inline void msg_exp_odd(LSH512_SSSE3_Internal* i_state)
291 {
292  CRYPTOPP_ASSERT(i_state != NULLPTR);
293 
294  lsh_u64* submsg_e_l = i_state->submsg_e_l;
295  lsh_u64* submsg_e_r = i_state->submsg_e_r;
296  lsh_u64* submsg_o_l = i_state->submsg_o_l;
297  lsh_u64* submsg_o_r = i_state->submsg_o_r;
298 
299  __m128i temp;
300  _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_shuffle_epi32(
301  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)), _MM_SHUFFLE(1,0,3,2)));
302 
303  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0));
304  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
305  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)));
306  _mm_storeu_si128(M128_CAST(submsg_o_l+2), temp);
307  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_shuffle_epi32(
308  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)), _MM_SHUFFLE(1,0,3,2)));
309 
310  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4));
311  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_unpacklo_epi64(
312  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
313  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
314  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_unpackhi_epi64(
315  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
316  _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_shuffle_epi32(
317  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)), _MM_SHUFFLE(1,0,3,2)));
318 
319  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0));
320  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
321  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)));
322  _mm_storeu_si128(M128_CAST(submsg_o_r+2), temp);
323  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_shuffle_epi32(
324  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)), _MM_SHUFFLE(1,0,3,2)));
325 
326  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4));
327  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_unpacklo_epi64(
328  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
329  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
330  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_unpackhi_epi64(
331  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
332 
333  _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi64(
334  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)),
335  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0))));
336  _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_add_epi64(
337  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)),
338  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
339  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi64(
340  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)),
341  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
342  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_add_epi64(
343  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
344  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
345 
346  _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi64(
347  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)),
348  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0))));
349  _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_add_epi64(
350  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)),
351  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
352  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi64(
353  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)),
354  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
355  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_add_epi64(
356  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
357  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
358 }
359 
360 inline void load_sc(const lsh_u64** p_const_v, size_t i)
361 {
362  *p_const_v = &LSH512_StepConstants[i];
363 }
364 
365 inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
366 {
367  CRYPTOPP_ASSERT(i_state != NULLPTR);
368 
369  lsh_u64* submsg_e_l = i_state->submsg_e_l;
370  lsh_u64* submsg_e_r = i_state->submsg_e_r;
371 
372  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
373  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
374  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l))));
375  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
376  _mm_loadu_si128(CONST_M128_CAST(cv_r)),
377  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r))));
378  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
379  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
380  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
381  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
382  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
383  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
384  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
385  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
386  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
387  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
388  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
389  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
390  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
391  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
392  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
393  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
394  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
395  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
396 }
397 
398 inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
399 {
400  CRYPTOPP_ASSERT(i_state != NULLPTR);
401 
402  lsh_u64* submsg_o_l = i_state->submsg_o_l;
403  lsh_u64* submsg_o_r = i_state->submsg_o_r;
404 
405  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
406  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
407  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
408  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
409  _mm_loadu_si128(CONST_M128_CAST(cv_r)),
410  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
411  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
412  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
413  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
414  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
415  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
416  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
417  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
418  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
419  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
420  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
421  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
422  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
423  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
424  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
425  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
426  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
427  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
428  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
429 }
430 
431 inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
432 {
433  _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi64(
434  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
435  _mm_loadu_si128(CONST_M128_CAST(cv_r))));
436  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_add_epi64(
437  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
438  _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
439  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi64(
440  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
441  _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
442  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_add_epi64(
443  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
444  _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
445 }
446 
447 template <unsigned int R>
448 inline void rotate_blk(lsh_u64 cv[8])
449 {
450 #if defined(CRYPTOPP_XOP_AVAILABLE)
451  _mm_storeu_si128(M128_CAST(cv),
452  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
453  _mm_storeu_si128(M128_CAST(cv+2),
454  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R));
455  _mm_storeu_si128(M128_CAST(cv+4),
456  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
457  _mm_storeu_si128(M128_CAST(cv+6),
458  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R));
459 
460 #else
461  _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
462  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
463  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), 64-R)));
464  _mm_storeu_si128(M128_CAST(cv+2), _mm_or_si128(
465  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R),
466  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), 64-R)));
467  _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
468  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
469  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 64-R)));
470  _mm_storeu_si128(M128_CAST(cv+6), _mm_or_si128(
471  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R),
472  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), 64-R)));
473 #endif
474 }
475 
476 inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
477 {
478  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
479  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
480  _mm_loadu_si128(CONST_M128_CAST(const_v))));
481  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
482  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
483  _mm_loadu_si128(CONST_M128_CAST(const_v+2))));
484  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
485  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
486  _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
487  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
488  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
489  _mm_loadu_si128(CONST_M128_CAST(const_v+6))));
490 }
491 
492 inline void rotate_msg_gamma(lsh_u64 cv_r[8])
493 {
494  // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
495  _mm_storeu_si128(M128_CAST(cv_r+0),
496  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
497  _mm_set_epi8(13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
498  _mm_storeu_si128(M128_CAST(cv_r+2),
499  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
500  _mm_set_epi8(9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4)));
501 
502  _mm_storeu_si128(M128_CAST(cv_r+4),
503  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
504  _mm_set_epi8(12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
505  _mm_storeu_si128(M128_CAST(cv_r+6),
506  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
507  _mm_set_epi8(8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3)));
508 }
509 
510 inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
511 {
512  __m128i temp[2];
513  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
514  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_unpacklo_epi64(
515  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
516  _mm_loadu_si128(CONST_M128_CAST(cv_l+0))));
517  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_unpackhi_epi64(
518  temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+2))));
519 
520  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+4));
521  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_unpacklo_epi64(
522  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
523  _mm_loadu_si128(CONST_M128_CAST(cv_l+4))));
524  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_unpackhi_epi64(
525  temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+6))));
526  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_shuffle_epi32(
527  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), _MM_SHUFFLE(1,0,3,2)));
528 
529  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+0));
530  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_unpacklo_epi64(
531  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
532  _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
533  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_unpackhi_epi64(
534  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), temp[0]));
535  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_shuffle_epi32(
536  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), _MM_SHUFFLE(1,0,3,2)));
537 
538  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+4));
539  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_unpacklo_epi64(
540  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
541  _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
542  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_unpackhi_epi64(
543  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), temp[0]));
544 
545  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
546  temp[1] = _mm_loadu_si128(CONST_M128_CAST(cv_l+2));
547 
548  _mm_storeu_si128(M128_CAST(cv_l+0),
549  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
550  _mm_storeu_si128(M128_CAST(cv_l+2),
551  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)));
552  _mm_storeu_si128(M128_CAST(cv_l+4),
553  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
554  _mm_storeu_si128(M128_CAST(cv_l+6),
555  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)));
556  _mm_storeu_si128(M128_CAST(cv_r+4),
557  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
558  _mm_storeu_si128(M128_CAST(cv_r+6),
559  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)));
560 
561  _mm_storeu_si128(M128_CAST(cv_r+0), temp[0]);
562  _mm_storeu_si128(M128_CAST(cv_r+2), temp[1]);
563 };
564 
565 /* -------------------------------------------------------- *
566 * step function
567 * -------------------------------------------------------- */
568 
569 template <unsigned int Alpha, unsigned int Beta>
570 inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
571 {
572  add_blk(cv_l, cv_r);
573  rotate_blk<Alpha>(cv_l);
574  xor_with_const(cv_l, const_v);
575  add_blk(cv_r, cv_l);
576  rotate_blk<Beta>(cv_r);
577  add_blk(cv_l, cv_r);
578  rotate_msg_gamma(cv_r);
579 }
580 
581 /* -------------------------------------------------------- *
582 * compression function
583 * -------------------------------------------------------- */
584 
585 inline void compress(LSH512_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
586 {
587  CRYPTOPP_ASSERT(ctx != NULLPTR);
588 
589  LSH512_SSSE3_Internal s_state(ctx->cv_l);
590  LSH512_SSSE3_Internal* i_state = &s_state;
591 
592  const lsh_u64* const_v = NULL;
593  lsh_u64 *cv_l = ctx->cv_l;
594  lsh_u64 *cv_r = ctx->cv_r;
595 
596  load_msg_blk(i_state, pdMsgBlk);
597 
598  msg_add_even(cv_l, cv_r, i_state);
599  load_sc(&const_v, 0);
600  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
601  word_perm(cv_l, cv_r);
602 
603  msg_add_odd(cv_l, cv_r, i_state);
604  load_sc(&const_v, 8);
605  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
606  word_perm(cv_l, cv_r);
607 
608  for (size_t i = 1; i < NUM_STEPS / 2; i++)
609  {
610  msg_exp_even(i_state);
611  msg_add_even(cv_l, cv_r, i_state);
612  load_sc(&const_v, 16 * i);
613  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
614  word_perm(cv_l, cv_r);
615 
616  msg_exp_odd(i_state);
617  msg_add_odd(cv_l, cv_r, i_state);
618  load_sc(&const_v, 16 * i + 8);
619  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
620  word_perm(cv_l, cv_r);
621  }
622 
623  msg_exp_even(i_state);
624  msg_add_even(cv_l, cv_r, i_state);
625 }
626 
627 /* -------------------------------------------------------- */
628 
629 inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
630 {
631  // The IV's are 32-byte aligned so we can use aligned loads.
632  _mm_storeu_si128(M128_CAST(cv_l+0),
633  _mm_load_si128(CONST_M128_CAST(iv+0)));
634  _mm_storeu_si128(M128_CAST(cv_l+2),
635  _mm_load_si128(CONST_M128_CAST(iv+2)));
636  _mm_storeu_si128(M128_CAST(cv_l+4),
637  _mm_load_si128(CONST_M128_CAST(iv+4)));
638  _mm_storeu_si128(M128_CAST(cv_l+6),
639  _mm_load_si128(CONST_M128_CAST(iv+6)));
640  _mm_storeu_si128(M128_CAST(cv_r+0),
641  _mm_load_si128(CONST_M128_CAST(iv+8)));
642  _mm_storeu_si128(M128_CAST(cv_r+2),
643  _mm_load_si128(CONST_M128_CAST(iv+10)));
644  _mm_storeu_si128(M128_CAST(cv_r+4),
645  _mm_load_si128(CONST_M128_CAST(iv+12)));
646  _mm_storeu_si128(M128_CAST(cv_r+6),
647  _mm_load_si128(CONST_M128_CAST(iv+14)));
648 }
649 
650 inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
651 {
652  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
653  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128());
654  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
655  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128());
656  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
657  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128());
658  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
659  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128());
660 }
661 
662 inline void zero_submsgs(LSH512_SSSE3_Context* ctx)
663 {
664  lsh_u64* sub_msgs = ctx->sub_msgs;
665 
666  _mm_storeu_si128(M128_CAST(sub_msgs+ 0),
667  _mm_setzero_si128());
668  _mm_storeu_si128(M128_CAST(sub_msgs+ 2),
669  _mm_setzero_si128());
670  _mm_storeu_si128(M128_CAST(sub_msgs+ 4),
671  _mm_setzero_si128());
672  _mm_storeu_si128(M128_CAST(sub_msgs+ 6),
673  _mm_setzero_si128());
674  _mm_storeu_si128(M128_CAST(sub_msgs+ 8),
675  _mm_setzero_si128());
676  _mm_storeu_si128(M128_CAST(sub_msgs+10),
677  _mm_setzero_si128());
678  _mm_storeu_si128(M128_CAST(sub_msgs+12),
679  _mm_setzero_si128());
680  _mm_storeu_si128(M128_CAST(sub_msgs+14),
681  _mm_setzero_si128());
682 }
683 
684 inline void init224(LSH512_SSSE3_Context* ctx)
685 {
686  CRYPTOPP_ASSERT(ctx != NULLPTR);
687 
688  zero_submsgs(ctx);
689  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
690 }
691 
692 inline void init256(LSH512_SSSE3_Context* ctx)
693 {
694  CRYPTOPP_ASSERT(ctx != NULLPTR);
695 
696  zero_submsgs(ctx);
697  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
698 }
699 
700 inline void init384(LSH512_SSSE3_Context* ctx)
701 {
702  CRYPTOPP_ASSERT(ctx != NULLPTR);
703 
704  zero_submsgs(ctx);
705  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
706 }
707 
708 inline void init512(LSH512_SSSE3_Context* ctx)
709 {
710  CRYPTOPP_ASSERT(ctx != NULLPTR);
711 
712  zero_submsgs(ctx);
713  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
714 }
715 
716 /* -------------------------------------------------------- */
717 
718 inline void fin(LSH512_SSSE3_Context* ctx)
719 {
720  CRYPTOPP_ASSERT(ctx != NULLPTR);
721 
722  _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
723  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
724  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
725  _mm_storeu_si128(M128_CAST(ctx->cv_l+2), _mm_xor_si128(
726  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+2)),
727  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+2))));
728  _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
729  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
730  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
731  _mm_storeu_si128(M128_CAST(ctx->cv_l+6), _mm_xor_si128(
732  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+6)),
733  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+6))));
734 }
735 
736 /* -------------------------------------------------------- */
737 
738 inline void get_hash(LSH512_SSSE3_Context* ctx, lsh_u8* pbHashVal)
739 {
740  CRYPTOPP_ASSERT(ctx != NULLPTR);
741  CRYPTOPP_ASSERT(ctx->alg_type != 0);
742  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
743 
744  lsh_uint alg_type = ctx->alg_type;
745  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
746  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
747 
748  // Multiplying by sizeof(lsh_u8) looks odd...
749  memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
750  if (hash_val_bit_len){
751  pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
752  }
753 }
754 
755 /* -------------------------------------------------------- */
756 
757 lsh_err lsh512_init_ssse3(LSH512_SSSE3_Context* ctx)
758 {
759  CRYPTOPP_ASSERT(ctx != NULLPTR);
760  CRYPTOPP_ASSERT(ctx->alg_type != 0);
761 
762  lsh_u32 alg_type = ctx->alg_type;
763  const lsh_u64* const_v = NULL;
764  ctx->remain_databitlen = 0;
765 
766  switch (alg_type){
767  case LSH_TYPE_512_512:
768  init512(ctx);
769  return LSH_SUCCESS;
770  case LSH_TYPE_512_384:
771  init384(ctx);
772  return LSH_SUCCESS;
773  case LSH_TYPE_512_256:
774  init256(ctx);
775  return LSH_SUCCESS;
776  case LSH_TYPE_512_224:
777  init224(ctx);
778  return LSH_SUCCESS;
779  default:
780  break;
781  }
782 
783  lsh_u64* cv_l = ctx->cv_l;
784  lsh_u64* cv_r = ctx->cv_r;
785 
786  zero_iv(cv_l, cv_r);
787  cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
788  cv_l[1] = LSH_GET_HASHBIT(alg_type);
789 
790  for (size_t i = 0; i < NUM_STEPS / 2; i++)
791  {
792  //Mix
793  load_sc(&const_v, i * 16);
794  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
795  word_perm(cv_l, cv_r);
796 
797  load_sc(&const_v, i * 16 + 8);
798  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
799  word_perm(cv_l, cv_r);
800  }
801 
802  return LSH_SUCCESS;
803 }
804 
805 lsh_err lsh512_update_ssse3(LSH512_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
806 {
807  CRYPTOPP_ASSERT(ctx != NULLPTR);
808  CRYPTOPP_ASSERT(data != NULLPTR);
809  CRYPTOPP_ASSERT(databitlen % 8 == 0);
810  CRYPTOPP_ASSERT(ctx->alg_type != 0);
811 
812  if (databitlen == 0){
813  return LSH_SUCCESS;
814  }
815 
816  // We are byte oriented. tail bits will always be 0.
817  size_t databytelen = databitlen >> 3;
818  // lsh_uint pos2 = databitlen & 0x7;
819  const size_t pos2 = 0;
820 
821  size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
822  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
823  const size_t remain_msg_bit = 0;
824 
825  if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
826  return LSH_ERR_INVALID_STATE;
827  }
828  if (remain_msg_bit > 0){
829  return LSH_ERR_INVALID_DATABITLEN;
830  }
831 
832  if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
833  memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
834  ctx->remain_databitlen += (lsh_uint)databitlen;
835  remain_msg_byte += (lsh_uint)databytelen;
836  if (pos2){
837  ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
838  }
839  return LSH_SUCCESS;
840  }
841 
842  if (remain_msg_byte > 0){
843  size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
844  memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
845  compress(ctx, ctx->last_block);
846  data += more_byte;
847  databytelen -= more_byte;
848  remain_msg_byte = 0;
849  ctx->remain_databitlen = 0;
850  }
851 
852  while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
853  {
854  // This call to compress caused some trouble.
855  // The data pointer can become unaligned in the
856  // previous block.
857  compress(ctx, data);
858  data += LSH512_MSG_BLK_BYTE_LEN;
859  databytelen -= LSH512_MSG_BLK_BYTE_LEN;
860  }
861 
862  if (databytelen > 0){
863  memcpy(ctx->last_block, data, databytelen);
864  ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
865  }
866 
867  if (pos2){
868  ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
869  ctx->remain_databitlen += pos2;
870  }
871  return LSH_SUCCESS;
872 }
873 
874 lsh_err lsh512_final_ssse3(LSH512_SSSE3_Context* ctx, lsh_u8* hashval)
875 {
876  CRYPTOPP_ASSERT(ctx != NULLPTR);
877  CRYPTOPP_ASSERT(hashval != NULLPTR);
878 
879  // We are byte oriented. tail bits will always be 0.
880  size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
881  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
882  const size_t remain_msg_bit = 0;
883 
884  if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
885  return LSH_ERR_INVALID_STATE;
886  }
887 
888  if (remain_msg_bit){
889  ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
890  }
891  else{
892  ctx->last_block[remain_msg_byte] = 0x80;
893  }
894  memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
895 
896  compress(ctx, ctx->last_block);
897 
898  fin(ctx);
899  get_hash(ctx, hashval);
900 
901  return LSH_SUCCESS;
902 }
903 
904 ANONYMOUS_NAMESPACE_END
905 
906 NAMESPACE_BEGIN(CryptoPP)
907 
908 extern
909 void LSH512_Base_Restart_SSSE3(word64* state)
910 {
911  state[RemainingBits] = 0;
912  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
913  lsh_err err = lsh512_init_ssse3(&ctx);
914 
915  if (err != LSH_SUCCESS)
916  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_ssse3 failed");
917 }
918 
919 extern
920 void LSH512_Base_Update_SSSE3(word64* state, const byte *input, size_t size)
921 {
922  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
923  lsh_err err = lsh512_update_ssse3(&ctx, input, 8*size);
924 
925  if (err != LSH_SUCCESS)
926  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_ssse3 failed");
927 }
928 
929 extern
930 void LSH512_Base_TruncatedFinal_SSSE3(word64* state, byte *hash, size_t)
931 {
932  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
933  lsh_err err = lsh512_final_ssse3(&ctx, hash);
934 
935  if (err != LSH_SUCCESS)
936  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_ssse3 failed");
937 }
938 
939 NAMESPACE_END
940 
941 #endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1548
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2208
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1599
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68