Crypto++  8.7
Free C++ class library of cryptographic schemes
lsh256_avx.cpp
1 // lsh.cpp - written and placed in the public domain by Jeffrey Walton
2 // Based on the specification and source code provided by
3 // Korea Internet & Security Agency (KISA) website. Also
4 // see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5 // and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6 
7 // We are hitting some sort of GCC bug in the LSH AVX2 code path.
8 // Clang is OK on the AVX2 code path. We believe it is GCC Issue
9 // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10 // makes using zeroupper a little tricky.
11 
12 #include "pch.h"
13 #include "config.h"
14 
15 #include "lsh.h"
16 #include "misc.h"
17 
18 // Squash MS LNK4221 and libtool warnings
19 extern const char LSH256_AVX_FNAME[] = __FILE__;
20 
21 #if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22 
23 #if defined(CRYPTOPP_AVX2_AVAILABLE)
24 # include <emmintrin.h>
25 # include <immintrin.h>
26 #endif
27 
28 // GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
29 #if (CRYPTOPP_GCC_VERSION >= 40500)
30 # include <x86intrin.h>
31 #endif
32 
33 ANONYMOUS_NAMESPACE_BEGIN
34 
35 /* LSH Constants */
36 
37 const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
38 // const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
39 // const unsigned int LSH256_CV_BYTE_LEN = 64;
40 const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
41 
42 // const unsigned int MSG_BLK_WORD_LEN = 32;
43 const unsigned int CV_WORD_LEN = 16;
44 const unsigned int CONST_WORD_LEN = 8;
45 // const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
46 // const unsigned int WORD_BIT_LEN = 32;
47 const unsigned int NUM_STEPS = 26;
48 
49 const unsigned int ROT_EVEN_ALPHA = 29;
50 const unsigned int ROT_EVEN_BETA = 1;
51 const unsigned int ROT_ODD_ALPHA = 5;
52 const unsigned int ROT_ODD_BETA = 17;
53 
54 const unsigned int LSH_TYPE_256_256 = 0x0000020;
55 const unsigned int LSH_TYPE_256_224 = 0x000001C;
56 
57 // const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
58 // const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
59 
60 /* Error Code */
61 
62 const unsigned int LSH_SUCCESS = 0x0;
63 // const unsigned int LSH_ERR_NULL_PTR = 0x2401;
64 // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
65 const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
66 const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
67 
68 /* Index into our state array */
69 
70 const unsigned int AlgorithmType = 80;
71 const unsigned int RemainingBits = 81;
72 
73 NAMESPACE_END
74 
75 NAMESPACE_BEGIN(CryptoPP)
76 NAMESPACE_BEGIN(LSH)
77 
78 // lsh256.cpp
79 extern const word32 LSH256_IV224[CV_WORD_LEN];
80 extern const word32 LSH256_IV256[CV_WORD_LEN];
81 extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
82 
83 NAMESPACE_END // LSH
84 NAMESPACE_END // Crypto++
85 
86 ANONYMOUS_NAMESPACE_BEGIN
87 
88 using CryptoPP::byte;
89 using CryptoPP::word32;
92 
93 using CryptoPP::GetBlock;
97 
98 typedef byte lsh_u8;
99 typedef word32 lsh_u32;
100 typedef word32 lsh_uint;
101 typedef word32 lsh_err;
102 typedef word32 lsh_type;
103 
104 using CryptoPP::LSH::LSH256_IV224;
105 using CryptoPP::LSH::LSH256_IV256;
106 using CryptoPP::LSH::LSH256_StepConstants;
107 
108 struct LSH256_AVX2_Context
109 {
110  LSH256_AVX2_Context(word32* state, word32 algType, word32& remainingBitLength) :
111  cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
112  last_block(reinterpret_cast<byte*>(state+48)),
113  remain_databitlen(remainingBitLength),
114  alg_type(static_cast<lsh_type>(algType)) {}
115 
116  lsh_u32* cv_l; // start of our state block
117  lsh_u32* cv_r;
118  lsh_u32* sub_msgs;
119  lsh_u8* last_block;
120  lsh_u32& remain_databitlen;
121  lsh_type alg_type;
122 };
123 
124 struct LSH256_AVX2_Internal
125 {
126  LSH256_AVX2_Internal(word32* state) :
127  submsg_e_l(state+16), submsg_e_r(state+24),
128  submsg_o_l(state+32), submsg_o_r(state+40) { }
129 
130  lsh_u32* submsg_e_l; /* even left sub-message */
131  lsh_u32* submsg_e_r; /* even right sub-message */
132  lsh_u32* submsg_o_l; /* odd left sub-message */
133  lsh_u32* submsg_o_r; /* odd right sub-message */
134 };
135 
136 // Zero the upper 128 bits of all YMM registers on exit.
137 // It avoids AVX state transition penalties when saving state.
138 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735
139 // makes using zeroupper a little tricky.
140 
141 struct AVX_Cleanup
142 {
143  ~AVX_Cleanup() {
144  _mm256_zeroupper();
145  }
146 };
147 
148 // const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
149 
150 /* LSH AlgType Macro */
151 
152 inline bool LSH_IS_LSH512(lsh_uint val) {
153  return (val & 0xf0000) == 0;
154 }
155 
156 inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
157  return val >> 24;
158 }
159 
160 inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
161  return val & 0xffff;
162 }
163 
164 inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
165  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
166 }
167 
168 inline lsh_u32 loadLE32(lsh_u32 v) {
170 }
171 
172 lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
173  return rotlFixed(x, r);
174 }
175 
176 // Original code relied upon unaligned lsh_u32 buffer
177 inline void load_msg_blk(LSH256_AVX2_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
178 {
179  CRYPTOPP_ASSERT(i_state != NULLPTR);
180 
181  lsh_u32* submsg_e_l = i_state->submsg_e_l;
182  lsh_u32* submsg_e_r = i_state->submsg_e_r;
183  lsh_u32* submsg_o_l = i_state->submsg_o_l;
184  lsh_u32* submsg_o_r = i_state->submsg_o_r;
185 
186  _mm256_storeu_si256(M256_CAST(submsg_e_l+0),
187  _mm256_loadu_si256(CONST_M256_CAST(msgblk+0)));
188  _mm256_storeu_si256(M256_CAST(submsg_e_r+0),
189  _mm256_loadu_si256(CONST_M256_CAST(msgblk+32)));
190  _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
191  _mm256_loadu_si256(CONST_M256_CAST(msgblk+64)));
192  _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
193  _mm256_loadu_si256(CONST_M256_CAST(msgblk+96)));
194 }
195 
196 inline void msg_exp_even(LSH256_AVX2_Internal* i_state)
197 {
198  CRYPTOPP_ASSERT(i_state != NULLPTR);
199 
200  lsh_u32* submsg_e_l = i_state->submsg_e_l;
201  lsh_u32* submsg_e_r = i_state->submsg_e_r;
202  lsh_u32* submsg_o_l = i_state->submsg_o_l;
203  lsh_u32* submsg_o_r = i_state->submsg_o_r;
204 
205  const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
206  0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
207 
208  _mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi32(
209  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
210  _mm256_shuffle_epi8(
211  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)), mask)));
212  _mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi32(
213  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
214  _mm256_shuffle_epi8(
215  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)), mask)));
216 }
217 
218 inline void msg_exp_odd(LSH256_AVX2_Internal* i_state)
219 {
220  CRYPTOPP_ASSERT(i_state != NULLPTR);
221 
222  lsh_u32* submsg_e_l = i_state->submsg_e_l;
223  lsh_u32* submsg_e_r = i_state->submsg_e_r;
224  lsh_u32* submsg_o_l = i_state->submsg_o_l;
225  lsh_u32* submsg_o_r = i_state->submsg_o_r;
226 
227  const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
228  0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
229 
230  _mm256_storeu_si256(M256_CAST(submsg_o_l+0), _mm256_add_epi32(
231  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
232  _mm256_shuffle_epi8(
233  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)), mask)));
234  _mm256_storeu_si256(M256_CAST(submsg_o_r+0), _mm256_add_epi32(
235  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
236  _mm256_shuffle_epi8(
237  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)), mask)));
238 }
239 
240 inline void load_sc(const lsh_u32** p_const_v, size_t i)
241 {
242  CRYPTOPP_ASSERT(p_const_v != NULLPTR);
243 
244  *p_const_v = &LSH256_StepConstants[i];
245 }
246 
247 inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
248 {
249  CRYPTOPP_ASSERT(i_state != NULLPTR);
250 
251  lsh_u32* submsg_e_l = i_state->submsg_e_l;
252  lsh_u32* submsg_e_r = i_state->submsg_e_r;
253 
254  _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_xor_si256(
255  _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)),
256  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0))));
257  _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_xor_si256(
258  _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
259  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0))));
260 }
261 
262 inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
263 {
264  CRYPTOPP_ASSERT(i_state != NULLPTR);
265 
266  lsh_u32* submsg_o_l = i_state->submsg_o_l;
267  lsh_u32* submsg_o_r = i_state->submsg_o_r;
268 
269  _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
270  _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
271  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l))));
272  _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
273  _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
274  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r))));
275 }
276 
277 inline void add_blk(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
278 {
279  _mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi32(
280  _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
281  _mm256_loadu_si256(CONST_M256_CAST(cv_r))));
282 }
283 
284 template <unsigned int R>
285 inline void rotate_blk(lsh_u32 cv[8])
286 {
287  _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256(
288  _mm256_slli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), R),
289  _mm256_srli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), 32-R)));
290 }
291 
292 inline void xor_with_const(lsh_u32 cv_l[8], const lsh_u32 const_v[8])
293 {
294  _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
295  _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
296  _mm256_loadu_si256(CONST_M256_CAST(const_v))));
297 }
298 
299 inline void rotate_msg_gamma(lsh_u32 cv_r[8])
300 {
301  // g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
302  _mm256_storeu_si256(M256_CAST(cv_r+0),
303  _mm256_shuffle_epi8(_mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
304  _mm256_set_epi8(
305  /* hi lane */ 15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1,
306  /* lo lane */ 12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
307 }
308 
309 inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
310 {
311  __m256i temp = _mm256_shuffle_epi32(
312  _mm256_loadu_si256(CONST_M256_CAST(cv_l)), _MM_SHUFFLE(3,1,0,2));
313  _mm256_storeu_si256(M256_CAST(cv_r),
314  _mm256_shuffle_epi32(
315  _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(1,2,3,0)));
316  _mm256_storeu_si256(M256_CAST(cv_l),
317  _mm256_permute2x128_si256(temp,
318  _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,3,0,1)));
319  _mm256_storeu_si256(M256_CAST(cv_r),
320  _mm256_permute2x128_si256(temp,
321  _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,2,0,0)));
322 };
323 
324 /* -------------------------------------------------------- *
325 * step function
326 * -------------------------------------------------------- */
327 
328 template <unsigned int Alpha, unsigned int Beta>
329 inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
330 {
331  add_blk(cv_l, cv_r);
332  rotate_blk<Alpha>(cv_l);
333  xor_with_const(cv_l, const_v);
334  add_blk(cv_r, cv_l);
335  rotate_blk<Beta>(cv_r);
336  add_blk(cv_l, cv_r);
337  rotate_msg_gamma(cv_r);
338 }
339 
340 /* -------------------------------------------------------- *
341 * compression function
342 * -------------------------------------------------------- */
343 
344 inline void compress(LSH256_AVX2_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
345 {
346  CRYPTOPP_ASSERT(ctx != NULLPTR);
347 
348  LSH256_AVX2_Internal s_state(ctx->cv_l);
349  LSH256_AVX2_Internal* i_state = &s_state;
350 
351  const lsh_u32* const_v = NULL;
352  lsh_u32* cv_l = ctx->cv_l;
353  lsh_u32* cv_r = ctx->cv_r;
354 
355  load_msg_blk(i_state, pdMsgBlk);
356 
357  msg_add_even(cv_l, cv_r, i_state);
358  load_sc(&const_v, 0);
359  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
360  word_perm(cv_l, cv_r);
361 
362  msg_add_odd(cv_l, cv_r, i_state);
363  load_sc(&const_v, 8);
364  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
365  word_perm(cv_l, cv_r);
366 
367  for (size_t i = 1; i < NUM_STEPS / 2; i++)
368  {
369  msg_exp_even(i_state);
370  msg_add_even(cv_l, cv_r, i_state);
371  load_sc(&const_v, 16 * i);
372  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
373  word_perm(cv_l, cv_r);
374 
375  msg_exp_odd(i_state);
376  msg_add_odd(cv_l, cv_r, i_state);
377  load_sc(&const_v, 16 * i + 8);
378  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
379  word_perm(cv_l, cv_r);
380  }
381 
382  msg_exp_even(i_state);
383  msg_add_even(cv_l, cv_r, i_state);
384 }
385 
386 /* -------------------------------------------------------- */
387 
388 inline void load_iv(word32 cv_l[8], word32 cv_r[8], const word32 iv[16])
389 {
390  // The IV's are 32-byte aligned so we can use aligned loads.
391  _mm256_storeu_si256(M256_CAST(cv_l+0),
392  _mm256_load_si256(CONST_M256_CAST(iv+0)));
393  _mm256_storeu_si256(M256_CAST(cv_r+0),
394  _mm256_load_si256(CONST_M256_CAST(iv+8)));
395 }
396 
397 inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
398 {
399  _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256());
400  _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256());
401 }
402 
403 inline void zero_submsgs(LSH256_AVX2_Context* ctx)
404 {
405  lsh_u32* sub_msgs = ctx->sub_msgs;
406 
407  _mm256_storeu_si256(M256_CAST(sub_msgs+ 0), _mm256_setzero_si256());
408  _mm256_storeu_si256(M256_CAST(sub_msgs+ 8), _mm256_setzero_si256());
409  _mm256_storeu_si256(M256_CAST(sub_msgs+16), _mm256_setzero_si256());
410  _mm256_storeu_si256(M256_CAST(sub_msgs+24), _mm256_setzero_si256());
411 }
412 
413 inline void init224(LSH256_AVX2_Context* ctx)
414 {
415  CRYPTOPP_ASSERT(ctx != NULLPTR);
416 
417  zero_submsgs(ctx);
418  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
419 }
420 
421 inline void init256(LSH256_AVX2_Context* ctx)
422 {
423  CRYPTOPP_ASSERT(ctx != NULLPTR);
424 
425  zero_submsgs(ctx);
426  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
427 }
428 
429 /* -------------------------------------------------------- */
430 
431 inline void fin(LSH256_AVX2_Context* ctx)
432 {
433  CRYPTOPP_ASSERT(ctx != NULLPTR);
434 
435  _mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256(
436  _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)),
437  _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0))));
438 }
439 
440 /* -------------------------------------------------------- */
441 
442 inline void get_hash(LSH256_AVX2_Context* ctx, lsh_u8* pbHashVal)
443 {
444  CRYPTOPP_ASSERT(ctx != NULLPTR);
445  CRYPTOPP_ASSERT(ctx->alg_type != 0);
446  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
447 
448  lsh_uint alg_type = ctx->alg_type;
449  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
450  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
451 
452  // Multiplying by looks odd...
453  memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
454  if (hash_val_bit_len){
455  pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
456  }
457 }
458 
459 /* -------------------------------------------------------- */
460 
461 lsh_err lsh256_init_avx2(LSH256_AVX2_Context* ctx)
462 {
463  CRYPTOPP_ASSERT(ctx != NULLPTR);
464  CRYPTOPP_ASSERT(ctx->alg_type != 0);
465 
466  lsh_u32 alg_type = ctx->alg_type;
467  const lsh_u32* const_v = NULL;
468  ctx->remain_databitlen = 0;
469 
470  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
471  AVX_Cleanup cleanup;
472 
473  switch (alg_type)
474  {
475  case LSH_TYPE_256_256:
476  init256(ctx);
477  return LSH_SUCCESS;
478  case LSH_TYPE_256_224:
479  init224(ctx);
480  return LSH_SUCCESS;
481  default:
482  break;
483  }
484 
485  lsh_u32* cv_l = ctx->cv_l;
486  lsh_u32* cv_r = ctx->cv_r;
487 
488  zero_iv(cv_l, cv_r);
489  cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
490  cv_l[1] = LSH_GET_HASHBIT(alg_type);
491 
492  for (size_t i = 0; i < NUM_STEPS / 2; i++)
493  {
494  //Mix
495  load_sc(&const_v, i * 16);
496  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
497  word_perm(cv_l, cv_r);
498 
499  load_sc(&const_v, i * 16 + 8);
500  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
501  word_perm(cv_l, cv_r);
502  }
503 
504  return LSH_SUCCESS;
505 }
506 
507 lsh_err lsh256_update_avx2(LSH256_AVX2_Context* ctx, const lsh_u8* data, size_t databitlen)
508 {
509  CRYPTOPP_ASSERT(ctx != NULLPTR);
510  CRYPTOPP_ASSERT(data != NULLPTR);
511  CRYPTOPP_ASSERT(databitlen % 8 == 0);
512  CRYPTOPP_ASSERT(ctx->alg_type != 0);
513 
514  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
515  AVX_Cleanup cleanup;
516 
517  if (databitlen == 0){
518  return LSH_SUCCESS;
519  }
520 
521  // We are byte oriented. tail bits will always be 0.
522  size_t databytelen = databitlen >> 3;
523  // lsh_uint pos2 = databitlen & 0x7;
524  const size_t pos2 = 0;
525 
526  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
527  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
528  const size_t remain_msg_bit = 0;
529 
530  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
531  return LSH_ERR_INVALID_STATE;
532  }
533  if (remain_msg_bit > 0){
534  return LSH_ERR_INVALID_DATABITLEN;
535  }
536 
537  if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
538  {
539  memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
540  ctx->remain_databitlen += (lsh_uint)databitlen;
541  remain_msg_byte += (lsh_uint)databytelen;
542  if (pos2){
543  ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
544  }
545  return LSH_SUCCESS;
546  }
547 
548  if (remain_msg_byte > 0){
549  size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
550  memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
551  compress(ctx, ctx->last_block);
552  data += more_byte;
553  databytelen -= more_byte;
554  remain_msg_byte = 0;
555  ctx->remain_databitlen = 0;
556  }
557 
558  while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
559  {
560  // This call to compress caused some trouble.
561  // The data pointer can become unaligned in the
562  // previous block.
563  compress(ctx, data);
564  data += LSH256_MSG_BLK_BYTE_LEN;
565  databytelen -= LSH256_MSG_BLK_BYTE_LEN;
566  }
567 
568  if (databytelen > 0){
569  memcpy(ctx->last_block, data, databytelen);
570  ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
571  }
572 
573  if (pos2){
574  ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
575  ctx->remain_databitlen += pos2;
576  }
577 
578  return LSH_SUCCESS;
579 }
580 
581 lsh_err lsh256_final_avx2(LSH256_AVX2_Context* ctx, lsh_u8* hashval)
582 {
583  CRYPTOPP_ASSERT(ctx != NULLPTR);
584  CRYPTOPP_ASSERT(hashval != NULLPTR);
585 
586  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
587  AVX_Cleanup cleanup;
588 
589  // We are byte oriented. tail bits will always be 0.
590  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
591  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
592  const size_t remain_msg_bit = 0;
593 
594  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
595  return LSH_ERR_INVALID_STATE;
596  }
597 
598  if (remain_msg_bit){
599  ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
600  }
601  else{
602  ctx->last_block[remain_msg_byte] = 0x80;
603  }
604  memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
605 
606  compress(ctx, ctx->last_block);
607 
608  fin(ctx);
609  get_hash(ctx, hashval);
610 
611  return LSH_SUCCESS;
612 }
613 
614 ANONYMOUS_NAMESPACE_END
615 
616 NAMESPACE_BEGIN(CryptoPP)
617 
618 extern
619 void LSH256_Base_Restart_AVX2(word32* state)
620 {
621  state[RemainingBits] = 0;
622  LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
623  lsh_err err = lsh256_init_avx2(&ctx);
624 
625  if (err != LSH_SUCCESS)
626  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_init_avx2 failed");
627 }
628 
629 extern
630 void LSH256_Base_Update_AVX2(word32* state, const byte *input, size_t size)
631 {
632  LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
633  lsh_err err = lsh256_update_avx2(&ctx, input, 8*size);
634 
635  if (err != LSH_SUCCESS)
636  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_update_avx2 failed");
637 }
638 
639 extern
640 void LSH256_Base_TruncatedFinal_AVX2(word32* state, byte *hash, size_t)
641 {
642  LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
643  lsh_err err = lsh256_final_avx2(&ctx, hash);
644 
645  if (err != LSH_SUCCESS)
646  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_final_avx2 failed");
647 }
648 
649 NAMESPACE_END
650 
651 #endif // CRYPTOPP_AVX2_AVAILABLE
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1548
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2208
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1599
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68