Crypto++
tiger.cpp
1 // tiger.cpp - written and placed in the public domain by Wei Dai
2 
3 #include "pch.h"
4 #include "tiger.h"
5 #include "misc.h"
6 #include "cpu.h"
7 
8 NAMESPACE_BEGIN(CryptoPP)
9 
10 void Tiger::InitState(HashWordType *state)
11 {
12  state[0] = W64LIT(0x0123456789ABCDEF);
13  state[1] = W64LIT(0xFEDCBA9876543210);
14  state[2] = W64LIT(0xF096A5B4C3B2E187);
15 }
16 
17 void Tiger::TruncatedFinal(byte *hash, size_t size)
18 {
19  ThrowIfInvalidTruncatedSize(size);
20 
21  PadLastBlock(56, 0x01);
22  CorrectEndianess(m_data, m_data, 56);
23 
24  m_data[7] = GetBitCountLo();
25 
26  Transform(m_state, m_data);
27  CorrectEndianess(m_state, m_state, DigestSize());
28  memcpy(hash, m_state, size);
29 
30  Restart(); // reinit for next use
31 }
32 
33 void Tiger::Transform (word64 *digest, const word64 *X)
34 {
35 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
36  if (HasSSE2())
37  {
38 #ifdef __GNUC__
39  __asm__ __volatile__
40  (
41  ".intel_syntax noprefix;"
42  AS1( push ebx)
43 #else
44  #if _MSC_VER < 1300
45  const word64 *t = table;
46  AS2( mov edx, t)
47  #else
48  AS2( lea edx, [table])
49  #endif
50  AS2( mov eax, digest)
51  AS2( mov esi, X)
52 #endif
53  AS2( movq mm0, [eax])
54  AS2( movq mm1, [eax+1*8])
55  AS2( movq mm5, mm1)
56  AS2( movq mm2, [eax+2*8])
57  AS2( movq mm7, [edx+4*2048+0*8])
58  AS2( movq mm6, [edx+4*2048+1*8])
59  AS2( mov ecx, esp)
60  AS2( and esp, 0xfffffff0)
61  AS2( sub esp, 8*8)
62  AS1( push ecx)
63 
64 #define SSE2_round(a,b,c,x,mul) \
65  AS2( pxor c, [x])\
66  AS2( movd ecx, c)\
67  AS2( movzx edi, cl)\
68  AS2( movq mm3, [edx+0*2048+edi*8])\
69  AS2( movzx edi, ch)\
70  AS2( movq mm4, [edx+3*2048+edi*8])\
71  AS2( shr ecx, 16)\
72  AS2( movzx edi, cl)\
73  AS2( pxor mm3, [edx+1*2048+edi*8])\
74  AS2( movzx edi, ch)\
75  AS2( pxor mm4, [edx+2*2048+edi*8])\
76  AS3( pextrw ecx, c, 2)\
77  AS2( movzx edi, cl)\
78  AS2( pxor mm3, [edx+2*2048+edi*8])\
79  AS2( movzx edi, ch)\
80  AS2( pxor mm4, [edx+1*2048+edi*8])\
81  AS3( pextrw ecx, c, 3)\
82  AS2( movzx edi, cl)\
83  AS2( pxor mm3, [edx+3*2048+edi*8])\
84  AS2( psubq a, mm3)\
85  AS2( movzx edi, ch)\
86  AS2( pxor mm4, [edx+0*2048+edi*8])\
87  AS2( paddq b, mm4)\
88  SSE2_mul_##mul(b)
89 
90 #define SSE2_mul_5(b) \
91  AS2( movq mm3, b)\
92  AS2( psllq b, 2)\
93  AS2( paddq b, mm3)
94 
95 #define SSE2_mul_7(b) \
96  AS2( movq mm3, b)\
97  AS2( psllq b, 3)\
98  AS2( psubq b, mm3)
99 
100 #define SSE2_mul_9(b) \
101  AS2( movq mm3, b)\
102  AS2( psllq b, 3)\
103  AS2( paddq b, mm3)
104 
105 #define label2_5 1
106 #define label2_7 2
107 #define label2_9 3
108 
109 #define SSE2_pass(A,B,C,mul,X) \
110  AS2( xor ebx, ebx)\
111  ASL(mul)\
112  SSE2_round(A,B,C,X+0*8+ebx,mul)\
113  SSE2_round(B,C,A,X+1*8+ebx,mul)\
114  AS2( cmp ebx, 6*8)\
115  ASJ( je, label2_##mul, f)\
116  SSE2_round(C,A,B,X+2*8+ebx,mul)\
117  AS2( add ebx, 3*8)\
118  ASJ( jmp, mul, b)\
119  ASL(label2_##mul)
120 
121 #define SSE2_key_schedule(Y,X) \
122  AS2( movq mm3, [X+7*8])\
123  AS2( pxor mm3, mm6)\
124  AS2( movq mm4, [X+0*8])\
125  AS2( psubq mm4, mm3)\
126  AS2( movq [Y+0*8], mm4)\
127  AS2( pxor mm4, [X+1*8])\
128  AS2( movq mm3, mm4)\
129  AS2( movq [Y+1*8], mm4)\
130  AS2( paddq mm4, [X+2*8])\
131  AS2( pxor mm3, mm7)\
132  AS2( psllq mm3, 19)\
133  AS2( movq [Y+2*8], mm4)\
134  AS2( pxor mm3, mm4)\
135  AS2( movq mm4, [X+3*8])\
136  AS2( psubq mm4, mm3)\
137  AS2( movq [Y+3*8], mm4)\
138  AS2( pxor mm4, [X+4*8])\
139  AS2( movq mm3, mm4)\
140  AS2( movq [Y+4*8], mm4)\
141  AS2( paddq mm4, [X+5*8])\
142  AS2( pxor mm3, mm7)\
143  AS2( psrlq mm3, 23)\
144  AS2( movq [Y+5*8], mm4)\
145  AS2( pxor mm3, mm4)\
146  AS2( movq mm4, [X+6*8])\
147  AS2( psubq mm4, mm3)\
148  AS2( movq [Y+6*8], mm4)\
149  AS2( pxor mm4, [X+7*8])\
150  AS2( movq mm3, mm4)\
151  AS2( movq [Y+7*8], mm4)\
152  AS2( paddq mm4, [Y+0*8])\
153  AS2( pxor mm3, mm7)\
154  AS2( psllq mm3, 19)\
155  AS2( movq [Y+0*8], mm4)\
156  AS2( pxor mm3, mm4)\
157  AS2( movq mm4, [Y+1*8])\
158  AS2( psubq mm4, mm3)\
159  AS2( movq [Y+1*8], mm4)\
160  AS2( pxor mm4, [Y+2*8])\
161  AS2( movq mm3, mm4)\
162  AS2( movq [Y+2*8], mm4)\
163  AS2( paddq mm4, [Y+3*8])\
164  AS2( pxor mm3, mm7)\
165  AS2( psrlq mm3, 23)\
166  AS2( movq [Y+3*8], mm4)\
167  AS2( pxor mm3, mm4)\
168  AS2( movq mm4, [Y+4*8])\
169  AS2( psubq mm4, mm3)\
170  AS2( movq [Y+4*8], mm4)\
171  AS2( pxor mm4, [Y+5*8])\
172  AS2( movq [Y+5*8], mm4)\
173  AS2( paddq mm4, [Y+6*8])\
174  AS2( movq [Y+6*8], mm4)\
175  AS2( pxor mm4, [edx+4*2048+2*8])\
176  AS2( movq mm3, [Y+7*8])\
177  AS2( psubq mm3, mm4)\
178  AS2( movq [Y+7*8], mm3)
179 
180  SSE2_pass(mm0, mm1, mm2, 5, esi)
181  SSE2_key_schedule(esp+4, esi)
182  SSE2_pass(mm2, mm0, mm1, 7, esp+4)
183  SSE2_key_schedule(esp+4, esp+4)
184  SSE2_pass(mm1, mm2, mm0, 9, esp+4)
185 
186  AS2( pxor mm0, [eax+0*8])
187  AS2( movq [eax+0*8], mm0)
188  AS2( psubq mm1, mm5)
189  AS2( movq [eax+1*8], mm1)
190  AS2( paddq mm2, [eax+2*8])
191  AS2( movq [eax+2*8], mm2)
192 
193  AS1( pop esp)
194  AS1( emms)
195 #ifdef __GNUC__
196  AS1( pop ebx)
197  ".att_syntax prefix;"
198  :
199  : "a" (digest), "S" (X), "d" (table)
200  : "%ecx", "%edi", "memory", "cc"
201  );
202 #endif
203  }
204  else
205 #endif
206  {
207  word64 a = digest[0];
208  word64 b = digest[1];
209  word64 c = digest[2];
210  word64 Y[8];
211 
212 #define t1 (table)
213 #define t2 (table+256)
214 #define t3 (table+256*2)
215 #define t4 (table+256*3)
216 
217 #define round(a,b,c,x,mul) \
218  c ^= x; \
219  a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \
220  b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \
221  b *= mul
222 
223 #define pass(a,b,c,mul,X) {\
224  int i=0;\
225  while (true)\
226  {\
227  round(a,b,c,X[i+0],mul); \
228  round(b,c,a,X[i+1],mul); \
229  if (i==6)\
230  break;\
231  round(c,a,b,X[i+2],mul); \
232  i+=3;\
233  }}
234 
235 #define key_schedule(Y,X) \
236  Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \
237  Y[1] = X[1] ^ Y[0]; \
238  Y[2] = X[2] + Y[1]; \
239  Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \
240  Y[4] = X[4] ^ Y[3]; \
241  Y[5] = X[5] + Y[4]; \
242  Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \
243  Y[7] = X[7] ^ Y[6]; \
244  Y[0] += Y[7]; \
245  Y[1] -= Y[0] ^ ((~Y[7])<<19); \
246  Y[2] ^= Y[1]; \
247  Y[3] += Y[2]; \
248  Y[4] -= Y[3] ^ ((~Y[2])>>23); \
249  Y[5] ^= Y[4]; \
250  Y[6] += Y[5]; \
251  Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF)
252 
253  pass(a,b,c,5,X);
254  key_schedule(Y,X);
255  pass(c,a,b,7,Y);
256  key_schedule(Y,Y);
257  pass(b,c,a,9,Y);
258 
259  digest[0] = a ^ digest[0];
260  digest[1] = b - digest[1];
261  digest[2] = c + digest[2];
262  }
263 }
264 
265 NAMESPACE_END
Tiger
Definition: tiger.h:10
void TruncatedFinal(byte *hash, size_t size)
truncated version of Final()
Definition: tiger.cpp:17