diff options
Diffstat (limited to 'crypto/rc4/rc4_enc.c')
-rw-r--r-- | crypto/rc4/rc4_enc.c | 116 |
1 files changed, 58 insertions, 58 deletions
diff --git a/crypto/rc4/rc4_enc.c b/crypto/rc4/rc4_enc.c index 8dba10dacd..72cc8f6543 100644 --- a/crypto/rc4/rc4_enc.c +++ b/crypto/rc4/rc4_enc.c @@ -80,36 +80,36 @@ void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, d = key->data; #if defined(RC4_CHUNK) - /*- - * The original reason for implementing this(*) was the fact that - * pre-21164a Alpha CPUs don't have byte load/store instructions - * and e.g. a byte store has to be done with 64-bit load, shift, - * and, or and finally 64-bit store. Peaking data and operating - * at natural word size made it possible to reduce amount of - * instructions as well as to perform early read-ahead without - * suffering from RAW (read-after-write) hazard. This resulted - * in ~40%(**) performance improvement on 21064 box with gcc. - * But it's not only Alpha users who win here:-) Thanks to the - * early-n-wide read-ahead this implementation also exhibits - * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending - * on sizeof(RC4_INT)). - * - * (*) "this" means code which recognizes the case when input - * and output pointers appear to be aligned at natural CPU - * word boundary - * (**) i.e. according to 'apps/openssl speed rc4' benchmark, - * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... - * - * Cavets. - * - * - RC4_CHUNK="unsigned long long" should be a #1 choice for - * UltraSPARC. Unfortunately gcc generates very slow code - * (2.5-3 times slower than one generated by Sun's WorkShop - * C) and therefore gcc (at least 2.95 and earlier) should - * always be told that RC4_CHUNK="unsigned long". - * - * <appro@fy.chalmers.se> - */ + /*- + * The original reason for implementing this(*) was the fact that + * pre-21164a Alpha CPUs don't have byte load/store instructions + * and e.g. a byte store has to be done with 64-bit load, shift, + * and, or and finally 64-bit store. Peaking data and operating + * at natural word size made it possible to reduce amount of + * instructions as well as to perform early read-ahead without + * suffering from RAW (read-after-write) hazard. This resulted + * in ~40%(**) performance improvement on 21064 box with gcc. + * But it's not only Alpha users who win here:-) Thanks to the + * early-n-wide read-ahead this implementation also exhibits + * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending + * on sizeof(RC4_INT)). + * + * (*) "this" means code which recognizes the case when input + * and output pointers appear to be aligned at natural CPU + * word boundary + * (**) i.e. according to 'apps/openssl speed rc4' benchmark, + * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... + * + * Cavets. + * + * - RC4_CHUNK="unsigned long long" should be a #1 choice for + * UltraSPARC. Unfortunately gcc generates very slow code + * (2.5-3 times slower than one generated by Sun's WorkShop + * C) and therefore gcc (at least 2.95 and earlier) should + * always be told that RC4_CHUNK="unsigned long". + * + * <appro@fy.chalmers.se> + */ # define RC4_STEP ( \ x=(x+1) &0xff, \ @@ -131,34 +131,34 @@ void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, 1 }; - /*- - * I reckon we can afford to implement both endian - * cases and to decide which way to take at run-time - * because the machine code appears to be very compact - * and redundant 1-2KB is perfectly tolerable (i.e. - * in case the compiler fails to eliminate it:-). By - * suggestion from Terrel Larson <terr@terralogic.net> - * who also stands for the is_endian union:-) - * - * Special notes. - * - * - is_endian is declared automatic as doing otherwise - * (declaring static) prevents gcc from eliminating - * the redundant code; - * - compilers (those I've tried) don't seem to have - * problems eliminating either the operators guarded - * by "if (sizeof(RC4_CHUNK)==8)" or the condition - * expressions themselves so I've got 'em to replace - * corresponding #ifdefs from the previous version; - * - I chose to let the redundant switch cases when - * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed - * before); - * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in - * [LB]ESHFT guards against "shift is out of range" - * warnings when sizeof(RC4_CHUNK)!=8 - * - * <appro@fy.chalmers.se> - */ + /*- + * I reckon we can afford to implement both endian + * cases and to decide which way to take at run-time + * because the machine code appears to be very compact + * and redundant 1-2KB is perfectly tolerable (i.e. + * in case the compiler fails to eliminate it:-). By + * suggestion from Terrel Larson <terr@terralogic.net> + * who also stands for the is_endian union:-) + * + * Special notes. + * + * - is_endian is declared automatic as doing otherwise + * (declaring static) prevents gcc from eliminating + * the redundant code; + * - compilers (those I've tried) don't seem to have + * problems eliminating either the operators guarded + * by "if (sizeof(RC4_CHUNK)==8)" or the condition + * expressions themselves so I've got 'em to replace + * corresponding #ifdefs from the previous version; + * - I chose to let the redundant switch cases when + * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed + * before); + * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in + * [LB]ESHFT guards against "shift is out of range" + * warnings when sizeof(RC4_CHUNK)!=8 + * + * <appro@fy.chalmers.se> + */ if (!is_endian.little) { /* BIG-ENDIAN CASE */ # define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) for (; len & ~(sizeof(RC4_CHUNK) - 1); len -= sizeof(RC4_CHUNK)) { |