very very strange behaviour

Hi all,
I am printing a variable twice without any intermediate code and i am getting different output.

Serial.println(state->counter[0],DEC);
Serial.println(state->counter[0],DEC);

By the way, when i compiled it using gcc through command prompt, then it behaves correctly.

By the way this is my code (in 2 parts)

PART 1

#define ROTATE_DOWN16(a) (((a) << 16) | ((a) >> 16))
#define ROTATE_DOWN12(a) (((a) << 20) | ((a) >> 12))
#define ROTATE_DOWN8(a) (((a) << 24) | ((a) >> 8))
#define ROTATE_DOWN7(a) (((a) << 25) | ((a) >> 7))
static const myuint IV[8] = {//initial values to hash
0x6A09E667,
0xBB67AE85,
0x3C6EF372,
0xA54FF53A,
0x510E527F,
0x9B05688C,
0x1F83D9AB,
0x5BE0CD19
};

static const myuint c_blake[16] ={ 
0x243F6A88,
0x85A308D3,
0x13198A2E,
0x03707344,
0xA4093822,
0x299F31D0,
0x082EFA98,
0xEC4E6C89,
0x452821E6,
0x38D01377,
0xBE5466CF,
0x34E90C6C,
0xC0AC29B7,
0xC97C50DD,
0x3F84D5B5,
0xB5470917,
};

//10 permutations
static const myuint sigma[10][16]={
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}
};
myuint  U8_to_U32(const BitSequence *d){
     return ((( myuint)(*d ))<< 24 | ((myuint)(*(d+1)) )<< 16 | ((myuint)(*(d+2))) << 8 | (myuint)(*(d+3)) );
}

void U32_to_U8(BitSequence *h, myuint s){
     *h= (BitSequence)(s>>24);
     *(h+1)=(BitSequence)((s <<8 )>>24);
     *(h+2)=(BitSequence)((s <<16 )>>24);
     *(h+3)=(BitSequence)((s <<24 )>>24);
}
void G(int i,int r,myuint m[16],myuint *a,myuint *b ,myuint *c,myuint *d){
  
   *a = *a + *b +  ( m[(sigma[r][2*i])] ^ c_blake[(sigma[r][2*i + 1])] );
   
  
   *d = ROTATE_DOWN16((*d ^ *a));

   *c = *c + *d;
  
   *b = ROTATE_DOWN12((*b ^ *c));
  
   *a = *a + *b +  ( m[(sigma[r][2*i + 1])] ^ c_blake[(sigma[r][2*i])] );
   
   *d = ROTATE_DOWN8((*d ^ *a));
  
   *c = *c + *d;
   
   *b = ROTATE_DOWN7((*b ^ *c));
  
     
}

void compress(hashstate *state, myuint m[16])
{
      
   Serial.println(state->counter[0]);
    Serial.println(state->counter[1]);
     state->int_state[0]=state->chainvalue[0];
     state->int_state[1]=state->chainvalue[1];
     state->int_state[2]=state->chainvalue[2];
     state->int_state[3]=state->chainvalue[3];
     state->int_state[4]=state->chainvalue[4];
     state->int_state[5]=state->chainvalue[5];
     state->int_state[6]=state->chainvalue[6];
     state->int_state[7]=state->chainvalue[7];
    
    state->int_state[8]  = state->salt[0] ^ c_blake[0];
    state->int_state[9]  = state->salt[1] ^ c_blake[1];
    state->int_state[10] = state->salt[2] ^ c_blake[2];
    state->int_state[11] = state->salt[3] ^ c_blake[3];
 
    state->int_state[12] = state->counter[0] ^ c_blake[4];
    state->int_state[13] = state->counter[0] ^ c_blake[5];
    state->int_state[14] = state->counter[1] ^ c_blake[6];
    state->int_state[15] = state->counter[1] ^ c_blake[7];
   
  //  for(i=0;i<16;i++)printf("%02X\n",state->int_state[i]);
    
    
    //RRound
    int r;
    for(r=0;r<10;r++){
                      //COLUMN STEP
         G(0,r,m,&(state->int_state[0]),&(state->int_state[4]),&(state->int_state[8]),&(state->int_state[12]));
         G(1,r,m,&(state->int_state[1]),&(state->int_state[5]),&(state->int_state[9]),&(state->int_state[13]));
         G(2,r,m,&(state->int_state[2]),&(state->int_state[6]),&(state->int_state[10]),&(state->int_state[14]));
         G(3,r,m,&(state->int_state[3]),&(state->int_state[7]),&(state->int_state[11]),&(state->int_state[15]));
         //DIAGONAL STEP
         G(4,r,m,&(state->int_state[0]),&(state->int_state[5]),&(state->int_state[10]),&(state->int_state[15]));
         G(5,r,m,&(state->int_state[1]),&(state->int_state[6]),&(state->int_state[11]),&(state->int_state[12]));
         G(6,r,m,&(state->int_state[2]),&(state->int_state[7]),&(state->int_state[8]),&(state->int_state[13]));
         G(7,r,m,&(state->int_state[3]),&(state->int_state[4]),&(state->int_state[9]),&(state->int_state[14]));
    
    }

   
       //finalization
        
       state->chainvalue[0] ^= state->salt[0] ^ state->int_state[0] ^ state->int_state[8];
       state->chainvalue[1] ^= state->salt[1] ^ state->int_state[1] ^ state->int_state[9];
       state->chainvalue[2] ^= state->salt[2] ^ state->int_state[2] ^ state->int_state[10];
       state->chainvalue[3] ^= state->salt[3] ^ state->int_state[3] ^ state->int_state[11];
       state->chainvalue[4] ^= state->salt[0] ^ state->int_state[4] ^ state->int_state[12];
       state->chainvalue[5] ^= state->salt[1] ^ state->int_state[5] ^ state->int_state[13];
       state->chainvalue[6] ^= state->salt[2] ^ state->int_state[6] ^ state->int_state[14];
       state->chainvalue[7] ^= state->salt[3] ^ state->int_state[7] ^ state->int_state[15];
 
    
    
}

PART 2

HashReturn 
Hash(int hashbitlen,const BitSequence *data, DataLength databitlen, BitSequence *hashval)
{

         hashstate *state = (hashstate *)malloc(sizeof(hashstate));
         state->salt[0]=0;
         state->salt[1]=0;
         state->salt[2]=0;
         state->salt[3]=0;
         
         state->counter[0]=0;
         state->counter[1]=0;
         int i;
         for(i=0;i<8;++i)//initialize hash
         {
               state->chainvalue[i]=IV[i];
         }
    
        for(i=0;i<16;i++)state->int_state[i]=0;
         DataLength d=databitlen;
         
         myuint m[16];//msgblock consisiting of 16 words
         
         while(d >= 512)
         {
                 //extract bits from data to m
                 for(i=0;i<16;i++)m[i] = U8_to_U32(data+4*i);
                 
                 //increment data pointer
                 data +=64;
                 
                 
                 
                 //increment counter
                 state->counter[0] = (myuint)(state->counter[0]+ 512);
                 if(state->counter[0] == 0)state->counter[1]++;
                
                 compress(state,m);
                 //decremnt d
                 d = d- 512;
                
                
         }
         
         //2 cases 
         if(d <= 512-64-2){
              
              for(i=0 ; i<((d+31)/32)-1;i++)m[i]=U8_to_U32(data+4*i);
            
                     
              data+= i*4;
              
              int j=d-(i)*32;
              unsigned char pow_2[] = {128,64,32,16,8,4,2,1};
              if(j< 8)
              {
                   m[i]= ((myuint)(*data | pow_2[j]))<< 24;
              }
              else if(j<16)
              {
                    m[i]=((myuint)(*data)) << 24 | ((myuint)(*(data+1) | pow_2[j-8]))<< 16;
              }
              else if(j < 24)
              {
                   m[i] = ((myuint)(*data)) << 24 | ((myuint)*(data+1)) << 16 | ((myuint)(*(data+2) | pow_2[j-16]))<< 8;
              }
               else if(j < 32)
              {
                     m[i]= ((myuint) (*data) )<< 24 | ((myuint)(*(data+1))) << 16 | ((myuint)(*(data+2))) << 8 |((myuint)(*(data+3) | pow_2[j-24]));
              }
              if(j == 32){
                    m[i]= ((myuint) (*data) )<< 24 | ((myuint)(*(data+1))) << 16 | ((myuint)(*(data+2))) << 8 |((myuint)(*(data+3) ));
                    i++;
                    m[i] =0X80000000;
              }
               i++;
               
              if(i != 13  && i !=14){
                  for(;i<13;i++)m[i]=0;
              }
              
               
                
                    if(i == 13)m[13]=(myuint)1;
                    if(i==14) m[13] = m[13] | 0x00000001;
                    /*
                    if(i<=14){
                          if(i<=12)m[13]=(myuint)1;
                          else{m[13] = m[13] | 0x00000001;}                        

                    }  */
                          
                    m[14]=(myuint)(databitlen >> 32);
        
             m[15]=(myuint)databitlen;
             state->counter[0] += d;
              if(state->counter[0] == 0)state->counter[1]++;
      


              compress(state,m);
              
             
         }
              
         else{//d > 512-64-2 , in this case the last block will contain only padded bits
              for(i=0 ; i<((d+31)/32)-1;i++)m[i]=U8_to_U32(data+4*i);
              data+= i*4;
              int j=d-(i)*32;
              
              unsigned char pow_2[] = {128,64,32,16,8,4,2,1};
              
              if(j< 8)
              {
                   m[i]= ((myuint)(*data | pow_2[j]))<< 24;
              }
              else if(j<16)
              {
                    m[i]=((myuint)(*data)) << 24 | ((myuint)(*(data+1) | pow_2[j-8]))<< 16;
              }
              else if(j < 24)
              {
                   m[i] = ((myuint)(*data)) << 24 | ((myuint)*(data+1)) << 16 | ((myuint)(*(data+2) | pow_2[j-16]))<< 8;
              }
             else if(j < 32)
              {
                     m[i]= ((myuint) (*data) )<< 24 | ((myuint)(*(data+1))) << 16 | ((myuint)(*(data+2))) << 8 |((myuint)(*(data+3) | pow_2[j-24]));
              }
              if(j == 32){
                    m[i]= ((myuint) (*data) )<< 24 | ((myuint)(*(data+1))) << 16 | ((myuint)(*(data+2))) << 8 |((myuint)(*(data+3) ));
                    i++;
                    m[i] =0X80000000;
              }
               
              i++;
              for(;i<16;i++)m[i]=0;
              state->counter[0] += d;
              if(state->counter[0] == 0)state->counter[1]++;
                
              compress(state,m);

              for(i=0;i<13;i++)m[i]=0;
              
              m[13]=1;
              m[14]=(myuint)(databitlen >> 32);
              m[15]=(myuint)databitlen;
                state->counter[0]=0;
                state->counter[1]=0;
              compress(state,m);
                              
              
              
              
                 
         }
         
         //finalization
         
         U32_to_U8(hashval     , state->chainvalue[0]);
        
         U32_to_U8((hashval+4) , state->chainvalue[1]);
         U32_to_U8((hashval+8) , state->chainvalue[2]);
         U32_to_U8((hashval+12), state->chainvalue[3]);
         U32_to_U8((hashval+16), state->chainvalue[4]);
         U32_to_U8((hashval+20), state->chainvalue[5]);
         U32_to_U8((hashval+24), state->chainvalue[6]);
         U32_to_U8((hashval+28), state->chainvalue[7]);
         free(state);
         return SUCCESS;

}

I'm having a difficult time finding setup and loop. Is setup in part 1 or part 2? Is loop in part 1 or part 2?

I'm guessing it's a memory overflow thing, but without seeing the whole thing (like the "myuint" typedef - probably "unsigned long"), it's very hard to say.

@Coding Badly

Hi ,
I am sry. I didn't post that part thinking it would not be relevant.
well it is here :

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <avr/pgmspace.h>
#include "blake.h"


long startTime ;                    // start time for stop watch
long elapsedTime ;                  // elapsed time for stop watch
int fractional;                     // variable used to store fractional part of time



void setup(){
    Serial.begin(115200);
 
    int i,ret_val;
 
    Serial.println("start");
    ret_val=genShortMsg(256);

  
}





void loop() 
{
 
}

int
genShortMsg(int hashbitlen)
{

      int                  msglen, msgbytelen;
      BitSequence      Msg[753], MD[32];
                  msglen = 6017;
            //msglen=8083;      
            msgbytelen = (msglen+7)/8;
      


      int            ch, started;
      BitSequence      ich;
      int i;
      if ( msgbytelen == 0 ) {
            Msg[0] = 0x00;
            
      }
        Serial.println(msgbytelen);
        Serial.println("reached 1");
      memset(Msg, 0x00, msgbytelen);
       
      
      started = 0;
     // static char str1[] PROGMEM= "EEBCC18057252CBF3F9C070F1A73213356D5D4BC19AC2A411EC8CDEEE7A571E2E20EAF61FD0C33A0FFEB297DDB77A97F0A415347DB66BCAF";
             static char str1[] PROGMEM="B6E69511FFA9AC74535A28F0E2A0A6D11FD2A3D21CD8A514FC6D6BC063AB51D4BB89ACF599E46D6AF1F137E063F1050E6F51F2A5F887E8959533D37EB578422A3E4AFA37ED6037D4C69ABA1414DBCBF7EBECAD42C3A81FFE73E73135DCF90B496C937B4E20DCD73A3F0E235C249609391A508B50ED941436E8DF4F8CA884755EA301B82865688CEBAC2DB8B0F257A481A6382622C7908C319675F81191A5AEAC8A3DE42335581545A7E908DEC6D92636F1B00517BA2F718A97831C6B0172C68605E3D910E523A77C11900CE2E1204F3BC459A1B85310F7D6223009FABA3E51A940EBC63C0FDF02F03FA17BB35A918F501514F2B6A8FED85A2DEEDC39D2684351F49BBE5BA2ABC4655C89842B56DF401FF0A8956E41DB417B2D9CB0BAC162B4EA4E1CBD315371DF86436E2920221C19CC75602BD1E4B60C312110824C1EE49CA91375C7AF0D2D3CDA4";       
      static char str2[] PROGMEM="8CC4DE41204CE9814DD4AB05B6A627485BB49D2B0FB237F57CF543D2EEF06C60A6236DCB54F145F9FB2E9356D71867DB80E2D74F93BC7EE55AC5A0DEEADC397A6B5F8C826A69F9CDC081D1781DF7ABEF448BD4B4B1EF7100F0115D11571C27C6EDB";
      static char str3[] PROGMEM="BCF2E5B167AF3D336D8BA43306E6E6EE8E685C0ABA72A16B902BDB7FD777962AC9B83DF9A8A084EEA33EBB287588ECAF426BE987C9D12153C3EFC6F0FE6F7635972B1990B8639EBE073B6665C575ADD66EB79177FC0E7B29FD429E63ED33AE99282E2DC7671417A15F4F1C53E09F36CB959948B972EF6970E32E606EBC1FE27C532746481A840096284E3953D6272552535B03253E34A9609B4E8AA8A091538514DAD89B5111C3B135C0F36282180D90434BC8DA064F10D27D574F8C5F15D1BD5CF9D5F5E02B3FDB8BCC2DE688B8BC79FD5387CC6E0DCACAA8B5DB53528E398BA1C7D7F691EC16EB31B765EB42AA9193AFC1D0A0702903A36BEC2E4F806A5CBD755C7EB38A273E1F77C1E9D4540F6B4E00167899DF77D00F5B35431CA5FCD398167D21F9BC504A0EE354C22122D11EB49F5A71B98E91B968909528F0AA6CA0D58EF7E429283300";
  
      //static char str1[] PROGMEM="CA7E3E3807E14D73348FAF2202F1D5EF730B4780427CE2FFC4D83FFDDAD3AF74953D52E5E0669F4FE6A6CFB7338C3562263CA32DDADC6CC3CDF59E6B93F1E5A001A3C9C6CE5E286F61E880F451EC7C1BFAFD40D32634E1ABA13854BF95D4F7241D5B86C6432E8A86783F55502C77506618D6135B6568213CA728E0DA8D953687ADC64F7A8864DA957B0A5E0CEC00F5F1E8C151ED120AF087078C24533B11E4B7283C5111320F7E6156CBE1AC8CB2E8FB816C1983BE5D72F2F0DD54727D3221BB2CF950AF4434468E3D822E68F983161E0EC51F63E6C9F5890E368E2F81E18979A0C06246847AD3D04C2ECC51D011DF80CB0BCDB7AB107C28B19E04BE76252E3BC64E009DC3E082ABAE4D04BD6E5B252DDB750DF844CCB5364A6410558D195B76983AE8C9FB950458DD6FBE12D31AB94E28CDEED63637049FDA288F9E0ABA6827B118CCC5B9A2F02BD73EB0C08650E97D189F7BE7618823B0E1D077A099AADC1A80DC8B";
        //static char str2[] PROGMEM="8CF7B227CA962B4A985E599E8DA29C8303726E11FF6FE5813BE136B638B1BEBD7AE70FE6F3FAACE693930727A5EEBFBF0F634CAD2AA72D3007ED46948D6E495F9D34C2F42987E7F93D30199290EDA898575D50ECAED74A4881F5A7FE76D732E0A99A6E73F97F858B3F2A06FFB6E565B8E8F65B823A65926CD0C97220E438C4425F0D35C26A2415DB94937CE87651DD5DEF8F82AD2EF00FE51AB23D9A6115D3A768C62F2B9038A5AB36783DEEF2222DDBB3D7B33D92CEF0E3B75D523D72E2A6C7A0F817C9849648046DB4B930DAB03074BCAB362A227A4AC28B3040E0FCF1169C99FCEDBE615447DF44C93F5CF52664BAF4329574B11A96A9D0D58FBE56A6B350289920DF53366C118F3DDD9F3015A888E70BDDAC2F951E92DF36D0834640185BBD793647DCCA03FA27FE46579D8C1F7F01EFDF1A0287742935C5061CA7AFAF5B0E683F868D93E9DB01FFA7C0F2EAB53570ABD0FA65B1F93E47D4DEE953CD67B5EE2737";
        //static char str3[] PROGMEM="F921F8BA0B17FCF7EDD18CB0487917F8C40F43BBD718E3630B7BAE92CF85F431B9FEDCEF0C80DF424D7C0759452CAA657D394D3DD24BE5D2A208C25B0584DE2503461D2D0A3EBFEF7FC7B3F8C2CC1B19632BAEAB8D585013B61C40856798C4D3351DDF690FC9773CFDE7427480FA2E53CA3E01BA47A0B295A17AAF79CBCC4F014A5846FC8ACAF3C01D619BB5AA1680D5BDDC3A018EC5D5AA761C04C746CAFEF44FBCA0DC3D57EAA01D00EDB157E54AB5AF551985DC39025359E18FBAD185E6D7DD9DDAF69589F56259B196C7609BDA940484DA2FF925AB4AD8EFFCDDD79D539AF82DC9AE26DB69172D8B6942253880C9D69406E4C5A75C4A3E8650ABA4A99D49AF318683F4771356C056EFD06B04D0B22060A0788E4E311D8B1F11C6812DC44BB293429C7D3880";

      int j;
 
      for(j=0;j<strlen(str1);j++){
            ch=pgm_read_byte(&(str1[j]));
             
                  if ( (ch >= '0') && (ch <= '9') )
                        ich = ch - '0';
                  else if ( (ch >= 'A') && (ch <= 'F') )
                        ich = ch - 'A' + 10;
                  
                  else if ( (ch >= 'a') && (ch <= 'f') )
                        ich = ch - 'a' + 10;
                  
                  for ( i=0; i<msgbytelen-1; i++ )Msg[i] = (Msg[i] << 4) | (Msg[i+1] >> 4);
                  Msg[msgbytelen-1] = (Msg[msgbytelen-1] << 4) | ich;

            
      }


      for(j=0;j<strlen(str2);j++){
            ch=pgm_read_byte(&(str2[j]));
               
                  if ( (ch >= '0') && (ch <= '9') )
                        ich = ch - '0';
                  else if ( (ch >= 'A') && (ch <= 'F') )
                        ich = ch - 'A' + 10;
                  
                  else if ( (ch >= 'a') && (ch <= 'f') )
                        ich = ch - 'a' + 10;
                  
                  for ( i=0; i<msgbytelen-1; i++ )Msg[i] = (Msg[i] << 4) | (Msg[i+1] >> 4);
                  Msg[msgbytelen-1] = (Msg[msgbytelen-1] << 4) | ich;

            
      }


      for(j=0;j<strlen(str3);j++){
            ch=pgm_read_byte(&(str3[j]));
                       if ( (ch >= '0') && (ch <= '9') )
                        ich = ch - '0';
                  else if ( (ch >= 'A') && (ch <= 'F') )
                        ich = ch - 'A' + 10;
                  
                  else if ( (ch >= 'a') && (ch <= 'f') )
                        ich = ch - 'a' + 10;
                  
                  for ( i=0; i<msgbytelen-1; i++ )Msg[i] = (Msg[i] << 4) | (Msg[i+1] >> 4);
                  Msg[msgbytelen-1] = (Msg[msgbytelen-1] << 4) | ich;

            
      }
     
     
    
    
      startTime = micros();
   
      Hash(hashbitlen, Msg, msglen, MD);
     
         elapsedTime =   micros() - startTime; 
     Serial.print(elapsedTime);
      Serial.println("");
            
                for(i=0;i<hashbitlen/8;++i){
                  if(MD[i] < 16){
                       Serial.print(0,HEX);
                      Serial.print(MD[i],HEX);
                  }
                  else{
                     Serial.print(MD[i],HEX);
                  }
                  
                }
                Serial.println("");
               
          
      return 0;
}



////////////////////////////////////////////

@Coding Badly
Here is "blake.h"

#ifndef blake_h
#define blake_h
typedef unsigned long myuint;
typedef unsigned long  DataLength;
typedef unsigned char BitSequence;
typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2 } HashReturn;

typedef struct
{
        myuint int_state[16];//4 X 4 matrix of words
        myuint chainvalue[8];//hash
        myuint counter[2];//#bits hashed so far
        myuint salt[4];//provided by user------default is 0
}hashstate;




HashReturn Hash(int hashbitlen,const BitSequence *data, DataLength databitlen, BitSequence *hashval);

void compress (hashstate *state,myuint m[16]);

void G(int i,int r,myuint m[16],myuint *a,myuint *b ,myuint *c,myuint *d);

#endif

By the way, the code works correctly for smaller msglen say 600B

the code works correctly for smaller msglen say 600B

So, memory overflow it is.

Even though Groove is very likely correct, I'm curious to know which board you are using?

@Groove

so can i fix it(using some other memory or like that, by the way i am having an atmega328p board)

Difficult to say without analysing the code - some of what you have as RAM could be put into progmem, but the simplest would be to try a Mega, with 4K bytes RAM.

[edit]genShortMsg has some very large automatics - try them as statics.[/edit]

/Applications/arduino/arduino-0018/hardware/tools/avr/bin/avr-size *.elf
   text    data     bss     dec     hex filename
  10580     786     182   12323    3023 blake1.cpp.elf

that's normal compilation: 968 bytes of data. Plus 785 bytes for msg and md, plus 120 bytes for the malloc'ed hashstate. It's not surprising that you exceed 2K total when you throw in another couple local variables and stack frames...

so can i fix it(using some other memory or like that,

How far do you have to go? I see:
You can reduced or eliminate the 128 byte serial input buffer; you're not using it.
Sigma does not appear to require the myuint datasize: unsigned char would work fine. (saves 1603 bytes) (easiest thing to try...)
sigma, c_blake, and IV could be in progmem. 184
4 bytes saved.

This is just a core function, right? There's still code to be built around it? that makes things look awfully tight; a message that exceeds 1/3 the size of all available memory doesn't seem well-sized...

@westwf

at Groove's and your suggestion, i stored sigma,c_blake and IV in falsh and i have got my bigger msg working.

Thanks.

I am using microcontollers for the first time, and often get stucked when things work properly on command line , but doest not give same o/p on the board. Nevermind :slight_smile:

Should I Post the following as a new post

Anyways, can anybody give me some reference regarding microcontroller specific implementation. I am implementing
SHA-3 candidates and i want to optimize them as much as I can.
Right now i can do only software level optimization(as i have not done microcontroller specific coding before)

This sort of thing is useful to run through a desktop C compiler, where it is easily instrumented to show exactly how much memory is in use...