typedef unsigned char byte; #include #include #include #include #include #include enum { NIL, NUM, LTR, OPS, QUO, ETC, SPC, REG, PRN, PRM, BKT, IDX, CLN }; int tdx = 0; byte types[ 1024 ]; struct tok { byte* item; struct tok* prev; }* top = NULL; bool cmp_net( byte* x, const char* y ) { char* a = ( char* )x; char* s = ( char* )y; if( 0 == strcmp( a, s ) ) return true; else return false; } bool cp_net( byte* x, byte* y ) { char* a = ( char* )x; char* s = ( char* )y; if( a == strcpy( a, s ) ) return true; else return false; } bool len_dat( byte* item ) { char* x = ( char* )item; return 1 + strlen( x ); } byte num_dat( byte* token ) { //char** end = &( token + strlen( ( char* )token ) ) return ( byte )strtol( ( const char* )token, NULL, 10 ); } bool put_app( byte* token ) { printf( ": " ); puts( ( char* )token ); return true; } bool drop_dat() { if( tdx == 0 ) return false; if( top == NULL ) return false; struct tok* del = top; top = top -> prev; delete[] del -> item; delete del; --tdx; types[ tdx ] = 0; return true; } bool ws = false; bool dump_app() { struct tok* sis = top; int _tdx = tdx; while( sis != NULL ) { if( tdx > 0 ) printf( "x%d=", types[ _tdx - 1 ] ); puts( ( char* )( sis -> item ) ); sis = sis -> prev; _tdx -= 1; } printf( "%d so", _tdx - 1 ); for( _tdx = 0; _tdx < tdx; ++_tdx ) printf( " %d", types[ _tdx ] ); return true; } bool push( byte* token ) { bool discard = false; //byte x = types[ tdx ]; //byte y = types[ tdx - 1 ]; printf( "[%d %d]", tdx, types[ tdx - 1 ] ); put_app( token ); switch( types[ tdx - 1 ] ) { case OPS: if( cmp_net( token, "=" ) ) { assert( tdx >= 3 ); byte x = types[ tdx - 2 ]; byte y = types[ tdx - 3 ]; //printf( ":%d %d:", types[ tdx - 2 ], types[ tdx - 3 ] ); fflush( stdout ); if( x == REG ) { if( y == NUM ) { printf( "\t\tli\t%s,%s\n", top -> item, top -> prev -> item ); --tdx; drop_dat(); drop_dat(); return true; } if( y == QUO ) { } if( y == REG ) { } } else return false; return true; } else if( cmp_net( token, "," ) ) { struct tok* dup = new struct tok; dup -> item = new byte[ len_dat( top -> item ) ]; cp_net( dup -> item, top -> item ); dup -> prev = top; top = dup; return true; } else if( cmp_net( token, "." ) ) { --tdx; drop_dat(); return true; } else if( cmp_net( token, "+" ) ) { break; } else if( cmp_net( token, ")" ) ) { int begin = tdx - 2; // count out the postincrement and present token. struct tok* sis = top; if( tdx >= 2 && types[ tdx - 2 ] == PRN ) { --tdx; drop_dat(); return true; } for(; begin > 1; --begin, sis = sis -> prev ) if( types[ begin - 1 ] == PRN ) break; if( begin == 1 && types[ 0 ] != PRN ) { printf( "missing paren\n" ); return false; } int toks = tdx - 1 - begin; byte* prm = new byte[ 6 ]; // "65536\0" sprintf( ( char* )prm, "%d", toks ); struct tok* del = sis -> prev; // printf( "(%d %s %s)", tdx - 1 - begin, del -> item, token ); // in case there is any doubt "(# ( ))" sis -> prev = del -> prev; delete[] del -> item; printf( "%d\n", begin ); delete del; --tdx; // drop del for( --begin; begin < tdx + 1; ++begin ) { // printf( "%d=%d ", types[ begin ], types[ begin + 1 ] ); types[ begin ] = types[ begin + 1 ]; } --tdx; // yeah drop the ) ops types[ tdx++ ] = PRM; token = prm; discard = true; break; } else if( cmp_net( token, ";" ) ) { int begin = tdx - 2; // count out the postincrement and present token. struct tok* sis = top; for(; begin > 1; --begin, sis = sis -> prev ) if( types[ begin - 1 ] == CLN ) break; if( begin == 1 && types[ 0 ] != CLN ) { printf( "missing colon\n" ); return false; } if( begin == 0 ); int params = 0; int toks = tdx - 1 - begin; byte* prm = new byte[ 6 ]; // "65536\0" sprintf( ( char* )prm, "%d", toks ); struct tok* del = sis -> prev; // printf( "(%d %s %s)", tdx - 1 - begin, del -> item, token ); // in case there is any doubt "(# : ;)" sis -> prev = del -> prev; delete[] del -> item; delete del; --tdx; // drop del for( --begin; begin < tdx + 2; ++begin ) { // printf( "%d=%d ", types[ begin ], types[ begin + 1 ] ); types[ begin ] = types[ begin + 1 ]; } --tdx; // yeah drop the ops field if( types[ tdx ] == PRM ) { params = num_dat( top -> prev -> item ); printf( "params=%d\n", params ); --tdx; drop_dat(); } types[ tdx++ ] = PRM; token = prm; discard = true; break; } else return false; break; case NUM: { if( tdx == 0 ) break; // if( ws ) break; if( cmp_net( token, "0" ) || cmp_net( token, "1" ) || cmp_net( token, "2" ) || cmp_net( token, "3" ) || cmp_net( token, "4" ) || cmp_net( token, "5" ) || cmp_net( token, "6" ) || cmp_net( token, "7" ) || cmp_net( token, "8" ) || cmp_net( token, "9" ) || cmp_net( token, "10" ) || cmp_net( token, "11" ) ) { if( tdx < 2 ) break; byte x = types[ tdx - 2 ]; if( x == LTR ) { if( cmp_net( top -> item, "a" ) // 7 || cmp_net( top -> item, "s" ) // 11 || cmp_net( top -> item, "t" ) ) // 6 { assert( tdx >= 2 ); byte num = num_dat( token ); if( top -> item[ 0 ] == 'a' ) assert( num <= 7 ); if( top -> item[ 0 ] == 't' ) assert( num <= 6 ); // syntax error not presently possible where types[ tdx - 3 ] == PRM byte* reg = new byte[ len_dat( top -> item ) + len_dat( token ) - 1 ]; sprintf( ( char* )reg, "%s%s", top -> item, token ); --tdx; drop_dat(); types[ tdx++ ] = REG; token = reg; discard = true; break; } } } byte x = types[ tdx - 2 ]; if( x == PRM ) { printf( "syntax: ) #\n" ); return false; } } //return true; // this breaks a lot. case LTR: { if( tdx == 0 ) break; byte x = types[ tdx - 2 ]; if( cmp_net( token, "zero" ) || cmp_net( token, "ra" ) || cmp_net( token, "sp" ) || cmp_net( token, "gp" ) || cmp_net( token, "tp" ) || cmp_net( token, "fp" ) ) { types[ tdx - 1 ] = REG; if( x == PRM ) { printf( "syntax: ) reg\n" ); return false; } break; } if( x == PRM ) { // find label or identifier int params = num_dat( top -> prev -> item ); printf( "\t\tjal\tra,%s\n", token ); --tdx; drop_dat(); return true; } break; } case PRN: if( tdx == 0 ) break; types[ tdx - 1 ] = PRN; break; case CLN: if( tdx == 0 ) break; types[ tdx - 1 ] = CLN; break; default: return false; }; { struct tok* sis = new struct tok; sis -> item = new byte[ len_dat( token ) ]; cp_net( sis -> item, token ); sis -> prev = top; top = sis; if( discard ) delete[] token; // dump_app(); } return true; } byte type( byte x ) { switch( x ) { case '0' ... '9': return NUM; case 'G' ... 'Z': case 'a' ... 'z': case '_': return LTR; case '-': case '=': case '+': case '.': case '<': case '>': case ',': case ';': case '/': case '?': case '\\': case '|': case '!': case '%': case '^': case '&': case '*': case ')': return OPS; case '\'': case '"': return QUO; case '\t': case ' ': case '\n': case '\r': return SPC; case '(': return PRN; case ':': return CLN; default: return ETC; } } bool eof = false; int file = 0; int read_dat = 0; #include byte r() { byte input = 0; int ret = read( file, &input, 1 ); read_dat += ret; if( ret == 0 ) { eof = true; return '\n'; } return input; } byte token[ 1024 ]; byte w( int idx, byte sis ) { if( idx == 0 ) return 0; if( type( token[ idx - 1 ] ) != sis ) { //printf( "r(%s)", token ); return 0; } token[ idx ] = '\0'; puts( ( char* )token ); types[ tdx++ ] = QUO; push( token ); return 1; } bool sis() { int idx = 0; bool quoting = false; byte x, y; // this and next char x = token[ 0 ] = r(); // read one for(; !eof; x = y ) { token[ idx ] = x; y = r(); if( quoting ) { //printf( "QUO: %d\n", idx ); token[ ++idx ] = y; if( type( y ) == QUO ) { quoting = false; } else --idx; if( type( y ) == QUO && idx == 1 ) { //printf( "QUO Y:'%c'\n", x ); token[ 0 ] = x; token[ 1 ] = '\0'; types[ tdx++ ] = QUO; push( token ); continue; } else if( type( y ) == QUO ) { //printf( "Q" ); w( idx, type( x ) ); continue; } else if( type( x ) == QUO ) { //printf( "QUO X\n" ); --idx; token[ 0 ] = '\0'; types[ tdx++ ] = QUO; push( token ); continue; } else if( w( idx, QUO ) ) { printf( "^QUO\n" ); // continue; } } if( type( y ) != SPC && type( y ) != QUO ) { if( type( x ) == type( y ) && !ws ) { token[ ++idx ] = y; continue; } if( ws && type( x ) != SPC ) ws = false; //printf( "%c-%d+", ( type( y ) == SPC ) ? 'x' : 'y', y ); } else // following character is space { if( ws ) { y = x; continue; } // else // ws = true; //printf( "%c-%c-%cq", ( ws ) ? 'x' : 'y', ( byte )( type( x ) ) + 0x30, ( byte )( types[ tdx - 1 ] + 0x30 ) ); //if( tdx > 0 ) // if( ( types[ tdx - 1 ] == LTR ) && ( type( x ) == NUM ) ) // printf( "*" ); } //printf( "%c:", x ); switch( type( x ) ) { case NUM: token[ ++idx ] = '\0'; types[ tdx++ ] = NUM; //printf( "%c-%d-", ( type( y ) == SPC ) ? 'x' : 'y', y ); //printf( "%c-", ( ws ) ? 'x' : 'y', y ); push( token ); ws = false; break; case LTR: token[ ++idx ] = '\0'; types[ tdx++ ] = LTR; push( token ); ws = false; break; case OPS: token[ ++idx ] = '\0'; types[ tdx++ ] = OPS; push( token ); ws = false; break; case PRN: token[ ++idx ] = '\0'; types[ tdx++ ] = PRN; push( token ); ws = false; break; case CLN: token[ ++idx ] = '\0'; types[ tdx++ ] = CLN; push( token ); ws = false; break; case QUO: if( quoting ) // not executedk { printf( "QUO Z\n" ); quoting = false; token[ ++idx ] = '\0'; types[ tdx++ ] = QUO; push( token ); } else { // printf( "QUO A\n" ); if( type( y ) == QUO ) ;// printf( "QUO x\n" ); quoting = true; } ws = false; continue; case ETC: ws = false; return false; }; idx = 0; } return true; } bool leave() { while( top != NULL ) { if( tdx > 0 ) printf( ":%d=", types[ tdx - 1 ] ); puts( ( char* )( top -> item ) ); struct tok* del = top; top = top -> prev; delete[] del -> item; delete del; tdx -= 1; } return true; } int main( int argc, char* argv[] ) { file = open( "sample.e", O_RDONLY ); sis(); leave(); close( file ); return EXIT_SUCCESS; }