sbml2matlab
1.01
SBML to MATLAB translator
|
00001 // ************************************************************************** 00002 // * * 00003 // * u S C A N N E R General purpose C++ tokenizer * 00004 // * * 00005 // * Usage: * 00006 // * TScanner sc = new TScanner(); * 00007 // * sc.stream = <A stream>; // eg sc.stream = File.OpenRead (fileName); * 00008 // * sc.startScanner(); * 00009 // * sc.nextToken(); * 00010 // * if (sc.token == TScanner.TTokenCodes.tEndOFStreamToken) * 00011 // * exit; * 00012 // * * 00013 // * Copyright (c) 2004 by Herbert Sauro * 00014 // * Licenced under the Artistic open source licence * 00015 // * * 00016 // ************************************************************************** 00017 00018 //using System; 00019 //using System.IO; 00020 //using System.Collections; 00021 #include <istream> 00022 #include <streambuf> 00023 #include <map> 00024 #include <cstring> 00025 #include <string> 00026 #include <cmath> 00027 using namespace std; 00028 00029 namespace uScanner { 00030 00031 // Declare a Scanner exception type 00032 class EScannerException { 00033 public: 00034 string eMessage; 00035 00036 EScannerException (string message) { 00037 eMessage = message; 00038 } 00039 }; 00040 00041 // Different types of recognised tokens 00042 enum TTokenCode 00043 { 00044 tEmptyToken, tEndOfStreamToken, tIntToken, tDoubleToken, tComplexToken, 00045 tStringToken, tWordToken, tEolToken, tSemiColonToken, tCommaToken, 00046 tEqualsToken, tPlusToken, tMinusToken, tMultToken, tDivToken, 00047 tLParenToken, tRParenToken, tLBracToken, tRBracToken, tLCBracToken, 00048 tRCBracToken, tOrToken, tAndToken, tNotToken, tXorToken, 00049 tPowerToken, tLessThanToken, tLessThanOrEqualToken, tMoreThanToken, tMoreThanOrEqualToken, 00050 tNotEqualToken, tReversibleArrow, tIrreversibleArrow, tStartComment, tInternalToken, 00051 tExternalToken, tParameterToken, tIfToken, tWhileToken, tModelToken, 00052 tEndToken 00053 }; 00054 00055 00056 /* // This class is Experimental.... 00057 public class tokenClass 00058 { 00059 private string description; 00060 public TTokenCode tokenCode; 00061 00062 public override string ToString () { return description; } 00063 00064 public tokenClass (TTokenCode token, string value) 00065 { 00066 this.tokenCode = token; 00067 this.description = value; 00068 } 00069 }*/ 00070 00071 // This is currently used to store the previous token and support simple look ahead 00072 class TToken 00073 { 00074 public: 00075 enum TTokenCode tokenCode; 00076 string tokenString; 00077 int tokenInteger; 00078 double tokenDouble; 00079 double tokenValue; // Used to retrieve int or double 00080 00081 TToken() 00082 { 00083 tokenCode = tEmptyToken; 00084 tokenString = ""; 00085 tokenInteger = 0; 00086 tokenDouble = 0.0; 00087 tokenValue = 0.0; 00088 } 00089 }; 00090 00091 00092 // ------------------------------------------------------------------- 00093 // Start of TScanner Class 00094 // ------------------------------------------------------------------- 00095 00096 class TScanner 00097 { 00098 private: 00099 const static char EOFCHAR; 00100 const static char CR; 00101 //SRI const static char LF; 00102 00103 enum TCharCode { 00104 cLETTER, 00105 cDIGIT, 00106 cPOINT, 00107 cDOUBLEQUOTE, 00108 cUNDERSCORE, 00109 cSPECIAL, 00110 cWHITESPACE, 00111 cETX 00112 }; 00113 00114 // private variables 00115 TCharCode FCharTable[255]; 00116 char buffer[255]; // Input buffer 00117 int bufferPtr; // Index of position in buffer containing current char 00118 int bufferLength; 00119 int yylineno; // Current line number 00120 istream* myFStream; 00121 TTokenCode ftoken; 00122 map<string, int> wordTable; 00123 00124 void initScanner() 00125 { 00126 int ch; 00127 for (ch=0; ch < 255; ch++) 00128 FCharTable[ch] = cSPECIAL; 00129 for (ch=(int)'0'; ch <= (int)'9'; ch++) 00130 FCharTable[ch] = cDIGIT; 00131 for (ch=(int)'A'; ch <= (int)'Z'; ch++) 00132 FCharTable[ch] = cLETTER; 00133 for (ch=(int)'a'; ch <= (int)'z'; ch++) 00134 FCharTable[ch] = cLETTER; 00135 00136 FCharTable[(int)'.'] = cPOINT; 00137 FCharTable[(int)'"'] = cDOUBLEQUOTE; 00138 FCharTable[(int)'_'] = cUNDERSCORE; 00139 FCharTable[(int)'\t'] = cWHITESPACE; 00140 FCharTable[(int)' '] = cWHITESPACE; 00141 FCharTable[(int)EOFCHAR] = cETX; 00142 00143 //wordTable["and"] = tAndToken; 00144 //wordTable["or"] = tOrToken; 00145 //wordTable["not"] = tNotToken; 00146 //wordTable["xor"] = tXorToken; 00147 00148 wordTable["model"] = tModelToken; 00149 wordTable["if"] = tIfToken; 00150 wordTable["while"] = tWhileToken; 00151 wordTable["end"] = tEndToken; 00152 wordTable["internal"] = tInternalToken; 00153 wordTable["external"] = tExternalToken; 00154 wordTable["parameter"] = tParameterToken; 00155 } 00156 00157 char getCharFromBuffer() 00158 { 00159 char ch; 00160 // If the buffer is empty, read a new chuck of text from the 00161 // input stream, this might be a stream or console 00162 if (bufferPtr == 0) 00163 { 00164 // Read a chunck of data from the input stream 00165 myFStream->read(buffer, 255); 00166 bufferLength = (int) myFStream->gcount(); 00167 00168 if (bufferLength == 0) 00169 return EOFCHAR; 00170 } 00171 00172 ch = (char) buffer[bufferPtr]; 00173 bufferPtr++; 00174 if (bufferPtr >= bufferLength) 00175 bufferPtr = 0; // Indicates the buffer is empty 00176 00177 return ch; 00178 } 00179 00180 // ------------------------------------------------------------------- 00181 // Scan for a word, words start with letter or underscore then continue 00182 // with letters, digits or underscore 00183 // ------------------------------------------------------------------- 00184 00185 void getWord() 00186 { 00187 string tempfch; 00188 while ((FCharTable[(int)fch] == cLETTER) 00189 || (FCharTable[(int)fch] == cDIGIT) 00190 || (FCharTable[(int)fch] == cUNDERSCORE)) 00191 { 00192 tokenString = tokenString + fch; // Inefficient but convenient 00193 nextChar(); 00194 } 00195 00196 if ( wordTable.find (tokenString) != wordTable.end() ) 00197 ftoken = (TTokenCode) wordTable[tokenString]; 00198 else 00199 ftoken = tWordToken; 00200 } 00201 00202 // ------------------------------------------------------------------- 00203 // Scan for a number, integer, double or complex 00204 // ------------------------------------------------------------------- 00205 00206 void getNumber() 00207 { 00208 const int MAX_DIGIT_COUNT = 3; // Max number of digits in exponent 00209 00210 int single_digit; 00211 double scale; 00212 double evalue; 00213 int exponent_sign; 00214 int digit_count; 00215 00216 tokenInteger = 0; 00217 tokenDouble = 0.0; 00218 tokenScalar = 0.0; 00219 evalue = 0.0; 00220 exponent_sign = 1; 00221 00222 // Assume first it's an integer 00223 ftoken = tIntToken; 00224 00225 // Pick up number before any decimal place 00226 if (fch != '.') 00227 try 00228 { 00229 do 00230 { 00231 single_digit = fch - '0'; 00232 tokenInteger = 10*tokenInteger + single_digit; 00233 tokenScalar = tokenInteger; 00234 nextChar(); 00235 } while (FCharTable[(int)fch] == cDIGIT); 00236 } 00237 catch (...) 00238 { 00239 throw new EScannerException ("Integer Overflow - constant value too large to read"); 00240 } 00241 00242 scale = 1; 00243 if (fch == '.') 00244 { 00245 // Then it's a float. Start collecting fractional part 00246 ftoken = tDoubleToken; 00247 tokenDouble = tokenInteger; 00248 nextChar(); 00249 if (FCharTable[(int)fch] != cDIGIT) 00250 throw new EScannerException ("Syntax error: expecting number after decimal point"); 00251 00252 try 00253 { 00254 while (FCharTable[(int)fch] == cDIGIT) 00255 { 00256 scale = scale * 0.1; 00257 single_digit = fch - '0'; 00258 tokenDouble = tokenDouble + (single_digit * scale); 00259 tokenScalar = tokenDouble; 00260 nextChar(); 00261 } 00262 } 00263 catch (...) 00264 { 00265 throw new EScannerException ("Floating point overflow - constant value too large to read in"); 00266 } 00267 } 00268 00269 // Next check for scientific notation 00270 if ((fch == 'e') || (fch == 'E')) 00271 { 00272 // Then it's a float. Start collecting exponent part 00273 if (ftoken == tIntToken) 00274 { 00275 ftoken = tDoubleToken; 00276 tokenDouble = tokenInteger; 00277 tokenScalar = tokenInteger; 00278 } 00279 nextChar(); 00280 if ((fch == '-') || (fch == '+')) 00281 { 00282 if (fch == '-') exponent_sign = -1; 00283 nextChar(); 00284 } 00285 // accumulate exponent, check that first ch is a digit 00286 if (FCharTable[(int)fch] != cDIGIT) 00287 throw new EScannerException ("Syntax error: number expected in exponent"); 00288 00289 digit_count = 0; 00290 try { 00291 do { 00292 digit_count++; 00293 single_digit = fch - '0'; 00294 evalue = 10*evalue + single_digit; 00295 nextChar(); 00296 } while ((FCharTable[(int)fch] == cDIGIT) && (digit_count <= MAX_DIGIT_COUNT)); 00297 } 00298 catch (...) 00299 { 00300 throw new EScannerException ("Floating point overflow - Constant value too large to read"); 00301 } 00302 00303 if (digit_count > MAX_DIGIT_COUNT) 00304 throw new EScannerException ("Syntax error: too many digits in exponent"); 00305 00306 evalue = evalue * exponent_sign; 00307 if (evalue > 300) 00308 throw new EScannerException ("Exponent overflow while parsing floating point number"); 00309 evalue = pow (10.0, evalue); 00310 tokenDouble = tokenDouble * evalue; 00311 tokenScalar = tokenDouble; 00312 } 00313 00314 // Check for complex number 00315 if ((fch == 'i') || (fch == 'j')) 00316 { 00317 if (ftoken == tIntToken) 00318 tokenDouble = tokenInteger; 00319 ftoken = tComplexToken; 00320 nextChar(); 00321 } 00322 } 00323 00324 00325 bool IsDoubleQuote (char ch) { 00326 if (FCharTable[(int)ch] == cDOUBLEQUOTE) 00327 return true; 00328 else 00329 return false; 00330 } 00331 00332 // ------------------------------------------------------------------- 00333 // Scan for string, "abc" 00334 // ------------------------------------------------------------------- 00335 00336 void getString() { 00337 bool OldIgnoreNewLines; 00338 tokenString = ""; 00339 nextChar(); 00340 00341 ftoken = tStringToken; 00342 while (fch != EOFCHAR) 00343 { 00344 // Check for escape characters 00345 if (fch == '\\') 00346 { 00347 nextChar(); 00348 switch (fch) 00349 { 00350 case '\\' : tokenString = tokenString + '\\'; 00351 break; 00352 case 'n' : tokenString = tokenString + CR; 00353 break; 00354 //SRI case 'n' : tokenString = tokenString + CR + LF; 00355 //SRI break; 00356 case 'r' : tokenString = tokenString + CR; 00357 break; 00358 //SRI case 'f' : tokenString = tokenString + LF; 00359 //SRI break; 00360 case 't' : tokenString = tokenString + " "; 00361 break; 00362 default: 00363 throw new EScannerException ("Syntax error: Unrecognised control code in string"); 00364 } 00365 nextChar(); 00366 } 00367 else 00368 { 00369 OldIgnoreNewLines = IgnoreNewLines; 00370 if (IsDoubleQuote (fch)) 00371 { 00372 // Just in case the double quote is at the end of a line and another string 00373 // start immediately in the next line, if we ignore newlines we'll 00374 // pick up a double quote rather than the end of a string 00375 IgnoreNewLines = false; 00376 nextChar(); 00377 if (IsDoubleQuote (fch)) 00378 { 00379 tokenString = tokenString + fch; 00380 nextChar(); 00381 } 00382 else 00383 { 00384 if (OldIgnoreNewLines) 00385 { 00386 while (fch == CR) 00387 { 00388 nextChar(); 00389 //SRI while (fch == LF) 00390 //SRI nextChar(); 00391 } 00392 } 00393 IgnoreNewLines = OldIgnoreNewLines; 00394 return; 00395 } 00396 } 00397 else 00398 { 00399 tokenString = tokenString + fch; 00400 nextChar(); 00401 } 00402 IgnoreNewLines = OldIgnoreNewLines; 00403 } 00404 } 00405 if (fch == EOFCHAR) 00406 throw new EScannerException ("Syntax error: String without terminating quotation mark"); 00407 } 00408 00409 00410 // ------------------------------------------------------------------- 00411 // Scan for special characters 00412 // ------------------------------------------------------------------- 00413 00414 void getSpecial() { 00415 char tch; 00416 00417 switch (fch) { 00418 case 13: ftoken = tEolToken; 00419 nextChar(); 00420 break; 00421 00422 case ';': ftoken = tSemiColonToken; 00423 nextChar(); 00424 break; 00425 00426 case ',': ftoken = tCommaToken; 00427 nextChar(); 00428 break; 00429 00430 case '=': nextChar(); 00431 if (fch == '>') 00432 { 00433 ftoken = tReversibleArrow; 00434 nextChar(); 00435 } 00436 else 00437 ftoken = tEqualsToken; 00438 break; 00439 00440 case '+': ftoken = tPlusToken; 00441 nextChar(); 00442 break; 00443 00444 case '-': nextChar(); 00445 if (fch == '>') 00446 { 00447 ftoken = tIrreversibleArrow; 00448 nextChar(); 00449 } 00450 else 00451 ftoken = tMinusToken; 00452 break; 00453 00454 case '*': nextChar(); 00455 ftoken = tMultToken; 00456 break; 00457 00458 case '/': // look ahead at next ch 00459 tch = nextChar(); 00460 if (tch == '/') 00461 { 00462 ftoken = tStartComment; 00463 nextChar(); 00464 } 00465 else 00466 ftoken = tDivToken; 00467 break; 00468 00469 case '(': nextChar(); 00470 ftoken = tLParenToken; 00471 break; 00472 00473 case ')': nextChar(); 00474 ftoken = tRParenToken; 00475 break; 00476 00477 case '[': nextChar(); 00478 ftoken = tLBracToken; 00479 break; 00480 00481 case ']': nextChar(); 00482 ftoken = tRBracToken; 00483 break; 00484 00485 case '{': nextChar(); 00486 ftoken = tLCBracToken; 00487 break; 00488 00489 case '}': nextChar(); 00490 ftoken = tRCBracToken; 00491 break; 00492 00493 case '^': nextChar(); 00494 ftoken = tPowerToken; 00495 break; 00496 00497 case '<': nextChar(); 00498 if (fch == '=') 00499 { 00500 ftoken = tLessThanOrEqualToken; 00501 nextChar(); 00502 } 00503 else 00504 ftoken = tLessThanToken; 00505 break; 00506 00507 case '>': nextChar(); 00508 if (fch == '=') 00509 { 00510 ftoken = tMoreThanOrEqualToken; 00511 nextChar(); 00512 } 00513 else 00514 ftoken = tMoreThanToken; 00515 break; 00516 00517 case '!': nextChar(); 00518 if (fch == '=') 00519 { 00520 ftoken = tNotEqualToken; 00521 nextChar(); 00522 } 00523 break; 00524 00525 default: 00526 string message; 00527 message = "Syntax error: Unknown special token [" + fch; 00528 message = message + "]"; 00529 throw new EScannerException (message); 00530 } 00531 } 00532 00533 00534 void nextTokenInternal() { 00535 // check if a token has been pushed back into the token stream, if so use it first 00536 if (previousToken->tokenCode != tEmptyToken) { 00537 ftoken = previousToken->tokenCode; 00538 tokenString = previousToken->tokenString; 00539 tokenDouble = previousToken->tokenDouble; 00540 tokenInteger = previousToken->tokenInteger; 00541 previousToken->tokenCode = tEmptyToken; 00542 return; 00543 } 00544 00545 skipBlanks(); 00546 tokenString = ""; 00547 00548 switch (FCharTable[(int)fch]) { 00549 case cLETTER : 00550 case cUNDERSCORE : getWord(); 00551 break; 00552 case cDIGIT : getNumber(); 00553 break; 00554 case cDOUBLEQUOTE : getString(); 00555 break; 00556 case cETX : ftoken = tEndOfStreamToken; 00557 break; 00558 default : getSpecial(); 00559 break; 00560 } 00561 } 00562 00563 00564 // Public Variables 00565 00566 public: 00567 TToken* currentToken; 00568 TToken* previousToken; 00569 00570 bool IgnoreNewLines; 00571 00572 string tokenString; 00573 int tokenInteger; 00574 double tokenDouble; 00575 double tokenScalar; // Used to retrieve int or double 00576 char fch; 00577 00578 // ------------------------------------------------------------------- 00579 // Constructor 00580 // ------------------------------------------------------------------- 00581 TScanner() 00582 { 00583 //wordTable = new Hashtable(); 00584 previousToken = new TToken(); 00585 currentToken = new TToken(); 00586 IgnoreNewLines = true; 00587 initScanner(); 00588 } 00589 00590 // ------------------------------------------------------------------- 00591 // Destructor 00592 // ------------------------------------------------------------------- 00593 /* ~TScanner() 00594 { 00595 delete previousToken; 00596 delete currentToken; 00597 }*/ 00598 00599 // Create a readonly property for the current line number 00600 int getLineNumber() { 00601 return yylineno; 00602 } 00603 00604 // writeonly stream property 00605 void setStream(istream* inputStream) { 00606 //istream inputStream(sb); 00607 myFStream = inputStream; 00608 } 00609 00610 // readonly current token property 00611 TTokenCode getToken() { 00612 return ftoken; 00613 } 00614 00615 // Must be called before using nextToken() 00616 void startScanner() { 00617 yylineno = 1; 00618 bufferPtr = 0; 00619 nextChar(); 00620 } 00621 00622 // ------------------------------------------------------------------- 00623 // Fetches next character from input stream and filters NL if required 00624 // ------------------------------------------------------------------- 00625 00626 char nextChar() { 00627 fch = getCharFromBuffer(); 00628 if (IgnoreNewLines) 00629 { 00630 // Turn any CFs or LFs into space characters 00631 if (fch == CR) { 00632 yylineno++; 00633 fch = ' '; 00634 return fch; 00635 } 00636 00637 //SRI if (fch == LF) 00638 //SRI fch = ' '; 00639 //SRI return fch; 00640 } 00641 else 00642 { 00643 if (fch == CR) 00644 yylineno++; 00645 } 00646 return fch; 00647 } 00648 00649 00650 // ------------------------------------------------------------------- 00651 // Skips any blanks, ie TAB, ' ' 00652 // ------------------------------------------------------------------- 00653 00654 void skipBlanks() { 00655 while (FCharTable[(int)fch] == cWHITESPACE) { 00656 //SRI if ((fch == LF) || (fch == CR)) 00657 if (fch == CR) 00658 return; 00659 nextChar(); 00660 } 00661 } 00662 00663 // ------------------------------------------------------------------- 00664 // Retrieve next token in stream, returns tEndOfStreamToken 00665 // if it reaches the end of the stream 00666 // ------------------------------------------------------------------- 00667 00668 void nextToken() 00669 { 00670 nextTokenInternal(); 00671 while (ftoken == tStartComment) 00672 { 00673 // Comment ends with an end of line char 00674 while ((fch != CR) && (fch != EOFCHAR)) 00675 fch = getCharFromBuffer(); 00676 //SRI while ((fch == LF) && (fch != EOFCHAR)) 00677 //SRI fch = getCharFromBuffer(); 00678 while (fch == CR) 00679 { 00680 yylineno++; 00681 //SRI while (fch == LF) 00682 //SRI nextChar(); // Dump the linefeed 00683 fch = nextChar(); 00684 } 00685 nextTokenInternal(); // get the real next token 00686 } 00687 } 00688 00689 00690 // ------------------------------------------------------------------- 00691 // Allows one token look ahead 00692 // Push token back into token stream 00693 // ------------------------------------------------------------------- 00694 00695 void UnGetToken() 00696 { 00697 previousToken->tokenCode = ftoken; 00698 previousToken->tokenString = tokenString; 00699 previousToken->tokenInteger = tokenInteger; 00700 previousToken->tokenDouble = tokenDouble; 00701 } 00702 00703 // ------------------------------------------------------------------- 00704 // Given a token, this function returns the string eqauivalent 00705 // ------------------------------------------------------------------- 00706 00707 string tokenToString (TTokenCode code) { 00708 string strtokenInteger; 00709 string strtokenDouble; 00710 char buffer[100]; 00711 00712 switch (code) { 00713 case tIntToken : sprintf(buffer, "%d", tokenInteger); 00714 strtokenInteger = buffer; 00715 return "<Integer: " + strtokenInteger + ">"; 00716 break; 00717 case tDoubleToken : sprintf(buffer, "%lf", tokenDouble); 00718 strtokenDouble = buffer; 00719 return "<Double: " + strtokenDouble + ">"; 00720 break; 00721 case tComplexToken : return "<Complex: " + strtokenDouble + "i>"; 00722 break; 00723 case tStringToken : return "<String: " + tokenString + ">"; 00724 break; 00725 case tWordToken : //return "<Identifier: " + tokenString + ">"; 00726 return "(" + tokenString + ")"; 00727 break; 00728 case tEndOfStreamToken : return "<end of stream>"; 00729 break; 00730 case tEolToken : return "<EOLN>"; 00731 break; 00732 case tSemiColonToken : return "<;>"; 00733 break; 00734 case tCommaToken : return "<,>"; 00735 break; 00736 case tEqualsToken : return "<=>"; 00737 break; 00738 case tPlusToken : return "<+>"; 00739 break; 00740 case tMinusToken : return "<->"; 00741 break; 00742 case tMultToken : return "<*>"; 00743 break; 00744 case tDivToken : return "</>"; 00745 break; 00746 case tPowerToken : return "<^>"; 00747 break; 00748 case tLParenToken : return "<(>"; 00749 break; 00750 case tRParenToken : return "<)>"; 00751 break; 00752 case tLBracToken : return "<[>"; 00753 break; 00754 case tRBracToken : return "<]>"; 00755 break; 00756 case tLCBracToken : return "<{>"; 00757 break; 00758 case tRCBracToken : return "<}>"; 00759 break; 00760 /*case tOrToken : return "<or>"; 00761 break; 00762 case tAndToken : return "<and>"; 00763 break; 00764 case tNotToken : return "<not>"; 00765 break; 00766 case tXorToken : return "<xor>"; 00767 break;*/ 00768 case tLessThanToken : return "[<]"; 00769 break; 00770 case tLessThanOrEqualToken : return "[<=]"; 00771 break; 00772 case tMoreThanToken : return "[>]"; 00773 break; 00774 case tMoreThanOrEqualToken : return "[>=]"; 00775 break; 00776 case tNotEqualToken : return "!="; 00777 break; 00778 case tReversibleArrow : return "[=>]"; 00779 break; 00780 case tIrreversibleArrow : return "[->]"; 00781 break; 00782 case tIfToken : return "<if>"; 00783 break; 00784 case tWhileToken : return "<while>"; 00785 break; 00786 case tModelToken : return "<model>"; 00787 break; 00788 case tEndToken : return "<end>"; 00789 break; 00790 case tInternalToken : return "<Internal>"; 00791 break; 00792 case tExternalToken : return "<External>"; 00793 break; 00794 case tParameterToken : return "<Parameter>"; 00795 break; 00796 00797 default: 00798 return "<unknown>"; 00799 } 00800 } 00801 }; // end of TScanner class 00802 const char TScanner::EOFCHAR = '\x7F'; // Deemed end of string marker, used internally 00803 const char TScanner::CR = (char)10; 00804 //SRI const char TScanner::CR = (char)13; 00805 //SRI const char TScanner::LF = (char)10; 00806 00807 } // end of uScanner namespace