123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- /* Copyright 2009, UCAR/Unidata and OPeNDAP, Inc.
- See the COPYRIGHT file for more information. */
- #include "config.h"
- #include <strings.h>
- #include "dapparselex.h"
- #undef URLCVT /* NEVER turn this on */
- #define DAP2ENCODE
- /* Forward */
- static void dumptoken(DAPlexstate* lexstate);
- static void dapaddyytext(DAPlexstate* lex, int c);
- #ifndef DAP2ENCODE
- static int tohex(int c);
- #endif
- /****************************************************/
- #if 0 /* Following definitions are for informational purposes */
- /* Set of all ascii printable characters */
- static char ascii[] = " !\"#$%&'()*+,-./:;<=>?@[]\\^_`|{}~";
- /* Define the set of legal nonalphanum characters as specified in the DAP2 spec. */
- static char* daplegal ="_!~*'-\"";
- #endif
- static char* ddsworddelims =
- "{}[]:;=,";
- /* Define 1 and > 1st legal characters */
- static char* ddswordchars1 =
- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\.*";
- static char* ddswordcharsn =
- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\.*#";
- static char* daswordcharsn =
- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\.*#:";
- static char* cewordchars1 =
- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\";
- static char* cewordcharsn =
- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\";
- /* Current sets of legal characters */
- /*
- static char* wordchars1 = NULL;
- static char* wordcharsn = NULL;
- static char* worddelims = NULL;
- */
- static char* keywords[] = {
- "alias",
- "array",
- "attributes",
- "byte",
- "dataset",
- "error",
- "float32",
- "float64",
- "grid",
- "int16",
- "int32",
- "maps",
- "sequence",
- "string",
- "structure",
- "uint16",
- "uint32",
- "url",
- "code",
- "message",
- "program_type",
- "program",
- NULL /* mark end of the keywords list */
- };
- static int keytokens[] = {
- SCAN_ALIAS,
- SCAN_ARRAY,
- SCAN_ATTR,
- SCAN_BYTE,
- SCAN_DATASET,
- SCAN_ERROR,
- SCAN_FLOAT32,
- SCAN_FLOAT64,
- SCAN_GRID,
- SCAN_INT16,
- SCAN_INT32,
- SCAN_MAPS,
- SCAN_SEQUENCE,
- SCAN_STRING,
- SCAN_STRUCTURE,
- SCAN_UINT16,
- SCAN_UINT32,
- SCAN_URL,
- SCAN_CODE,
- SCAN_MESSAGE,
- SCAN_PTYPE,
- SCAN_PROG
- };
- /**************************************************/
- int
- daplex(YYSTYPE* lvalp, DAPparsestate* state)
- {
- DAPlexstate* lexstate = state->lexstate;
- int token;
- int c;
- unsigned int i;
- char* p;
- char* tmp;
- token = 0;
- ocbytesclear(lexstate->yytext);
- /* invariant: p always points to current char */
- for(p=lexstate->next;token==0&&(c=*p);p++) {
- if(c == '\n') {
- lexstate->lineno++;
- } else if(c <= ' ' || c == '\177') {
- /* whitespace: ignore */
- } else if(c == '#') {
- /* single line comment */
- while((c=*(++p))) {if(c == '\n') break;}
- } else if(strchr(lexstate->worddelims,c) != NULL) {
- /* don't put in lexstate->yytext to avoid memory leak */
- token = c;
- } else if(c == '"') {
- int more = 1;
- /* We have a string token; will be reported as WORD_STRING */
- while(more && (c=*(++p))) {
- #ifdef DAP2ENCODE
- if(c == '"')
- more = 0;
- else if(c == '\\') {
- /* Remove spec ambiguity by convering \c to c
- for any character c */
- c=*(++p);
- if(c == '\0') more = 0;
- }
- #else /*Non-standard*/
- switch (c) {
- case '"': more=0; break;
- case '\\':
- c=*(++p);
- switch (c) {
- case 'r': c = '\r'; break;
- case 'n': c = '\n'; break;
- case 'f': c = '\f'; break;
- case 't': c = '\t'; break;
- case 'x': {
- int d1,d2;
- c = '?';
- ++p;
- d1 = tohex(*p++);
- if(d1 < 0) {
- daperror(state,"Illegal \\xDD in TOKEN_STRING");
- } else {
- d2 = tohex(*p++);
- if(d2 < 0) {
- daperror(state,"Illegal \\xDD in TOKEN_STRING");
- } else {
- c=(((unsigned int)d1)<<4) | (unsigned int)d2;
- }
- }
- } break;
- default: break;
- }
- break;
- default: break;
- }
- #endif /*!DAP2ENCODE*/
- if(more) dapaddyytext(lexstate,c);
- }
- token=WORD_STRING;
- } else if(strchr(lexstate->wordchars1,c) != NULL) {
- int isdatamark = 0;
- /* we have a WORD_WORD */
- dapaddyytext(lexstate,c);
- while((c=*(++p))) {
- #ifdef URLCVT
- if(c == '%' && p[1] != 0 && p[2] != 0
- && strchr(hexdigits,p[1]) != NULL
- && strchr(hexdigits,p[2]) != NULL) {
- int d1,d2;
- d1 = tohex(p[1]);
- d2 = tohex(p[2]);
- if(d1 >= 0 || d2 >= 0) {
- c=(((unsigned int)d1)<<4) | (unsigned int)d2;
- p+=2;
- }
- } else {
- if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;}
- }
- dapaddyytext(lexstate,c);
- #else
- if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;}
- dapaddyytext(lexstate,c);
- #endif
- }
- /* Special check for Data: */
- tmp = ocbytescontents(lexstate->yytext);
- if(strcmp(tmp,"Data")==0 && *p == ':') {
- dapaddyytext(lexstate,*p); p++;
- if(p[0] == '\n') {
- token = SCAN_DATA;
- isdatamark = 1;
- p++;
- } else if(p[0] == '\r' && p[1] == '\n') {
- token = SCAN_DATA;
- isdatamark = 1;
- p+=2;
- }
- }
- if(!isdatamark) {
- /* check for keyword */
- token=WORD_WORD; /* assume */
- for(i=0;;i++) {
- if(keywords[i] == NULL) break;
- if(strcasecmp(keywords[i],tmp)==0) {
- token=keytokens[i];
- break;
- }
- }
- }
- } else { /* illegal */
- }
- }
- lexstate->next = p;
- strncpy(lexstate->lasttokentext,ocbytescontents(lexstate->yytext),MAX_TOKEN_LENGTH);
- lexstate->lasttoken = token;
- if(ocdebug >= 2)
- dumptoken(lexstate);
- /*Put return value onto Bison stack*/
- if(ocbyteslength(lexstate->yytext) == 0)
- *lvalp = NULL;
- else {
- *lvalp = ocbytesdup(lexstate->yytext);
- oclistpush(lexstate->reclaim,(ocelem)*lvalp);
- }
- return token; /* Return the type of the token. */
- }
- static void
- dapaddyytext(DAPlexstate* lex, int c)
- {
- ocbytesappend(lex->yytext,(char)c);
- }
- #ifndef DAP2ENCODE
- static int
- tohex(int c)
- {
- if(c >= 'a' && c <= 'f') return (c - 'a') + 0xa;
- if(c >= 'A' && c <= 'F') return (c - 'A') + 0xa;
- if(c >= '0' && c <= '9') return (c - '0');
- return -1;
- }
- #endif
- static void
- dumptoken(DAPlexstate* lexstate)
- {
- fprintf(stderr,"TOKEN = |%s|\n",ocbytescontents(lexstate->yytext));
- }
- /*
- Simple lexer
- */
- void
- dapsetwordchars(DAPlexstate* lexstate, int kind)
- {
- switch (kind) {
- case 0:
- lexstate->worddelims = ddsworddelims;
- lexstate->wordchars1 = ddswordchars1;
- lexstate->wordcharsn = ddswordcharsn;
- break;
- case 1:
- lexstate->worddelims = ddsworddelims;
- lexstate->wordchars1 = ddswordchars1;
- lexstate->wordcharsn = daswordcharsn;
- break;
- case 2:
- lexstate->worddelims = ddsworddelims;
- lexstate->wordchars1 = cewordchars1;
- lexstate->wordcharsn = cewordcharsn;
- break;
- default: break;
- }
- }
- void
- daplexinit(char* input, DAPlexstate** lexstatep)
- {
- DAPlexstate* lexstate = (DAPlexstate*)malloc(sizeof(DAPlexstate));
- if(lexstatep) *lexstatep = lexstate;
- if(lexstate == NULL) return;
- memset((void*)lexstate,0,sizeof(DAPlexstate));
- lexstate->input = strdup(input);
- lexstate->next = lexstate->input;
- lexstate->yytext = ocbytesnew();
- lexstate->reclaim = oclistnew();
- dapsetwordchars(lexstate,0); /* Assume DDS */
- }
- void
- daplexcleanup(DAPlexstate** lexstatep)
- {
- DAPlexstate* lexstate = *lexstatep;
- if(lexstate == NULL) return;
- if(lexstate->input != NULL) ocfree(lexstate->input);
- if(lexstate->reclaim != NULL) {
- while(oclistlength(lexstate->reclaim) > 0) {
- char* word = (char*)oclistpop(lexstate->reclaim);
- if(word) free(word);
- }
- oclistfree(lexstate->reclaim);
- }
- ocbytesfree(lexstate->yytext);
- free(lexstate);
- *lexstatep = NULL;
- }
- /* Dap identifiers will come to us with some
- characters escaped using the URL notation of
- %HH. The assumption here is that any character
- that is encoded is left encoded, except as follows:
- 1. if the encoded character is in fact a legal DAP2 character
- (alphanum+"_!~*'-\"") then it is decoded, otherwise not.
- */
- #ifndef DECODE_IDENTIFIERS
- static char* decodelist =
- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_!~*'-\"";
- #endif
- char*
- dapdecode(DAPlexstate* lexstate, char* name)
- {
- char* decoded;
- #ifdef DECODE_IDENTIFIERS
- decoded = ocuridecode(name);
- #else
- decoded = ocuridecodeonly(name,decodelist);
- #endif
- oclistpush(lexstate->reclaim,(ocelem)decoded);
- return decoded;
- }
|