git:andrei/script_vars: script parsing: fix line/column counting in strings - sr-dev

12 Dec 2008

Module: sip-router
Branch: andrei/script_vars
Commit: 9c674bfa6e1b60082beb57bd571995ff1cd8208b
URL:    http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=9c674bfa...
Author: Andrei Pelinescu-Onciul andrei@iptel.org
Committer: Andrei Pelinescu-Onciul andrei@iptel.org
Date:   Fri Dec 12 12:55:02 2008 +0100
script parsing: fix line/column counting in strings
- added startline which contains the starting line for a token
 (some tokens can span multiple lines).
- string tokens are now correctly reported (startline and
  startcolumn always point at the beginning)
---
cfg.lex |  108 ++++++++++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 80 insertions(+), 28 deletions(-)

diff --git a/cfg.lex b/cfg.lex
index 7578837..6e9d991 100644
--- a/cfg.lex
+++ b/cfg.lex
@@ -119,10 +119,15 @@
    int line=1;
    int column=1;
    int startcolumn=1;
+	int startline=1;
+	static int ign_lines=0;
+	static int ign_columns=0;
static char* addchar(struct str_buf *, char);
    static char* addstr(struct str_buf *, char*, int);
    static void count();
+	static void count_more();
+	static void count_ignore();
%}
@@ -806,7 +811,7 @@ EAT_ABLE	[\ \t\b\r]
 <PVARID>{ID}|'.'			{yymore(); }
 <PVARID>{LPAREN}			{	state = PVAR_P_S; BEGIN(PVAR_P);
    							p_nest=1; yymore(); }
-<PVARID>.					{ count(); state=INITIAL_S; BEGIN(INITIAL);
+<PVARID>.					{ yyless(0); state=INITIAL_S; BEGIN(INITIAL);
    							return PVAR;
    						}
@@ -872,7 +877,7 @@ EAT_ABLE	[\ \t\b\r]
 <INITIAL>{COMMA}		{ count(); return COMMA; }
 <INITIAL>{SEMICOLON}	{ count(); return SEMICOLON; }
 <INITIAL>{COLON}	{ count(); return COLON; }
-<INITIAL>{STAR}	{ count(); return STAR; }
+<INITIAL>{STAR}		{ count(); return STAR; }
 <INITIAL>{RPAREN}	{ count(); return RPAREN; }
 <INITIAL>{LPAREN}	{ count(); return LPAREN; }
 <INITIAL>{LBRACE}	{ count(); return LBRACE; }
@@ -885,16 +890,19 @@ EAT_ABLE	[\ \t\b\r]
 <INITIAL>{CR}		{ count();/* return CR;*/ }
-<INITIAL,SELECT>{QUOTES} { count(); old_initial = YY_START; old_state = state; state=STRING_S; BEGIN(STRING1); }
-<INITIAL>{TICK} { count(); old_initial = YY_START; old_state = state; state=STRING_S; BEGIN(STRING2); }
+<INITIAL,SELECT>{QUOTES} { count(); old_initial = YY_START; 
+							old_state = state; state=STRING_S;
+							BEGIN(STRING1); }
+<INITIAL>{TICK} { count(); old_initial = YY_START; old_state = state;
+					state=STRING_S; BEGIN(STRING2); }
-<STRING1>{QUOTES} { count(); 
+<STRING1>{QUOTES} { count_more(); 
    					yytext[yyleng-1]=0; yyleng--;
    					addstr(&s_buf, yytext, yyleng);
    					BEGIN(STR_BETWEEN);
    				}
-<STRING2>{TICK}  { count(); state=old_state; BEGIN(old_initial);
+<STRING2>{TICK}  { count_more(); state=old_state; BEGIN(old_initial);
    					yytext[yyleng-1]=0; yyleng--;
    					addstr(&s_buf, yytext, yyleng);
    					yylval.strval=s_buf.s;
@@ -903,24 +911,28 @@ EAT_ABLE	[\ \t\b\r]
    				}
 <STRING2>.|{EAT_ABLE}|{CR}	{ yymore(); }
-<STRING1>\n		{ count(); addchar(&s_buf, '\n'); }
-<STRING1>\r		{ count(); addchar(&s_buf, '\r'); }
-<STRING1>\a		{ count(); addchar(&s_buf, '\a'); }
-<STRING1>\t		{ count(); addchar(&s_buf, '\t'); }
-<STRING1>\{QUOTES}	{ count(); addchar(&s_buf, '"');  }
-<STRING1>\\		{ count(); addchar(&s_buf, '\'); }
-<STRING1>\x{HEX}{1,2}	{ count(); addchar(&s_buf,
+<STRING1>\n		{ count_more(); addchar(&s_buf, '\n'); }
+<STRING1>\r		{ count_more(); addchar(&s_buf, '\r'); }
+<STRING1>\a		{ count_more(); addchar(&s_buf, '\a'); }
+<STRING1>\t		{ count_more(); addchar(&s_buf, '\t'); }
+<STRING1>\{QUOTES}	{ count_more(); addchar(&s_buf, '"');  }
+<STRING1>\\		{ count_more(); addchar(&s_buf, '\'); }
+<STRING1>\x{HEX}{1,2}	{ count_more(); addchar(&s_buf,
    										(char)strtol(yytext+2, 0, 16)); }
  /* don't allow [0-7]{1}, it will eat the backreferences from
     subst_uri if allowed (although everybody should use '' in subt_uri) */
-<STRING1>\[0-7]{2,3}	{ count(); addchar(&s_buf,
+<STRING1>\[0-7]{2,3}	{ count_more(); addchar(&s_buf,
    										(char)strtol(yytext+1, 0, 8));  }
-<STRING1>\{CR}		{ count(); } /* eat escaped CRs */
-<STRING1>.|{EAT_ABLE}|{CR}	{ addchar(&s_buf, *yytext); }
-
-<STR_BETWEEN>{EAT_ABLE}|{CR}	{ count(); /* eat whitespace */ }
-<STR_BETWEEN>{QUOTES}			{ count(); state=STRING_S; BEGIN(STRING1);}
-<STR_BETWEEN>.					{	yyless(0);
+<STRING1>\{CR}		{ count_more(); } /* eat escaped CRs */
+<STRING1>.|{EAT_ABLE}|{CR}	{ count_more(); addchar(&s_buf, *yytext); }
+
+<STR_BETWEEN>{EAT_ABLE}|{CR}	{ count_ignore(); }
+<STR_BETWEEN>{QUOTES}			{ count_more(); state=STRING_S;
+								  BEGIN(STRING1);}
+<STR_BETWEEN>.					{	
+									yyless(0); /* reparse it */
+									/* ignore the whitespace now that is
+									  counted, return saved string value */
    								state=old_state; BEGIN(old_initial);
    								yylval.strval=s_buf.s;
    								memset(&s_buf, 0, sizeof(s_buf));
@@ -1043,26 +1055,66 @@ error:
-static void count()
+/** helper function for count_*(). */
+static void count_lc(int* l, int* c)
 {
    int i;
-
-	startcolumn=column;
    for (i=0; i<yyleng;i++){
    	if (yytext[i]=='\n'){
-			line++;
-			column=startcolumn=1;
+			(*l)++;
+			(*c)=1;
    	}else if (yytext[i]=='\t'){
-			column++;
-			/*column+=8 -(column%8);*/
+			(*c)++;
+			/*(*c)+=8 -((*c)%8);*/
    	}else{
-			column++;
+			(*c)++;
    	}
    }
 }
+/* helper function */
+static void count_restore_ignored()
+{
+	if (ign_lines) /* ignored line(s) => column has changed */
+		column=ign_columns;
+	else
+		column+=ign_columns;
+	line+=ign_lines;
+	ign_lines=ign_columns=0;
+}
+
+
+
+/** count/record position for stuff added to the current token. */
+static void count_more()
+{
+	count_restore_ignored();
+	count_lc(&line, &column);
+}
+
+
+
+/** count/record position for a new token. */
+static void count()
+{
+	count_restore_ignored();
+	startline=line;
+	startcolumn=column;
+	count_more();
+}
+
+
+
+/** record discarded stuff (not contained in the token) so that
+    the next token position can be adjusted properly*/
+static void count_ignore()
+{
+	count_lc(&ign_lines, &ign_columns);
+}
+
+
 /* replacement yywrap, removes libfl dependency */
 int yywrap()
 {