Error Catching Newline in Lex - lex

I have been debugging for a couple of days now and can't seem to fix this bug. The following code is a Lex file for C language grammar.
I have two problems I haven't been able to spot yet.
This first is newline detection. It seems to leave the newline char in the output and doesn't increment the line var (to keep track of line, column for error feedback).
I have every combination of newline escape sequence but can't seem catch it.
The second is detecting a single quote followed by a newline, which should produce an
illegal character error.
What am I not seeing? Thanks in advance!
%{
int col = 1;
int line = 1;
int aux_col = 0;
int err = 0;
%}
%x C_COMMENT
/* Identifiers */
ids [a-zA-Z_]([a-zA-Z0-9_])*
/* Integers */
inteiros 0|[1-9]([0-9])*
/* Chars or any Escape Sequence */
chrlit (\'[^\n\'\\]?\')|(\'\\.\')
/* Strings */
strlit (\"[^\n\"\\]?+\")
/* Char Error: Multi Char Constant */
chr_multi (\'[^\n\']{2,}\')
/* Char Error: Non-terminated Char Constant */
chr_nonterm (\'[^\n\']{1,})
/* String Error: Non-terminated String Constant */
strerr (\"[^\n\"]?+)
%%
"/*" {BEGIN(C_COMMENT); if(col==0) {aux_col=3;} else {aux_col=2;}}
<C_COMMENT>"*/" {col+=aux_col+2; aux_col=0; BEGIN(INITIAL);}
<C_COMMENT><<EOF>> {printf("Line %d, col %d: unterminated comment\n",line, col); BEGIN(INITIAL);}
<C_COMMENT>. {aux_col++;}
"do"|"struct"|"auto"|"long"|"switch"|"break"|"enum"|"register"|"typedef"
"case"|"extern"|"union"|"float"|"short"|"unsigned"|"const"|"for"|"signed" "void"|"continue"|"goto"|"sizeof"|"volatile"|"default"|"static"
{col+=yyleng; printf("RESERVED\n");}
return {col+=yyleng; printf("RETURN\n");}
while {col+=yyleng; printf("WHILE\n");}
printf {col+=yyleng; printf("PRINTF\n");}
atoi {col+=yyleng; printf("ATOI\n");}
if {col+=yyleng; printf("IF\n");}
int {col+=yyleng; printf("INT\n");}
itoa {col+=yyleng; printf("ITOA\n");}
char {col+=yyleng; printf("CHAR\n");}
"&&" {col+=yyleng; printf("AND\n");}
"&" {col+=yyleng; printf("AMP\n");}
"=" {col+=yyleng; printf("ASSIGN\n");}
"*" {col+=yyleng; printf("AST\n");}
"," {col+=yyleng; printf("COMMA\n");}
"/" {col+=yyleng; printf("DIV\n");}
"==" {col+=yyleng; printf("EQ\n");}
">=" {col+=yyleng; printf("GE\n");}
">" {col+=yyleng; printf("GT\n");}
"{" {col+=yyleng; printf("LBRACE\n");}
"<=" {col+=yyleng; printf("LE\n");}
"(" {col+=yyleng; printf("LPAR\n");}
"[" {col+=yyleng; printf("LSQ\n");}
"<" {col+=yyleng; printf("LT\n");}
"-" {col+=yyleng; printf("MINUS\n");}
"%" {col+=yyleng; printf("MOD\n");}
"!=" {col+=yyleng; printf("NE\n");}
"!" {col+=yyleng; printf("NOT\n");}
"+" {col+=yyleng; printf("PLUS\n");}
"}" {col+=yyleng; printf("RBRACE\n");}
")" {col+=yyleng; printf("RPAR\n");}
"]" {col+=yyleng; printf("RSQ\n");}
";" {col+=yyleng; printf("SEMI\n");}
\|\| {col+=yyleng; printf("OR\n");}
{ids} {col+=yyleng; printf("ID(%s)\n",yytext);}
{inteiros} {col+=yyleng; printf("INTLIT(%s)\n", yytext);}
{strlit} {col+=yyleng; printf("STRLIT(%s)\n", yytext);}
{chrlit} {col+=yyleng; printf("CHRLIT(%s)\n", yytext);}
{chr_multi} {if(col==0) {col++;err=1;}
printf("Line %d, col %d: multi-character char constant\n", line, col);
col+=yyleng;
if(err == 1) {col--;err=0;}}
{chr_nonterm} {if(col==0) {col++;err=1;}
printf("Line %d, col %d: unterminated char constant\n", line, col);
col+=yyleng;if(err == 1) {col--;err=0;}}
{strerr} {if(col==0) {col++;err=1;}
printf("Line %d, col %d: unterminated string constant\n", line,col);
col+=yyleng;
if(err == 1) {col--;err=0;}}
" " {col++;};
'\n' {col=1;line++;printf("BAR N\n");}
"\\n" {col=1;line++;printf("BAR N2\n");}
\n {col=1;line++;printf("BAR N3\n");}
.|\' {printf("Line %d, col %d: illegal character ('%s')\n", line, col, yytext); col++;}
%%
int main () {
yylex();
return 0;
}
int yywrap(){
return 1;
}

You are not matching newlines in comments. . matches any character but newline.
After fixing the rule to match keywords (adding a couple of | and putting it all on one line) the program did report an illegal character on a single quote followed by a newline. So please give the exact input that give unexpected results.
Best regards from a fellow Bryan.

Related

How to divide a character with a string in Ada

I'm trying to figure out how to divide a character with a string and get a float as the quota. I've done the following in my code:
Procedure extwo is
function "-"(Strang: in String;
Char: in Character) return Float is
F: Float := Float'Value(Strang);
begin
return Float(Character'Pos(Char)-48) - Float'Value(Strang);
end "-";
Differensen: Float;
Char: Character;
Strang: String(1.
Begin
Put("Mata in ett tecken: ");
Get(Char);
Put("Mata in en sträng med exakt 3 tecken: ");
Get(Strang);
Differensen:= Char-Strang;
Put("Du matade in tecknet: ");
Put(Char);
Put(" och strängen: ");
Put(Strang);
Put(" och differensen blev ");
Put(Differensen, Fore=> 0);
end extwo;
With this I get the error messages: " invalid operand types for operator "-" ", " left operand has type "Standard.Character" " and " right operand has subtype of "Standard.String" defined at line 59 "
all on line 95:22 which is the line where it says "Differensen:= Char-Strang;"
It’s easier with operators to use the names L for the parameter that goes on the left of the operator, R for the parameter that goes on the right.
Which, in order to match your usage (Char - Strang) and your implementation would be
function "-" (L : Character; R : String) return Float;

How can I solve "lex program for the pattern that starts with vowel, ends with consonant and might have digits too"

I have tried to solve this question "Write a lex program for the pattern that starts with vowel, ends with consonant and might have digits too."
Here is my code:
%{
#include<stdio.h>
#include<string.h>
int cno=0, wno=0, lno=o;
%}
character [a-zA-z]
digit [0-9]
word({character}|{digit})+
line\n
%%
{line}{cno++;lno++;}
{line}{wno++; cno+=strlen(yytext);}{cno++;}
%%
int main(void)
{
yylex();
print("Count alphanumeric pattern: %d;", cno);
print("Length of string: %d;", wno);
print("Line numbers: %d\n", lno);
return 0;
}
Here is the output:
line 23: unrecognized rule
line 23: fatal parse error
I think the following code will solve your problem.
%{
int valid_patterns = 0, invalid_patterns =0;
%}
PATTERN ([aeiouAEIOU][A-Za-z0-9]*[b-df-hj-np-tv-zB-DF-HJ-NP-TV-Z])*
%%
{PATTERN} {printf("\n\t Pattern Matched: %s", yytext); valid_patterns++;}
[A-Za-z0-9]+ {invalid_patterns++;}
"\n" {
printf("\n\n\t Total Matched Patterns : %d", valid_patterns);
printf("\n\t Total Unmatched Patterns: %d\n", invalid_patterns);
valid_patterns = 0; invalid_patterns = 0;
}
%%
/*** User code section***/
int yywrap(){}
int main(int argc, char **argv[])
{
printf("\n Enter your inputs: \n\n");
yylex();
return 0;
}
Explanation of the first rule for PATTERN as per your questions requirements:
[aeiouAEIOU] make sure a pattern starts with a vowel.
[A-Za-z0-9]* indicates that any alphanumeric character can occur in middle.
[b-df-hj-np-tv-zB-DF-HJ-NP-TV-Z] make sure a pattern ends with a consonant.
(...)* indicates there might be 0 or more occurrence of desire pattern.
The second rule [A-Za-z0-9]+ catches any other input that doesn't match the requirements. And the final rule "\n"takes action when you input a new line, it prints information about your inputs.
An I/O example---
Input: Hello amazing people around the world
Output:
Pattern Matched: amazing
Pattern Matched: around
Total Matched Patterns : 2
Total Unmatched Patterns: 4

Alternative of ChrW function

Is there any alternative function/solution of the ChrW() which accepts value not in range is -32768–65535 like for character code 􀂇 which leads to "􀂇". Using ChrW() gives error
"Invalid procedure call or argument"
So I want an alternative solution to convert the charactercode to the actual character.
Code:
Function HTMLDecode(sText)
Dim regEx
Dim matches
Dim match
sText = Replace(sText, """, Chr(34))
sText = Replace(sText, "<" , Chr(60))
sText = Replace(sText, ">" , Chr(62))
sText = Replace(sText, "&" , Chr(38))
sText = Replace(sText, " ", Chr(32))
Set regEx= New RegExp
With regEx
.Pattern = "&#(\d+);" 'Match html unicode escapes
.Global = True
End With
Set matches = regEx.Execute(sText)
'Iterate over matches
For Each match In matches
'For each unicode match, replace the whole match, with the ChrW of the digits.
sText = Replace(sText, match.Value, ChrW(match.SubMatches(0)))
Next
HTMLDecode = sText
End Function
' https://en.wikipedia.org/wiki/UTF-16#Description
function Utf16Encode(byval unicode_code_point)
if (unicode_code_point >= 0 and unicode_code_point <= &hD7FF&) or (unicode_code_point >= &hE000& and unicode_code_point <= &hFFFF&) Then
Utf16Encode = ChrW(unicode_code_point)
else
unicode_code_point = unicode_code_point - &h10000&
Utf16Encode = ChrW(&hD800 Or (unicode_code_point \ &h400&)) & ChrW(&hDC00 Or (unicode_code_point And &h3FF&))
end if
end function
For Each match In matches
'For each unicode match, replace the whole match, with the ChrW of the digits.
sText = Replace(sText, match.Value, Utf16Encode(CLng(match.SubMatches(0))))
Next

How to search a specific word in lex given a input file?

I am very new to lex. I am trying to develop a parser to search a count of specific word in an given input file...
My code is
%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
int lnum = 1, fresult = 0, cc=0, wc=0, lc=0, bc=0, sc=0, nc=0, tc=0, result;
char temp[20], str[20], fname[20];
FILE *fp;
#undef yywrap
%}
digit[0-9]+
word [a-zA-Z]+
eol [\n]
blank [ ]
tab [\t]
result [word]
%%
{result} {
if((strstr(temp, str)) != 0)
{
printf(" A match found on line: %d\n", lnum);
fresult++;
wc++;
cc+=yyleng;
}
lnum++;
if(fresult == 0)
{
printf(" Match not found\n");
}
}
{digit} {nc++;}
{word} {wc++; cc+=yyleng;}
{tab} {tc++;}
{blank} {bc++;}
{eol} {lc++;}
. sc++;
%%
int main(int argc, char *argv[])
{
strcpy(fname,argv[1]);
strcpy(str,argv[2]);
fp=fopen(fname,"r+");
yyin=fp;
yylex();
printf(" Total count of the word is :%d\n", fresult);
printf(" Character Count = %d\n", cc);
printf(" Number Count = %d\n", nc);
printf(" Word Count = %d\n", wc);
printf(" Line Count = %d\n", lc);
printf(" Special Character Count = %d\n", sc);
printf(" Blank Count = %d\n", bc);
printf(" Tab Count = %d\n", tc);
return(0);
}
int yywrap()
{
return -1;
}
The word count and others are working perfectly.... But the word search is taking the input but not given the specific count...... How can I improve the code?
Should I need to add anything?
Thanks in Advance...... :)
I have made some changes to your code to help you in the right direction. First, I created a variable to keep track of whether a match is found or not.
Secondly, I am not using strstr() anymore and instead I am using strcmp() because you want to match a word to a word not a word within a sentence and we do not need a pointer returned. strcmp() is nice because we just get an integer.
I see what you were trying to do with result [word] however, as you found out, this will not work. This section of the Flex file is known as the rules section. Here you use the regular expressions that you defined in the above section (definitions) to tell Flex what to do when a rule is matched.
As you can see, I have deleted all occurrences of result[word] - as this will not work. In the rules section, I also deleted the result definition because we no longer have a rule to match it. However, I keep the code for the result definitions and simply apply it to the word definition.
The last major change is adding the <<EOF>> rule which is a special rule that tells Flex what to do when it has encountered the end of the file. In our case, if the match variable is not 1, then we have not found a match and we would like to print this to the screen. We also need to call yyterminate() (definition at the bottom of the page) to stop the lexical analyzer.
Below is the updated code. I hope that helps!
%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
int lnum = 1, fresult = 0, cc=0, wc=0, lc=0, bc=0, sc=0, nc=0, tc=0, result;
char temp[20], str[20], fname[20];
FILE *fp;
int match = 0;//For keeping track of matches
#undef yywrap
%}
/*Rules*/
digit [0-9]+
word [a-zA-Z]+
eol [\n]
blank [ ]
tab [\t]
/*Definitions*/
%%
{digit} {
nc++;
}
{tab} {
tc++;
}
{blank} {
bc++;
}
{eol} {
lc++;
}
{word} {
if((strcmp(yytext, str)) == 0)//We found a match
{
printf("\n A match found on line: %d\n", lnum);
fresult++;
wc++;
cc+=yyleng;
match = 1;//We have a match
}
else //We found a word, but it was not a match
{
wc++;
}
}
. {
sc++;
}
<<EOF>> {
if(!match)
{
printf(" Match not found\n");
}
yyterminate();
}
%%
int main(int argc, char *argv[])
{
strcpy(fname,argv[1]);
strcpy(str,argv[2]);
fp = fopen(fname,"r+");
yyin = fp;
yylex();
printf("\n\n Total count of the word is :%d\n", fresult);
printf(" Character Count = %d\n", cc);
printf(" Number Count = %d\n", nc);
printf(" Word Count = %d\n", wc);
printf(" Line Count = %d\n", lc);
printf(" Special Character Count = %d\n", sc);
printf(" Blank Count = %d\n", bc);
printf(" Tab Count = %d\n", tc);
fclose(fp);
return(0);
}
int yywrap()
{
return 1;
}
{result} {
if((strstr(temp, str)) != 0)
result [word]
Result is a regex for the characters 'w', 'o', 'r', 'd', which is not what you want. You probably want to match on {word}. In addition, temp will always be null - I think you want to use yytext instead.

lex program: error: expected ';', ',' or ')' before numeric constant?

I've checked other similar posts but I think I just need a second set of eyes. This file is for the lex Unix utility.
I created a makefile and this is the error I receive:
gcc -g -c lex.yy.c
cxref.l:57: error: expected ‘;’, ‘,’ or ‘)’ before numeric constant
make: *** [lex.yy.o] Error 1
Line 57 is just inside the void inserID() function near the top.
Here is the code:
%{
#include <stdio.h>
#include <string.h>
char identifier[1000][82];
char linesFound[100][100];
void insertId(char*, int);
int i = 0;
int lineNum = 1;
%}
%x comment
%s str
%%
"/*" BEGIN(comment);
<comment>[^*\n]* /* eat anything that's not a '*' */
<comment>"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */
<comment>\n ++lineNum;
<comment>"*"+"/" BEGIN(INITIAL);
"\n" ++lineNum;
auto ;
break ;
case ;
char ;
continue ;
default ;
do ;
double ;
else ;
extern ;
float ;
for ;
goto ;
if ;
int ;
long ;
register ;
return ;
short ;
sizeof ;
static ;
struct ;
switch ;
typedef ;
union ;
unsigned ;
void ;
while ;
[*]?[a-zA-Z][a-zA-Z0-9_]* insertId(yytext, lineNum);
[^a-zA-Z0-9_]+ ;
[0-9]+ ;
%%
void insertId(char* str, int nLine)
{
char num[2];
sprintf ( num, "%d", nLine);
int iter;
for(iter = 0; iter <= i; iter++)
{
if ( strcmp(identifier[iter], str) == 0 )
{
strcat( linesFound[iter], ", " );
strcat( linesFound[iter], num );
return;
}
}
strcpy( identifier[i], str );
strcat( identifier[i], ": " );
strcpy( linesFound[i], num );
i++;
}
Your problem is:
%s str
There is a reason that it's normal to write condition names in CAPS: it makes them look like macros, which is exactly what they are.
So
void insertId(char* str, int nLine)
get macro expanded to something like:
void insertId(char* 2, int nLine)
and the compiler complains that 2 is not really expected at that point in the declaration.