Antlr4 can't recognize a single number and bracket. I don't know what the problem is? - sql-parser

lexer grammar TransformLexer;
#header { package com.abc.g4.gen; }
channels { DPCOMMENT, ERRORCHANNEL }
#members {
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
* For example:
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is folllowed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
public boolean isValidDecimal() {
int nextChar = _input.LA(1);
if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
nextChar == '_') {
return false;
} else {
return true;
}
}
}
// SKIP
SPACE: [ \t\r\n]+ -> channel(HIDDEN);
SPEC_MYSQL_COMMENT: '/*!' .+? '*/' -> channel(DPCOMMENT);
COMMENT_INPUT: '/*' .*? '*/' -> channel(HIDDEN);
LINE_COMMENT: (
('--' [ \t] | '#') ~[\r\n]* ('\r'? '\n' | EOF)
| '--' ('\r'? '\n' | EOF)
) -> channel(HIDDEN);
STRING
: DQUOTA_STRING
;
EQ : '==';
NEQ : '<>';
NEQJ: '!=';
LT : '<';
LTE : '<=';
GT : '>';
GTE : '>=';
PLUS: '+';
MINUS: '-';
ASTERISK: '*';
SLASH: '/' ;
PERCENT: '%';
RSHIFT: '>>';
LSHIFT: '<<';
IS: 'IS' | 'is';
NULL: 'NULL' | 'null';
TRUE: 'TRUE' | 'true';
FALSE: 'FALSE' | 'false';
LIKE: 'LIKE' | 'like';
OR: 'OR' | 'or' | '|';
AND: 'AND' | '&&' | 'and' | '&';
IN: 'IN' | 'in';
NOT: 'NOT' | '!' | 'not';
CASE: 'CASE' | 'case';
WHEN: 'WHEN' | 'when';
THEN: 'THEN' | 'then';
ELSE: 'ELSE' | 'else';
END: 'END' | 'end';
JOIN: '||';
ID: [#]ID_LITERAL+;
// DOUBLE_QUOTE_ID: '"' ~'"'+ '"';
REVERSE_QUOTE_ID: '`' ~'`'+ '`';
NAME: ID_LITERAL+;
fragment ID_LITERAL: [a-zA-Z_0-9\u0080-\uFFFF]*?[a-zA-Z_$\u0080-\uFFFF]+?[a-zA-Z_$0-9\u0080-\uFFFF]*;
fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"' | '\'' ( ~('\''|'\\') | ('\\' .) )* '\'';
fragment DEC_DIGIT: '0' .. '9'+;
// Last tokens must generate Errors
ERROR_RECONGNIGION: . -> channel(ERRORCHANNEL);
NEWLINE:'\r'? '\n' ;
BYTELENGTH_LITERAL
: DEC_DIGIT+ ('B' | 'K' | 'M' | 'G')
;
INTEGER_VALUE
: [-]*DEC_DIGIT+
;
DECIMAL_VALUE
: DEC_DIGIT+ EXPONENT
| DECIMAL_DIGITS EXPONENT? {isValidDecimal()}?
;
IDENTIFIER
: (LETTER | DEC_DIGIT | '_')+
;
BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
;
COMMA: ',' ;
LEFT_BRACKET
: '(('
;
RGIHT_BRACKET
: '))'
;
LEFT_BRACKET1
: '{{'
;
RGIHT_BRACKET1
: '}}'
;
START
: '$'
;
fragment DECIMAL_DIGITS
: DEC_DIGIT+ '.' DEC_DIGIT+
| '.' DEC_DIGIT+
;
fragment EXPONENT
: 'E' [+-]? DEC_DIGIT+
;
fragment LETTER
: [A-Z]
;
SIMPLE_COMMENT
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
;
BRACKETED_COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;
WS
: [ \r\n\t]+ -> channel(HIDDEN)
;
parser grammar TransformParser;
options { tokenVocab=TransformLexer; }
#header { package com.abc.g4.gen; }
finalExpression:
(booleanExpression | caseExpression | resultExpression | function) EOF
;
caseExpression
: CASE whenClause+ (ELSE (elseExpression=resultExpression | caseExpression))? END #whenExpression
| constant #constantDefault
;
values:
constant #constantValue
| ID #idValue
;
valueCalc:
LEFT_BRACKET valueCalc RGIHT_BRACKET
| valueCalc ('*'|'/'|'%') valueCalc
| valueCalc ('+'|'-') valueCalc
| valueCalc ('<<'|'>>') valueCalc
| values
;
booleanExpression
: left=booleanExpression operator=AND right=booleanExpression #logicalBinary1
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
| NOT booleanExpression #logicalNot
| predicated #predicatedExpression
| left=valueCalc operator=comparisonOperator right=valueCalc #comparison4
| booleanValue #booleanValueTag
;
predicated
: (values | valueCalc) IN values (values)*
;
whenClause:
WHEN condition=booleanExpression THEN (result=resultExpression | caseExpression);
resultExpression:
predicated | values | valueCalc;
constant
: NULL #nullLiteral
| STRING #typeConstructor
| number #numericLiteral
| booleanValue #booleanLiteral
| STRING+ #stringLiteral
;
comparisonOperator
: EQ | NEQ | NEQJ | LT | LTE | GT | GTE | IS
;
booleanValue
: TRUE | FALSE
;
number
: MINUS? DECIMAL_VALUE #decimalLiteral
| MINUS? INTEGER_VALUE #integerLiteral
;
qualifiedName
: NAME
;
function
: qualifiedName (params) #functionCall
;
param:
valueCalc | values | function | booleanExpression
;
params:
param (param)*
;
I can recognize numbers of multiple characters, but I cannot recognize numbers of single characters
enter image description here
enter image description here
And parentheses cannot change the priority of expression calculation. What's wrong with my code
enter image description here
I try to replace '(', ')' with '((', '))' or '{{', '}}'. It can be done
enter image description here

Resolved: delete 'ERROR_ RECONGNATION 'Then it's OK

Related

How to apply an empty condition to sql select by using "and" in Spark?

I have an UuidConditionSet, when the if condition is wrong, I want apply an empty string to my select statement(or just ignore this UuidConditionSet), but I got this error. How to solve this problem?
mismatched input 'FROM' expecting <EOF>(line 10, pos 3)
This is the select
(SELECT
item,
amount,
date
from my_table
where record_type = 'myType'
and ( date_format(date, "yyyy-MM-dd") >= '2020-02-27'
and date_format(date, "yyyy-MM-dd") <= '2020-02-28' )
and ()
var UuidConditionSet = ""
var UuidCondition = Seq.empty[String]
if(!UuidList.mkString.isEmpty) {
UuidCondition = for {
Uuid <- UuidList
UuidConditionSet = s"${SQLColumnHelper.EVENT_INFO_STRUCT_NAME}.${SQLColumnHelper.UUID} = '".concat(eventUuid).concat("'")
} yield UuidConditionSet
UuidConditionSet = UuidCondition.reduce(_.concat(" or ").concat(_))
}
s"""SELECT
| ${SQLColumnHelper.STRUCT_NAME_ITEM},
| ${SQLColumnHelper.STRUCT_NAME_AMOUNT},
| ${SQLColumnHelper.DATE}
| from ${sqlTableHelper.TABLE}
| where ${SQLColumnHelper.EVENT_INFO_STRUCT_NAME} = '${RECORD_TYPE}'
| and ( date_format(${SQLColumnHelper.DATE}, "${Constant.STAY_DATE_FORMAT}") >= '${stayDateRangeTuple._1}'
| and date_format(${SQLColumnHelper.DATE}, "${Constant.STAY_DATE_FORMAT}") <= '${stayDateRangeTuple._2}' )
| and ($UuidConditionSet)
You can use pattern matching on the list UuidList to check the size and return an empty string if the list is empty. Also, you can use IN instead of multiple ORs here.
Try this:
val UuidCondition = UuidList match {
case l if (l.size > 0) => {
l.map(u => s"'$u'").mkString(
s"and ${SQLColumnHelper.EVENT_INFO_STRUCT_NAME}.${SQLColumnHelper.UUID} in (",
",",
")"
)
}
case _ => ""
}
s"""SELECT
| ${SQLColumnHelper.STRUCT_NAME_ITEM},
| ${SQLColumnHelper.STRUCT_NAME_AMOUNT},
| ${SQLColumnHelper.DATE}
| from ${sqlTableHelper.TABLE}
| where ${SQLColumnHelper.EVENT_INFO_STRUCT_NAME} = '${RECORD_TYPE}'
| and date_format(${SQLColumnHelper.DATE}, "${Constant.STAY_DATE_FORMAT}") >= '${stayDateRangeTuple._1}'
| and date_format(${SQLColumnHelper.DATE}, "${Constant.STAY_DATE_FORMAT}") <= '${stayDateRangeTuple._2}'
| $UuidCondition
"""

Returning boolean values in sparql

From the rdf file, I need to return true for the person if their age is even, false if their age is odd. I wrote the query to display persons with even age, but need to modify to display the results in boolean values.
select * where { ?x h:age ?age .
filter( strends(?age, 0) || strends(?age, 2) || strends(?age, 4) || strends(?age, 6) || strends(?age, 8) )
}
an even test is ?X/2 = FLOOR(?X/2).
So if ?age has a numeric datatype:
where { ?x h:age ?age .
BIND( (?age/2 = FLOOR(?age/2)) AS ?isEven)
}
will add ?isEven as true/false.
If ?age is a string, then replace ?age with xsd:integer(?age).
I had a similar case. I needed to return in the results true/false if the optional relationship exists.
SELECT ?c ?hasNarrowMatch
WHERE {
?c a skos:Concept.
OPTIONAL {?c skos:narrowMatch ?nm}
BIND (exists{?c skos:narrowMatch ?nm} AS ?y)
BIND (IF(?y, "true", "false") AS ?hasNarrowMatch)
}
The results will look like this:
+-----+----------------+
| c | hasNarrowMatch |
+-----+----------------+
| c1 | true |
| c2 | false |
| c3 | true |
+-----+----------------+

Why isn't "NOT ( <search_condition> )" listed in definition?

Based on testing, WHERE NOT is valid SQL syntax
where not ( x = 10 y > 5 )
However, I don't follow that from the definition:
To me, the T-SQL definition of a search condition doesn't seem to allow producing NOT ( <search_condition> )"
The spec/docs makes it seem like the only legitimate target for a unary negation is a predicate. To produce not ( <search_condition> ) it looks like this requires the statement to be the right hand side of a conjunction or disjunction...Is the spec incorrect, or am I missing a way to produce this?
< search_condition > ::=
{ [ NOT ] <predicate> | ( <search_condition> ) }
[ { AND | OR } [ NOT ] { <predicate> | ( <search_condition> ) } ]
[ ,...n ]
<predicate> ::=
{ expression { = | < > | ! = | > | > = | ! > | < | < = | ! < } expression
| string_expression [ NOT ] LIKE string_expression
[ ESCAPE 'escape_character' ]
| expression [ NOT ] BETWEEN expression AND expression
| expression IS [ NOT ] NULL
| CONTAINS
( { column | * } ,'< contains_search_condition >')
| FREETEXT ( { column | * } ,'freetext_string')
| expression [ NOT ] IN (subquery | expression [ ,...n ] )
| expression { = | < > | ! = | > | > = | ! > | < | < = | ! < }
{ ALL | SOME | ANY} (subquery)
| EXISTS (subquery) }
NOT is listed in square brackets in both parts, meaning it's optional.
If we expand this:
< search_condition > ::=
{ [ NOT ] <predicate> | ( <search_condition> ) }
[ { AND | OR } [ NOT ] { <predicate> | ( <search_condition> ) } ]
[ ,...n ]
, removing some of the square brackets and possibly their contents, we get this:
< some_search_condition > ::= { NOT ( <search_condition> ) }
which your string matches.
Transact-SQL Syntax Conventions
[ ] (brackets) Optional syntax items. Do not type the brackets.
That syntax link is on the page with the definition
Search Condition (Transact-SQL)
Transact-SQL Syntax Conventions
What do you mean it does not support not?
Post a query that fails.
This runs just fine
select nativeMD5
from docSVsys
join docSVtext
on docSVtext.fieldID = 110
and docSVtext.sID = docSVsys.sID
where not (docSVsys.sID = 10 or docSVsys.sID = 11)

ANTLR4 matching a string with a start cha and end character

I am trying to write antlr grammar so that I can create a match on a certain ID.
I need to match a character that starts with the character 'n' and ends with 'd'
And this ID can have space.
Everywhere else I want to ignore the whitespace
// lexer/terminal rules start with an upper case letter
ID
:
(
'a'..'z'
| 'A'..'Z'
| '0'..'9'
| ('+'|'-'|'*'|'/'|'_')
| '='
| '~'
| '{'
| '}'
| ','
| NA
)+
;
NA : 'n'[ ]['a'..'z']'d' ;
WS : [ \t\n]+ -> skip;
I tested this with an expression A1=not attempted
It considers A1=not as an ID and attempted as an error node
Can you have a grammar that ignore white spaces but makes an exception for a certain string as "not attempted"
You should try to seperate ID ("A1") from the rest. Further you need to take care on the priority of lexical rules. Your "n...d" should have higher priority, so take it as one of your first lexer rules.
A working grammar (only tested for your example "A1=not attempted" is:
statement : ID expr;
expr : OP expr
| (NA | ID | OP)
;
NA : 'n'[a-zA-Z ]*'d' ;
ID
: (
'a'..'z'
| 'A'..'Z'
| '0'..'9'
| ('+'|'-'|'*'|'/'|'_')
)+ ;
OP : '='
| '~'
| '{'
| '}'
| ','
;
WS : [ \t\r\n]+ -> skip;
Try it with start rule statement. I changed the NA Rule so it will match zero or more characters a to z and A to Z and Whitspace in any order.
Good Luck with ANTLR, its a nice tool.

How do I display all pronouns in a sentence and their persons using antlr

EDITED according to WayneH's grammar
Here's what i have in my grammar file.
grammar pfinder;
options {
language = Java;
}
sentence
: ((words | pronoun) SPACE)* ((words | pronoun) ('.' | '?'))
;
words
: WORDS {System.out.println($text);};
pronoun returns [String value]
: sfirst {$value = $sfirst.value; System.out.println($sfirst.text + '(' + $sfirst.value + ')');}
| ssecond {$value = $ssecond.value; System.out.println($ssecond.text + '(' + $ssecond.value + ')');}
| sthird {$value = $sthird.value; System.out.println($sthird.text + '(' + $sthird.value + ')');}
| pfirst {$value = $pfirst.value; System.out.println($pfirst.text + '(' + $pfirst.value + ')');}
| psecond {$value = $psecond.value; System.out.println($psecond.text + '(' + $psecond.value + ')');}
| pthird{$value = $pthird.value; System.out.println($pthird.text + '(' + $pthird.value + ')');};
sfirst returns [String value] : ('i' | 'me' | 'my' | 'mine') {$value = "s1";};
ssecond returns [String value] : ('you' | 'your'| 'yours'| 'yourself') {$value = "s2";};
sthird returns [String value] : ('he' | 'she' | 'it' | 'his' | 'hers' | 'its' | 'him' | 'her' | 'himself' | 'herself') {$value = "s3";};
pfirst returns [String value] : ('we' | 'us' | 'our' | 'ours') {$value = "p1";};
psecond returns [String value] : ('yourselves') {$value = "p2";};
pthird returns [String value] : ('they'| 'them'| 'their'| 'theirs' | 'themselves') {$value = "p3";};
WORDS : LETTER*;// {$channel=HIDDEN;};
SPACE : (' ')?;
fragment LETTER : ('a'..'z' | 'A'..'Z');
and here,s what i have on a java test class
import java.util.Scanner;
import org.antlr.runtime.*;
import org.antlr.runtime.tree.*;
import java.util.List;
public class test2 {
public static void main(String[] args) throws RecognitionException {
String s;
Scanner input = new Scanner(System.in);
System.out.println("Eter a Sentence: ");
s=input.nextLine().toLowerCase();
ANTLRStringStream in = new ANTLRStringStream(s);
pfinderLexer lexer = new pfinderLexer(in);
TokenStream tokenStream = new CommonTokenStream(lexer);
pfinderParser parser = new pfinderParser(tokenStream);
parser.pronoun();
}
}
what do I need to put in the test file so that the it will display all the pronouns in a sentence and their respective values(s1,s2,...)?
In case you are trying to do some sort of high-level analysis of spoken/written language, you might consider using some sort of natural language processing tool. For example, TagHelper Tools will tell you which elements are pronouns (and verbs, and nouns, and adverbs, and other esoteric grammatical constructs). (THT is the only tool of that sort that I'm familiar with, so don't take that as a particular endorsement of awesomeness).
fragments don't create tokens, and placing them in parser rules will not give desirable results.
On my test box, this produced (I think!) the desired result:
program :
PRONOUN+
;
PRONOUN :
'i' | 'me' | 'my' | 'mine'
| 'you' | 'your'| 'yours'| 'yourself'
| 'he' | 'she' | 'it' | 'his' | 'hers' | 'its' | 'him' | 'her' | 'himself' | 'herself'
| 'we' | 'us' | 'our' | 'ours'
| 'yourselves'
| 'they'| 'them'| 'their'| 'theirs' | 'themselves'
;
WS : ' ' { $channel = HIDDEN; };
WORD : ('A'..'Z'|'a'..'z')+ { $channel = HIDDEN; };
In Antlrworks, a sample "i kicked you" returned the tree structure: program -> [i, you].
I feel compelled to point out that Antlr is overkill for stripping the pronouns out of a sentence. Consider using a regular expression. This grammar is not case insensitive. Expanding WORD to consume everything except your dictionary of PRONOUNs (such as puncuation, etc) may be a bit tedious. Will require sanitization of input.
--- Edit: In response to the second OP:
I have altered the original grammar to make ease of parsing. The new grammar is:
grammar pfinder;
options {
backtrack=true;
output = AST;
}
tokens {
PROGRAM;
}
program :
(WORD* p+=PRONOUN+ WORD*)*
-> ^(PROGRAM $p*)
;
PRONOUN :
'i' | 'me' | 'my' | 'mine'
| 'you' | 'your'| 'yours'| 'yourself'
| 'he' | 'she' | 'it' | 'his' | 'hers' | 'its' | 'him' | 'her' | 'himself' | 'herself'
| 'we' | 'us' | 'our' | 'ours' | 'yourselves'
| 'they'| 'them'| 'their'| 'theirs' | 'themselves'
;
WS : ' ' { $channel = HIDDEN; };
WORD : ('A'..'Z'|'a'..'z')+;
I'll explain the changes:
Backtracking is now required to solve the parser rule program. Perhaps there's a better way to write it which doesn't require backtracking but this is the first thing that popped in to my mind.
An imaginary token PROGRAM has been defined to group our pronouns.
Each matched program is added to Antlr var $p and is rewritten in AST under the imaginary rule.
The interpreter code may now use a CommonTree to collect matched pronouns
The following is written in C# (I don't know Java) but I wrote it with the intent that you'll be able to read and understand it.
static object[] ReadTokens( string text )
{
ArrayList results = new ArrayList();
pfinderLexer Lexer = new pfinderLexer(new Antlr.Runtime.ANTLRStringStream(text));
pfinderParser Parser = new pfinderParser(new CommonTokenStream(Lexer));
// syntaxTree is imaginary token {PROGRAM},
// its children are the pronouns collected by $p in grammar.
CommonTree syntaxTree = Parser.program().Tree as CommonTree;
if ( syntaxTree == null ) return null;
foreach ( object pronoun in syntaxTree.Children )
{
results.Add(pronoun.ToString());
}
return results.ToArray();
}
Calling ReadTokens("i kicked you and them") returns array ["i", "you", "them"]
I think you need to learn more about lexer rules within ANTLR, lexer rules start with uppercase letter and generate tokens for the stream the parser will look at. Lexer fragment rules will not generate a token for the stream but will help other lexer rules generate tokens, look at lexer rules WORDS and LETTER (LETTER is not a token but does help WORDS create a token).
Now, when a text literal is put into a parser rule (rule name will start with a lowercase letter) that text literal is also a valid token that the lexer will identify and pass (at least when you use ANTLR - I have not used any other tools similar to ANTLR to answer for them).
The next thing I was noticing is that your 's' and 'pronoun' rules appear to be the same thing. I commented out the 's' rule and put everything into the 'pronoun' rule
And then the last thing is to learn how to put actions into the grammer, you have some in the 's' rule setting the return value. I made the pronoun rule return a string value so that if you wanted the actions in your 'sentence' rule you would easily be able to accomplish your "-i pronoun" comment/answer.
Now since I do not know what your exact results are, I played with your grammer and made some slight modifications and reorganized (moving what I thought were parser rules to the top with keep all lexer rules at the bottom) and put in some actions that I think will show you what you need. Also, there could be several different ways to accomplish this and I don't think my solution is perfect for any of your possible wanted results, but here is a grammer I was able to get working in ANTLRWorks:
grammar pfinder;
options {
language = Java;
}
sentence
: ((words | pronoun) SPACE)* ((words | pronoun) ('.' | '?'))
;
words
: WORDS {System.out.println($text);};
pronoun returns [String value]
: sfirst {$value = $sfirst.value; System.out.println($sfirst.text + '(' + $sfirst.value + ')');}
| ssecond {$value = $ssecond.value; System.out.println($ssecond.text + '(' + $ssecond.value + ')');}
| sthird {$value = $sthird.value; System.out.println($sthird.text + '(' + $sthird.value + ')');}
| pfirst {$value = $pfirst.value; System.out.println($pfirst.text + '(' + $pfirst.value + ')');}
| psecond {$value = $psecond.value; System.out.println($psecond.text + '(' + $psecond.value + ')');}
| pthird{$value = $pthird.value; System.out.println($pthird.text + '(' + $pthird.value + ')');};
//s returns [String value]
// : exp=sfirst {$value = "s1";}
// | exp=ssecond {$value = "s2";}
// | exp=sthird {$value = "s3";}
// | exp=pfirst {$value = "p1";}
// | exp=psecond {$value = "p2";}
// | exp=pthird {$value = "p3";}
// ;
sfirst returns [String value] : ('i' | 'me' | 'my' | 'mine') {$value = "s1";};
ssecond returns [String value] : ('you' | 'your'| 'yours'| 'yourself') {$value = "s2";};
sthird returns [String value] : ('he' | 'she' | 'it' | 'his' | 'hers' | 'its' | 'him' | 'her' | 'himself' | 'herself') {$value = "s3";};
pfirst returns [String value] : ('we' | 'us' | 'our' | 'ours') {$value = "p1";};
psecond returns [String value] : ('yourselves') {$value = "p2";};
pthird returns [String value] : ('they'| 'them'| 'their'| 'theirs' | 'themselves') {$value = "p3";};
WORDS : LETTER*;// {$channel=HIDDEN;};
SPACE : (' ')?;
fragment LETTER : ('a'..'z' | 'A'..'Z');
I think the end result is this grammer will show you how to accomplish what you are trying to do and will require modification no matter what that end result is.
Good luck.
I think you only have to change one line in your test class,
parser.pronoun();
to:
parser.sentence();
You might want to change a few other things in the grammer as well:
SPACE : ' ';
sentence: (words | pronoun) (SPACE (words | pronoun))* ('.' | '?'); // then you might want to put a rule between sentence and words/pronoun.