使用lex和yacc打印令牌

printing tokens using lex and yacc

本文关键字:打印 令牌 yacc lex 使用      更新时间:2023-10-16

我有一个lex文件,一个yacc文件和main.cpp文件。

我的main.cpp看起来像

int main(int argc, char **argv)
{
    if (argc == 1)
    {   int token;
        curr_filename = "<stdin>";
        yyin = stdin;
        yyparse();
    }
    else
    {
        for (int i = 1; i < argc; ++i)
        {
            curr_filename = argv[i];
            yyin = std::fopen(argv[i], "r");
            if (yyin)
            {    
                yyparse();  
                std::fclose(yyin);
            }
            else
            {
                utility::print_error(argv[i], "cannot be opened");
            }
        }
    }
    if (yynerrs > 0)
    {
        std::cerr << "Compilation halted due to lexical or syntax errors.n";
        exit(1);
    }

这有助于进行解析,但现在我想打印从lex文件生成的token。所以我通过调用yylex()对它做了一点修改,如下所示

    int main(int argc, char **argv)
    {
        if (argc == 1)
        {   int token;
            curr_filename = "<stdin>";
       yyin = stdin;
// calling yylex to get token 
     while(token= yylex())
     {
        switch(token){
        case 258 : 
        std::cout << "class" ;
        default : 
        std::cout << "token " ;
                 }

            yyparse();
        }
//rest of the code same

但是没有打印到输出。

如何在标准输出或文件

上打印令牌

flex file

%option noyywrap
%option yylineno
%{
#include "flexbison.hpp"
#include "tokentable.hpp"
#include "symboltable.hpp"
#include "y.tab.h"
#include <stdio.h>
#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; 
static const int MAX_STR_CONST = 1025;
char string_buf[MAX_STR_CONST];  // buffer to store string contstants encountered in source file
char *string_buf_ptr;

int num_comment = 0;      // count to keep track how many opening comment tokens have been encountered
std::size_t curr_lineno = 0;      // keep track of current line number of source file
bool str_too_long = false;   // used to handle string constant size error check

%}
%x COMMENT
%x LINECOMMENT
%x STRING
DARROW =>
%%
"(*" {
    BEGIN(COMMENT);
    num_comment++;
}
"*)" {
    if (num_comment <= 0) {
        yylval.error_msg = "Unmatched *)";
        return ERROR;
    }
}
<COMMENT>"*)" {
    num_comment--;
    if (num_comment < 0) {
        yylval.error_msg = "Unmatched *)";
        return ERROR;
    }
    if (num_comment == 0) {
        BEGIN(INITIAL);
    }
}
<COMMENT>"(*" {
    num_comment++;
}
<COMMENT>[^n] {
    // eat everything within comments
}
<COMMENT>n {
    ++curr_lineno;
}
"--"[^n]* {
    BEGIN(LINECOMMENT);
}
<LINECOMMENT>n {
    ++curr_lineno;
    BEGIN(INITIAL);
}
<COMMENT><<EOF>> {
    BEGIN(INITIAL);
    yylval.error_msg = "EOF in comment";
    return ERROR;
}
"=>" {
    return DARROW; 
}
(?i:class) {
    return CLASS;
}
(?i:else) {
    return ELSE;
}
(?i:in) {
    return IN;
}
(?i:then) {
    return THEN;
}
(?i:fi) {
    return FI;
}
(?i:if) {
    return IF;
}
(?i:inherits) {
    return INHERITS;
}
(?i:let) {
    return LET;
}
(?i:loop) {
    return LOOP;
}
(?i:pool) {
    return POOL;
}
(?i:while) {
    return WHILE;
}
(?i:case) {
    return CASE;
}
(?i:esac) {
    return ESAC;
}
(?i:of) {
    return OF;
}
(?i:new) {
    return NEW;
}
(?i:isvoid) {
    return ISVOID;
}
(?i:not) {
    return NOT;
}
t(?i:rue) {
    yylval.boolean = true;
    return BOOL_CONST;
}
f(?i:alse) {
    yylval.boolean = false;
    return BOOL_CONST;
}
[0-9]+ {
    yylval.symbol = inttable().add(yytext);
    return INT_CONST;
}
"<=" {
    return LE;
}
"<-" {
    return ASSIGN;
}

[A-Z][a-zA-Z0-9_]* {
    yylval.symbol = idtable().add(yytext);
    return TYPEID;
}

[a-z][a-zA-Z0-9_]* {
    yylval.symbol = idtable().add(yytext);
    return OBJECTID;
}
";"|","|"{"|"}"|":"|"("|")"|"+"|"-"|"*"|"/"|"="|"~"|"<"|"."|"@" { 
    return *yytext;
}
n {
    ++curr_lineno;
}
[ frtv] {
    // eat whitespace
}
 /*
  *  String constants (C syntax)
  *  Escape sequence c is accepted for all characters c. Except for 
  *  n t b f, the result is c.
  *
  */
" {
    BEGIN(STRING);
    string_buf_ptr = string_buf;
    memset(string_buf, 0, MAX_STR_CONST);
}
<STRING>" {
    BEGIN(INITIAL);
    yylval.symbol = stringtable().add(string_buf);
    return STR_CONST;
}
<STRING>[^n]*" {
    BEGIN(INITIAL);
    if (str_too_long) {
        str_too_long = false;
    }
    else {
        yylval.error_msg = "String contains null character";
        return ERROR;
    }
}
<STRING>[^"]*n {
    if (str_too_long) {
        yyinput(); /* eat quote */
        BEGIN(INITIAL);
        str_too_long = false;
    }
    else {
        if (yytext[yyleng - 1] != '') {
            BEGIN(INITIAL);
            yylval.error_msg = "String contains null character";
            return ERROR;
        }
    }
}
<STRING><<EOF>> {
    BEGIN(INITIAL);
    yylval.error_msg = "EOF in string constant";
    return ERROR;
}
<STRING>\ {
    if (strlen(string_buf) >= MAX_STR_CONST - 1) {
        str_too_long = true;
        unput('');
        yylval.error_msg = "String constant too long";
        return ERROR;
    }
    char ahead = yyinput();
    switch (ahead) {
        case 'b':
            *string_buf_ptr++ = 'b';
            break;
        case 't':
            *string_buf_ptr++ = 't';
            break;
        case 'n':
            *string_buf_ptr++ = 'n';
            break;
        case 'f':
            *string_buf_ptr++ = 'f';
            break;
        case 'n':
            ++curr_lineno;
            *string_buf_ptr++ = 'n';
            break;
        case '':
            unput(ahead);
            break;
        default:
            *string_buf_ptr++ = ahead;
    }
}
<STRING>n {
    ++curr_lineno;
    BEGIN(INITIAL);
    yylval.error_msg = "Unterminated string constant";
    return ERROR;
}
<STRING>. {
    if (strlen(string_buf) >= MAX_STR_CONST - 1) {
        str_too_long = true;
        unput('');
        yylval.error_msg = "String constant too long";
        return ERROR;
    }
    *string_buf_ptr++ = *yytext;
}
. /* error for invalid tokens */ {
    yylval.error_msg = std::string(yytext) + " is not a valid character in the current context.";
    return ERROR;
}
%%

bison file

%{
#include "flexbison.hpp"
#include "symboltable.hpp"
#include "tokentable.hpp"
#include "ast.hpp"
#include <iostream>
// convinience function for setting location of each ast node
#define SETLOC(lval,node) (lval)->setloc((node).first_line, curr_filename)
// both defined in main.cpp
extern ProgramPtr ast_root;
extern std::string curr_filename;
// both defined in lexer
extern int yylex();
extern int yylineno;
void yyerror(char *);        
%}
%token CLASS 258 ELSE 259 FI 260 IF 261 IN 262 
%token INHERITS 263 LET 264 LOOP 265 POOL 266 THEN 267 WHILE 268
%token CASE 269 ESAC 270 OF 271 DARROW 272 NEW 273 ISVOID 274
%token <symbol>  STR_CONST 275 INT_CONST 276 
%token <boolean> BOOL_CONST 277
%token <symbol>  TYPEID 278 OBJECTID 279 
%token ASSIGN 280 NOT 281 LE 282 ERROR 283
%type <program> program
%type <clazz> class
%type <classes> class_list
%type <attribute> attribute
%type <attributes> attribute_list
%type <method> method
%type <methods> method_list
%type <expression> expression
%type <expression> let_expr 
%type <expressions> expression_list
%type <expressions> method_expr_list
%type <formal> formal
%type <formals> formal_list
%type <branch> case
%type <cases> case_list
%nonassoc '='
%left LET
%right ASSIGN
%left NOT
%left '+' '-'
%left '*' '/' 
%left ISVOID
%left '~'
%left '@'
%left '.'
%nonassoc LE '<'
%%
program : class_list    { @$ = @1; ast_root = std::make_shared<Program>($1); }
;
class_list : class { $$ = Classes(); $$.push_back($1); }
            | class_list class { $$.push_back($2); }
;
class : CLASS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, idtable().add("Object"), $4, $5); SETLOC($$, @1); }
        | CLASS TYPEID INHERITS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, $4, $6, $7); SETLOC($$, @1); }
        | error ';' { yyerrok; } 
;
attribute_list : attribute ';' { $$ = Attributes(); $$.push_back($1); }
               | attribute_list attribute ';' { $$.push_back($2); }
               | error ';' { yyerrok; }
;
attribute : OBJECTID ':' TYPEID { $$ = std::make_shared<Attribute>($1, $3, std::make_shared<NoExpr>()); SETLOC($$, @1); }
          | OBJECTID ':' TYPEID ASSIGN expression { $$ = std::make_shared<Attribute>($1, $3, $5); SETLOC($$, @5); }
;
method_list : method ';' { $$ = Methods(); $$.push_back($1); }
            | method_list method ';' { $$.push_back($2); }
            | error ';' { yyerrok; }
;
method : OBJECTID '(' formal_list ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $6, $3, $8); SETLOC($$, @1); }
       | OBJECTID '(' ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $5, Formals(), $7); SETLOC($$, @1); }
;
formal_list : formal { $$ = Formals(); $$.push_back($1); }
            | formal_list ',' formal { $$.push_back($3); } 
;
formal : OBJECTID ':' TYPEID { $$ = std::make_shared<Formal>($1, $3); SETLOC($$, @1); }
;
case_list : case { $$ = Cases(); $$.push_back($1); }
            | case_list case { $$.push_back($2); }
;
case : OBJECTID ':' TYPEID DARROW expression ';' { $$ = std::make_shared<CaseBranch>($1, $3, $5); SETLOC($$, @5); }
;
method_expr_list : expression { $$ = Expressions(); $$.push_back($1); }
                    | method_expr_list ',' expression { $$.push_back($3); }
;
expression_list : expression ';' { $$ = Expressions(); $$.push_back($1); }
                | expression_list expression ';' { $$.push_back($2); }
                | error ';' { yyerrok; }
;
let_expr : OBJECTID ':' TYPEID IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ASSIGN expression IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ',' let_expr { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ASSIGN expression ',' let_expr { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @4); }
            | error ',' let_expr { yyerrok; }
;

expression : OBJECTID ASSIGN expression { $$ = std::make_shared<Assign>($1, $3); SETLOC($$, @3); }
            | expression '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, $5); SETLOC($$, @1); }
            | expression '.' OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, Expressions()); SETLOC($$, @1); }
            | expression '@' TYPEID '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, $7); SETLOC($$, @1); }
            | expression '@' TYPEID '.' OBJECTID '(' ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, Expressions()); SETLOC($$, @1);}
            | OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, $3); 
                                                  SETLOC($$, @1); } 
            | OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, Expressions()); 
                                 SETLOC($$, @1); } 
            | IF expression THEN expression ELSE expression FI { $$ = std::make_shared<If>($2, $4, $6); SETLOC($$, @2); }
            | WHILE expression LOOP expression POOL { $$ = std::make_shared<While>($2, $4); SETLOC($$, @2); }
            | '{' expression_list '}' { $$ = std::make_shared<Block>($2); SETLOC($$, @2); }
            | LET let_expr { $$ = $2; SETLOC($$, @2); }
            | CASE expression OF case_list ESAC { $$ = std::make_shared<Case>($2, $4); SETLOC($$, @2); }
            | NEW TYPEID { $$ = std::make_shared<New>($2); SETLOC($$, @2); }
            | ISVOID expression { $$ = std::make_shared<IsVoid>($2); SETLOC($$, @2); }
            | expression '+' expression { $$ = std::make_shared<Plus>($1, $3); SETLOC($$, @1); }
            | expression '-' expression { $$ = std::make_shared<Sub>($1, $3); SETLOC($$, @1); }
            | expression '*' expression { $$ = std::make_shared<Mul>($1, $3); SETLOC($$, @1); }
            | expression '/' expression { $$ = std::make_shared<Div>($1, $3); SETLOC($$, @1); }
            | '~' expression { $$ = std::make_shared<Complement>($2); SETLOC($$, @2); }
            | expression '<' expression { $$ = std::make_shared<LessThan>($1, $3); SETLOC($$, @1); }
            | expression LE expression { $$ = std::make_shared<LessThanEqualTo>($1, $3); SETLOC($$, @1); }
            | expression '=' expression { $$ = std::make_shared<EqualTo>($1, $3); SETLOC($$, @1); }
            | NOT expression { $$ = std::make_shared<Not>($2); SETLOC($$, @2); }
            | '(' expression ')' { $$ = $2; SETLOC($$, @2); } 
            | OBJECTID { $$ = std::make_shared<Object>($1); SETLOC($$, @1); }
            | INT_CONST { $$ = std::make_shared<IntConst>($1); SETLOC($$, @1); }
            | STR_CONST { $$ = std::make_shared<StringConst>($1); SETLOC($$, @1); }
            | BOOL_CONST { $$ = std::make_shared<BoolConst>($1); SETLOC($$, @1); } 
;
%%
// utility function for converting bison tokens to its string representation
// for better error reporting
std::string convert_token(int token)
{
    std::string rep;
    switch (token)
    {
        case CLASS: rep = "class"; break;
        case ELSE: rep = "else"; break;
        case FI: rep = "fi"; break;
        case IF: rep = "if"; break;
        case IN: rep = "in"; break;
        case INHERITS: rep = "inherits"; break;
        case LET: rep = "let"; break;
        case LOOP: rep = "loop"; break;
        case POOL: rep = "pool"; break;
        case THEN: rep = "then"; break;
        case WHILE: rep = "while"; break;
        case CASE: rep = "case"; break;
        case ESAC: rep = "esac"; break;
        case OF: rep = "of"; break;
        case DARROW: rep = "=>"; break;
        case NEW: rep = "new"; break;
        case ISVOID: rep = "isvoid"; break;
        case ASSIGN: rep = "<-"; break;
        case NOT: rep = "not"; break;
        case LE: rep = "<="; break;
        case STR_CONST: rep = "STR_CONST = " + yylval.symbol.get_val(); break;
        case INT_CONST: rep = "INT_CONST = " + yylval.symbol.get_val(); break;
        case BOOL_CONST: rep = "BOOL_CONST = " + yylval.boolean; break;
        case TYPEID: rep = "TYPEID = " + yylval.symbol.get_val(); break;
        case OBJECTID: rep = "OBJECTID = " + yylval.symbol.get_val(); break;
        default: rep = (char) token;
    }     
    return rep;
}
void yyerror(char *)
{
    if (yylval.error_msg.length() <= 0)
        std::cerr << curr_filename << ":" << yylineno << ": " << "error: " <<  "syntax error near or at character or token '" << convert_token(yychar) << "'n";
    else
        std::cerr << curr_filename << ":" << yylineno << ": " << "error: " << yylval.error_msg << "n";
}

我不知道为什么没有看到任何输出,但是我没有查看所有的代码。如果从main调用yylex,将读取并有效丢弃一个令牌。然后,当您调用yyparse时,yyparse将调用yylex本身,直到yylex返回0。假定(但不确定),下一次从main中的while循环调用yylex时,它将再次返回0,循环将结束。结果应该是从while循环中打印一个单词,然后是yyparse产生的输出(如果有的话),这可能是语法错误的信号,因为它从来没有看到输入的第一个标记。

我怀疑那是你想要做的,但也不完全清楚。

如果您想在标记被列名时看到它们,那么在每个列名操作中插入语句以打印标记。或者告诉flex调用其他的扫描函数,比如yylex_internal,然后创建自己的函数yylex(),它调用yylex_internal,然后在返回结果之前打印结果。

如果您只对调试感兴趣,那么您最好使用flex-d命令行选项,它将自动生成调试输出。它可能不完全是您想要的调试格式,但它更容易执行和撤消:)

要更改flex生成的yylex函数的名称,请在.l文件顶部的代码块中插入以下内容:

#define YY_DECL int yylex_internal()

flex生成的文件如下声明扫描函数:

YY_DECL {
  /* body of function
}

因此,您可以重命名函数或添加参数,甚至可以通过定义YY_DECL宏来更改返回类型。请参阅flex手册中的生成扫描器部分。

顺便说一下,手动为所有终端令牌编号通常不被认为是好的风格,尽管bison允许您这样做。你应该让bison对它们进行编号,并通过#include "y.tab.h"将定义包含在源文件中(或者你称之为bison头文件的任何东西;您可以使用-o选项轻松更改名称)。