Skip to content
Snippets Groups Projects
lv6lexer.mll 13.45 KiB
{

(*
la gestion des no de ligne est faite dans Lxm 
pour viter les dpendances boucles entre Lexer et Parser
*)
open Lv6parser

(* rcupration d'erreur avec correction line/col *)
exception Lexical_error of string * Lxm.t

let handle_lexical_error fn lexbuf = (
        let lxm = Lxm.make (lexbuf ) in
        try
                fn lexbuf
        with Lexical_error(msg, _) ->
                raise(Lexical_error(msg, lxm))
)
let unget_lexbuf lb =
  lb.Lexing.lex_curr_pos <- lb.Lexing.lex_curr_pos - 1

(* table des mots-cl *)
let keywords = Hashtbl.create 50 ;;
Hashtbl.add keywords "extern"     (function x -> TK_EXTERN x) ;;
Hashtbl.add keywords "unsafe"     (function x -> TK_UNSAFE x) ;;

Hashtbl.add keywords "and"        (function x -> TK_AND x) ;;
Hashtbl.add keywords "assert"     (function x -> TK_ASSERT x) ;;
Hashtbl.add keywords "bool"       (function x -> TK_BOOL x) ;;
Hashtbl.add keywords "const"      (function x -> TK_CONST x) ;;
Hashtbl.add keywords "current"    (function x -> TK_CURRENT x) ;;
Hashtbl.add keywords "div"        (function x -> TK_DIV x) ;;
Hashtbl.add keywords "else"       (function x -> TK_ELSE x) ;;
Hashtbl.add keywords "enum"       (function x -> TK_ENUM x) ;;
Hashtbl.add keywords "function"   (function x -> TK_FUNCTION x) ;;
Hashtbl.add keywords "false"      (function x -> TK_FALSE x) ;;
Hashtbl.add keywords "if"         (function x -> TK_IF x) ;;
Hashtbl.add keywords "int"        (function x -> TK_INT x) ;;
Hashtbl.add keywords "let"        (function x -> TK_LET x) ;;
Hashtbl.add keywords "mod"        (function x -> TK_MOD x) ;;
Hashtbl.add keywords "node"       (function x -> TK_NODE x) ;;
Hashtbl.add keywords "not"        (function x -> TK_NOT x) ;;
Hashtbl.add keywords "operator"   (function x -> TK_OPERATOR x) ;;
Hashtbl.add keywords "or"         (function x -> TK_OR x) ;;
Hashtbl.add keywords "nor"        (function x -> TK_NOR x) ;;
Hashtbl.add keywords "fby"        (function x -> TK_FBY x) ;;
Hashtbl.add keywords "pre"        (function x -> TK_PRE x) ;;
Hashtbl.add keywords "real"       (function x -> TK_REAL x) ;;
Hashtbl.add keywords "returns"    (function x -> TK_RETURNS x) ;;
Hashtbl.add keywords "step"       (function x -> TK_STEP x) ;;
Hashtbl.add keywords "struct"     (function x -> TK_STRUCT x) ;;
Hashtbl.add keywords "tel"        (function x -> TK_TEL x) ;;
Hashtbl.add keywords "type"       (function x -> TK_TYPE x) ;;
Hashtbl.add keywords "then"       (function x -> TK_THEN x) ;;
Hashtbl.add keywords "true"       (function x -> TK_TRUE x) ;;
Hashtbl.add keywords "var"        (function x -> TK_VAR x) ;;
Hashtbl.add keywords "when"       (function x -> TK_WHEN x) ;;
Hashtbl.add keywords "with"       (function x -> TK_WITH x) ;;
Hashtbl.add keywords "xor"        (function x -> TK_XOR x) ;;
Hashtbl.add keywords "model"      (function x -> TK_MODEL x) ;;
Hashtbl.add keywords "package"    (function x -> TK_PACKAGE x) ;;
Hashtbl.add keywords "needs"      (function x -> TK_NEEDS x) ;;
Hashtbl.add keywords "provides"   (function x -> TK_PROVIDES x) ;;
Hashtbl.add keywords "uses"       (function x -> TK_USES x) ;;
Hashtbl.add keywords "is"         (function x -> TK_IS x) ;;
Hashtbl.add keywords "body"       (function x -> TK_BODY x) ;;
Hashtbl.add keywords "end"        (function x -> TK_END x) ;;
Hashtbl.add keywords "include"    (function x -> TK_INCLUDE x) ;;
Hashtbl.add keywords "merge"      (function x -> TK_MERGE x) ;;
let is_a_keyword ( s: string ) = (
        try
                let res = Hashtbl.find keywords s in (Some res)
        with Not_found -> ( None )
)

let token_code tk = (
        match tk with
                  TK_EOF           -> ("TK_EOF" , Lxm.dummy "eof")
                | TK_ERROR     lxm -> ("TK_ERROR" , lxm)
                | TK_EXTERN    lxm -> ("TK_EXTERN" , lxm)
                | TK_UNSAFE    lxm -> ("TK_UNSAFE" , lxm)
                | TK_AND       lxm -> ("TK_AND" , lxm)
                | TK_ARROW     lxm -> ("TK_ARROW" , lxm)
                | TK_ASSERT    lxm -> ("TK_ASSERT" , lxm)
                | TK_BAR       lxm -> ("TK_BAR" , lxm)
                | TK_BOOL      lxm -> ("TK_BOOL" , lxm)
                | TK_CDOTS     lxm -> ("TK_CDOTS" , lxm)
                | TK_CLOSE_BRACKET lxm -> ("TK_CLOSE_BRACKET" , lxm)
                | TK_CLOSE_BRACE   lxm -> ("TK_CLOSE_BRACE" , lxm)
                | TK_CLOSE_PAR     lxm -> ("TK_CLOSE_PAR" , lxm)
                | TK_CLOSE_STATIC_PAR  lxm -> ("TK_CLOSE_STATIC_PAR" , lxm)
                | TK_COLON     lxm -> ("TK_COLON" , lxm)
                | TK_COMA      lxm -> ("TK_COMA" , lxm)
                | TK_CONST     lxm -> ("TK_CONST" , lxm)
                | TK_CURRENT   lxm -> ("TK_CURRENT" , lxm)
                | TK_DIV       lxm -> ("TK_DIV" , lxm)
                | TK_DIESE     lxm -> ("TK_DIESE" , lxm)
                | TK_DOT       lxm -> ("TK_DOT" , lxm)
                | TK_ELSE      lxm -> ("TK_ELSE" , lxm)
                | TK_EQ        lxm -> ("TK_EQ" , lxm)
                | TK_ENUM      lxm -> ("TK_ENUM" , lxm)
                | TK_FALSE     lxm -> ("TK_FALSE" , lxm)
                | TK_FUNCTION  lxm -> ("TK_FUNCTION" , lxm)
                | TK_GT        lxm -> ("TK_GT" , lxm)
                | TK_GTE       lxm -> ("TK_GTE" , lxm)
                | TK_HAT       lxm -> ("TK_HAT" , lxm)
                | TK_ICONST    lxm -> ("TK_ICONT" , lxm)
                | TK_IDENT     lxm -> ("TK_IDENT" , lxm)
                | TK_LONGIDENT lxm -> ("TK_LONGIDENT" , lxm)
                | TK_STRING  lxm -> ("TK_STRING" , lxm)
                | TK_IF        lxm -> ("TK_IF" , lxm)
                | TK_IMPL      lxm -> ("TK_IMPL" , lxm)
                | TK_INT       lxm -> ("TK_INT" , lxm)
                | TK_LET       lxm -> ("TK_LET" , lxm)
                | TK_LT        lxm -> ("TK_LT" , lxm)
                | TK_LTE       lxm -> ("TK_LTE" , lxm)
                | TK_MINUS     lxm -> ("TK_MINUS" , lxm)
                | TK_MOD       lxm -> ("TK_MOD" , lxm)
                | TK_NEQ       lxm -> ("TK_NEQ" , lxm)
                | TK_NODE      lxm -> ("TK_NODE" , lxm)
                | TK_NOR       lxm -> ("TK_NOR" , lxm)
                | TK_NOT       lxm -> ("TK_NOT" , lxm)
                | TK_OPEN_BRACKET lxm -> ("TK_OPEN_BRACKET" , lxm)
                | TK_OPEN_BRACE   lxm -> ("TK_OPEN_BRACE" , lxm)
                | TK_OPEN_PAR     lxm -> ("TK_OPEN_PAR" , lxm)
                | TK_OPEN_STATIC_PAR  lxm -> ("TK_OPEN_STATIC_PAR" , lxm)
                | TK_OPERATOR        lxm -> ("TK_OPERATOR" , lxm)
                | TK_OR        lxm -> ("TK_OR" , lxm)
                | TK_PCENT     lxm -> ("TK_PCENT" , lxm)
                | TK_PLUS      lxm -> ("TK_PLUS" , lxm)
                | TK_POWER     lxm -> ("TK_POWER" , lxm)
                | TK_FBY       lxm -> ("TK_FBY" , lxm)
                | TK_PRE       lxm -> ("TK_PRE" , lxm)
                | TK_RCONST    lxm -> ("TK_RCONST" , lxm)
                | TK_REAL      lxm -> ("TK_REAL" , lxm)
                | TK_RETURNS   lxm -> ("TK_RETURNS" , lxm)
                | TK_SEMICOL   lxm -> ("TK_SEMICOL" , lxm)
                | TK_STAR      lxm -> ("TK_STAR" , lxm)
                | TK_SLASH     lxm -> ("TK_SLASH" , lxm)
                | TK_STEP      lxm -> ("TK_STEP" , lxm)
                | TK_STRUCT    lxm -> ("TK_STRUCT" , lxm)
                | TK_TEL       lxm -> ("TK_TEL" , lxm)
                | TK_THEN      lxm -> ("TK_THEN" , lxm)
                | TK_TRUE      lxm -> ("TK_TRUE" , lxm)
                | TK_TYPE      lxm -> ("TK_TYPE" , lxm)
                | TK_VAR       lxm -> ("TK_VAR" , lxm)
                | TK_WHEN      lxm -> ("TK_WHEN" , lxm)
                | TK_MERGE      lxm -> ("TK_MERGE" , lxm)
                | TK_WITH      lxm -> ("TK_WITH" , lxm)
                | TK_XOR       lxm -> ("TK_XOR" , lxm)
                | TK_MODEL     lxm -> ("TK_MODEL" , lxm)
                | TK_PACKAGE   lxm -> ("TK_PACKAGE" , lxm)
                | TK_NEEDS     lxm -> ("TK_NEEDS" , lxm)
                | TK_PROVIDES  lxm -> ("TK_PROVIDES" , lxm)
                | TK_USES      lxm -> ("TK_USES" , lxm)
                | TK_IS        lxm -> ("TK_IS" , lxm)
                | TK_BODY      lxm -> ("TK_BODY" , lxm)
                | TK_END       lxm -> ("TK_END" , lxm)
                | TK_INCLUDE   lxm -> ("TK_INCLUDE" , lxm)
)

}

(* Pour simplifier les rgles des constantes numriques *)

let chiffre = ['0'-'9']
let chiffres = ['0'-'9'] +
let exposant = ( 'e' | 'E' ) ( '+' | '-' )? chiffres

rule lexer = parse
     eof
                { TK_EOF }
(* saute les blancs *)
(* saute les blancs *)
  | [' ' '\013' '\009' '\012'] +
                { lexer lexbuf }
(* retour  la ligne *)
        | '\n'
                {
                        Lxm.new_line ( lexbuf );
                        lexer lexbuf    
                }
(* commentaire parenths *)
        | "(*"
                {
                        handle_lexical_error comment_par lexbuf;
                        lexer lexbuf
                }
(* commentaire parenths bis *)
        | "/*"
                {
                        handle_lexical_error comment_par_bis lexbuf;
                        lexer lexbuf
                }
(* commentaire en ligne *)
        | "--"
                {
                        handle_lexical_error comment_line lexbuf;
                        lexer lexbuf
                }

(* mots-cl dbutant par un sparateur (prioritaires) *)
        | "->" { TK_ARROW ( Lxm.make lexbuf ) }
        | "=>" { TK_IMPL ( Lxm.make lexbuf ) }
        | "<=" { TK_LTE ( Lxm.make lexbuf ) }
        | "<>" { TK_NEQ ( Lxm.make lexbuf ) }
        | ">=" { TK_GTE ( Lxm.make lexbuf ) }
        | ".." { TK_CDOTS ( Lxm.make lexbuf ) }
        | "**" { TK_POWER ( Lxm.make lexbuf ) }
        (* parentheses des params statiques ... bof *)
        | "<<" { TK_OPEN_STATIC_PAR  ( Lxm.make lexbuf ) }
        | ">>" { TK_CLOSE_STATIC_PAR ( Lxm.make lexbuf ) }
(* sparateurs simples *)
        | "+"  { TK_PLUS ( Lxm.make lexbuf ) }
        | "^"  { TK_HAT ( Lxm.make lexbuf ) }
        | "#"  { TK_DIESE ( Lxm.make lexbuf ) }
        | "-"  { TK_MINUS ( Lxm.make lexbuf ) }
        | "/"  { TK_SLASH ( Lxm.make lexbuf ) }
        | "%"  { TK_PCENT ( Lxm.make lexbuf ) }
        | "*"  { TK_STAR ( Lxm.make lexbuf ) }
        | "|"  { TK_BAR ( Lxm.make lexbuf ) }
        | "="  { TK_EQ ( Lxm.make lexbuf ) }

        | "."  { TK_DOT ( Lxm.make lexbuf ) }
(*      | "\"" { TK_QUOTE ( Lxm.make lexbuf ) } *)
        | ","  { TK_COMA ( Lxm.make lexbuf ) }
        | ";"  { TK_SEMICOL ( Lxm.make lexbuf ) }
        | ":"  { TK_COLON ( Lxm.make lexbuf ) }
        | "("  { TK_OPEN_PAR ( Lxm.make lexbuf ) }
        | ")"  { TK_CLOSE_PAR ( Lxm.make lexbuf ) }
        | "{"  { TK_OPEN_BRACE ( Lxm.make lexbuf ) }
        | "}"  { TK_CLOSE_BRACE ( Lxm.make lexbuf ) }
        | "["  { TK_OPEN_BRACKET ( Lxm.make lexbuf ) }
        | "]"  { TK_CLOSE_BRACKET ( Lxm.make lexbuf ) }
        | "<"  { TK_LT ( Lxm.make lexbuf ) }
        | ">"  { TK_GT ( Lxm.make lexbuf ) }
(* identificateur point *)
        | ['_' 'A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
                ':' ':'
          ['_' 'A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
                {
                        let lxm = Lxm.make lexbuf in
                        TK_LONGIDENT (lxm)
                }

(* une chaine quelconque *)
         | "\""  [^ '\"']* "\""
            { 
                        let lxm = Lxm.make_string lexbuf in
                          TK_STRING (lxm)
            }
(* constantes entires *)
        |  chiffres  { TK_ICONST (Lxm.make lexbuf ) }


(* constantes relles *)
        |  chiffres (exposant) { TK_RCONST (Lxm.make lexbuf ) }

        |  chiffres '.' chiffres (exposant)? 
            { TK_RCONST (Lxm.make lexbuf ) }

        |  chiffres '.'  (exposant)
            { TK_RCONST (Lxm.make lexbuf ) }

        |  '.' chiffres (exposant)? { TK_RCONST (Lxm.make lexbuf ) }

(* Pour dsambiguer le .. (slice)  *)
        |  chiffres  '.'[^'.'] {
             unget_lexbuf lexbuf;
             TK_RCONST (Lxm.make lexbuf)
           }

(* mot-cl ou identificateur *)
        | ['_' 'A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
                {
                        let lxm = Lxm.make lexbuf in
                        let x = is_a_keyword ( Lxm.str lxm ) in
                        match x with
                                  None -> TK_IDENT ( lxm )
                                | Some keyw -> keyw ( lxm ) 
                }
        | _    { TK_ERROR ( Lxm.make lexbuf ) }

and comment_par = parse
          "*)" 
                { }
        | "\n" 
                {
                        Lxm.new_line ( lexbuf );
                        comment_par lexbuf      
                }
        | eof
                {
                        raise(Lexical_error("unterminated comment", 
                                            Lxm.dummy "unterminated comment"))
                }
        |       _
                { comment_par lexbuf }

and comment_par_bis = parse
          "*/" 
                { }
        | "\n" 
                {
                        Lxm.new_line ( lexbuf );
                        comment_par_bis lexbuf  
                }
        | eof
                {
                        raise(Lexical_error("unterminated comment", 
                                            Lxm.dummy "unterminated comment"))
                }
        |       _
                { comment_par_bis lexbuf }

and comment_line = parse
        '\n'
                {
                        Lxm.new_line ( lexbuf );
                }
        | eof
                { }
        |       _
                { comment_line lexbuf }