Skip to content
Snippets Groups Projects
lexer.mll 13.6 KiB
Newer Older
Erwan Jahier's avatar
Erwan Jahier committed
{

(*
la gestion des no de ligne est faite dans Lxm 
pour viter les dpendances boucles entre Lexer et Parser
*)
open Lxm
open Parser

(* rcupration d'erreur avec correction line/col *)
exception Lexical_error of string * Lxm.t

let handle_lexical_error fn lexbuf = (
Erwan Jahier's avatar
Erwan Jahier committed
        let lxm = Lxm.make (lexbuf ) in
        try
                fn lexbuf
        with Lexical_error(msg, _) ->
                raise(Lexical_error(msg, lxm))
Erwan Jahier's avatar
Erwan Jahier committed
)

(* table des mots-cl *)
let keywords = Hashtbl.create 50 ;;
Hashtbl.add keywords "extern"     (function x -> TK_EXTERN x) ;;

Erwan Jahier's avatar
Erwan Jahier committed
Hashtbl.add keywords "and"        (function x -> TK_AND x) ;;
Hashtbl.add keywords "assert"     (function x -> TK_ASSERT x) ;;
Hashtbl.add keywords "bool"       (function x -> TK_BOOL x) ;;
Hashtbl.add keywords "const"      (function x -> TK_CONST x) ;;
Hashtbl.add keywords "current"    (function x -> TK_CURRENT x) ;;
Hashtbl.add keywords "div"        (function x -> TK_DIV x) ;;
Hashtbl.add keywords "else"       (function x -> TK_ELSE x) ;;
Hashtbl.add keywords "enum"       (function x -> TK_ENUM x) ;;
Hashtbl.add keywords "function"   (function x -> TK_FUNCTION x) ;;
Hashtbl.add keywords "false"      (function x -> TK_FALSE x) ;;
Hashtbl.add keywords "if"         (function x -> TK_IF x) ;;
Hashtbl.add keywords "int"        (function x -> TK_INT x) ;;
Hashtbl.add keywords "let"        (function x -> TK_LET x) ;;
Hashtbl.add keywords "mod"        (function x -> TK_MOD x) ;;
Hashtbl.add keywords "node"       (function x -> TK_NODE x) ;;
Hashtbl.add keywords "not"        (function x -> TK_NOT x) ;;
Hashtbl.add keywords "operator"   (function x -> TK_OPERATOR x) ;;
Hashtbl.add keywords "or"         (function x -> TK_OR x) ;;
Hashtbl.add keywords "nor"        (function x -> TK_NOR x) ;;
Erwan Jahier's avatar
Erwan Jahier committed
Hashtbl.add keywords "fby"        (function x -> TK_FBY x) ;;
Erwan Jahier's avatar
Erwan Jahier committed
Hashtbl.add keywords "pre"        (function x -> TK_PRE x) ;;
Hashtbl.add keywords "real"       (function x -> TK_REAL x) ;;
Hashtbl.add keywords "returns"    (function x -> TK_RETURNS x) ;;
Hashtbl.add keywords "step"       (function x -> TK_STEP x) ;;
Hashtbl.add keywords "struct"     (function x -> TK_STRUCT x) ;;
Hashtbl.add keywords "tel"        (function x -> TK_TEL x) ;;
Hashtbl.add keywords "type"       (function x -> TK_TYPE x) ;;
Hashtbl.add keywords "then"       (function x -> TK_THEN x) ;;
Hashtbl.add keywords "true"       (function x -> TK_TRUE x) ;;
Hashtbl.add keywords "var"        (function x -> TK_VAR x) ;;
Hashtbl.add keywords "when"       (function x -> TK_WHEN x) ;;
Hashtbl.add keywords "with"       (function x -> TK_WITH x) ;;
Hashtbl.add keywords "xor"        (function x -> TK_XOR x) ;;
Hashtbl.add keywords "model"      (function x -> TK_MODEL x) ;;
Hashtbl.add keywords "package"    (function x -> TK_PACKAGE x) ;;
Hashtbl.add keywords "needs"      (function x -> TK_NEEDS x) ;;
Hashtbl.add keywords "provides"   (function x -> TK_PROVIDES x) ;;
Hashtbl.add keywords "uses"       (function x -> TK_USES x) ;;
Hashtbl.add keywords "is"         (function x -> TK_IS x) ;;
Hashtbl.add keywords "body"       (function x -> TK_BODY x) ;;
Hashtbl.add keywords "end"        (function x -> TK_END x) ;;
Hashtbl.add keywords "include"    (function x -> TK_INCLUDE x) ;;
Erwan Jahier's avatar
Erwan Jahier committed

let is_a_keyword ( s: string ) = (
Erwan Jahier's avatar
Erwan Jahier committed
        try
                let res = Hashtbl.find keywords s in (Some res)
        with Not_found -> ( None )
Erwan Jahier's avatar
Erwan Jahier committed
)

let token_code tk = (
Erwan Jahier's avatar
Erwan Jahier committed
        match tk with
                  TK_EOF           -> ("TK_EOF" , Lxm.dummy "eof")
                | TK_ERROR     lxm -> ("TK_ERROR" , lxm)
                | TK_EXTERN    lxm -> ("TK_EXTERN" , lxm)
                | TK_AND       lxm -> ("TK_AND" , lxm)
                | TK_ARROW     lxm -> ("TK_ARROW" , lxm)
                | TK_ASSERT    lxm -> ("TK_ASSERT" , lxm)
                | TK_BAR       lxm -> ("TK_BAR" , lxm)
                | TK_BOOL      lxm -> ("TK_BOOL" , lxm)
                | TK_CDOTS     lxm -> ("TK_CDOTS" , lxm)
                | TK_CLOSE_BRACKET lxm -> ("TK_CLOSE_BRACKET" , lxm)
                | TK_CLOSE_BRACE   lxm -> ("TK_CLOSE_BRACE" , lxm)
                | TK_CLOSE_PAR     lxm -> ("TK_CLOSE_PAR" , lxm)
                | TK_CLOSE_STATIC_PAR  lxm -> ("TK_CLOSE_STATIC_PAR" , lxm)
                | TK_COLON     lxm -> ("TK_COLON" , lxm)
                | TK_COMA      lxm -> ("TK_COMA" , lxm)
                | TK_CONST     lxm -> ("TK_CONST" , lxm)
                | TK_CURRENT   lxm -> ("TK_CURRENT" , lxm)
                | TK_DIV       lxm -> ("TK_DIV" , lxm)
                | TK_DIESE     lxm -> ("TK_DIESE" , lxm)
                | TK_DOT       lxm -> ("TK_DOT" , lxm)
                | TK_ELSE      lxm -> ("TK_ELSE" , lxm)
                | TK_EQ        lxm -> ("TK_EQ" , lxm)
                | TK_ENUM      lxm -> ("TK_ENUM" , lxm)
                | TK_FALSE     lxm -> ("TK_FALSE" , lxm)
                | TK_FIELD     lxm -> ("TK_FIELD" , lxm)
                | TK_FUNCTION  lxm -> ("TK_FUNCTION" , lxm)
                | TK_GT        lxm -> ("TK_GT" , lxm)
                | TK_GTE       lxm -> ("TK_GTE" , lxm)
                | TK_HAT       lxm -> ("TK_HAT" , lxm)
                | TK_ICONST    lxm -> ("TK_ICONT" , lxm)
                | TK_IDENT     lxm -> ("TK_IDENT" , lxm)
                | TK_LONGIDENT lxm -> ("TK_LONGIDENT" , lxm)
                | TK_STRING    lxm -> ("TK_STRING" , lxm)
                | TK_IF        lxm -> ("TK_IF" , lxm)
                | TK_IMPL      lxm -> ("TK_IMPL" , lxm)
                | TK_INT       lxm -> ("TK_INT" , lxm)
                | TK_LET       lxm -> ("TK_LET" , lxm)
                | TK_LT        lxm -> ("TK_LT" , lxm)
                | TK_LTE       lxm -> ("TK_LTE" , lxm)
                | TK_MINUS     lxm -> ("TK_MINUS" , lxm)
                | TK_MOD       lxm -> ("TK_MOD" , lxm)
                | TK_NEQ       lxm -> ("TK_NEQ" , lxm)
                | TK_NODE      lxm -> ("TK_NODE" , lxm)
                | TK_NOR       lxm -> ("TK_NOR" , lxm)
                | TK_NOT       lxm -> ("TK_NOT" , lxm)
                | TK_OPEN_BRACKET lxm -> ("TK_OPEN_BRACKET" , lxm)
                | TK_OPEN_BRACE   lxm -> ("TK_OPEN_BRACE" , lxm)
                | TK_OPEN_PAR     lxm -> ("TK_OPEN_PAR" , lxm)
                | TK_OPEN_STATIC_PAR  lxm -> ("TK_OPEN_STATIC_PAR" , lxm)
                | TK_OPERATOR        lxm -> ("TK_OPERATOR" , lxm)
                | TK_OR        lxm -> ("TK_OR" , lxm)
                | TK_PCENT     lxm -> ("TK_PCENT" , lxm)
                | TK_PLUS      lxm -> ("TK_PLUS" , lxm)
                | TK_POWER     lxm -> ("TK_POWER" , lxm)
                | TK_FBY       lxm -> ("TK_FBY" , lxm)
                | TK_PRE       lxm -> ("TK_PRE" , lxm)
                | TK_RCONST    lxm -> ("TK_RCONST" , lxm)
                | TK_REAL      lxm -> ("TK_REAL" , lxm)
                | TK_RETURNS   lxm -> ("TK_RETURNS" , lxm)
                | TK_SEMICOL   lxm -> ("TK_SEMICOL" , lxm)
                | TK_STAR      lxm -> ("TK_STAR" , lxm)
                | TK_SLASH     lxm -> ("TK_SLASH" , lxm)
                | TK_STEP      lxm -> ("TK_STEP" , lxm)
                | TK_STRUCT    lxm -> ("TK_STRUCT" , lxm)
                | TK_TEL       lxm -> ("TK_TEL" , lxm)
                | TK_THEN      lxm -> ("TK_THEN" , lxm)
                | TK_TRUE      lxm -> ("TK_TRUE" , lxm)
                | TK_TYPE      lxm -> ("TK_TYPE" , lxm)
                | TK_VAR       lxm -> ("TK_VAR" , lxm)
                | TK_WHEN      lxm -> ("TK_WHEN" , lxm)
                | TK_MERGE      lxm -> ("TK_MERGE" , lxm)
Erwan Jahier's avatar
Erwan Jahier committed
                | TK_WITH      lxm -> ("TK_WITH" , lxm)
                | TK_XOR       lxm -> ("TK_XOR" , lxm)
                | TK_MODEL     lxm -> ("TK_MODEL" , lxm)
                | TK_PACKAGE   lxm -> ("TK_PACKAGE" , lxm)
                | TK_NEEDS     lxm -> ("TK_NEEDS" , lxm)
                | TK_PROVIDES  lxm -> ("TK_PROVIDES" , lxm)
                | TK_USES      lxm -> ("TK_USES" , lxm)
                | TK_IS        lxm -> ("TK_IS" , lxm)
                | TK_BODY      lxm -> ("TK_BODY" , lxm)
                | TK_END       lxm -> ("TK_END" , lxm)
                | TK_INCLUDE   lxm -> ("TK_INCLUDE" , lxm)
                | TK_SLICE_START   lxm -> ("TK_SLICE_START" , lxm)
Erwan Jahier's avatar
Erwan Jahier committed
)

}

(* Pour simplifier les rgles des constantes numriques *)

let chiffre = ['0'-'9']
let chiffres = ['0'-'9'] +
let exposant = ( 'e' | 'E' ) ( '+' | '-' )? chiffres

rule lexer = parse
     eof
Erwan Jahier's avatar
Erwan Jahier committed
                { TK_EOF }
Erwan Jahier's avatar
Erwan Jahier committed
(* saute les blancs *)
(* saute les blancs *)
  | [' ' '\013' '\009' '\012'] +
Erwan Jahier's avatar
Erwan Jahier committed
                { lexer lexbuf }
Erwan Jahier's avatar
Erwan Jahier committed
(* retour  la ligne *)
Erwan Jahier's avatar
Erwan Jahier committed
        | '\n'
                {
                        Lxm.new_line ( lexbuf );
                        lexer lexbuf    
                }
Erwan Jahier's avatar
Erwan Jahier committed
(* commentaire parenths *)
Erwan Jahier's avatar
Erwan Jahier committed
        | "(*"
                {
                        handle_lexical_error comment_par lexbuf;
                        lexer lexbuf
                }
(* commentaire parenths bis *)
Erwan Jahier's avatar
Erwan Jahier committed
        | "/*"
                {
                        handle_lexical_error comment_par_bis lexbuf;
                        lexer lexbuf
                }
Erwan Jahier's avatar
Erwan Jahier committed
(* commentaire en ligne *)
Erwan Jahier's avatar
Erwan Jahier committed
        | "--"
                {
                        handle_lexical_error comment_line lexbuf;
                        lexer lexbuf
                }
Erwan Jahier's avatar
Erwan Jahier committed

(* mots-cl dbutant par un sparateur (prioritaires) *)
Erwan Jahier's avatar
Erwan Jahier committed
        | "->" { TK_ARROW ( Lxm.make lexbuf ) }
        | "=>" { TK_IMPL ( Lxm.make lexbuf ) }
        | "<=" { TK_LTE ( Lxm.make lexbuf ) }
        | "<>" { TK_NEQ ( Lxm.make lexbuf ) }
        | ">=" { TK_GTE ( Lxm.make lexbuf ) }
        | ".%" { TK_FIELD ( Lxm.make lexbuf ) }
        | ".." { TK_CDOTS ( Lxm.make lexbuf ) }
        | "**" { TK_POWER ( Lxm.make lexbuf ) }
        (* parentheses des params statiques ... bof *)
        | "<<" { TK_OPEN_STATIC_PAR  ( Lxm.make lexbuf ) }
        | ">>" { TK_CLOSE_STATIC_PAR ( Lxm.make lexbuf ) }
Erwan Jahier's avatar
Erwan Jahier committed
(* sparateurs simples *)
Erwan Jahier's avatar
Erwan Jahier committed
        | "+"  { TK_PLUS ( Lxm.make lexbuf ) }
        | "^"  { TK_HAT ( Lxm.make lexbuf ) }
        | "#"  { TK_DIESE ( Lxm.make lexbuf ) }
        | "-"  { TK_MINUS ( Lxm.make lexbuf ) }
        | "/"  { TK_SLASH ( Lxm.make lexbuf ) }
        | "%"  { TK_PCENT ( Lxm.make lexbuf ) }
        | "*"  { TK_STAR ( Lxm.make lexbuf ) }
        | "|"  { TK_BAR ( Lxm.make lexbuf ) }
        | "="  { TK_EQ ( Lxm.make lexbuf ) }
Erwan Jahier's avatar
Erwan Jahier committed

Erwan Jahier's avatar
Erwan Jahier committed
        | "."  { TK_DOT ( Lxm.make lexbuf ) }
(*      | "\"" { TK_QUOTE ( Lxm.make lexbuf ) } *)
        | ","  { TK_COMA ( Lxm.make lexbuf ) }
        | ";"  { TK_SEMICOL ( Lxm.make lexbuf ) }
        | ":"  { TK_COLON ( Lxm.make lexbuf ) }
        | "("  { TK_OPEN_PAR ( Lxm.make lexbuf ) }
        | ")"  { TK_CLOSE_PAR ( Lxm.make lexbuf ) }
        | "{"  { TK_OPEN_BRACE ( Lxm.make lexbuf ) }
        | "}"  { TK_CLOSE_BRACE ( Lxm.make lexbuf ) }
        | "["  { TK_OPEN_BRACKET ( Lxm.make lexbuf ) }
        | "]"  { TK_CLOSE_BRACKET ( Lxm.make lexbuf ) }
        | "<"  { TK_LT ( Lxm.make lexbuf ) }
        | ">"  { TK_GT ( Lxm.make lexbuf ) }
Erwan Jahier's avatar
Erwan Jahier committed
(* identificateur point *)
Erwan Jahier's avatar
Erwan Jahier committed
        | ['_' 'A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
                ':' ':'
          ['_' 'A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
                {
                        let lxm = Lxm.make lexbuf in
                        TK_LONGIDENT (lxm)
                }

(* Une grosse bidouille pour feinter lex  qui on arrive pas  faire
   comprendre que "[expr_min..expr_max]" est une tranche de tableau,
   et pas 2 reels qui se suivent ("1..3"), ou bien l'acces  une
   structure ("0..max").
*)
Erwan Jahier's avatar
Erwan Jahier committed
        |  ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9' ':'] * '.' '.'
            {
              let lxm = Lxm.make lexbuf in
                TK_SLICE_START (lxm)
            }
            (* une chaine quelconque *)
        | "\"" ['_' 'A'-'Z' 'a'-'z']  ['A'-'Z' '(' ')' '$' '/' 'a'-'z' '.' '-' '_'] * "\""
            { 
                        let lxm = Lxm.make_string lexbuf in
                          TK_STRING (lxm)
            }
Erwan Jahier's avatar
Erwan Jahier committed
(* constantes entires et relles *)
Erwan Jahier's avatar
Erwan Jahier committed
        |  chiffres  { TK_ICONST (Lxm.make lexbuf ) }
Erwan Jahier's avatar
Erwan Jahier committed

        |  ( '-' )? chiffres (exposant) { TK_RCONST (Lxm.make lexbuf ) }
Erwan Jahier's avatar
Erwan Jahier committed

        |  ( '-' )? chiffres '.' (chiffres)? (exposant)? 
            { TK_RCONST (Lxm.make lexbuf ) }
Erwan Jahier's avatar
Erwan Jahier committed

        |  ( '-' )? '.' chiffres (exposant)? { TK_RCONST (Lxm.make lexbuf ) }
Erwan Jahier's avatar
Erwan Jahier committed

Erwan Jahier's avatar
Erwan Jahier committed
(* mot-cl ou identificateur *)
Erwan Jahier's avatar
Erwan Jahier committed
        | ['_' 'A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
                {
                        let lxm = Lxm.make lexbuf in
                        let x = is_a_keyword ( Lxm.str lxm ) in
                        match x with
                                  None -> TK_IDENT ( lxm )
                                | Some keyw -> keyw ( lxm ) 
                }
        | _    { TK_ERROR ( Lxm.make lexbuf ) }
Erwan Jahier's avatar
Erwan Jahier committed

and comment_par = parse
Erwan Jahier's avatar
Erwan Jahier committed
          "*)" 
                { }
        | "\n" 
                {
                        Lxm.new_line ( lexbuf );
                        comment_par lexbuf      
                }
        | eof
                {
                        raise(Lexical_error("unterminated comment", 
                                            Lxm.dummy "unterminated comment"))
                }
        |       _
                { comment_par lexbuf }
Erwan Jahier's avatar
Erwan Jahier committed

and comment_par_bis = parse
Erwan Jahier's avatar
Erwan Jahier committed
          "*/" 
                { }
        | "\n" 
                {
                        Lxm.new_line ( lexbuf );
                        comment_par_bis lexbuf  
                }
        | eof
                {
                        raise(Lexical_error("unterminated comment", 
                                            Lxm.dummy "unterminated comment"))
                }
        |       _
                { comment_par_bis lexbuf }
Erwan Jahier's avatar
Erwan Jahier committed
and comment_line = parse
Erwan Jahier's avatar
Erwan Jahier committed
        '\n'
                {
                        Lxm.new_line ( lexbuf );
                }
        | eof
                { }
        |       _
                { comment_line lexbuf }