compilateur-projet-prog-1/clex.mll

298 lines
13 KiB
OCaml

{
(*
* Copyright (c) 2005 by Laboratoire Spécification et Vérification (LSV),
* UMR 8643 CNRS & ENS Cachan.
* Written by Jean Goubault-Larrecq. Derived from the csur project.
*
* Permission is granted to anyone to use this software for any
* purpose on any computer system, and to redistribute it freely,
* subject to the following restrictions:
*
* 1. Neither the author nor its employer is responsible for the consequences of use of
* this software, no matter how awful, even if they arise
* from defects in it.
*
* 2. The origin of this software must not be misrepresented, either
* by explicit claim or by omission.
*
* 3. Altered versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
*
* 4. This software is restricted to non-commercial use only. Commercial
* use is subject to a specific license, obtainable from LSV.
*
*)
(* Analyse lexicale d'un sous-ensemble (tres) reduit de C.
*)
open Cparse
open Error
open Ctab
let string_buf = Buffer.create 256
let string_iter f s = (* = String.iter; pas present en OCaml 2.04. *)
let n = String.length s
in for i=0 to n-1 do f (s.[i]) done
let count yytext =
(oldcline := !cline; oldccol := !ccol;
string_iter (fun c -> match c with
'\n' -> (cline := !cline+1; ccol := 0)
(* | '\t' -> (ccol := !ccol + 8 - (!ccol mod 8)) *)
| _ -> ccol := !ccol+1) yytext)
let parse_hex yytext tend =
let n = ref 0
in let len = String.length yytext-tend
in ((for i=2 to len-1 do
let c = yytext.[i] in
match c with
'0'..'9' -> n := 16 * !n + (int_of_char c - int_of_char '0')
| 'a'..'f' -> n := 16 * !n + (int_of_char c + 10 - int_of_char 'a')
| 'A'..'F' -> n := 16 * !n + (int_of_char c + 10 - int_of_char 'A')
| _ -> fatal (Some (!cfile, !cline, !ccol-len, !cline, !ccol))
("invalid hexadecimal number " ^ yytext)
done);
!n)
let parse_oct yytext start tend =
let n = ref 0
in let len = String.length yytext-tend
in ((for i=start to len-1 do
let c = yytext.[i] in
match c with
'0'..'7' -> n := 8 * !n + (int_of_char c - int_of_char '0')
| _ -> fatal (Some (!cfile, !cline, !ccol-len, !cline, !ccol))
("invalid octal number " ^ yytext)
done);
!n)
let parse_dec yytext tend =
let n = ref 0
in let len = String.length yytext-tend
in ((for i=0 to len-1 do
let c = yytext.[i] in
match c with
'0'..'9' -> n := 10 * !n + (int_of_char c - int_of_char '0')
| _ -> fatal (Some (!cfile, !cline, !ccol-len, !cline, !ccol))
("invalid number " ^ yytext)
done);
!n)
}
let digit = ['0'-'9']
let letter = ['a'-'z' 'A'-'Z' '_']
let hex = ['a'-'f' 'A'-'F' '0'-'9']
let expo = ['E' 'e'] ['+' '-']? digit+
let fs = ['f' 'F' 'l' 'L']
let is = ['u' 'U' 'l' 'L']*
rule ctoken = parse
"/*" { count (Lexing.lexeme lexbuf); comment lexbuf; ctoken lexbuf }
| "//" { count (Lexing.lexeme lexbuf); inlcomment lexbuf; ctoken lexbuf }
| "auto" { count (Lexing.lexeme lexbuf); AUTO }
| "break" { count (Lexing.lexeme lexbuf); BREAK }
| "case" { count (Lexing.lexeme lexbuf); CASE }
| "char" { count (Lexing.lexeme lexbuf); CHAR }
| "const" { count (Lexing.lexeme lexbuf); CONST }
| "continue" { count (Lexing.lexeme lexbuf); CONTINUE }
| "default" { count (Lexing.lexeme lexbuf); DEFAULT }
| "do" { count (Lexing.lexeme lexbuf); DO }
| "double" { count (Lexing.lexeme lexbuf); DOUBLE }
| "else" { count (Lexing.lexeme lexbuf); ELSE }
| "enum" { count (Lexing.lexeme lexbuf); ENUM }
| "extern" { count (Lexing.lexeme lexbuf); EXTERN }
| "float" { count (Lexing.lexeme lexbuf); FLOATING }
| "for" { count (Lexing.lexeme lexbuf); FOR }
| "goto" { count (Lexing.lexeme lexbuf); GOTO }
| "if" { count (Lexing.lexeme lexbuf); IF }
| "int" { count (Lexing.lexeme lexbuf); INTEGER }
| "long" { count (Lexing.lexeme lexbuf); LONG }
| "register" { count (Lexing.lexeme lexbuf); REGISTER }
| "return" { count (Lexing.lexeme lexbuf); RETURN }
| "short" { count (Lexing.lexeme lexbuf); SHORT }
| "signed" { count (Lexing.lexeme lexbuf); SIGNED }
| "sizeof" { count (Lexing.lexeme lexbuf); SIZEOF }
| "static" { count (Lexing.lexeme lexbuf); STATIC }
| "struct" { count (Lexing.lexeme lexbuf); STRUCT }
| "switch" { count (Lexing.lexeme lexbuf); SWITCH }
| "typedef" { count (Lexing.lexeme lexbuf); TYPEDEF }
| "union" { count (Lexing.lexeme lexbuf); UNION }
| "unsigned" { count (Lexing.lexeme lexbuf); UNSIGNED }
| "void" { count (Lexing.lexeme lexbuf); VOID }
| "volatile" { count (Lexing.lexeme lexbuf); VOLATILE }
| "while" { count (Lexing.lexeme lexbuf); WHILE }
| letter (letter | digit)* { count (Lexing.lexeme lexbuf);
let yytext = Lexing.lexeme lexbuf in
IDENTIFIER yytext
}
| '0' ['x' 'X'] hex+ { count (Lexing.lexeme lexbuf);
CONSTANT (parse_hex (Lexing.lexeme lexbuf) 0) }
| '0' ['x' 'X'] hex+ ['u' 'U'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_hex (Lexing.lexeme lexbuf) 1) }
| '0' ['x' 'X'] hex+ ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_hex (Lexing.lexeme lexbuf) 1) }
| '0' ['x' 'X'] hex+ ['u' 'U'] ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_hex (Lexing.lexeme lexbuf) 2) }
| '0' ['x' 'X'] hex+ ['u' 'U'] ['l' 'L'] ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_hex (Lexing.lexeme lexbuf) 3) }
| '0' ['0'-'7']+ { count (Lexing.lexeme lexbuf);
CONSTANT (parse_oct (Lexing.lexeme lexbuf) 1 0) }
| '0' ['0'-'7']+ ['u' 'U'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_oct (Lexing.lexeme lexbuf) 1 1) }
| '0' ['0'-'7']+ ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_oct (Lexing.lexeme lexbuf) 1 1) }
| '0' ['0'-'7']+ ['u' 'U'] ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_oct (Lexing.lexeme lexbuf) 1 2) }
| '0' ['0'-'7']+ ['u' 'U'] ['l' 'L'] ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_oct (Lexing.lexeme lexbuf) 1 3) }
| digit+ { count (Lexing.lexeme lexbuf);
CONSTANT (parse_dec (Lexing.lexeme lexbuf) 0) }
| digit+ ['u' 'U'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_dec (Lexing.lexeme lexbuf) 1) }
| digit+ ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_dec (Lexing.lexeme lexbuf) 1) }
| digit+ ['l' 'L' ] ['l' 'L' ] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_dec (Lexing.lexeme lexbuf) 1) }
| digit+ ['u' 'U'] ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_dec (Lexing.lexeme lexbuf) 2) }
| digit+ ['u' 'U'] ['l' 'L'] ['l' 'L'] { count (Lexing.lexeme lexbuf);
CONSTANT (parse_dec (Lexing.lexeme lexbuf) 3) }
| '\'' [^ '\'' '\\'] '\'' { count (Lexing.lexeme lexbuf);
CONSTANT (int_of_char (Lexing.lexeme_char lexbuf 1)) }
| '\'' '\\' ['0'-'7'] ['0'-'7']? ['0'-'7']? '\'' { count (Lexing.lexeme lexbuf);
CONSTANT (parse_oct (Lexing.lexeme lexbuf) 2 1) }
| '\'' '\\' 'a' '\'' { count (Lexing.lexeme lexbuf);
CONSTANT 7 (* bell, ^G *) }
| '\'' '\\' 'b' '\'' { count (Lexing.lexeme lexbuf);
CONSTANT (int_of_char '\b') }
| '\'' '\\' 'f' '\'' { count (Lexing.lexeme lexbuf);
CONSTANT 12 (* form feed, ^L *) }
| '\'' '\\' 'n' '\'' { count (Lexing.lexeme lexbuf);
CONSTANT (int_of_char '\n') }
| '\'' '\\' 'r' '\'' { count (Lexing.lexeme lexbuf);
CONSTANT (int_of_char '\r') }
| '\'' '\\' 't' '\'' { count (Lexing.lexeme lexbuf);
CONSTANT (int_of_char '\t')
(* bell, ^G *) }
| '\'' '\\' 'v' '\'' { count (Lexing.lexeme lexbuf);
CONSTANT 11 (* vertical tab, ^K *) }
| '\'' '\\' _ '\'' { count (Lexing.lexeme lexbuf);
CONSTANT (int_of_char (Lexing.lexeme_char lexbuf 2)) }
| "\""
{
count (Lexing.lexeme lexbuf); Buffer.reset string_buf;
string lexbuf;
STRING_LITERAL (Buffer.contents string_buf)
}
| "..." { count (Lexing.lexeme lexbuf); ELLIPSIS }
| ">>=" { count (Lexing.lexeme lexbuf); RIGHT_ASSIGN }
| "<<=" { count (Lexing.lexeme lexbuf); LEFT_ASSIGN }
| "+=" { count (Lexing.lexeme lexbuf); ADD_ASSIGN }
| "-=" { count (Lexing.lexeme lexbuf); SUB_ASSIGN }
| "*=" { count (Lexing.lexeme lexbuf); MUL_ASSIGN }
| "/=" { count (Lexing.lexeme lexbuf); DIV_ASSIGN }
| "%=" { count (Lexing.lexeme lexbuf); MOD_ASSIGN }
| "&=" { count (Lexing.lexeme lexbuf); AND_ASSIGN }
| "^=" { count (Lexing.lexeme lexbuf); XOR_ASSIGN }
| "|=" { count (Lexing.lexeme lexbuf); OR_ASSIGN }
| ">>" { count (Lexing.lexeme lexbuf); RIGHT_OP }
| "<<" { count (Lexing.lexeme lexbuf); LEFT_OP }
| "++" { count (Lexing.lexeme lexbuf); INC_OP }
| "--" { count (Lexing.lexeme lexbuf); DEC_OP }
| "->" { count (Lexing.lexeme lexbuf); PTR_OP }
| "&&" { count (Lexing.lexeme lexbuf); AND_OP }
| "||" { count (Lexing.lexeme lexbuf); OR_OP }
| "<=" { count (Lexing.lexeme lexbuf); LE_OP }
| ">=" { count (Lexing.lexeme lexbuf); GE_OP }
| "==" { count (Lexing.lexeme lexbuf); EQ_OP }
| "!=" { count (Lexing.lexeme lexbuf); NE_OP }
| ";" { count (Lexing.lexeme lexbuf); SEMI_CHR }
| ("{" | "<%") { count (Lexing.lexeme lexbuf); OPEN_BRACE_CHR }
| ("}" | "%>") { count (Lexing.lexeme lexbuf); CLOSE_BRACE_CHR }
| "," { count (Lexing.lexeme lexbuf); COMMA_CHR }
| ":" { count (Lexing.lexeme lexbuf); COLON_CHR }
| "=" { count (Lexing.lexeme lexbuf); EQ_CHR }
| "(" { count (Lexing.lexeme lexbuf); OPEN_PAREN_CHR }
| ")" { count (Lexing.lexeme lexbuf); CLOSE_PAREN_CHR }
| ("[" | "<:") { count (Lexing.lexeme lexbuf); OPEN_BRACKET_CHR }
| ("]" | ":>") { count (Lexing.lexeme lexbuf); CLOSE_BRACKET_CHR }
| "." { count (Lexing.lexeme lexbuf); DOT_CHR }
| "&" { count (Lexing.lexeme lexbuf); AND_CHR }
| "|" { count (Lexing.lexeme lexbuf); OR_CHR }
| "^" { count (Lexing.lexeme lexbuf); XOR_CHR }
| "!" { count (Lexing.lexeme lexbuf); BANG_CHR }
| "~" { count (Lexing.lexeme lexbuf); TILDE_CHR }
| "+" { count (Lexing.lexeme lexbuf); ADD_CHR }
| "-" { count (Lexing.lexeme lexbuf); SUB_CHR }
| "*" { count (Lexing.lexeme lexbuf); STAR_CHR }
| "/" { count (Lexing.lexeme lexbuf); DIV_CHR }
| "%" { count (Lexing.lexeme lexbuf); MOD_CHR }
| "<" { count (Lexing.lexeme lexbuf); OPEN_ANGLE_CHR }
| ">" { count (Lexing.lexeme lexbuf); CLOSE_ANGLE_CHR }
| "?" { count (Lexing.lexeme lexbuf); QUES_CHR }
| '#' { count (Lexing.lexeme lexbuf); line lexbuf }
| [' ' '\t' '\012' '\013' '\n' '\014']+ { count (Lexing.lexeme lexbuf); ctoken lexbuf }
| _ { fatal (Some (!cfile, !cline, !ccol, !cline, !ccol+1))
("bad character '" ^ (Lexing.lexeme lexbuf) ^ "'") }
| eof { EOF }
and comment = parse
"*/" { count (Lexing.lexeme lexbuf) }
| [^ '*']* { count (Lexing.lexeme lexbuf); comment lexbuf }
| eof { fatal (Some (!cfile, !cline, !ccol, !cline, !ccol)) "end of file reached inside comment" }
and inlcomment = parse
"\n" { count (Lexing.lexeme lexbuf) }
| [^ '\n']* { count (Lexing.lexeme lexbuf); inlcomment lexbuf }
and string = parse
'"' { () }
| '\n'+ { string lexbuf }
| '\\' ['0'-'7'] ['0'-'7']? ['0'-'7']? { Buffer.add_char string_buf (Char.chr (parse_oct (Lexing.lexeme lexbuf) 1 0)); string lexbuf }
| '\\' 'a' { Buffer.add_char string_buf '\007'; string lexbuf }
| '\\' 'b' { Buffer.add_char string_buf '\b'; string lexbuf }
| '\\' 'f' { Buffer.add_char string_buf '\014'; string lexbuf }
| '\\' 'n' { Buffer.add_char string_buf '\n'; string lexbuf }
| '\\' 'r' { Buffer.add_char string_buf '\r'; string lexbuf }
| '\\' 't' { Buffer.add_char string_buf '\t'; string lexbuf }
| '\\' 'v' { Buffer.add_char string_buf '\013'; string lexbuf }
| '\\' _ { Buffer.add_char string_buf (Lexing.lexeme_char lexbuf 1); string lexbuf }
| [^ '\\' '\n' '"']+ { Buffer.add_string string_buf (Lexing.lexeme lexbuf); string lexbuf }
| _ { Buffer.add_char string_buf (Lexing.lexeme_char lexbuf 0); string lexbuf }
| eof { fatal (Some (!cfile, !cline, !ccol, !cline, !ccol)) "end of file reached inside string literal" }
and line = parse
['0'-'9']+ { cline := parse_dec (Lexing.lexeme lexbuf) 0 - 1; line2 lexbuf }
| [' ' '\t']+ { count (Lexing.lexeme lexbuf); line lexbuf }
| '\n' { count (Lexing.lexeme lexbuf); ctoken lexbuf }
| "\"" { count (Lexing.lexeme lexbuf); Buffer.reset string_buf;
string lexbuf;
cfile := Buffer.contents string_buf;
ctoken lexbuf
}
| eof { fatal (Some (!cfile, !cline, !ccol, !cline, !ccol)) "end of file reached inside # directive" }
and line2 = parse
[' ' '\t']+ { count (Lexing.lexeme lexbuf); line2 lexbuf }
| '\n' { count (Lexing.lexeme lexbuf); ctoken lexbuf }
| "\"" { count (Lexing.lexeme lexbuf); Buffer.reset string_buf;
string lexbuf;
cfile := Buffer.contents string_buf;
line3 lexbuf
}
| eof { fatal (Some (!cfile, !cline, !ccol, !cline, !ccol)) "end of file reached inside # directive" }
and line3 = parse
'\n' { count (Lexing.lexeme lexbuf); ctoken lexbuf }
| _ { count (Lexing.lexeme lexbuf); line3 lexbuf }
| eof { fatal (Some (!cfile, !cline, !ccol, !cline, !ccol)) "end of file reached inside # directive" }