From ba61dfd69504ec6263a9dee9931d93adeb6f3142 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Mon, 7 Jul 2025 21:52:08 +0200 Subject: Initialize repository --- lib/nix/dune | 15 +++ lib/nix/elaborator.ml | 208 +++++++++++++++++++++++++++++++++ lib/nix/lexer.mll | 315 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/nix/nix.ml | 20 ++++ lib/nix/parser.mly | 310 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/nix/printer.ml | 176 ++++++++++++++++++++++++++++ lib/nix/tokens.ml | 64 ++++++++++ lib/nix/types.ml | 112 ++++++++++++++++++ 8 files changed, 1220 insertions(+) create mode 100644 lib/nix/dune create mode 100644 lib/nix/elaborator.ml create mode 100644 lib/nix/lexer.mll create mode 100644 lib/nix/nix.ml create mode 100644 lib/nix/parser.mly create mode 100644 lib/nix/printer.ml create mode 100644 lib/nix/tokens.ml create mode 100644 lib/nix/types.ml (limited to 'lib/nix') diff --git a/lib/nix/dune b/lib/nix/dune new file mode 100644 index 0000000..3954c8a --- /dev/null +++ b/lib/nix/dune @@ -0,0 +1,15 @@ +(menhir + (modules parser) + (flags "--dump" "--strict" "--external-tokens" "Tokens") + (infer true)) + +(ocamllex + (modules lexer)) + +(library + (name nix) + (preprocess + (pps ppx_sexp_conv)) + (instrumentation + (backend bisect_ppx)) + (libraries core core_unix core_unix.filename_unix pprint ppx_sexp_conv str)) diff --git a/lib/nix/elaborator.ml b/lib/nix/elaborator.ml new file mode 100644 index 0000000..36ee0d4 --- /dev/null +++ b/lib/nix/elaborator.ml @@ -0,0 +1,208 @@ +open Core +open Types + +(* The Nix elaborator does a few things: + - Attribute paths are transformed into a simple list of expressions: + + Simple identifiers are rewritten to string values + + Antiquotations are rewritten to their component expressions + + Anything else, that is not a string value, is rejected + and raises an exception + - In 'inherit (...) x1 ... xn', x1 ... xn are checked for 'reasonably' being + identifiers, i.e., being one of x, "x" and ${"x"}. + - Nested attribute paths are unfolded and attribute sets are merged where + possible. (Where we mean 'what Nix does' with 'where possible'; see the + comment at the respective function.) + - Paths are turned into strings and made absolute w.r.t. the current + working directory. + - Indented strings are converted to their 'normal' counterpart. *) + +exception ElaborateError of string + +type attr_set = recursivity * binding list + +let set_expr (r, bs) = Val (AttSet (r, bs)) +let get_id = function Id x -> x | _ -> assert false + +let rec update_bnd (bs : binding list) (x : string) ~(f : expr option -> expr) = + match bs with + | [] -> [ AttrPath ([ Val (Str (x, [])) ], f None) ] + | AttrPath ([ Val (Str (y, [])) ], e) :: s' when String.(x = y) -> + AttrPath ([ Val (Str (y, [])) ], f (Some e)) :: s' + | Inherit (_, ids) :: _ + when List.exists ids ~f:(fun e -> String.(get_id e = x)) -> + raise (ElaborateError "Cannot update inherit") + | bnd :: s' -> bnd :: update_bnd s' x ~f + +let set_update_bnd (r, bs) x ~f = (r, update_bnd bs x ~f) + +let rec has_bnd (bs : binding list) (x : string) : bool = + match bs with + | [] -> false + | AttrPath ([ Val (Str (y, [])) ], _) :: _ when String.(x = y) -> true + | Inherit (_, ids) :: _ + when List.exists ids ~f:(fun e -> String.(get_id e = x)) -> + true + | _ :: bs' -> has_bnd bs' x + +let merge_bnds bs1 bs2 : binding list = + List.fold_left bs2 ~init:bs1 ~f:(fun bs1' b2 -> + match b2 with + | AttrPath ([ Val (Str (x, [])) ], e) -> + update_bnd bs1' x ~f:(function + | Some _ -> raise (ElaborateError "Duplicated attribute") + | None -> e) + | AttrPath ([ d ], e) -> AttrPath ([ d ], e) :: bs1' + | Inherit (md, xs) -> + if List.for_all xs ~f:(fun e -> not (has_bnd bs1' (get_id e))) then + Inherit (md, xs) :: bs1' + else raise (ElaborateError "Duplicated attribute") + | _ -> assert false) + +(* This function intentionally clobbers recursivity, because that is the way + that Nix likes to handle attribute insertion. See + (1) https://github.com/NixOS/nix/issues/9020 + (2) https://github.com/NixOS/nix/issues/11268 + (3) https://github.com/NixOS/nix/pull/11294 *) +let rec insert (bs : binding list) (path : expr list) (e : expr) = + match path with + | [] -> raise (ElaborateError "Cannot insert attribute with empty path") + | [ Val (Str (x, [])) ] -> + update_bnd bs x ~f:(function + | None -> e + | Some (Val (AttSet (r1, bs1))) -> ( + match e with + | Val (AttSet (_, bs2)) -> set_expr (r1, merge_bnds bs1 bs2) + | _ -> raise (ElaborateError "Duplicated attribute")) + | _ -> raise (ElaborateError "Duplicated attribute")) + | Val (Str (x, [])) :: rest -> + update_bnd bs x ~f:(function + | Some (Val (AttSet (r, bs))) -> Val (AttSet (r, insert bs rest e)) + | Some _ -> raise (ElaborateError "Duplicated attribute") + | None -> Val (AttSet (Nonrec, insert [] rest e))) + | [ part ] -> AttrPath ([ part ], e) :: bs + | part :: rest -> + AttrPath ([ part ], Val (AttSet (Nonrec, insert [] rest e))) :: bs + +let insert_inherit (bs : binding list) (from : expr option) (es : expr list) = + if List.for_all es ~f:(fun e -> not (has_bnd bs (get_id e))) then + Inherit (from, es) :: bs + else raise (ElaborateError "Duplicated attribute") + +let simplify_path_component = function + | Id x -> Val (Str (x, [])) + | Val (Str (s, ess)) -> Val (Str (s, ess)) + | Aquote e -> e + | _ -> raise (ElaborateError "Unexpected path component") + +let simplify_path = List.map ~f:simplify_path_component + +let simplify_bnd_paths = + List.map ~f:(fun bnd -> + match bnd with + | AttrPath (path, e) -> AttrPath (simplify_path path, e) + | Inherit (me, xs) -> Inherit (me, xs)) + +(* Law: concat_lines ∘ split_lines = id *) + +let rec split_lines s = + match String.lsplit2 s ~on:'\n' with + | Some (s1, s2) -> s1 :: split_lines s2 + | None -> [ s ] + +let rec concat_lines = function + | [] -> "" + | [ x ] -> x + | x :: xs -> x ^ "\n" ^ concat_lines xs + +let map_tail ~f = function [] -> [] | x :: xs -> x :: List.map ~f xs + +let unindent n s ~skip_first_line = + let map_op ~f = if skip_first_line then map_tail ~f else List.map ~f in + split_lines s + |> map_op ~f:(fun line -> + let expected_prefix = String.make n ' ' in + String.chop_prefix_if_exists ~prefix:expected_prefix line) + |> concat_lines + +let is_spaces l = String.(strip l ~drop:(Char.( = ) ' ') |> is_empty) + +let drop_first_empty_line s = + match String.lsplit2 s ~on:'\n' with + | Some (l, s') when is_spaces l -> s' + | _ -> s + +let rec process ?(dir = None) = function + | BinaryOp (op, e1, e2) -> BinaryOp (op, process ~dir e1, process ~dir e2) + | UnaryOp (op, e) -> UnaryOp (op, process ~dir e) + | Cond (e1, e2, e3) -> Cond (process ~dir e1, process ~dir e2, process ~dir e3) + | With (e1, e2) -> With (process ~dir e1, process ~dir e2) + | Assert (e1, e2) -> Assert (process ~dir e1, process ~dir e2) + | Test (e1, es) -> + Test (process ~dir e1, List.(simplify_path es >>| process ~dir)) + | SetLet bs -> SetLet (process_bnds ~dir bs) + | Let (bs, e) -> Let (process_bnds ~dir bs, process ~dir e) + | Val v -> Val (process_val ~dir v) + | Id x -> Id x + | Select (e, es, me) -> + Select + ( process ~dir e, + List.(simplify_path es >>| process ~dir), + Option.(me >>| process ~dir) ) + | Apply (e1, e2) -> Apply (process ~dir e1, process ~dir e2) + | Aquote e -> Aquote (process ~dir e) + +and process_val ~dir = function + | Str (s, ess) -> Str (s, List.(ess >>| fun (e, s) -> (process ~dir e, s))) + | IStr (n, s, ess) -> + let s' = drop_first_empty_line (unindent n s ~skip_first_line:false) + and ess' = + List.map ess ~f:(fun (e, s) -> + (process ~dir e, unindent n s ~skip_first_line:true)) + in + Str (s', ess') + | Lambda (p, e) -> Lambda (process_pattern ~dir p, process ~dir e) + | List es -> List List.(es >>| process ~dir) + | AttSet (r, bs) -> AttSet (r, process_bnds ~dir bs) + | Path p -> ( + if Filename.is_absolute p then Str (p, []) + else + match dir with + | Some dir when Filename.is_absolute dir -> + Str (Filename.concat dir p, []) + | Some _ -> + raise + (ElaborateError "Provided directory should be an absolute path") + | None -> raise (ElaborateError "Do not know how to resolve path")) + | v -> v + +and process_bnds ~dir bs = + bs + |> List.map ~f:(function + | AttrPath (es, e) -> + AttrPath (List.(es >>| process ~dir), process ~dir e) + | Inherit (me, xs) -> + Inherit (Option.(me >>| process ~dir), process_inherit_ids xs)) + |> simplify_bnd_paths + |> List.fold ~init:[] ~f:(fun bs' bnd -> + match bnd with + | AttrPath (path, e) -> insert bs' path e + | Inherit (from, es) -> insert_inherit bs' from es) + +and process_inherit_ids = + List.map ~f:(function + | Id x | Val (Str (x, [])) | Aquote (Val (Str (x, []))) -> Id x + | _ -> raise (ElaborateError "Unexpected expression in inherit")) + +and process_pattern ~dir = function + | Alias x -> Alias x + | ParamSet (mx, (ps, k)) -> ParamSet (mx, (process_param_set ~dir mx ps, k)) + +and process_param_set ~dir ?(seen = String.Set.empty) mx ps = + match ps with + | [] -> [] + | (y, me) :: ps' -> + if Set.mem seen y || Option.mem mx y ~equal:String.( = ) then + raise (ElaborateError "Duplicated function argument") + else + (y, Option.(me >>| process ~dir)) + :: process_param_set ~dir mx ps' ~seen:(Set.add seen y) diff --git a/lib/nix/lexer.mll b/lib/nix/lexer.mll new file mode 100644 index 0000000..023d888 --- /dev/null +++ b/lib/nix/lexer.mll @@ -0,0 +1,315 @@ +{ +open Core +open Tokens + +exception Error of string + +(* Types of curly braces. + AQUOTE corresponds to the braces for antiquotation, i.e. '${...}' + and SET to an attribute set '{...}'. + *) +type braces = + | AQUOTE + | SET + +let print_stack s = + let b = Buffer.create 100 in + Buffer.add_string b "[ "; + List.iter s ~f:(function + | AQUOTE -> Buffer.add_string b "AQUOTE; " + | SET -> Buffer.add_string b "SET; " + ); + Buffer.add_string b "]"; + Buffer.contents b + +let token_of_str state buf = + match state with + | `Start -> STR_START (Buffer.contents buf) + | `Mid -> STR_MID (Buffer.contents buf) + +let token_of_istr state buf = + match state with + | `Start -> ISTR_START (Buffer.contents buf) + | `Mid -> ISTR_MID (Buffer.contents buf) + +(* lookup table for one-character tokens *) +let char_table = Array.create ~len:94 EOF +let _ = + List.iter ~f:(fun (k, v) -> Array.set char_table ((int_of_char k) - 1) v) + [ + '.', SELECT; + '?', QMARK; + '!', NOT; + '=', ASSIGN; + '<', LT; + '>', GT; + '[', LBRACK; + ']', RBRACK; + '+', PLUS; + '-', MINUS; + '*', TIMES; + '/', SLASH; + '(', LPAREN; + ')', RPAREN; + ':', COLON; + ';', SEMICOLON; + ',', COMMA; + '@', AS + ] + +(* lookup table for two- and three-character tokens *) +let str_table = Hashtbl.create (module String) ~size:10 +let _ = + List.iter ~f:(fun (kwd, tok) -> Hashtbl.set str_table ~key:kwd ~data:tok) + [ + "//", MERGE; + "++", CONCAT; + "<=", LTE; + ">=", GTE; + "==", EQ; + "!=", NEQ; + "&&", AND; + "||", OR; + "->", IMPL; + "...", ELLIPSIS + ] + +(* lookup table for keywords *) +let keyword_table = Hashtbl.create (module String) ~size:10 +let _ = + List.iter ~f:(fun (kwd, tok) -> Hashtbl.set keyword_table ~key:kwd ~data:tok) + [ "with", WITH; + "rec", REC; + "let", LET; + "in", IN; + "inherit", INHERIT; + "if" , IF; + "then", THEN; + "else", ELSE; + "assert", ASSERT; + "or", ORDEF ] + +(* replace an escape sequence by the corresponding character(s) *) +let unescape = function + | "\\n" -> "\n" + | "\\r" -> "\r" + | "\\t" -> "\t" + | "\\\\" -> "\\" + | "\\${" -> "${" + | "''$" -> "$" + | "$$" -> "$" + | "'''" -> "''" + | "''\\t" -> "\t" + | "''\\r" -> "\r" + | "''\\n" -> "\n" + | x -> + failwith (Printf.sprintf "unescape unexpected arg %s" x) + +let collect_tokens lexer q lexbuf = + let stack = ref [] in + let queue = Stdlib.Queue.create () in + let rec go () = + match (try Some (Stdlib.Queue.take queue) with Stdlib.Queue.Empty -> None) with + | Some token -> + ( + match token, !stack with + | AQUOTE_CLOSE, [] -> + Stdlib.Queue.add AQUOTE_CLOSE q + | EOF, _ -> + Stdlib.Queue.add EOF q; + | _, _ -> + Stdlib.Queue.add token q; + go () + ) + | None -> + lexer queue stack lexbuf; + go () + in + Stdlib.Queue.add AQUOTE_OPEN q; + stack := [AQUOTE]; + lexer queue stack lexbuf; + go () + +(* utility functions *) +let print_position lexbuf = + let pos = Lexing.lexeme_start_p lexbuf in + Printf.sprintf "%s:%d:%d" pos.pos_fname + pos.pos_lnum (pos.pos_cnum - pos.pos_bol + 1) + + +let set_filename fname (lexbuf: Lexing.lexbuf) = + let pos = lexbuf.lex_curr_p in + lexbuf.lex_curr_p <- { pos with pos_fname = fname }; lexbuf + +} + +let nzdigit = ['1'-'9'] +let digit = nzdigit | '0' +let float = (nzdigit digit* '.' digit* | '0'? '.' digit+) (['E' 'e'] ['+' '-']? digit+)? +let alpha = ['a'-'z' 'A'-'Z'] +let alpha_digit = alpha | digit +let path_chr = alpha_digit | ['.' '_' '-' '+'] +let path = path_chr* ('/' path_chr+)+ +let spath = alpha_digit path_chr* ('/' path_chr+)* +let uri_chr = ['%' '/' '?' ':' '@' '&' '=' '+' '$' ',' '-' '_' '.' '!' '~' '*' '\''] +let scheme = alpha (alpha | ['+' '-' '.'])* +let uri = scheme ':' (alpha_digit | uri_chr)+ +let char_tokens = ['.' '?' '!' '=' '<' '>' '[' ']' '+' '-' '*' '/' '^' '(' ')' ':' ';' ',' '@'] + +rule get_tokens q s = parse +(* skip whitespeces *) +| [' ' '\t' '\r'] + { get_tokens q s lexbuf } +(* increase line count for new lines *) +| '\n' + { Lexing.new_line lexbuf; get_tokens q s lexbuf } +| char_tokens as c + { Stdlib.Queue.add (Array.get char_table ((int_of_char c) - 1)) q } +| ("//" | "++" | "<=" | ">=" | "==" | "!=" | "&&" | "||" | "->" | "...") as s + { Stdlib.Queue.add (Hashtbl.find_exn str_table s) q} +| digit+ as i + { Stdlib.Queue.add (INT i) q } +| float + { Stdlib.Queue.add (FLOAT (Lexing.lexeme lexbuf)) q } +| path + { Stdlib.Queue.add (PATH (Lexing.lexeme lexbuf)) q } +| '<' (spath as p) '>' + { Stdlib.Queue.add (SPATH p) q } +| '~' path as p + { Stdlib.Queue.add (HPATH p) q } +| uri + { Stdlib.Queue.add(URI (Lexing.lexeme lexbuf)) q } +(* keywords or identifiers *) +| ((alpha | '_')+ (alpha_digit | ['_' '\'' '-'])*) as id + { Stdlib.Queue.add (Hashtbl.find keyword_table id |> Option.value ~default:(ID id)) q} +(* comments *) +| '#' ([^ '\n']* as c) + { ignore c; get_tokens q s lexbuf} +| "/*" + { comment (Buffer.create 64) lexbuf; + get_tokens q s lexbuf + } +(* the following three tokens change the braces stack *) +| "${" + { Stdlib.Queue.add AQUOTE_OPEN q; s := AQUOTE :: !s } +| '{' + { Stdlib.Queue.add LBRACE q; s := SET :: !s } +| '}' + { + match !s with + | AQUOTE :: rest -> + Stdlib.Queue.add AQUOTE_CLOSE q; s := rest + | SET :: rest -> + Stdlib.Queue.add RBRACE q; s := rest + | _ -> + let pos = print_position lexbuf in + let err = Printf.sprintf "Unbalanced '}' at %s\n" pos in + raise (Error err) + } +(* a double-quoted string *) +| '"' + { string `Start (Buffer.create 64) q lexbuf } +(* an indented string *) +| "''" (' '+ as ws) + { istring `Start (Some (String.length ws)) (Buffer.create 64) q lexbuf } +| "''" + { istring `Start None (Buffer.create 64) q lexbuf } +(* End of input *) +| eof + { Stdlib.Queue.add EOF q } +(* any other character raises an exception *) +| _ + { + let pos = print_position lexbuf in + let tok = Lexing.lexeme lexbuf in + let err = Printf.sprintf "Unexpected character '%s' at %s\n" tok pos in + raise (Error err) + } + +(* Nix does not allow nested comments, but it is still handy to lex it + separately because we can properly increase line count. *) +and comment buf = parse + | '\n' + {Lexing.new_line lexbuf; Buffer.add_char buf '\n'; comment buf lexbuf} + | "*/" + { () } + | _ as c + { Buffer.add_char buf c; comment buf lexbuf } + +and string state buf q = parse + | '"' (* terminate when we hit '"' *) + { Stdlib.Queue.add (token_of_str state buf) q; Stdlib.Queue.add STR_END q } + | '\n' + { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; string state buf q lexbuf } + | ("\\n" | "\\r" | "\\t" | "\\\\" | "\\${") as s + { Buffer.add_string buf (unescape s); string state buf q lexbuf } + | "\\" (_ as c) (* add the character verbatim *) + { Buffer.add_char buf c; string state buf q lexbuf } + | "${" (* collect all the tokens till we hit the matching '}' *) + { + Stdlib.Queue.add (token_of_str state buf) q; + collect_tokens get_tokens q lexbuf; + string `Mid (Buffer.create 64) q lexbuf + } + | _ as c (* otherwise just add the character to the buffer *) + { Buffer.add_char buf c; string state buf q lexbuf } + +and istring state imin buf q = parse + | ('\n' ' '* "''") + { + Lexing.new_line lexbuf; + Buffer.add_string buf "\n"; + let indent = match imin with | None -> 0 | Some i -> i in + Stdlib.Queue.add (token_of_istr state buf) q; + Stdlib.Queue.add (ISTR_END indent) q + } + | "''" + { + let indent = match imin with | None -> 0 | Some i -> i in + Stdlib.Queue.add (token_of_istr state buf) q; + Stdlib.Queue.add (ISTR_END indent) q + } + | ('\n' ' '* '\n') as s + { + Lexing.new_line lexbuf; + Lexing.new_line lexbuf; + Buffer.add_string buf s; + istring state imin buf q lexbuf + } + | ('\n' (' '* as ws)) as s + { + Lexing.new_line lexbuf; + Buffer.add_string buf s; + let ws_count = String.length ws in + match imin with + | None -> + istring state (Some ws_count) buf q lexbuf + | Some i -> + istring state (Some (min i ws_count)) buf q lexbuf + } + | ("''$" | "'''" | "''\\t" | "''\\r" | "''\\n") as s + { Buffer.add_string buf (unescape s); istring state imin buf q lexbuf } + | "''\\" (_ as c) + { Buffer.add_char buf c; istring state imin buf q lexbuf } + | "${" + { + Stdlib.Queue.add (token_of_istr state buf) q; + collect_tokens get_tokens q lexbuf; + istring `Mid imin (Buffer.create 64) q lexbuf + } + | _ as c + { Buffer.add_char buf c; istring state imin buf q lexbuf } +{ + +let rec next_token + (q: token Stdlib.Queue.t) + (s: braces list ref) + (lexbuf: Lexing.lexbuf) + : token = + match (try Some (Stdlib.Queue.take q) with | Stdlib.Queue.Empty -> None) with + | Some token -> + token + | None -> + get_tokens q s lexbuf; + next_token q s lexbuf +} diff --git a/lib/nix/nix.ml b/lib/nix/nix.ml new file mode 100644 index 0000000..39dc94c --- /dev/null +++ b/lib/nix/nix.ml @@ -0,0 +1,20 @@ +open Core +module Ast = Types +module Printer = Printer + +exception ParseError of string + +let parse ~filename (data : string) = + let lexbuf = Lexer.set_filename filename (Lexing.from_string data) + and q, s = (Stdlib.Queue.create (), ref []) in + try Parser.main (Lexer.next_token q s) lexbuf with + | Lexer.Error msg -> + let msg' = String.rstrip msg in + raise (ParseError (sprintf "Lexing error: %s" msg')) + | Parser.Error -> + let msg = sprintf "Parse error at %s" (Lexer.print_position lexbuf) in + raise (ParseError msg) + +let elaborate = Elaborator.process + +exception ElaborateError = Elaborator.ElaborateError diff --git a/lib/nix/parser.mly b/lib/nix/parser.mly new file mode 100644 index 0000000..dc1638d --- /dev/null +++ b/lib/nix/parser.mly @@ -0,0 +1,310 @@ +/* Tokens with data */ +%token INT +%token FLOAT +/* A path */ +%token PATH +/* Search path, enclosed in <> */ +%token SPATH +/* Home path, starts with ~ */ +%token HPATH +%token URI +%token STR_START +%token STR_MID +%token STR_END +%token ISTR_START +%token ISTR_MID +%token ISTR_END +%token ID +/* Tokens that stand for themselves */ +%token SELECT "." +%token QMARK "?" +%token CONCAT "++" +%token NOT "!" +%token MERGE "//" +%token ASSIGN "=" +%token LT "<" +%token LTE "<=" +%token GT ">" +%token GTE ">=" +%token EQ "==" +%token NEQ "!=" +%token AND "&&" +%token OR "||" +%token IMPL "->" +%token AQUOTE_OPEN "${" +%token AQUOTE_CLOSE "}$" +%token LBRACE "{" +%token RBRACE "}" +%token LBRACK "[" +%token RBRACK "]" +%token PLUS "+" +%token MINUS "-" +%token TIMES "*" +%token SLASH "/" +%token LPAREN "(" +%token RPAREN ")" +%token COLON ":" +%token SEMICOLON ";" +%token COMMA "," +%token ELLIPSIS "..." +%token AS "@" +/* Keywords */ +%token WITH "with" +%token REC "rec" +%token LET "let" +%token IN "in" +%token INHERIT "inherit" +%token IF "if" +%token THEN "then" +%token ELSE "else" +%token ASSERT "assert" +%token ORDEF "or" + +/* End of input */ +%token EOF + +%{ + open Types +%} + +%start main + +%% + +main: +| e = expr0 EOF + { e } + +expr0: +| "if"; e1 = expr0; "then"; e2 = expr0; "else"; e3 = expr0 + { Cond (e1, e2, e3) } +| "with"; e1 = expr0; ";"; e2 = expr0 + { With (e1, e2) } +| "assert"; e1 = expr0; ";"; e2 = expr0 + { Assert (e1, e2) } +| "let"; xs = delimited("{", list(binding), "}") + { SetLet xs } +| "let"; xs = list(binding); "in"; e = expr0 + { Let (xs, e) } +| l = lambda + { Val l } +| e = expr1 + { e } + +/* Rules expr1-expr14 are almost direct translation of the operator + precedence table: + https://nixos.org/nix/manual/#sec-language-operators */ + +%inline binary_expr(Lhs, Op, Rhs): +| lhs = Lhs; op = Op; rhs = Rhs + { BinaryOp (op, lhs, rhs) } + +expr1: +| e = binary_expr(expr2, "->" {Impl}, expr1) +| e = expr2 + { e } + +expr2: +| e = binary_expr(expr2, "||" {Or}, expr3) +| e = expr3 + { e } + +expr3: +| e = binary_expr(expr3, "&&" {And}, expr4) +| e = expr4 + { e } + +%inline expr4_ops: +| "==" { Eq } +| "!=" { Neq } + +expr4: +| e = binary_expr(expr5, expr4_ops, expr5) +| e = expr5 + { e } + +%inline expr5_ops: +| "<" { Lt } +| ">" { Gt } +| "<=" { Lte } +| ">=" { Gte } + +expr5: +| e = binary_expr(expr6, expr5_ops, expr6) +| e = expr6 + { e } + +expr6: +| e = binary_expr(expr7, "//" {Merge}, expr6) +| e = expr7 + { e } + +expr7: +| e = preceded("!", expr7) + { UnaryOp (Not, e) } +| e = expr8 + { e } + +%inline expr8_ops: +| "+" { Plus } +| "-" { Minus } + +expr8: +| e = binary_expr(expr8, expr8_ops, expr9) +| e = expr9 + { e } + +%inline expr9_ops: +| "*" { Mult } +| "/" { Div } + +expr9: +| e = binary_expr(expr9, expr9_ops, expr10) +| e = expr10 + { e } + +expr10: +| e = binary_expr(expr11, "++" {Concat}, expr10) +| e = expr11 + { e } + +expr11: +| e = expr12 "?" p = attr_path + { Test (e, p) } +| e = expr12 + { e } + +expr12: +| e = preceded("-", expr13) + { UnaryOp (Negate, e) } +| e = expr13 + { e } + +expr13: +| f = expr13; arg = expr14 + { Apply (f, arg) } +| e = expr14 + { e } + +%inline selectable: +| s = set + { Val s } +| id = ID + { Id id } +| e = delimited("(", expr0, ")") + { e } + +expr14: +| e = selectable; "."; p = attr_path; o = option(preceded("or", expr14)) + { Select (e, p, o) } +| e = atomic_expr; "or" + { Apply (e, Id "or") } +| e = atomic_expr + { e } + +atomic_expr: +| id = ID + { Id id } +| v = value + { Val v } +| e = delimited("(", expr0, ")") + { e } + +attr_path: +| p = separated_nonempty_list(".", attr_path_component) + { p } + +attr_path_component: +| "or" + { Id "or" } +| id = ID + { Id id } +| e = delimited("${", expr0, "}$") + { Aquote e } +| s = str + { Val s } + +value: +| s = str + { s } +| s = istr + { s } +| i = INT + {Int i} +| f = FLOAT + { Float f } +| p = PATH + { Path p } +| sp = SPATH + { SPath sp } +| hp = HPATH + { HPath hp } +| uri = URI + { Uri uri } +| l = nixlist + { l } +| s = set + { s } + +%inline str_mid(X): +| xs = list(pair(delimited("${", expr0, "}$"), X)) { xs } + +/* Double-quoted string */ +str: +| start = STR_START; mids = str_mid(STR_MID); STR_END + { Str (start, mids) } + +/* Indented string */ +istr: +| start = ISTR_START; mids = str_mid(ISTR_MID); i = ISTR_END + { IStr (i, start, mids) } + +/* Lists and sets */ +nixlist: +| xs = delimited("[", list(expr14), "]") + { List xs } + +empty_set: +| "{"; "}" {} + +set: +| empty_set + { AttSet (Nonrec, []) } +| xs = delimited("{", nonempty_list(binding), "}") + { AttSet (Nonrec, xs) } +| xs = preceded("rec", delimited("{", list(binding), "}")) + { AttSet (Rec, xs) } + +binding: +| kv = terminated(separated_pair(attr_path, "=", expr0), ";") + { let (k, v) = kv in AttrPath (k, v) } +| xs = delimited("inherit", pair(option(delimited("(", expr0, ")")), list(attr_path_component)), ";") + { let (prefix, ids) = xs in Inherit (prefix, ids) } + +lambda: +| id = ID; "@"; p = param_set; ":"; e = expr0 + { Lambda (ParamSet (Some id, p), e) } +| p = param_set; "@"; id = ID; ":"; e = expr0 + { Lambda (ParamSet (Some id, p), e) } +| p = param_set; ":"; e = expr0 + { Lambda (ParamSet (None, p), e) } +| id = ID; ":"; e = expr0 + { Lambda (Alias id, e) } + +%inline param_set: +| empty_set + { ([], Exact) } +| "{"; "..."; "}" + { ([], Loose) } +| ps = delimited("{", pair(pair(params, ","?), boption("...")), "}") + { let ((ps, _), ellipsis) = ps in (ps, if ellipsis then Loose else Exact) } + +params: +| p = param + { [p] } +| ps = params; ","; p = param + { ps @ [p] } + +%inline param: +p = pair(ID, option(preceded("?", expr0))) + { p } diff --git a/lib/nix/printer.ml b/lib/nix/printer.ml new file mode 100644 index 0000000..57e81f4 --- /dev/null +++ b/lib/nix/printer.ml @@ -0,0 +1,176 @@ +open Core +open Types +open PPrint + +let rec escape_chlist = function + | [] -> [] + | '$' :: '{' :: l' -> '\\' :: '$' :: '{' :: escape_chlist l' + | '\n' :: l' -> '\\' :: 'n' :: escape_chlist l' + | '\r' :: l' -> '\\' :: 'r' :: escape_chlist l' + | '\t' :: l' -> '\\' :: 't' :: escape_chlist l' + | '\\' :: l' -> '\\' :: '\\' :: escape_chlist l' + | '"' :: l' -> '\\' :: '"' :: escape_chlist l' + | c :: l' -> c :: escape_chlist l' + +let escape_string s = s |> String.to_list |> escape_chlist |> String.of_list +let out_width = ref 80 +let set_width i = out_width := i +let indent = ref 2 +let set_indent i = indent := i + +let rec doc_of_expr = function + | BinaryOp (op, lhs, rhs) -> + let lhs_doc = maybe_parens_bop op `Left lhs + and rhs_doc = maybe_parens_bop op `Right rhs in + infix !indent 1 (doc_of_bop op) lhs_doc rhs_doc + | UnaryOp (op, e) -> precede (doc_of_uop op) (maybe_parens (prec_of_uop op) e) + | Cond (e1, e2, e3) -> + surround !indent 1 + (soft_surround !indent 1 (string "if") (doc_of_expr e1) (string "then")) + (doc_of_expr e2) + (string "else" ^^ nest !indent (break 1 ^^ doc_of_expr e3)) + | With (e1, e2) -> + flow (break 1) [ string "with"; doc_of_expr e1 ^^ semi; doc_of_expr e2 ] + | Assert (e1, e2) -> + flow (break 1) [ string "assert"; doc_of_expr e1 ^^ semi; doc_of_expr e2 ] + | Test (e, path) -> + maybe_parens 4 e ^^ space ^^ string "?" + ^^ group (break 1 ^^ separate_map dot doc_of_expr path) + | SetLet bs -> + surround !indent 1 + (string "let " ^^ lbrace) + (group (separate_map (break 1) doc_of_binding bs)) + rbrace + | Let (bs, e) -> + surround !indent 1 (string "let") + (separate_map (break 1) doc_of_binding bs) + (prefix !indent 1 (string "in") (doc_of_expr e)) + | Val v -> doc_of_val v + | Id id -> string id + | Select (e, path, oe) -> + maybe_parens 1 e ^^ dot ^^ doc_of_attpath path + ^^ optional + (fun e -> + space ^^ string "or" ^^ nest !indent (break 1 ^^ maybe_parens 1 e)) + oe + | Apply (e1, e2) -> prefix !indent 1 (maybe_parens 2 e1) (maybe_parens 2 e2) + | Aquote e -> surround !indent 0 (string "${") (doc_of_expr e) (string "}") + +and maybe_parens lvl e = + if prec_of_expr e >= lvl then surround !indent 0 lparen (doc_of_expr e) rparen + else doc_of_expr e + +and maybe_parens_bop op (loc : [ `Left | `Right ]) e = + match (loc, assoc_of_bop op) with + | (`Left, Some Left | `Right, Some Right) + when prec_of_expr e >= prec_of_bop op -> + doc_of_expr e + | _, _ -> maybe_parens (prec_of_bop op) e + +and doc_of_attpath path = separate_map dot doc_of_expr path + +and doc_of_paramset (params, kind) = + let ps = + List.map ~f:doc_of_param params + @ if Poly.(kind = Loose) then [ string "..." ] else [] + in + surround !indent 0 lbrace (separate (comma ^^ break 1) ps) rbrace + +and doc_of_param (id, oe) = + string id ^^ optional (fun e -> qmark ^^ space ^^ doc_of_expr e) oe + +and doc_of_binding = function + | AttrPath (path, e) -> + doc_of_attpath path ^^ space ^^ equals ^^ space ^^ doc_of_expr e ^^ semi + | Inherit (oe, ids) -> + let id_docs = + List.map + ~f:(function + | Id x | Val (Str (x, [])) -> string x | _ -> assert false) + ids + in + let xs = + flow (break 1) + (match oe with + | Some e -> parens (doc_of_expr e) :: id_docs + | None -> id_docs) + in + soft_surround !indent 0 (string "inherit" ^^ space) xs semi + +and doc_of_bop = function + | Plus -> plus + | Minus -> minus + | Mult -> star + | Div -> slash + | Gt -> rangle + | Lt -> langle + | Lte -> string "<=" + | Gte -> string ">=" + | Eq -> string "==" + | Neq -> string "!=" + | Or -> string "||" + | And -> string "&&" + | Impl -> string "->" + | Merge -> string "//" + | Concat -> string "++" + +and doc_of_uop = function Negate -> minus | Not -> bang + +and doc_of_val = function + | Str (start, xs) -> + dquotes + (string (escape_string start) + ^^ concat + (List.map + ~f:(fun (e, s) -> + surround !indent 0 (string "${") (doc_of_expr e) + (string "}" ^^ string (escape_string s))) + xs)) + | IStr (i, start, xs) -> + let qq = string "''" in + let str s = + String.split ~on:'\n' s + |> List.map ~f:(fun s -> + let len = String.length s in + let s' = + if len >= i then String.sub s ~pos:i ~len:(len - i) else s + in + string s') + |> separate hardline + in + enclose qq qq + (str start + ^^ concat + (List.map + ~f:(fun (e, s) -> + enclose (string "${") rbrace (doc_of_expr e) ^^ str s) + xs)) + | Int x | Float x | Path x | SPath x | HPath x | Uri x -> string x + | Lambda (pattern, body) -> + let pat = + match pattern with + | Alias id -> string id + | ParamSet (None, ps) -> doc_of_paramset ps + | ParamSet (Some id, ps) -> + doc_of_paramset ps ^^ group (break 1 ^^ at ^^ break 1 ^^ string id) + in + flow (break 1) [ pat ^^ colon; doc_of_expr body ] + | List [] -> lbracket ^^ space ^^ rbracket + | List es -> + surround !indent 1 lbracket + (separate_map (break 1) (maybe_parens 2) es) + rbracket + | AttSet (Nonrec, []) -> lbrace ^^ space ^^ rbrace + | AttSet (Nonrec, bs) -> + surround !indent 1 lbrace + (group (separate_map (break 1) doc_of_binding bs)) + rbrace + | AttSet (Rec, bs) -> + string "rec" ^^ space ^^ doc_of_val (AttSet (Nonrec, bs)) + +let print chan expr = ToChannel.pretty 0.7 !out_width chan (doc_of_expr expr) + +let to_string expr = + let buf = Stdlib.Buffer.create 0 in + ToBuffer.pretty 0.7 !out_width buf (doc_of_expr expr); + Stdlib.Buffer.contents buf diff --git a/lib/nix/tokens.ml b/lib/nix/tokens.ml new file mode 100644 index 0000000..4891d48 --- /dev/null +++ b/lib/nix/tokens.ml @@ -0,0 +1,64 @@ +type token = + (* Tokens with data *) + | INT of string + | FLOAT of string + (* A path (starting with / or ./) *) + | PATH of string + (* Search path, enclosed in < and > *) + | SPATH of string + (* Home path, starting with ~/ *) + | HPATH of string + | URI of string + | STR_START of string + | STR_MID of string + | STR_END + | ISTR_START of string + | ISTR_MID of string + | ISTR_END of int + | ID of string + (* Tokens that stand for themselves *) + | SELECT + | QMARK + | CONCAT + | NOT + | MERGE + | ASSIGN + | LT + | LTE + | GT + | GTE + | EQ + | NEQ + | AND + | OR + | IMPL + | AQUOTE_OPEN + | AQUOTE_CLOSE + | LBRACE + | RBRACE + | LBRACK + | RBRACK + | PLUS + | MINUS + | TIMES + | SLASH + | LPAREN + | RPAREN + | COLON + | SEMICOLON + | COMMA + | ELLIPSIS + | AS + (* Keywords *) + | WITH + | REC + | LET + | IN + | INHERIT + | IF + | THEN + | ELSE + | ASSERT + | ORDEF + (* End of input *) + | EOF diff --git a/lib/nix/types.ml b/lib/nix/types.ml new file mode 100644 index 0000000..8245406 --- /dev/null +++ b/lib/nix/types.ml @@ -0,0 +1,112 @@ +open Core + +(* Binary operators *) +type binary_op = + | Plus + | Minus + | Mult + | Div + | Gt + | Lt + | Lte + | Gte + | Eq + | Neq + | Or + | And + | Impl + | Merge + | Concat +[@@deriving sexp_of] + +(* Unary operators *) +type unary_op = Negate | Not [@@deriving sexp_of] + +(* The top-level expression type *) +type expr = + | BinaryOp of binary_op * expr * expr + | UnaryOp of unary_op * expr + | Cond of expr * expr * expr + | With of expr * expr + | Assert of expr * expr + | Test of expr * expr list + | SetLet of binding list + | Let of binding list * expr + | Val of value + | Id of id + | Select of expr * expr list * expr option + | Apply of expr * expr + | Aquote of expr +[@@deriving sexp_of] + +(* Possible values *) +and value = + (* Str is a string start, followed by arbitrary number of antiquotations and + strings that separate them *) + | Str of string * (expr * string) list + (* IStr is an indented string, so it has the extra integer component which + indicates the indentation *) + | IStr of int * string * (expr * string) list + | Int of string + | Float of string + | Path of string + | SPath of string + | HPath of string + | Uri of string + | Lambda of pattern * expr + | List of expr list + | AttSet of recursivity * binding list +[@@deriving sexp_of] + +(* Patterns in lambda definitions *) +and pattern = Alias of id | ParamSet of id option * param_set +[@@deriving sexp_of] + +and param_set = param list * match_kind [@@deriving sexp_of] +and param = id * expr option [@@deriving sexp_of] +and recursivity = Rec | Nonrec +and match_kind = Exact | Loose + +(* Bindings in attribute sets and let expressions *) +and binding = + (* The first expr should be attrpath, which is the same as in Select *) + | AttrPath of expr list * expr + | Inherit of expr option * expr list +[@@deriving sexp_of] + +(* Identifiers *) +and id = string + +(* Precedence levels of binary operators *) +let prec_of_bop = function + | Concat -> 5 + | Mult | Div -> 6 + | Plus | Minus -> 7 + | Merge -> 9 + | Gt | Lt | Lte | Gte -> 10 + | Eq | Neq -> 11 + | And -> 12 + | Or -> 13 + | Impl -> 14 + +type assoc = Left | Right + +let assoc_of_bop = function + | Mult | Div | Plus | Minus -> Some Left + | Concat | Merge | And | Or -> Some Right + | Gt | Lt | Lte | Gte | Eq | Neq | Impl -> None + +(* Precedence levels of unary operators *) +let prec_of_uop = function Negate -> 3 | Not -> 8 + +(* Precedence level of expressions + (assuming that the constituents have higher levels) *) +let prec_of_expr = function + | Val (Lambda _) -> 15 + | Val _ | Id _ | Aquote _ -> 0 + | BinaryOp (op, _, _) -> prec_of_bop op + | UnaryOp (op, _) -> prec_of_uop op + | Cond _ | With _ | Assert _ | Let _ | SetLet _ -> 15 + | Test _ -> 4 + | Select _ -> 1 + | Apply _ -> 2 -- cgit v1.2.3