diff options
Diffstat (limited to 'lib/nix')
| -rw-r--r-- | lib/nix/dune | 15 | ||||
| -rw-r--r-- | lib/nix/elaborator.ml | 208 | ||||
| -rw-r--r-- | lib/nix/lexer.mll | 315 | ||||
| -rw-r--r-- | lib/nix/nix.ml | 20 | ||||
| -rw-r--r-- | lib/nix/parser.mly | 310 | ||||
| -rw-r--r-- | lib/nix/printer.ml | 176 | ||||
| -rw-r--r-- | lib/nix/tokens.ml | 64 | ||||
| -rw-r--r-- | lib/nix/types.ml | 112 |
8 files changed, 1220 insertions, 0 deletions
diff --git a/lib/nix/dune b/lib/nix/dune new file mode 100644 index 0000000..3954c8a --- /dev/null +++ b/lib/nix/dune | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | (menhir | ||
| 2 | (modules parser) | ||
| 3 | (flags "--dump" "--strict" "--external-tokens" "Tokens") | ||
| 4 | (infer true)) | ||
| 5 | |||
| 6 | (ocamllex | ||
| 7 | (modules lexer)) | ||
| 8 | |||
| 9 | (library | ||
| 10 | (name nix) | ||
| 11 | (preprocess | ||
| 12 | (pps ppx_sexp_conv)) | ||
| 13 | (instrumentation | ||
| 14 | (backend bisect_ppx)) | ||
| 15 | (libraries core core_unix core_unix.filename_unix pprint ppx_sexp_conv str)) | ||
diff --git a/lib/nix/elaborator.ml b/lib/nix/elaborator.ml new file mode 100644 index 0000000..36ee0d4 --- /dev/null +++ b/lib/nix/elaborator.ml | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | open Core | ||
| 2 | open Types | ||
| 3 | |||
| 4 | (* The Nix elaborator does a few things: | ||
| 5 | - Attribute paths are transformed into a simple list of expressions: | ||
| 6 | + Simple identifiers are rewritten to string values | ||
| 7 | + Antiquotations are rewritten to their component expressions | ||
| 8 | + Anything else, that is not a string value, is rejected | ||
| 9 | and raises an exception | ||
| 10 | - In 'inherit (...) x1 ... xn', x1 ... xn are checked for 'reasonably' being | ||
| 11 | identifiers, i.e., being one of x, "x" and ${"x"}. | ||
| 12 | - Nested attribute paths are unfolded and attribute sets are merged where | ||
| 13 | possible. (Where we mean 'what Nix does' with 'where possible'; see the | ||
| 14 | comment at the respective function.) | ||
| 15 | - Paths are turned into strings and made absolute w.r.t. the current | ||
| 16 | working directory. | ||
| 17 | - Indented strings are converted to their 'normal' counterpart. *) | ||
| 18 | |||
| 19 | exception ElaborateError of string | ||
| 20 | |||
| 21 | type attr_set = recursivity * binding list | ||
| 22 | |||
| 23 | let set_expr (r, bs) = Val (AttSet (r, bs)) | ||
| 24 | let get_id = function Id x -> x | _ -> assert false | ||
| 25 | |||
| 26 | let rec update_bnd (bs : binding list) (x : string) ~(f : expr option -> expr) = | ||
| 27 | match bs with | ||
| 28 | | [] -> [ AttrPath ([ Val (Str (x, [])) ], f None) ] | ||
| 29 | | AttrPath ([ Val (Str (y, [])) ], e) :: s' when String.(x = y) -> | ||
| 30 | AttrPath ([ Val (Str (y, [])) ], f (Some e)) :: s' | ||
| 31 | | Inherit (_, ids) :: _ | ||
| 32 | when List.exists ids ~f:(fun e -> String.(get_id e = x)) -> | ||
| 33 | raise (ElaborateError "Cannot update inherit") | ||
| 34 | | bnd :: s' -> bnd :: update_bnd s' x ~f | ||
| 35 | |||
| 36 | let set_update_bnd (r, bs) x ~f = (r, update_bnd bs x ~f) | ||
| 37 | |||
| 38 | let rec has_bnd (bs : binding list) (x : string) : bool = | ||
| 39 | match bs with | ||
| 40 | | [] -> false | ||
| 41 | | AttrPath ([ Val (Str (y, [])) ], _) :: _ when String.(x = y) -> true | ||
| 42 | | Inherit (_, ids) :: _ | ||
| 43 | when List.exists ids ~f:(fun e -> String.(get_id e = x)) -> | ||
| 44 | true | ||
| 45 | | _ :: bs' -> has_bnd bs' x | ||
| 46 | |||
| 47 | let merge_bnds bs1 bs2 : binding list = | ||
| 48 | List.fold_left bs2 ~init:bs1 ~f:(fun bs1' b2 -> | ||
| 49 | match b2 with | ||
| 50 | | AttrPath ([ Val (Str (x, [])) ], e) -> | ||
| 51 | update_bnd bs1' x ~f:(function | ||
| 52 | | Some _ -> raise (ElaborateError "Duplicated attribute") | ||
| 53 | | None -> e) | ||
| 54 | | AttrPath ([ d ], e) -> AttrPath ([ d ], e) :: bs1' | ||
| 55 | | Inherit (md, xs) -> | ||
| 56 | if List.for_all xs ~f:(fun e -> not (has_bnd bs1' (get_id e))) then | ||
| 57 | Inherit (md, xs) :: bs1' | ||
| 58 | else raise (ElaborateError "Duplicated attribute") | ||
| 59 | | _ -> assert false) | ||
| 60 | |||
| 61 | (* This function intentionally clobbers recursivity, because that is the way | ||
| 62 | that Nix likes to handle attribute insertion. See | ||
| 63 | (1) https://github.com/NixOS/nix/issues/9020 | ||
| 64 | (2) https://github.com/NixOS/nix/issues/11268 | ||
| 65 | (3) https://github.com/NixOS/nix/pull/11294 *) | ||
| 66 | let rec insert (bs : binding list) (path : expr list) (e : expr) = | ||
| 67 | match path with | ||
| 68 | | [] -> raise (ElaborateError "Cannot insert attribute with empty path") | ||
| 69 | | [ Val (Str (x, [])) ] -> | ||
| 70 | update_bnd bs x ~f:(function | ||
| 71 | | None -> e | ||
| 72 | | Some (Val (AttSet (r1, bs1))) -> ( | ||
| 73 | match e with | ||
| 74 | | Val (AttSet (_, bs2)) -> set_expr (r1, merge_bnds bs1 bs2) | ||
| 75 | | _ -> raise (ElaborateError "Duplicated attribute")) | ||
| 76 | | _ -> raise (ElaborateError "Duplicated attribute")) | ||
| 77 | | Val (Str (x, [])) :: rest -> | ||
| 78 | update_bnd bs x ~f:(function | ||
| 79 | | Some (Val (AttSet (r, bs))) -> Val (AttSet (r, insert bs rest e)) | ||
| 80 | | Some _ -> raise (ElaborateError "Duplicated attribute") | ||
| 81 | | None -> Val (AttSet (Nonrec, insert [] rest e))) | ||
| 82 | | [ part ] -> AttrPath ([ part ], e) :: bs | ||
| 83 | | part :: rest -> | ||
| 84 | AttrPath ([ part ], Val (AttSet (Nonrec, insert [] rest e))) :: bs | ||
| 85 | |||
| 86 | let insert_inherit (bs : binding list) (from : expr option) (es : expr list) = | ||
| 87 | if List.for_all es ~f:(fun e -> not (has_bnd bs (get_id e))) then | ||
| 88 | Inherit (from, es) :: bs | ||
| 89 | else raise (ElaborateError "Duplicated attribute") | ||
| 90 | |||
| 91 | let simplify_path_component = function | ||
| 92 | | Id x -> Val (Str (x, [])) | ||
| 93 | | Val (Str (s, ess)) -> Val (Str (s, ess)) | ||
| 94 | | Aquote e -> e | ||
| 95 | | _ -> raise (ElaborateError "Unexpected path component") | ||
| 96 | |||
| 97 | let simplify_path = List.map ~f:simplify_path_component | ||
| 98 | |||
| 99 | let simplify_bnd_paths = | ||
| 100 | List.map ~f:(fun bnd -> | ||
| 101 | match bnd with | ||
| 102 | | AttrPath (path, e) -> AttrPath (simplify_path path, e) | ||
| 103 | | Inherit (me, xs) -> Inherit (me, xs)) | ||
| 104 | |||
| 105 | (* Law: concat_lines ∘ split_lines = id *) | ||
| 106 | |||
| 107 | let rec split_lines s = | ||
| 108 | match String.lsplit2 s ~on:'\n' with | ||
| 109 | | Some (s1, s2) -> s1 :: split_lines s2 | ||
| 110 | | None -> [ s ] | ||
| 111 | |||
| 112 | let rec concat_lines = function | ||
| 113 | | [] -> "" | ||
| 114 | | [ x ] -> x | ||
| 115 | | x :: xs -> x ^ "\n" ^ concat_lines xs | ||
| 116 | |||
| 117 | let map_tail ~f = function [] -> [] | x :: xs -> x :: List.map ~f xs | ||
| 118 | |||
| 119 | let unindent n s ~skip_first_line = | ||
| 120 | let map_op ~f = if skip_first_line then map_tail ~f else List.map ~f in | ||
| 121 | split_lines s | ||
| 122 | |> map_op ~f:(fun line -> | ||
| 123 | let expected_prefix = String.make n ' ' in | ||
| 124 | String.chop_prefix_if_exists ~prefix:expected_prefix line) | ||
| 125 | |> concat_lines | ||
| 126 | |||
| 127 | let is_spaces l = String.(strip l ~drop:(Char.( = ) ' ') |> is_empty) | ||
| 128 | |||
| 129 | let drop_first_empty_line s = | ||
| 130 | match String.lsplit2 s ~on:'\n' with | ||
| 131 | | Some (l, s') when is_spaces l -> s' | ||
| 132 | | _ -> s | ||
| 133 | |||
| 134 | let rec process ?(dir = None) = function | ||
| 135 | | BinaryOp (op, e1, e2) -> BinaryOp (op, process ~dir e1, process ~dir e2) | ||
| 136 | | UnaryOp (op, e) -> UnaryOp (op, process ~dir e) | ||
| 137 | | Cond (e1, e2, e3) -> Cond (process ~dir e1, process ~dir e2, process ~dir e3) | ||
| 138 | | With (e1, e2) -> With (process ~dir e1, process ~dir e2) | ||
| 139 | | Assert (e1, e2) -> Assert (process ~dir e1, process ~dir e2) | ||
| 140 | | Test (e1, es) -> | ||
| 141 | Test (process ~dir e1, List.(simplify_path es >>| process ~dir)) | ||
| 142 | | SetLet bs -> SetLet (process_bnds ~dir bs) | ||
| 143 | | Let (bs, e) -> Let (process_bnds ~dir bs, process ~dir e) | ||
| 144 | | Val v -> Val (process_val ~dir v) | ||
| 145 | | Id x -> Id x | ||
| 146 | | Select (e, es, me) -> | ||
| 147 | Select | ||
| 148 | ( process ~dir e, | ||
| 149 | List.(simplify_path es >>| process ~dir), | ||
| 150 | Option.(me >>| process ~dir) ) | ||
| 151 | | Apply (e1, e2) -> Apply (process ~dir e1, process ~dir e2) | ||
| 152 | | Aquote e -> Aquote (process ~dir e) | ||
| 153 | |||
| 154 | and process_val ~dir = function | ||
| 155 | | Str (s, ess) -> Str (s, List.(ess >>| fun (e, s) -> (process ~dir e, s))) | ||
| 156 | | IStr (n, s, ess) -> | ||
| 157 | let s' = drop_first_empty_line (unindent n s ~skip_first_line:false) | ||
| 158 | and ess' = | ||
| 159 | List.map ess ~f:(fun (e, s) -> | ||
| 160 | (process ~dir e, unindent n s ~skip_first_line:true)) | ||
| 161 | in | ||
| 162 | Str (s', ess') | ||
| 163 | | Lambda (p, e) -> Lambda (process_pattern ~dir p, process ~dir e) | ||
| 164 | | List es -> List List.(es >>| process ~dir) | ||
| 165 | | AttSet (r, bs) -> AttSet (r, process_bnds ~dir bs) | ||
| 166 | | Path p -> ( | ||
| 167 | if Filename.is_absolute p then Str (p, []) | ||
| 168 | else | ||
| 169 | match dir with | ||
| 170 | | Some dir when Filename.is_absolute dir -> | ||
| 171 | Str (Filename.concat dir p, []) | ||
| 172 | | Some _ -> | ||
| 173 | raise | ||
| 174 | (ElaborateError "Provided directory should be an absolute path") | ||
| 175 | | None -> raise (ElaborateError "Do not know how to resolve path")) | ||
| 176 | | v -> v | ||
| 177 | |||
| 178 | and process_bnds ~dir bs = | ||
| 179 | bs | ||
| 180 | |> List.map ~f:(function | ||
| 181 | | AttrPath (es, e) -> | ||
| 182 | AttrPath (List.(es >>| process ~dir), process ~dir e) | ||
| 183 | | Inherit (me, xs) -> | ||
| 184 | Inherit (Option.(me >>| process ~dir), process_inherit_ids xs)) | ||
| 185 | |> simplify_bnd_paths | ||
| 186 | |> List.fold ~init:[] ~f:(fun bs' bnd -> | ||
| 187 | match bnd with | ||
| 188 | | AttrPath (path, e) -> insert bs' path e | ||
| 189 | | Inherit (from, es) -> insert_inherit bs' from es) | ||
| 190 | |||
| 191 | and process_inherit_ids = | ||
| 192 | List.map ~f:(function | ||
| 193 | | Id x | Val (Str (x, [])) | Aquote (Val (Str (x, []))) -> Id x | ||
| 194 | | _ -> raise (ElaborateError "Unexpected expression in inherit")) | ||
| 195 | |||
| 196 | and process_pattern ~dir = function | ||
| 197 | | Alias x -> Alias x | ||
| 198 | | ParamSet (mx, (ps, k)) -> ParamSet (mx, (process_param_set ~dir mx ps, k)) | ||
| 199 | |||
| 200 | and process_param_set ~dir ?(seen = String.Set.empty) mx ps = | ||
| 201 | match ps with | ||
| 202 | | [] -> [] | ||
| 203 | | (y, me) :: ps' -> | ||
| 204 | if Set.mem seen y || Option.mem mx y ~equal:String.( = ) then | ||
| 205 | raise (ElaborateError "Duplicated function argument") | ||
| 206 | else | ||
| 207 | (y, Option.(me >>| process ~dir)) | ||
| 208 | :: process_param_set ~dir mx ps' ~seen:(Set.add seen y) | ||
diff --git a/lib/nix/lexer.mll b/lib/nix/lexer.mll new file mode 100644 index 0000000..023d888 --- /dev/null +++ b/lib/nix/lexer.mll | |||
| @@ -0,0 +1,315 @@ | |||
| 1 | { | ||
| 2 | open Core | ||
| 3 | open Tokens | ||
| 4 | |||
| 5 | exception Error of string | ||
| 6 | |||
| 7 | (* Types of curly braces. | ||
| 8 | AQUOTE corresponds to the braces for antiquotation, i.e. '${...}' | ||
| 9 | and SET to an attribute set '{...}'. | ||
| 10 | *) | ||
| 11 | type braces = | ||
| 12 | | AQUOTE | ||
| 13 | | SET | ||
| 14 | |||
| 15 | let print_stack s = | ||
| 16 | let b = Buffer.create 100 in | ||
| 17 | Buffer.add_string b "[ "; | ||
| 18 | List.iter s ~f:(function | ||
| 19 | | AQUOTE -> Buffer.add_string b "AQUOTE; " | ||
| 20 | | SET -> Buffer.add_string b "SET; " | ||
| 21 | ); | ||
| 22 | Buffer.add_string b "]"; | ||
| 23 | Buffer.contents b | ||
| 24 | |||
| 25 | let token_of_str state buf = | ||
| 26 | match state with | ||
| 27 | | `Start -> STR_START (Buffer.contents buf) | ||
| 28 | | `Mid -> STR_MID (Buffer.contents buf) | ||
| 29 | |||
| 30 | let token_of_istr state buf = | ||
| 31 | match state with | ||
| 32 | | `Start -> ISTR_START (Buffer.contents buf) | ||
| 33 | | `Mid -> ISTR_MID (Buffer.contents buf) | ||
| 34 | |||
| 35 | (* lookup table for one-character tokens *) | ||
| 36 | let char_table = Array.create ~len:94 EOF | ||
| 37 | let _ = | ||
| 38 | List.iter ~f:(fun (k, v) -> Array.set char_table ((int_of_char k) - 1) v) | ||
| 39 | [ | ||
| 40 | '.', SELECT; | ||
| 41 | '?', QMARK; | ||
| 42 | '!', NOT; | ||
| 43 | '=', ASSIGN; | ||
| 44 | '<', LT; | ||
| 45 | '>', GT; | ||
| 46 | '[', LBRACK; | ||
| 47 | ']', RBRACK; | ||
| 48 | '+', PLUS; | ||
| 49 | '-', MINUS; | ||
| 50 | '*', TIMES; | ||
| 51 | '/', SLASH; | ||
| 52 | '(', LPAREN; | ||
| 53 | ')', RPAREN; | ||
| 54 | ':', COLON; | ||
| 55 | ';', SEMICOLON; | ||
| 56 | ',', COMMA; | ||
| 57 | '@', AS | ||
| 58 | ] | ||
| 59 | |||
| 60 | (* lookup table for two- and three-character tokens *) | ||
| 61 | let str_table = Hashtbl.create (module String) ~size:10 | ||
| 62 | let _ = | ||
| 63 | List.iter ~f:(fun (kwd, tok) -> Hashtbl.set str_table ~key:kwd ~data:tok) | ||
| 64 | [ | ||
| 65 | "//", MERGE; | ||
| 66 | "++", CONCAT; | ||
| 67 | "<=", LTE; | ||
| 68 | ">=", GTE; | ||
| 69 | "==", EQ; | ||
| 70 | "!=", NEQ; | ||
| 71 | "&&", AND; | ||
| 72 | "||", OR; | ||
| 73 | "->", IMPL; | ||
| 74 | "...", ELLIPSIS | ||
| 75 | ] | ||
| 76 | |||
| 77 | (* lookup table for keywords *) | ||
| 78 | let keyword_table = Hashtbl.create (module String) ~size:10 | ||
| 79 | let _ = | ||
| 80 | List.iter ~f:(fun (kwd, tok) -> Hashtbl.set keyword_table ~key:kwd ~data:tok) | ||
| 81 | [ "with", WITH; | ||
| 82 | "rec", REC; | ||
| 83 | "let", LET; | ||
| 84 | "in", IN; | ||
| 85 | "inherit", INHERIT; | ||
| 86 | "if" , IF; | ||
| 87 | "then", THEN; | ||
| 88 | "else", ELSE; | ||
| 89 | "assert", ASSERT; | ||
| 90 | "or", ORDEF ] | ||
| 91 | |||
| 92 | (* replace an escape sequence by the corresponding character(s) *) | ||
| 93 | let unescape = function | ||
| 94 | | "\\n" -> "\n" | ||
| 95 | | "\\r" -> "\r" | ||
| 96 | | "\\t" -> "\t" | ||
| 97 | | "\\\\" -> "\\" | ||
| 98 | | "\\${" -> "${" | ||
| 99 | | "''$" -> "$" | ||
| 100 | | "$$" -> "$" | ||
| 101 | | "'''" -> "''" | ||
| 102 | | "''\\t" -> "\t" | ||
| 103 | | "''\\r" -> "\r" | ||
| 104 | | "''\\n" -> "\n" | ||
| 105 | | x -> | ||
| 106 | failwith (Printf.sprintf "unescape unexpected arg %s" x) | ||
| 107 | |||
| 108 | let collect_tokens lexer q lexbuf = | ||
| 109 | let stack = ref [] in | ||
| 110 | let queue = Stdlib.Queue.create () in | ||
| 111 | let rec go () = | ||
| 112 | match (try Some (Stdlib.Queue.take queue) with Stdlib.Queue.Empty -> None) with | ||
| 113 | | Some token -> | ||
| 114 | ( | ||
| 115 | match token, !stack with | ||
| 116 | | AQUOTE_CLOSE, [] -> | ||
| 117 | Stdlib.Queue.add AQUOTE_CLOSE q | ||
| 118 | | EOF, _ -> | ||
| 119 | Stdlib.Queue.add EOF q; | ||
| 120 | | _, _ -> | ||
| 121 | Stdlib.Queue.add token q; | ||
| 122 | go () | ||
| 123 | ) | ||
| 124 | | None -> | ||
| 125 | lexer queue stack lexbuf; | ||
| 126 | go () | ||
| 127 | in | ||
| 128 | Stdlib.Queue.add AQUOTE_OPEN q; | ||
| 129 | stack := [AQUOTE]; | ||
| 130 | lexer queue stack lexbuf; | ||
| 131 | go () | ||
| 132 | |||
| 133 | (* utility functions *) | ||
| 134 | let print_position lexbuf = | ||
| 135 | let pos = Lexing.lexeme_start_p lexbuf in | ||
| 136 | Printf.sprintf "%s:%d:%d" pos.pos_fname | ||
| 137 | pos.pos_lnum (pos.pos_cnum - pos.pos_bol + 1) | ||
| 138 | |||
| 139 | |||
| 140 | let set_filename fname (lexbuf: Lexing.lexbuf) = | ||
| 141 | let pos = lexbuf.lex_curr_p in | ||
| 142 | lexbuf.lex_curr_p <- { pos with pos_fname = fname }; lexbuf | ||
| 143 | |||
| 144 | } | ||
| 145 | |||
| 146 | let nzdigit = ['1'-'9'] | ||
| 147 | let digit = nzdigit | '0' | ||
| 148 | let float = (nzdigit digit* '.' digit* | '0'? '.' digit+) (['E' 'e'] ['+' '-']? digit+)? | ||
| 149 | let alpha = ['a'-'z' 'A'-'Z'] | ||
| 150 | let alpha_digit = alpha | digit | ||
| 151 | let path_chr = alpha_digit | ['.' '_' '-' '+'] | ||
| 152 | let path = path_chr* ('/' path_chr+)+ | ||
| 153 | let spath = alpha_digit path_chr* ('/' path_chr+)* | ||
| 154 | let uri_chr = ['%' '/' '?' ':' '@' '&' '=' '+' '$' ',' '-' '_' '.' '!' '~' '*' '\''] | ||
| 155 | let scheme = alpha (alpha | ['+' '-' '.'])* | ||
| 156 | let uri = scheme ':' (alpha_digit | uri_chr)+ | ||
| 157 | let char_tokens = ['.' '?' '!' '=' '<' '>' '[' ']' '+' '-' '*' '/' '^' '(' ')' ':' ';' ',' '@'] | ||
| 158 | |||
| 159 | rule get_tokens q s = parse | ||
| 160 | (* skip whitespeces *) | ||
| 161 | | [' ' '\t' '\r'] | ||
| 162 | { get_tokens q s lexbuf } | ||
| 163 | (* increase line count for new lines *) | ||
| 164 | | '\n' | ||
| 165 | { Lexing.new_line lexbuf; get_tokens q s lexbuf } | ||
| 166 | | char_tokens as c | ||
| 167 | { Stdlib.Queue.add (Array.get char_table ((int_of_char c) - 1)) q } | ||
| 168 | | ("//" | "++" | "<=" | ">=" | "==" | "!=" | "&&" | "||" | "->" | "...") as s | ||
| 169 | { Stdlib.Queue.add (Hashtbl.find_exn str_table s) q} | ||
| 170 | | digit+ as i | ||
| 171 | { Stdlib.Queue.add (INT i) q } | ||
| 172 | | float | ||
| 173 | { Stdlib.Queue.add (FLOAT (Lexing.lexeme lexbuf)) q } | ||
| 174 | | path | ||
| 175 | { Stdlib.Queue.add (PATH (Lexing.lexeme lexbuf)) q } | ||
| 176 | | '<' (spath as p) '>' | ||
| 177 | { Stdlib.Queue.add (SPATH p) q } | ||
| 178 | | '~' path as p | ||
| 179 | { Stdlib.Queue.add (HPATH p) q } | ||
| 180 | | uri | ||
| 181 | { Stdlib.Queue.add(URI (Lexing.lexeme lexbuf)) q } | ||
| 182 | (* keywords or identifiers *) | ||
| 183 | | ((alpha | '_')+ (alpha_digit | ['_' '\'' '-'])*) as id | ||
| 184 | { Stdlib.Queue.add (Hashtbl.find keyword_table id |> Option.value ~default:(ID id)) q} | ||
| 185 | (* comments *) | ||
| 186 | | '#' ([^ '\n']* as c) | ||
| 187 | { ignore c; get_tokens q s lexbuf} | ||
| 188 | | "/*" | ||
| 189 | { comment (Buffer.create 64) lexbuf; | ||
| 190 | get_tokens q s lexbuf | ||
| 191 | } | ||
| 192 | (* the following three tokens change the braces stack *) | ||
| 193 | | "${" | ||
| 194 | { Stdlib.Queue.add AQUOTE_OPEN q; s := AQUOTE :: !s } | ||
| 195 | | '{' | ||
| 196 | { Stdlib.Queue.add LBRACE q; s := SET :: !s } | ||
| 197 | | '}' | ||
| 198 | { | ||
| 199 | match !s with | ||
| 200 | | AQUOTE :: rest -> | ||
| 201 | Stdlib.Queue.add AQUOTE_CLOSE q; s := rest | ||
| 202 | | SET :: rest -> | ||
| 203 | Stdlib.Queue.add RBRACE q; s := rest | ||
| 204 | | _ -> | ||
| 205 | let pos = print_position lexbuf in | ||
| 206 | let err = Printf.sprintf "Unbalanced '}' at %s\n" pos in | ||
| 207 | raise (Error err) | ||
| 208 | } | ||
| 209 | (* a double-quoted string *) | ||
| 210 | | '"' | ||
| 211 | { string `Start (Buffer.create 64) q lexbuf } | ||
| 212 | (* an indented string *) | ||
| 213 | | "''" (' '+ as ws) | ||
| 214 | { istring `Start (Some (String.length ws)) (Buffer.create 64) q lexbuf } | ||
| 215 | | "''" | ||
| 216 | { istring `Start None (Buffer.create 64) q lexbuf } | ||
| 217 | (* End of input *) | ||
| 218 | | eof | ||
| 219 | { Stdlib.Queue.add EOF q } | ||
| 220 | (* any other character raises an exception *) | ||
| 221 | | _ | ||
| 222 | { | ||
| 223 | let pos = print_position lexbuf in | ||
| 224 | let tok = Lexing.lexeme lexbuf in | ||
| 225 | let err = Printf.sprintf "Unexpected character '%s' at %s\n" tok pos in | ||
| 226 | raise (Error err) | ||
| 227 | } | ||
| 228 | |||
| 229 | (* Nix does not allow nested comments, but it is still handy to lex it | ||
| 230 | separately because we can properly increase line count. *) | ||
| 231 | and comment buf = parse | ||
| 232 | | '\n' | ||
| 233 | {Lexing.new_line lexbuf; Buffer.add_char buf '\n'; comment buf lexbuf} | ||
| 234 | | "*/" | ||
| 235 | { () } | ||
| 236 | | _ as c | ||
| 237 | { Buffer.add_char buf c; comment buf lexbuf } | ||
| 238 | |||
| 239 | and string state buf q = parse | ||
| 240 | | '"' (* terminate when we hit '"' *) | ||
| 241 | { Stdlib.Queue.add (token_of_str state buf) q; Stdlib.Queue.add STR_END q } | ||
| 242 | | '\n' | ||
| 243 | { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; string state buf q lexbuf } | ||
| 244 | | ("\\n" | "\\r" | "\\t" | "\\\\" | "\\${") as s | ||
| 245 | { Buffer.add_string buf (unescape s); string state buf q lexbuf } | ||
| 246 | | "\\" (_ as c) (* add the character verbatim *) | ||
| 247 | { Buffer.add_char buf c; string state buf q lexbuf } | ||
| 248 | | "${" (* collect all the tokens till we hit the matching '}' *) | ||
| 249 | { | ||
| 250 | Stdlib.Queue.add (token_of_str state buf) q; | ||
| 251 | collect_tokens get_tokens q lexbuf; | ||
| 252 | string `Mid (Buffer.create 64) q lexbuf | ||
| 253 | } | ||
| 254 | | _ as c (* otherwise just add the character to the buffer *) | ||
| 255 | { Buffer.add_char buf c; string state buf q lexbuf } | ||
| 256 | |||
| 257 | and istring state imin buf q = parse | ||
| 258 | | ('\n' ' '* "''") | ||
| 259 | { | ||
| 260 | Lexing.new_line lexbuf; | ||
| 261 | Buffer.add_string buf "\n"; | ||
| 262 | let indent = match imin with | None -> 0 | Some i -> i in | ||
| 263 | Stdlib.Queue.add (token_of_istr state buf) q; | ||
| 264 | Stdlib.Queue.add (ISTR_END indent) q | ||
| 265 | } | ||
| 266 | | "''" | ||
| 267 | { | ||
| 268 | let indent = match imin with | None -> 0 | Some i -> i in | ||
| 269 | Stdlib.Queue.add (token_of_istr state buf) q; | ||
| 270 | Stdlib.Queue.add (ISTR_END indent) q | ||
| 271 | } | ||
| 272 | | ('\n' ' '* '\n') as s | ||
| 273 | { | ||
| 274 | Lexing.new_line lexbuf; | ||
| 275 | Lexing.new_line lexbuf; | ||
| 276 | Buffer.add_string buf s; | ||
| 277 | istring state imin buf q lexbuf | ||
| 278 | } | ||
| 279 | | ('\n' (' '* as ws)) as s | ||
| 280 | { | ||
| 281 | Lexing.new_line lexbuf; | ||
| 282 | Buffer.add_string buf s; | ||
| 283 | let ws_count = String.length ws in | ||
| 284 | match imin with | ||
| 285 | | None -> | ||
| 286 | istring state (Some ws_count) buf q lexbuf | ||
| 287 | | Some i -> | ||
| 288 | istring state (Some (min i ws_count)) buf q lexbuf | ||
| 289 | } | ||
| 290 | | ("''$" | "'''" | "''\\t" | "''\\r" | "''\\n") as s | ||
| 291 | { Buffer.add_string buf (unescape s); istring state imin buf q lexbuf } | ||
| 292 | | "''\\" (_ as c) | ||
| 293 | { Buffer.add_char buf c; istring state imin buf q lexbuf } | ||
| 294 | | "${" | ||
| 295 | { | ||
| 296 | Stdlib.Queue.add (token_of_istr state buf) q; | ||
| 297 | collect_tokens get_tokens q lexbuf; | ||
| 298 | istring `Mid imin (Buffer.create 64) q lexbuf | ||
| 299 | } | ||
| 300 | | _ as c | ||
| 301 | { Buffer.add_char buf c; istring state imin buf q lexbuf } | ||
| 302 | { | ||
| 303 | |||
| 304 | let rec next_token | ||
| 305 | (q: token Stdlib.Queue.t) | ||
| 306 | (s: braces list ref) | ||
| 307 | (lexbuf: Lexing.lexbuf) | ||
| 308 | : token = | ||
| 309 | match (try Some (Stdlib.Queue.take q) with | Stdlib.Queue.Empty -> None) with | ||
| 310 | | Some token -> | ||
| 311 | token | ||
| 312 | | None -> | ||
| 313 | get_tokens q s lexbuf; | ||
| 314 | next_token q s lexbuf | ||
| 315 | } | ||
diff --git a/lib/nix/nix.ml b/lib/nix/nix.ml new file mode 100644 index 0000000..39dc94c --- /dev/null +++ b/lib/nix/nix.ml | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | open Core | ||
| 2 | module Ast = Types | ||
| 3 | module Printer = Printer | ||
| 4 | |||
| 5 | exception ParseError of string | ||
| 6 | |||
| 7 | let parse ~filename (data : string) = | ||
| 8 | let lexbuf = Lexer.set_filename filename (Lexing.from_string data) | ||
| 9 | and q, s = (Stdlib.Queue.create (), ref []) in | ||
| 10 | try Parser.main (Lexer.next_token q s) lexbuf with | ||
| 11 | | Lexer.Error msg -> | ||
| 12 | let msg' = String.rstrip msg in | ||
| 13 | raise (ParseError (sprintf "Lexing error: %s" msg')) | ||
| 14 | | Parser.Error -> | ||
| 15 | let msg = sprintf "Parse error at %s" (Lexer.print_position lexbuf) in | ||
| 16 | raise (ParseError msg) | ||
| 17 | |||
| 18 | let elaborate = Elaborator.process | ||
| 19 | |||
| 20 | exception ElaborateError = Elaborator.ElaborateError | ||
diff --git a/lib/nix/parser.mly b/lib/nix/parser.mly new file mode 100644 index 0000000..dc1638d --- /dev/null +++ b/lib/nix/parser.mly | |||
| @@ -0,0 +1,310 @@ | |||
| 1 | /* Tokens with data */ | ||
| 2 | %token <string> INT | ||
| 3 | %token <string> FLOAT | ||
| 4 | /* A path */ | ||
| 5 | %token <string> PATH | ||
| 6 | /* Search path, enclosed in <> */ | ||
| 7 | %token <string> SPATH | ||
| 8 | /* Home path, starts with ~ */ | ||
| 9 | %token <string> HPATH | ||
| 10 | %token <string> URI | ||
| 11 | %token <string> STR_START | ||
| 12 | %token <string> STR_MID | ||
| 13 | %token STR_END | ||
| 14 | %token <string> ISTR_START | ||
| 15 | %token <string> ISTR_MID | ||
| 16 | %token <int> ISTR_END | ||
| 17 | %token <string> ID | ||
| 18 | /* Tokens that stand for themselves */ | ||
| 19 | %token SELECT "." | ||
| 20 | %token QMARK "?" | ||
| 21 | %token CONCAT "++" | ||
| 22 | %token NOT "!" | ||
| 23 | %token MERGE "//" | ||
| 24 | %token ASSIGN "=" | ||
| 25 | %token LT "<" | ||
| 26 | %token LTE "<=" | ||
| 27 | %token GT ">" | ||
| 28 | %token GTE ">=" | ||
| 29 | %token EQ "==" | ||
| 30 | %token NEQ "!=" | ||
| 31 | %token AND "&&" | ||
| 32 | %token OR "||" | ||
| 33 | %token IMPL "->" | ||
| 34 | %token AQUOTE_OPEN "${" | ||
| 35 | %token AQUOTE_CLOSE "}$" | ||
| 36 | %token LBRACE "{" | ||
| 37 | %token RBRACE "}" | ||
| 38 | %token LBRACK "[" | ||
| 39 | %token RBRACK "]" | ||
| 40 | %token PLUS "+" | ||
| 41 | %token MINUS "-" | ||
| 42 | %token TIMES "*" | ||
| 43 | %token SLASH "/" | ||
| 44 | %token LPAREN "(" | ||
| 45 | %token RPAREN ")" | ||
| 46 | %token COLON ":" | ||
| 47 | %token SEMICOLON ";" | ||
| 48 | %token COMMA "," | ||
| 49 | %token ELLIPSIS "..." | ||
| 50 | %token AS "@" | ||
| 51 | /* Keywords */ | ||
| 52 | %token WITH "with" | ||
| 53 | %token REC "rec" | ||
| 54 | %token LET "let" | ||
| 55 | %token IN "in" | ||
| 56 | %token INHERIT "inherit" | ||
| 57 | %token IF "if" | ||
| 58 | %token THEN "then" | ||
| 59 | %token ELSE "else" | ||
| 60 | %token ASSERT "assert" | ||
| 61 | %token ORDEF "or" | ||
| 62 | |||
| 63 | /* End of input */ | ||
| 64 | %token EOF | ||
| 65 | |||
| 66 | %{ | ||
| 67 | open Types | ||
| 68 | %} | ||
| 69 | |||
| 70 | %start <Types.expr> main | ||
| 71 | |||
| 72 | %% | ||
| 73 | |||
| 74 | main: | ||
| 75 | | e = expr0 EOF | ||
| 76 | { e } | ||
| 77 | |||
| 78 | expr0: | ||
| 79 | | "if"; e1 = expr0; "then"; e2 = expr0; "else"; e3 = expr0 | ||
| 80 | { Cond (e1, e2, e3) } | ||
| 81 | | "with"; e1 = expr0; ";"; e2 = expr0 | ||
| 82 | { With (e1, e2) } | ||
| 83 | | "assert"; e1 = expr0; ";"; e2 = expr0 | ||
| 84 | { Assert (e1, e2) } | ||
| 85 | | "let"; xs = delimited("{", list(binding), "}") | ||
| 86 | { SetLet xs } | ||
| 87 | | "let"; xs = list(binding); "in"; e = expr0 | ||
| 88 | { Let (xs, e) } | ||
| 89 | | l = lambda | ||
| 90 | { Val l } | ||
| 91 | | e = expr1 | ||
| 92 | { e } | ||
| 93 | |||
| 94 | /* Rules expr1-expr14 are almost direct translation of the operator | ||
| 95 | precedence table: | ||
| 96 | https://nixos.org/nix/manual/#sec-language-operators */ | ||
| 97 | |||
| 98 | %inline binary_expr(Lhs, Op, Rhs): | ||
| 99 | | lhs = Lhs; op = Op; rhs = Rhs | ||
| 100 | { BinaryOp (op, lhs, rhs) } | ||
| 101 | |||
| 102 | expr1: | ||
| 103 | | e = binary_expr(expr2, "->" {Impl}, expr1) | ||
| 104 | | e = expr2 | ||
| 105 | { e } | ||
| 106 | |||
| 107 | expr2: | ||
| 108 | | e = binary_expr(expr2, "||" {Or}, expr3) | ||
| 109 | | e = expr3 | ||
| 110 | { e } | ||
| 111 | |||
| 112 | expr3: | ||
| 113 | | e = binary_expr(expr3, "&&" {And}, expr4) | ||
| 114 | | e = expr4 | ||
| 115 | { e } | ||
| 116 | |||
| 117 | %inline expr4_ops: | ||
| 118 | | "==" { Eq } | ||
| 119 | | "!=" { Neq } | ||
| 120 | |||
| 121 | expr4: | ||
| 122 | | e = binary_expr(expr5, expr4_ops, expr5) | ||
| 123 | | e = expr5 | ||
| 124 | { e } | ||
| 125 | |||
| 126 | %inline expr5_ops: | ||
| 127 | | "<" { Lt } | ||
| 128 | | ">" { Gt } | ||
| 129 | | "<=" { Lte } | ||
| 130 | | ">=" { Gte } | ||
| 131 | |||
| 132 | expr5: | ||
| 133 | | e = binary_expr(expr6, expr5_ops, expr6) | ||
| 134 | | e = expr6 | ||
| 135 | { e } | ||
| 136 | |||
| 137 | expr6: | ||
| 138 | | e = binary_expr(expr7, "//" {Merge}, expr6) | ||
| 139 | | e = expr7 | ||
| 140 | { e } | ||
| 141 | |||
| 142 | expr7: | ||
| 143 | | e = preceded("!", expr7) | ||
| 144 | { UnaryOp (Not, e) } | ||
| 145 | | e = expr8 | ||
| 146 | { e } | ||
| 147 | |||
| 148 | %inline expr8_ops: | ||
| 149 | | "+" { Plus } | ||
| 150 | | "-" { Minus } | ||
| 151 | |||
| 152 | expr8: | ||
| 153 | | e = binary_expr(expr8, expr8_ops, expr9) | ||
| 154 | | e = expr9 | ||
| 155 | { e } | ||
| 156 | |||
| 157 | %inline expr9_ops: | ||
| 158 | | "*" { Mult } | ||
| 159 | | "/" { Div } | ||
| 160 | |||
| 161 | expr9: | ||
| 162 | | e = binary_expr(expr9, expr9_ops, expr10) | ||
| 163 | | e = expr10 | ||
| 164 | { e } | ||
| 165 | |||
| 166 | expr10: | ||
| 167 | | e = binary_expr(expr11, "++" {Concat}, expr10) | ||
| 168 | | e = expr11 | ||
| 169 | { e } | ||
| 170 | |||
| 171 | expr11: | ||
| 172 | | e = expr12 "?" p = attr_path | ||
| 173 | { Test (e, p) } | ||
| 174 | | e = expr12 | ||
| 175 | { e } | ||
| 176 | |||
| 177 | expr12: | ||
| 178 | | e = preceded("-", expr13) | ||
| 179 | { UnaryOp (Negate, e) } | ||
| 180 | | e = expr13 | ||
| 181 | { e } | ||
| 182 | |||
| 183 | expr13: | ||
| 184 | | f = expr13; arg = expr14 | ||
| 185 | { Apply (f, arg) } | ||
| 186 | | e = expr14 | ||
| 187 | { e } | ||
| 188 | |||
| 189 | %inline selectable: | ||
| 190 | | s = set | ||
| 191 | { Val s } | ||
| 192 | | id = ID | ||
| 193 | { Id id } | ||
| 194 | | e = delimited("(", expr0, ")") | ||
| 195 | { e } | ||
| 196 | |||
| 197 | expr14: | ||
| 198 | | e = selectable; "."; p = attr_path; o = option(preceded("or", expr14)) | ||
| 199 | { Select (e, p, o) } | ||
| 200 | | e = atomic_expr; "or" | ||
| 201 | { Apply (e, Id "or") } | ||
| 202 | | e = atomic_expr | ||
| 203 | { e } | ||
| 204 | |||
| 205 | atomic_expr: | ||
| 206 | | id = ID | ||
| 207 | { Id id } | ||
| 208 | | v = value | ||
| 209 | { Val v } | ||
| 210 | | e = delimited("(", expr0, ")") | ||
| 211 | { e } | ||
| 212 | |||
| 213 | attr_path: | ||
| 214 | | p = separated_nonempty_list(".", attr_path_component) | ||
| 215 | { p } | ||
| 216 | |||
| 217 | attr_path_component: | ||
| 218 | | "or" | ||
| 219 | { Id "or" } | ||
| 220 | | id = ID | ||
| 221 | { Id id } | ||
| 222 | | e = delimited("${", expr0, "}$") | ||
| 223 | { Aquote e } | ||
| 224 | | s = str | ||
| 225 | { Val s } | ||
| 226 | |||
| 227 | value: | ||
| 228 | | s = str | ||
| 229 | { s } | ||
| 230 | | s = istr | ||
| 231 | { s } | ||
| 232 | | i = INT | ||
| 233 | {Int i} | ||
| 234 | | f = FLOAT | ||
| 235 | { Float f } | ||
| 236 | | p = PATH | ||
| 237 | { Path p } | ||
| 238 | | sp = SPATH | ||
| 239 | { SPath sp } | ||
| 240 | | hp = HPATH | ||
| 241 | { HPath hp } | ||
| 242 | | uri = URI | ||
| 243 | { Uri uri } | ||
| 244 | | l = nixlist | ||
| 245 | { l } | ||
| 246 | | s = set | ||
| 247 | { s } | ||
| 248 | |||
| 249 | %inline str_mid(X): | ||
| 250 | | xs = list(pair(delimited("${", expr0, "}$"), X)) { xs } | ||
| 251 | |||
| 252 | /* Double-quoted string */ | ||
| 253 | str: | ||
| 254 | | start = STR_START; mids = str_mid(STR_MID); STR_END | ||
| 255 | { Str (start, mids) } | ||
| 256 | |||
| 257 | /* Indented string */ | ||
| 258 | istr: | ||
| 259 | | start = ISTR_START; mids = str_mid(ISTR_MID); i = ISTR_END | ||
| 260 | { IStr (i, start, mids) } | ||
| 261 | |||
| 262 | /* Lists and sets */ | ||
| 263 | nixlist: | ||
| 264 | | xs = delimited("[", list(expr14), "]") | ||
| 265 | { List xs } | ||
| 266 | |||
| 267 | empty_set: | ||
| 268 | | "{"; "}" {} | ||
| 269 | |||
| 270 | set: | ||
| 271 | | empty_set | ||
| 272 | { AttSet (Nonrec, []) } | ||
| 273 | | xs = delimited("{", nonempty_list(binding), "}") | ||
| 274 | { AttSet (Nonrec, xs) } | ||
| 275 | | xs = preceded("rec", delimited("{", list(binding), "}")) | ||
| 276 | { AttSet (Rec, xs) } | ||
| 277 | |||
| 278 | binding: | ||
| 279 | | kv = terminated(separated_pair(attr_path, "=", expr0), ";") | ||
| 280 | { let (k, v) = kv in AttrPath (k, v) } | ||
| 281 | | xs = delimited("inherit", pair(option(delimited("(", expr0, ")")), list(attr_path_component)), ";") | ||
| 282 | { let (prefix, ids) = xs in Inherit (prefix, ids) } | ||
| 283 | |||
| 284 | lambda: | ||
| 285 | | id = ID; "@"; p = param_set; ":"; e = expr0 | ||
| 286 | { Lambda (ParamSet (Some id, p), e) } | ||
| 287 | | p = param_set; "@"; id = ID; ":"; e = expr0 | ||
| 288 | { Lambda (ParamSet (Some id, p), e) } | ||
| 289 | | p = param_set; ":"; e = expr0 | ||
| 290 | { Lambda (ParamSet (None, p), e) } | ||
| 291 | | id = ID; ":"; e = expr0 | ||
| 292 | { Lambda (Alias id, e) } | ||
| 293 | |||
| 294 | %inline param_set: | ||
| 295 | | empty_set | ||
| 296 | { ([], Exact) } | ||
| 297 | | "{"; "..."; "}" | ||
| 298 | { ([], Loose) } | ||
| 299 | | ps = delimited("{", pair(pair(params, ","?), boption("...")), "}") | ||
| 300 | { let ((ps, _), ellipsis) = ps in (ps, if ellipsis then Loose else Exact) } | ||
| 301 | |||
| 302 | params: | ||
| 303 | | p = param | ||
| 304 | { [p] } | ||
| 305 | | ps = params; ","; p = param | ||
| 306 | { ps @ [p] } | ||
| 307 | |||
| 308 | %inline param: | ||
| 309 | p = pair(ID, option(preceded("?", expr0))) | ||
| 310 | { p } | ||
diff --git a/lib/nix/printer.ml b/lib/nix/printer.ml new file mode 100644 index 0000000..57e81f4 --- /dev/null +++ b/lib/nix/printer.ml | |||
| @@ -0,0 +1,176 @@ | |||
| 1 | open Core | ||
| 2 | open Types | ||
| 3 | open PPrint | ||
| 4 | |||
| 5 | let rec escape_chlist = function | ||
| 6 | | [] -> [] | ||
| 7 | | '$' :: '{' :: l' -> '\\' :: '$' :: '{' :: escape_chlist l' | ||
| 8 | | '\n' :: l' -> '\\' :: 'n' :: escape_chlist l' | ||
| 9 | | '\r' :: l' -> '\\' :: 'r' :: escape_chlist l' | ||
| 10 | | '\t' :: l' -> '\\' :: 't' :: escape_chlist l' | ||
| 11 | | '\\' :: l' -> '\\' :: '\\' :: escape_chlist l' | ||
| 12 | | '"' :: l' -> '\\' :: '"' :: escape_chlist l' | ||
| 13 | | c :: l' -> c :: escape_chlist l' | ||
| 14 | |||
| 15 | let escape_string s = s |> String.to_list |> escape_chlist |> String.of_list | ||
| 16 | let out_width = ref 80 | ||
| 17 | let set_width i = out_width := i | ||
| 18 | let indent = ref 2 | ||
| 19 | let set_indent i = indent := i | ||
| 20 | |||
| 21 | let rec doc_of_expr = function | ||
| 22 | | BinaryOp (op, lhs, rhs) -> | ||
| 23 | let lhs_doc = maybe_parens_bop op `Left lhs | ||
| 24 | and rhs_doc = maybe_parens_bop op `Right rhs in | ||
| 25 | infix !indent 1 (doc_of_bop op) lhs_doc rhs_doc | ||
| 26 | | UnaryOp (op, e) -> precede (doc_of_uop op) (maybe_parens (prec_of_uop op) e) | ||
| 27 | | Cond (e1, e2, e3) -> | ||
| 28 | surround !indent 1 | ||
| 29 | (soft_surround !indent 1 (string "if") (doc_of_expr e1) (string "then")) | ||
| 30 | (doc_of_expr e2) | ||
| 31 | (string "else" ^^ nest !indent (break 1 ^^ doc_of_expr e3)) | ||
| 32 | | With (e1, e2) -> | ||
| 33 | flow (break 1) [ string "with"; doc_of_expr e1 ^^ semi; doc_of_expr e2 ] | ||
| 34 | | Assert (e1, e2) -> | ||
| 35 | flow (break 1) [ string "assert"; doc_of_expr e1 ^^ semi; doc_of_expr e2 ] | ||
| 36 | | Test (e, path) -> | ||
| 37 | maybe_parens 4 e ^^ space ^^ string "?" | ||
| 38 | ^^ group (break 1 ^^ separate_map dot doc_of_expr path) | ||
| 39 | | SetLet bs -> | ||
| 40 | surround !indent 1 | ||
| 41 | (string "let " ^^ lbrace) | ||
| 42 | (group (separate_map (break 1) doc_of_binding bs)) | ||
| 43 | rbrace | ||
| 44 | | Let (bs, e) -> | ||
| 45 | surround !indent 1 (string "let") | ||
| 46 | (separate_map (break 1) doc_of_binding bs) | ||
| 47 | (prefix !indent 1 (string "in") (doc_of_expr e)) | ||
| 48 | | Val v -> doc_of_val v | ||
| 49 | | Id id -> string id | ||
| 50 | | Select (e, path, oe) -> | ||
| 51 | maybe_parens 1 e ^^ dot ^^ doc_of_attpath path | ||
| 52 | ^^ optional | ||
| 53 | (fun e -> | ||
| 54 | space ^^ string "or" ^^ nest !indent (break 1 ^^ maybe_parens 1 e)) | ||
| 55 | oe | ||
| 56 | | Apply (e1, e2) -> prefix !indent 1 (maybe_parens 2 e1) (maybe_parens 2 e2) | ||
| 57 | | Aquote e -> surround !indent 0 (string "${") (doc_of_expr e) (string "}") | ||
| 58 | |||
| 59 | and maybe_parens lvl e = | ||
| 60 | if prec_of_expr e >= lvl then surround !indent 0 lparen (doc_of_expr e) rparen | ||
| 61 | else doc_of_expr e | ||
| 62 | |||
| 63 | and maybe_parens_bop op (loc : [ `Left | `Right ]) e = | ||
| 64 | match (loc, assoc_of_bop op) with | ||
| 65 | | (`Left, Some Left | `Right, Some Right) | ||
| 66 | when prec_of_expr e >= prec_of_bop op -> | ||
| 67 | doc_of_expr e | ||
| 68 | | _, _ -> maybe_parens (prec_of_bop op) e | ||
| 69 | |||
| 70 | and doc_of_attpath path = separate_map dot doc_of_expr path | ||
| 71 | |||
| 72 | and doc_of_paramset (params, kind) = | ||
| 73 | let ps = | ||
| 74 | List.map ~f:doc_of_param params | ||
| 75 | @ if Poly.(kind = Loose) then [ string "..." ] else [] | ||
| 76 | in | ||
| 77 | surround !indent 0 lbrace (separate (comma ^^ break 1) ps) rbrace | ||
| 78 | |||
| 79 | and doc_of_param (id, oe) = | ||
| 80 | string id ^^ optional (fun e -> qmark ^^ space ^^ doc_of_expr e) oe | ||
| 81 | |||
| 82 | and doc_of_binding = function | ||
| 83 | | AttrPath (path, e) -> | ||
| 84 | doc_of_attpath path ^^ space ^^ equals ^^ space ^^ doc_of_expr e ^^ semi | ||
| 85 | | Inherit (oe, ids) -> | ||
| 86 | let id_docs = | ||
| 87 | List.map | ||
| 88 | ~f:(function | ||
| 89 | | Id x | Val (Str (x, [])) -> string x | _ -> assert false) | ||
| 90 | ids | ||
| 91 | in | ||
| 92 | let xs = | ||
| 93 | flow (break 1) | ||
| 94 | (match oe with | ||
| 95 | | Some e -> parens (doc_of_expr e) :: id_docs | ||
| 96 | | None -> id_docs) | ||
| 97 | in | ||
| 98 | soft_surround !indent 0 (string "inherit" ^^ space) xs semi | ||
| 99 | |||
| 100 | and doc_of_bop = function | ||
| 101 | | Plus -> plus | ||
| 102 | | Minus -> minus | ||
| 103 | | Mult -> star | ||
| 104 | | Div -> slash | ||
| 105 | | Gt -> rangle | ||
| 106 | | Lt -> langle | ||
| 107 | | Lte -> string "<=" | ||
| 108 | | Gte -> string ">=" | ||
| 109 | | Eq -> string "==" | ||
| 110 | | Neq -> string "!=" | ||
| 111 | | Or -> string "||" | ||
| 112 | | And -> string "&&" | ||
| 113 | | Impl -> string "->" | ||
| 114 | | Merge -> string "//" | ||
| 115 | | Concat -> string "++" | ||
| 116 | |||
| 117 | and doc_of_uop = function Negate -> minus | Not -> bang | ||
| 118 | |||
| 119 | and doc_of_val = function | ||
| 120 | | Str (start, xs) -> | ||
| 121 | dquotes | ||
| 122 | (string (escape_string start) | ||
| 123 | ^^ concat | ||
| 124 | (List.map | ||
| 125 | ~f:(fun (e, s) -> | ||
| 126 | surround !indent 0 (string "${") (doc_of_expr e) | ||
| 127 | (string "}" ^^ string (escape_string s))) | ||
| 128 | xs)) | ||
| 129 | | IStr (i, start, xs) -> | ||
| 130 | let qq = string "''" in | ||
| 131 | let str s = | ||
| 132 | String.split ~on:'\n' s | ||
| 133 | |> List.map ~f:(fun s -> | ||
| 134 | let len = String.length s in | ||
| 135 | let s' = | ||
| 136 | if len >= i then String.sub s ~pos:i ~len:(len - i) else s | ||
| 137 | in | ||
| 138 | string s') | ||
| 139 | |> separate hardline | ||
| 140 | in | ||
| 141 | enclose qq qq | ||
| 142 | (str start | ||
| 143 | ^^ concat | ||
| 144 | (List.map | ||
| 145 | ~f:(fun (e, s) -> | ||
| 146 | enclose (string "${") rbrace (doc_of_expr e) ^^ str s) | ||
| 147 | xs)) | ||
| 148 | | Int x | Float x | Path x | SPath x | HPath x | Uri x -> string x | ||
| 149 | | Lambda (pattern, body) -> | ||
| 150 | let pat = | ||
| 151 | match pattern with | ||
| 152 | | Alias id -> string id | ||
| 153 | | ParamSet (None, ps) -> doc_of_paramset ps | ||
| 154 | | ParamSet (Some id, ps) -> | ||
| 155 | doc_of_paramset ps ^^ group (break 1 ^^ at ^^ break 1 ^^ string id) | ||
| 156 | in | ||
| 157 | flow (break 1) [ pat ^^ colon; doc_of_expr body ] | ||
| 158 | | List [] -> lbracket ^^ space ^^ rbracket | ||
| 159 | | List es -> | ||
| 160 | surround !indent 1 lbracket | ||
| 161 | (separate_map (break 1) (maybe_parens 2) es) | ||
| 162 | rbracket | ||
| 163 | | AttSet (Nonrec, []) -> lbrace ^^ space ^^ rbrace | ||
| 164 | | AttSet (Nonrec, bs) -> | ||
| 165 | surround !indent 1 lbrace | ||
| 166 | (group (separate_map (break 1) doc_of_binding bs)) | ||
| 167 | rbrace | ||
| 168 | | AttSet (Rec, bs) -> | ||
| 169 | string "rec" ^^ space ^^ doc_of_val (AttSet (Nonrec, bs)) | ||
| 170 | |||
| 171 | let print chan expr = ToChannel.pretty 0.7 !out_width chan (doc_of_expr expr) | ||
| 172 | |||
| 173 | let to_string expr = | ||
| 174 | let buf = Stdlib.Buffer.create 0 in | ||
| 175 | ToBuffer.pretty 0.7 !out_width buf (doc_of_expr expr); | ||
| 176 | Stdlib.Buffer.contents buf | ||
diff --git a/lib/nix/tokens.ml b/lib/nix/tokens.ml new file mode 100644 index 0000000..4891d48 --- /dev/null +++ b/lib/nix/tokens.ml | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | type token = | ||
| 2 | (* Tokens with data *) | ||
| 3 | | INT of string | ||
| 4 | | FLOAT of string | ||
| 5 | (* A path (starting with / or ./) *) | ||
| 6 | | PATH of string | ||
| 7 | (* Search path, enclosed in < and > *) | ||
| 8 | | SPATH of string | ||
| 9 | (* Home path, starting with ~/ *) | ||
| 10 | | HPATH of string | ||
| 11 | | URI of string | ||
| 12 | | STR_START of string | ||
| 13 | | STR_MID of string | ||
| 14 | | STR_END | ||
| 15 | | ISTR_START of string | ||
| 16 | | ISTR_MID of string | ||
| 17 | | ISTR_END of int | ||
| 18 | | ID of string | ||
| 19 | (* Tokens that stand for themselves *) | ||
| 20 | | SELECT | ||
| 21 | | QMARK | ||
| 22 | | CONCAT | ||
| 23 | | NOT | ||
| 24 | | MERGE | ||
| 25 | | ASSIGN | ||
| 26 | | LT | ||
| 27 | | LTE | ||
| 28 | | GT | ||
| 29 | | GTE | ||
| 30 | | EQ | ||
| 31 | | NEQ | ||
| 32 | | AND | ||
| 33 | | OR | ||
| 34 | | IMPL | ||
| 35 | | AQUOTE_OPEN | ||
| 36 | | AQUOTE_CLOSE | ||
| 37 | | LBRACE | ||
| 38 | | RBRACE | ||
| 39 | | LBRACK | ||
| 40 | | RBRACK | ||
| 41 | | PLUS | ||
| 42 | | MINUS | ||
| 43 | | TIMES | ||
| 44 | | SLASH | ||
| 45 | | LPAREN | ||
| 46 | | RPAREN | ||
| 47 | | COLON | ||
| 48 | | SEMICOLON | ||
| 49 | | COMMA | ||
| 50 | | ELLIPSIS | ||
| 51 | | AS | ||
| 52 | (* Keywords *) | ||
| 53 | | WITH | ||
| 54 | | REC | ||
| 55 | | LET | ||
| 56 | | IN | ||
| 57 | | INHERIT | ||
| 58 | | IF | ||
| 59 | | THEN | ||
| 60 | | ELSE | ||
| 61 | | ASSERT | ||
| 62 | | ORDEF | ||
| 63 | (* End of input *) | ||
| 64 | | EOF | ||
diff --git a/lib/nix/types.ml b/lib/nix/types.ml new file mode 100644 index 0000000..8245406 --- /dev/null +++ b/lib/nix/types.ml | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | open Core | ||
| 2 | |||
| 3 | (* Binary operators *) | ||
| 4 | type binary_op = | ||
| 5 | | Plus | ||
| 6 | | Minus | ||
| 7 | | Mult | ||
| 8 | | Div | ||
| 9 | | Gt | ||
| 10 | | Lt | ||
| 11 | | Lte | ||
| 12 | | Gte | ||
| 13 | | Eq | ||
| 14 | | Neq | ||
| 15 | | Or | ||
| 16 | | And | ||
| 17 | | Impl | ||
| 18 | | Merge | ||
| 19 | | Concat | ||
| 20 | [@@deriving sexp_of] | ||
| 21 | |||
| 22 | (* Unary operators *) | ||
| 23 | type unary_op = Negate | Not [@@deriving sexp_of] | ||
| 24 | |||
| 25 | (* The top-level expression type *) | ||
| 26 | type expr = | ||
| 27 | | BinaryOp of binary_op * expr * expr | ||
| 28 | | UnaryOp of unary_op * expr | ||
| 29 | | Cond of expr * expr * expr | ||
| 30 | | With of expr * expr | ||
| 31 | | Assert of expr * expr | ||
| 32 | | Test of expr * expr list | ||
| 33 | | SetLet of binding list | ||
| 34 | | Let of binding list * expr | ||
| 35 | | Val of value | ||
| 36 | | Id of id | ||
| 37 | | Select of expr * expr list * expr option | ||
| 38 | | Apply of expr * expr | ||
| 39 | | Aquote of expr | ||
| 40 | [@@deriving sexp_of] | ||
| 41 | |||
| 42 | (* Possible values *) | ||
| 43 | and value = | ||
| 44 | (* Str is a string start, followed by arbitrary number of antiquotations and | ||
| 45 | strings that separate them *) | ||
| 46 | | Str of string * (expr * string) list | ||
| 47 | (* IStr is an indented string, so it has the extra integer component which | ||
| 48 | indicates the indentation *) | ||
| 49 | | IStr of int * string * (expr * string) list | ||
| 50 | | Int of string | ||
| 51 | | Float of string | ||
| 52 | | Path of string | ||
| 53 | | SPath of string | ||
| 54 | | HPath of string | ||
| 55 | | Uri of string | ||
| 56 | | Lambda of pattern * expr | ||
| 57 | | List of expr list | ||
| 58 | | AttSet of recursivity * binding list | ||
| 59 | [@@deriving sexp_of] | ||
| 60 | |||
| 61 | (* Patterns in lambda definitions *) | ||
| 62 | and pattern = Alias of id | ParamSet of id option * param_set | ||
| 63 | [@@deriving sexp_of] | ||
| 64 | |||
| 65 | and param_set = param list * match_kind [@@deriving sexp_of] | ||
| 66 | and param = id * expr option [@@deriving sexp_of] | ||
| 67 | and recursivity = Rec | Nonrec | ||
| 68 | and match_kind = Exact | Loose | ||
| 69 | |||
| 70 | (* Bindings in attribute sets and let expressions *) | ||
| 71 | and binding = | ||
| 72 | (* The first expr should be attrpath, which is the same as in Select *) | ||
| 73 | | AttrPath of expr list * expr | ||
| 74 | | Inherit of expr option * expr list | ||
| 75 | [@@deriving sexp_of] | ||
| 76 | |||
| 77 | (* Identifiers *) | ||
| 78 | and id = string | ||
| 79 | |||
| 80 | (* Precedence levels of binary operators *) | ||
| 81 | let prec_of_bop = function | ||
| 82 | | Concat -> 5 | ||
| 83 | | Mult | Div -> 6 | ||
| 84 | | Plus | Minus -> 7 | ||
| 85 | | Merge -> 9 | ||
| 86 | | Gt | Lt | Lte | Gte -> 10 | ||
| 87 | | Eq | Neq -> 11 | ||
| 88 | | And -> 12 | ||
| 89 | | Or -> 13 | ||
| 90 | | Impl -> 14 | ||
| 91 | |||
| 92 | type assoc = Left | Right | ||
| 93 | |||
| 94 | let assoc_of_bop = function | ||
| 95 | | Mult | Div | Plus | Minus -> Some Left | ||
| 96 | | Concat | Merge | And | Or -> Some Right | ||
| 97 | | Gt | Lt | Lte | Gte | Eq | Neq | Impl -> None | ||
| 98 | |||
| 99 | (* Precedence levels of unary operators *) | ||
| 100 | let prec_of_uop = function Negate -> 3 | Not -> 8 | ||
| 101 | |||
| 102 | (* Precedence level of expressions | ||
| 103 | (assuming that the constituents have higher levels) *) | ||
| 104 | let prec_of_expr = function | ||
| 105 | | Val (Lambda _) -> 15 | ||
| 106 | | Val _ | Id _ | Aquote _ -> 0 | ||
| 107 | | BinaryOp (op, _, _) -> prec_of_bop op | ||
| 108 | | UnaryOp (op, _) -> prec_of_uop op | ||
| 109 | | Cond _ | With _ | Assert _ | Let _ | SetLet _ -> 15 | ||
| 110 | | Test _ -> 4 | ||
| 111 | | Select _ -> 1 | ||
| 112 | | Apply _ -> 2 | ||