aboutsummaryrefslogtreecommitdiffstats
path: root/lib/nix
diff options
context:
space:
mode:
authorRutger Broekhoff2025-07-07 21:52:08 +0200
committerRutger Broekhoff2025-07-07 21:52:08 +0200
commitba61dfd69504ec6263a9dee9931d93adeb6f3142 (patch)
treed6c9b78e50eeab24e0c1c09ab45909a6ae3fd5db /lib/nix
downloadverified-dyn-lang-interp-ba61dfd69504ec6263a9dee9931d93adeb6f3142.tar.gz
verified-dyn-lang-interp-ba61dfd69504ec6263a9dee9931d93adeb6f3142.zip
Initialize repository
Diffstat (limited to 'lib/nix')
-rw-r--r--lib/nix/dune15
-rw-r--r--lib/nix/elaborator.ml208
-rw-r--r--lib/nix/lexer.mll315
-rw-r--r--lib/nix/nix.ml20
-rw-r--r--lib/nix/parser.mly310
-rw-r--r--lib/nix/printer.ml176
-rw-r--r--lib/nix/tokens.ml64
-rw-r--r--lib/nix/types.ml112
8 files changed, 1220 insertions, 0 deletions
diff --git a/lib/nix/dune b/lib/nix/dune
new file mode 100644
index 0000000..3954c8a
--- /dev/null
+++ b/lib/nix/dune
@@ -0,0 +1,15 @@
1(menhir
2 (modules parser)
3 (flags "--dump" "--strict" "--external-tokens" "Tokens")
4 (infer true))
5
6(ocamllex
7 (modules lexer))
8
9(library
10 (name nix)
11 (preprocess
12 (pps ppx_sexp_conv))
13 (instrumentation
14 (backend bisect_ppx))
15 (libraries core core_unix core_unix.filename_unix pprint ppx_sexp_conv str))
diff --git a/lib/nix/elaborator.ml b/lib/nix/elaborator.ml
new file mode 100644
index 0000000..36ee0d4
--- /dev/null
+++ b/lib/nix/elaborator.ml
@@ -0,0 +1,208 @@
1open Core
2open Types
3
4(* The Nix elaborator does a few things:
5 - Attribute paths are transformed into a simple list of expressions:
6 + Simple identifiers are rewritten to string values
7 + Antiquotations are rewritten to their component expressions
8 + Anything else, that is not a string value, is rejected
9 and raises an exception
10 - In 'inherit (...) x1 ... xn', x1 ... xn are checked for 'reasonably' being
11 identifiers, i.e., being one of x, "x" and ${"x"}.
12 - Nested attribute paths are unfolded and attribute sets are merged where
13 possible. (Where we mean 'what Nix does' with 'where possible'; see the
14 comment at the respective function.)
15 - Paths are turned into strings and made absolute w.r.t. the current
16 working directory.
17 - Indented strings are converted to their 'normal' counterpart. *)
18
19exception ElaborateError of string
20
21type attr_set = recursivity * binding list
22
23let set_expr (r, bs) = Val (AttSet (r, bs))
24let get_id = function Id x -> x | _ -> assert false
25
26let rec update_bnd (bs : binding list) (x : string) ~(f : expr option -> expr) =
27 match bs with
28 | [] -> [ AttrPath ([ Val (Str (x, [])) ], f None) ]
29 | AttrPath ([ Val (Str (y, [])) ], e) :: s' when String.(x = y) ->
30 AttrPath ([ Val (Str (y, [])) ], f (Some e)) :: s'
31 | Inherit (_, ids) :: _
32 when List.exists ids ~f:(fun e -> String.(get_id e = x)) ->
33 raise (ElaborateError "Cannot update inherit")
34 | bnd :: s' -> bnd :: update_bnd s' x ~f
35
36let set_update_bnd (r, bs) x ~f = (r, update_bnd bs x ~f)
37
38let rec has_bnd (bs : binding list) (x : string) : bool =
39 match bs with
40 | [] -> false
41 | AttrPath ([ Val (Str (y, [])) ], _) :: _ when String.(x = y) -> true
42 | Inherit (_, ids) :: _
43 when List.exists ids ~f:(fun e -> String.(get_id e = x)) ->
44 true
45 | _ :: bs' -> has_bnd bs' x
46
47let merge_bnds bs1 bs2 : binding list =
48 List.fold_left bs2 ~init:bs1 ~f:(fun bs1' b2 ->
49 match b2 with
50 | AttrPath ([ Val (Str (x, [])) ], e) ->
51 update_bnd bs1' x ~f:(function
52 | Some _ -> raise (ElaborateError "Duplicated attribute")
53 | None -> e)
54 | AttrPath ([ d ], e) -> AttrPath ([ d ], e) :: bs1'
55 | Inherit (md, xs) ->
56 if List.for_all xs ~f:(fun e -> not (has_bnd bs1' (get_id e))) then
57 Inherit (md, xs) :: bs1'
58 else raise (ElaborateError "Duplicated attribute")
59 | _ -> assert false)
60
61(* This function intentionally clobbers recursivity, because that is the way
62 that Nix likes to handle attribute insertion. See
63 (1) https://github.com/NixOS/nix/issues/9020
64 (2) https://github.com/NixOS/nix/issues/11268
65 (3) https://github.com/NixOS/nix/pull/11294 *)
66let rec insert (bs : binding list) (path : expr list) (e : expr) =
67 match path with
68 | [] -> raise (ElaborateError "Cannot insert attribute with empty path")
69 | [ Val (Str (x, [])) ] ->
70 update_bnd bs x ~f:(function
71 | None -> e
72 | Some (Val (AttSet (r1, bs1))) -> (
73 match e with
74 | Val (AttSet (_, bs2)) -> set_expr (r1, merge_bnds bs1 bs2)
75 | _ -> raise (ElaborateError "Duplicated attribute"))
76 | _ -> raise (ElaborateError "Duplicated attribute"))
77 | Val (Str (x, [])) :: rest ->
78 update_bnd bs x ~f:(function
79 | Some (Val (AttSet (r, bs))) -> Val (AttSet (r, insert bs rest e))
80 | Some _ -> raise (ElaborateError "Duplicated attribute")
81 | None -> Val (AttSet (Nonrec, insert [] rest e)))
82 | [ part ] -> AttrPath ([ part ], e) :: bs
83 | part :: rest ->
84 AttrPath ([ part ], Val (AttSet (Nonrec, insert [] rest e))) :: bs
85
86let insert_inherit (bs : binding list) (from : expr option) (es : expr list) =
87 if List.for_all es ~f:(fun e -> not (has_bnd bs (get_id e))) then
88 Inherit (from, es) :: bs
89 else raise (ElaborateError "Duplicated attribute")
90
91let simplify_path_component = function
92 | Id x -> Val (Str (x, []))
93 | Val (Str (s, ess)) -> Val (Str (s, ess))
94 | Aquote e -> e
95 | _ -> raise (ElaborateError "Unexpected path component")
96
97let simplify_path = List.map ~f:simplify_path_component
98
99let simplify_bnd_paths =
100 List.map ~f:(fun bnd ->
101 match bnd with
102 | AttrPath (path, e) -> AttrPath (simplify_path path, e)
103 | Inherit (me, xs) -> Inherit (me, xs))
104
105(* Law: concat_lines ∘ split_lines = id *)
106
107let rec split_lines s =
108 match String.lsplit2 s ~on:'\n' with
109 | Some (s1, s2) -> s1 :: split_lines s2
110 | None -> [ s ]
111
112let rec concat_lines = function
113 | [] -> ""
114 | [ x ] -> x
115 | x :: xs -> x ^ "\n" ^ concat_lines xs
116
117let map_tail ~f = function [] -> [] | x :: xs -> x :: List.map ~f xs
118
119let unindent n s ~skip_first_line =
120 let map_op ~f = if skip_first_line then map_tail ~f else List.map ~f in
121 split_lines s
122 |> map_op ~f:(fun line ->
123 let expected_prefix = String.make n ' ' in
124 String.chop_prefix_if_exists ~prefix:expected_prefix line)
125 |> concat_lines
126
127let is_spaces l = String.(strip l ~drop:(Char.( = ) ' ') |> is_empty)
128
129let drop_first_empty_line s =
130 match String.lsplit2 s ~on:'\n' with
131 | Some (l, s') when is_spaces l -> s'
132 | _ -> s
133
134let rec process ?(dir = None) = function
135 | BinaryOp (op, e1, e2) -> BinaryOp (op, process ~dir e1, process ~dir e2)
136 | UnaryOp (op, e) -> UnaryOp (op, process ~dir e)
137 | Cond (e1, e2, e3) -> Cond (process ~dir e1, process ~dir e2, process ~dir e3)
138 | With (e1, e2) -> With (process ~dir e1, process ~dir e2)
139 | Assert (e1, e2) -> Assert (process ~dir e1, process ~dir e2)
140 | Test (e1, es) ->
141 Test (process ~dir e1, List.(simplify_path es >>| process ~dir))
142 | SetLet bs -> SetLet (process_bnds ~dir bs)
143 | Let (bs, e) -> Let (process_bnds ~dir bs, process ~dir e)
144 | Val v -> Val (process_val ~dir v)
145 | Id x -> Id x
146 | Select (e, es, me) ->
147 Select
148 ( process ~dir e,
149 List.(simplify_path es >>| process ~dir),
150 Option.(me >>| process ~dir) )
151 | Apply (e1, e2) -> Apply (process ~dir e1, process ~dir e2)
152 | Aquote e -> Aquote (process ~dir e)
153
154and process_val ~dir = function
155 | Str (s, ess) -> Str (s, List.(ess >>| fun (e, s) -> (process ~dir e, s)))
156 | IStr (n, s, ess) ->
157 let s' = drop_first_empty_line (unindent n s ~skip_first_line:false)
158 and ess' =
159 List.map ess ~f:(fun (e, s) ->
160 (process ~dir e, unindent n s ~skip_first_line:true))
161 in
162 Str (s', ess')
163 | Lambda (p, e) -> Lambda (process_pattern ~dir p, process ~dir e)
164 | List es -> List List.(es >>| process ~dir)
165 | AttSet (r, bs) -> AttSet (r, process_bnds ~dir bs)
166 | Path p -> (
167 if Filename.is_absolute p then Str (p, [])
168 else
169 match dir with
170 | Some dir when Filename.is_absolute dir ->
171 Str (Filename.concat dir p, [])
172 | Some _ ->
173 raise
174 (ElaborateError "Provided directory should be an absolute path")
175 | None -> raise (ElaborateError "Do not know how to resolve path"))
176 | v -> v
177
178and process_bnds ~dir bs =
179 bs
180 |> List.map ~f:(function
181 | AttrPath (es, e) ->
182 AttrPath (List.(es >>| process ~dir), process ~dir e)
183 | Inherit (me, xs) ->
184 Inherit (Option.(me >>| process ~dir), process_inherit_ids xs))
185 |> simplify_bnd_paths
186 |> List.fold ~init:[] ~f:(fun bs' bnd ->
187 match bnd with
188 | AttrPath (path, e) -> insert bs' path e
189 | Inherit (from, es) -> insert_inherit bs' from es)
190
191and process_inherit_ids =
192 List.map ~f:(function
193 | Id x | Val (Str (x, [])) | Aquote (Val (Str (x, []))) -> Id x
194 | _ -> raise (ElaborateError "Unexpected expression in inherit"))
195
196and process_pattern ~dir = function
197 | Alias x -> Alias x
198 | ParamSet (mx, (ps, k)) -> ParamSet (mx, (process_param_set ~dir mx ps, k))
199
200and process_param_set ~dir ?(seen = String.Set.empty) mx ps =
201 match ps with
202 | [] -> []
203 | (y, me) :: ps' ->
204 if Set.mem seen y || Option.mem mx y ~equal:String.( = ) then
205 raise (ElaborateError "Duplicated function argument")
206 else
207 (y, Option.(me >>| process ~dir))
208 :: process_param_set ~dir mx ps' ~seen:(Set.add seen y)
diff --git a/lib/nix/lexer.mll b/lib/nix/lexer.mll
new file mode 100644
index 0000000..023d888
--- /dev/null
+++ b/lib/nix/lexer.mll
@@ -0,0 +1,315 @@
1{
2open Core
3open Tokens
4
5exception Error of string
6
7(* Types of curly braces.
8 AQUOTE corresponds to the braces for antiquotation, i.e. '${...}'
9 and SET to an attribute set '{...}'.
10 *)
11type braces =
12 | AQUOTE
13 | SET
14
15let print_stack s =
16 let b = Buffer.create 100 in
17 Buffer.add_string b "[ ";
18 List.iter s ~f:(function
19 | AQUOTE -> Buffer.add_string b "AQUOTE; "
20 | SET -> Buffer.add_string b "SET; "
21 );
22 Buffer.add_string b "]";
23 Buffer.contents b
24
25let token_of_str state buf =
26 match state with
27 | `Start -> STR_START (Buffer.contents buf)
28 | `Mid -> STR_MID (Buffer.contents buf)
29
30let token_of_istr state buf =
31 match state with
32 | `Start -> ISTR_START (Buffer.contents buf)
33 | `Mid -> ISTR_MID (Buffer.contents buf)
34
35(* lookup table for one-character tokens *)
36let char_table = Array.create ~len:94 EOF
37let _ =
38 List.iter ~f:(fun (k, v) -> Array.set char_table ((int_of_char k) - 1) v)
39 [
40 '.', SELECT;
41 '?', QMARK;
42 '!', NOT;
43 '=', ASSIGN;
44 '<', LT;
45 '>', GT;
46 '[', LBRACK;
47 ']', RBRACK;
48 '+', PLUS;
49 '-', MINUS;
50 '*', TIMES;
51 '/', SLASH;
52 '(', LPAREN;
53 ')', RPAREN;
54 ':', COLON;
55 ';', SEMICOLON;
56 ',', COMMA;
57 '@', AS
58 ]
59
60(* lookup table for two- and three-character tokens *)
61let str_table = Hashtbl.create (module String) ~size:10
62let _ =
63 List.iter ~f:(fun (kwd, tok) -> Hashtbl.set str_table ~key:kwd ~data:tok)
64 [
65 "//", MERGE;
66 "++", CONCAT;
67 "<=", LTE;
68 ">=", GTE;
69 "==", EQ;
70 "!=", NEQ;
71 "&&", AND;
72 "||", OR;
73 "->", IMPL;
74 "...", ELLIPSIS
75 ]
76
77(* lookup table for keywords *)
78let keyword_table = Hashtbl.create (module String) ~size:10
79let _ =
80 List.iter ~f:(fun (kwd, tok) -> Hashtbl.set keyword_table ~key:kwd ~data:tok)
81 [ "with", WITH;
82 "rec", REC;
83 "let", LET;
84 "in", IN;
85 "inherit", INHERIT;
86 "if" , IF;
87 "then", THEN;
88 "else", ELSE;
89 "assert", ASSERT;
90 "or", ORDEF ]
91
92(* replace an escape sequence by the corresponding character(s) *)
93let unescape = function
94 | "\\n" -> "\n"
95 | "\\r" -> "\r"
96 | "\\t" -> "\t"
97 | "\\\\" -> "\\"
98 | "\\${" -> "${"
99 | "''$" -> "$"
100 | "$$" -> "$"
101 | "'''" -> "''"
102 | "''\\t" -> "\t"
103 | "''\\r" -> "\r"
104 | "''\\n" -> "\n"
105 | x ->
106 failwith (Printf.sprintf "unescape unexpected arg %s" x)
107
108let collect_tokens lexer q lexbuf =
109 let stack = ref [] in
110 let queue = Stdlib.Queue.create () in
111 let rec go () =
112 match (try Some (Stdlib.Queue.take queue) with Stdlib.Queue.Empty -> None) with
113 | Some token ->
114 (
115 match token, !stack with
116 | AQUOTE_CLOSE, [] ->
117 Stdlib.Queue.add AQUOTE_CLOSE q
118 | EOF, _ ->
119 Stdlib.Queue.add EOF q;
120 | _, _ ->
121 Stdlib.Queue.add token q;
122 go ()
123 )
124 | None ->
125 lexer queue stack lexbuf;
126 go ()
127 in
128 Stdlib.Queue.add AQUOTE_OPEN q;
129 stack := [AQUOTE];
130 lexer queue stack lexbuf;
131 go ()
132
133(* utility functions *)
134let print_position lexbuf =
135 let pos = Lexing.lexeme_start_p lexbuf in
136 Printf.sprintf "%s:%d:%d" pos.pos_fname
137 pos.pos_lnum (pos.pos_cnum - pos.pos_bol + 1)
138
139
140let set_filename fname (lexbuf: Lexing.lexbuf) =
141 let pos = lexbuf.lex_curr_p in
142 lexbuf.lex_curr_p <- { pos with pos_fname = fname }; lexbuf
143
144}
145
146let nzdigit = ['1'-'9']
147let digit = nzdigit | '0'
148let float = (nzdigit digit* '.' digit* | '0'? '.' digit+) (['E' 'e'] ['+' '-']? digit+)?
149let alpha = ['a'-'z' 'A'-'Z']
150let alpha_digit = alpha | digit
151let path_chr = alpha_digit | ['.' '_' '-' '+']
152let path = path_chr* ('/' path_chr+)+
153let spath = alpha_digit path_chr* ('/' path_chr+)*
154let uri_chr = ['%' '/' '?' ':' '@' '&' '=' '+' '$' ',' '-' '_' '.' '!' '~' '*' '\'']
155let scheme = alpha (alpha | ['+' '-' '.'])*
156let uri = scheme ':' (alpha_digit | uri_chr)+
157let char_tokens = ['.' '?' '!' '=' '<' '>' '[' ']' '+' '-' '*' '/' '^' '(' ')' ':' ';' ',' '@']
158
159rule get_tokens q s = parse
160(* skip whitespeces *)
161| [' ' '\t' '\r']
162 { get_tokens q s lexbuf }
163(* increase line count for new lines *)
164| '\n'
165 { Lexing.new_line lexbuf; get_tokens q s lexbuf }
166| char_tokens as c
167 { Stdlib.Queue.add (Array.get char_table ((int_of_char c) - 1)) q }
168| ("//" | "++" | "<=" | ">=" | "==" | "!=" | "&&" | "||" | "->" | "...") as s
169 { Stdlib.Queue.add (Hashtbl.find_exn str_table s) q}
170| digit+ as i
171 { Stdlib.Queue.add (INT i) q }
172| float
173 { Stdlib.Queue.add (FLOAT (Lexing.lexeme lexbuf)) q }
174| path
175 { Stdlib.Queue.add (PATH (Lexing.lexeme lexbuf)) q }
176| '<' (spath as p) '>'
177 { Stdlib.Queue.add (SPATH p) q }
178| '~' path as p
179 { Stdlib.Queue.add (HPATH p) q }
180| uri
181 { Stdlib.Queue.add(URI (Lexing.lexeme lexbuf)) q }
182(* keywords or identifiers *)
183| ((alpha | '_')+ (alpha_digit | ['_' '\'' '-'])*) as id
184 { Stdlib.Queue.add (Hashtbl.find keyword_table id |> Option.value ~default:(ID id)) q}
185(* comments *)
186| '#' ([^ '\n']* as c)
187 { ignore c; get_tokens q s lexbuf}
188| "/*"
189 { comment (Buffer.create 64) lexbuf;
190 get_tokens q s lexbuf
191 }
192(* the following three tokens change the braces stack *)
193| "${"
194 { Stdlib.Queue.add AQUOTE_OPEN q; s := AQUOTE :: !s }
195| '{'
196 { Stdlib.Queue.add LBRACE q; s := SET :: !s }
197| '}'
198 {
199 match !s with
200 | AQUOTE :: rest ->
201 Stdlib.Queue.add AQUOTE_CLOSE q; s := rest
202 | SET :: rest ->
203 Stdlib.Queue.add RBRACE q; s := rest
204 | _ ->
205 let pos = print_position lexbuf in
206 let err = Printf.sprintf "Unbalanced '}' at %s\n" pos in
207 raise (Error err)
208 }
209(* a double-quoted string *)
210| '"'
211 { string `Start (Buffer.create 64) q lexbuf }
212(* an indented string *)
213| "''" (' '+ as ws)
214 { istring `Start (Some (String.length ws)) (Buffer.create 64) q lexbuf }
215| "''"
216 { istring `Start None (Buffer.create 64) q lexbuf }
217(* End of input *)
218| eof
219 { Stdlib.Queue.add EOF q }
220(* any other character raises an exception *)
221| _
222 {
223 let pos = print_position lexbuf in
224 let tok = Lexing.lexeme lexbuf in
225 let err = Printf.sprintf "Unexpected character '%s' at %s\n" tok pos in
226 raise (Error err)
227 }
228
229(* Nix does not allow nested comments, but it is still handy to lex it
230 separately because we can properly increase line count. *)
231and comment buf = parse
232 | '\n'
233 {Lexing.new_line lexbuf; Buffer.add_char buf '\n'; comment buf lexbuf}
234 | "*/"
235 { () }
236 | _ as c
237 { Buffer.add_char buf c; comment buf lexbuf }
238
239and string state buf q = parse
240 | '"' (* terminate when we hit '"' *)
241 { Stdlib.Queue.add (token_of_str state buf) q; Stdlib.Queue.add STR_END q }
242 | '\n'
243 { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; string state buf q lexbuf }
244 | ("\\n" | "\\r" | "\\t" | "\\\\" | "\\${") as s
245 { Buffer.add_string buf (unescape s); string state buf q lexbuf }
246 | "\\" (_ as c) (* add the character verbatim *)
247 { Buffer.add_char buf c; string state buf q lexbuf }
248 | "${" (* collect all the tokens till we hit the matching '}' *)
249 {
250 Stdlib.Queue.add (token_of_str state buf) q;
251 collect_tokens get_tokens q lexbuf;
252 string `Mid (Buffer.create 64) q lexbuf
253 }
254 | _ as c (* otherwise just add the character to the buffer *)
255 { Buffer.add_char buf c; string state buf q lexbuf }
256
257and istring state imin buf q = parse
258 | ('\n' ' '* "''")
259 {
260 Lexing.new_line lexbuf;
261 Buffer.add_string buf "\n";
262 let indent = match imin with | None -> 0 | Some i -> i in
263 Stdlib.Queue.add (token_of_istr state buf) q;
264 Stdlib.Queue.add (ISTR_END indent) q
265 }
266 | "''"
267 {
268 let indent = match imin with | None -> 0 | Some i -> i in
269 Stdlib.Queue.add (token_of_istr state buf) q;
270 Stdlib.Queue.add (ISTR_END indent) q
271 }
272 | ('\n' ' '* '\n') as s
273 {
274 Lexing.new_line lexbuf;
275 Lexing.new_line lexbuf;
276 Buffer.add_string buf s;
277 istring state imin buf q lexbuf
278 }
279 | ('\n' (' '* as ws)) as s
280 {
281 Lexing.new_line lexbuf;
282 Buffer.add_string buf s;
283 let ws_count = String.length ws in
284 match imin with
285 | None ->
286 istring state (Some ws_count) buf q lexbuf
287 | Some i ->
288 istring state (Some (min i ws_count)) buf q lexbuf
289 }
290 | ("''$" | "'''" | "''\\t" | "''\\r" | "''\\n") as s
291 { Buffer.add_string buf (unescape s); istring state imin buf q lexbuf }
292 | "''\\" (_ as c)
293 { Buffer.add_char buf c; istring state imin buf q lexbuf }
294 | "${"
295 {
296 Stdlib.Queue.add (token_of_istr state buf) q;
297 collect_tokens get_tokens q lexbuf;
298 istring `Mid imin (Buffer.create 64) q lexbuf
299 }
300 | _ as c
301 { Buffer.add_char buf c; istring state imin buf q lexbuf }
302{
303
304let rec next_token
305 (q: token Stdlib.Queue.t)
306 (s: braces list ref)
307 (lexbuf: Lexing.lexbuf)
308 : token =
309 match (try Some (Stdlib.Queue.take q) with | Stdlib.Queue.Empty -> None) with
310 | Some token ->
311 token
312 | None ->
313 get_tokens q s lexbuf;
314 next_token q s lexbuf
315}
diff --git a/lib/nix/nix.ml b/lib/nix/nix.ml
new file mode 100644
index 0000000..39dc94c
--- /dev/null
+++ b/lib/nix/nix.ml
@@ -0,0 +1,20 @@
1open Core
2module Ast = Types
3module Printer = Printer
4
5exception ParseError of string
6
7let parse ~filename (data : string) =
8 let lexbuf = Lexer.set_filename filename (Lexing.from_string data)
9 and q, s = (Stdlib.Queue.create (), ref []) in
10 try Parser.main (Lexer.next_token q s) lexbuf with
11 | Lexer.Error msg ->
12 let msg' = String.rstrip msg in
13 raise (ParseError (sprintf "Lexing error: %s" msg'))
14 | Parser.Error ->
15 let msg = sprintf "Parse error at %s" (Lexer.print_position lexbuf) in
16 raise (ParseError msg)
17
18let elaborate = Elaborator.process
19
20exception ElaborateError = Elaborator.ElaborateError
diff --git a/lib/nix/parser.mly b/lib/nix/parser.mly
new file mode 100644
index 0000000..dc1638d
--- /dev/null
+++ b/lib/nix/parser.mly
@@ -0,0 +1,310 @@
1/* Tokens with data */
2%token <string> INT
3%token <string> FLOAT
4/* A path */
5%token <string> PATH
6/* Search path, enclosed in <> */
7%token <string> SPATH
8/* Home path, starts with ~ */
9%token <string> HPATH
10%token <string> URI
11%token <string> STR_START
12%token <string> STR_MID
13%token STR_END
14%token <string> ISTR_START
15%token <string> ISTR_MID
16%token <int> ISTR_END
17%token <string> ID
18/* Tokens that stand for themselves */
19%token SELECT "."
20%token QMARK "?"
21%token CONCAT "++"
22%token NOT "!"
23%token MERGE "//"
24%token ASSIGN "="
25%token LT "<"
26%token LTE "<="
27%token GT ">"
28%token GTE ">="
29%token EQ "=="
30%token NEQ "!="
31%token AND "&&"
32%token OR "||"
33%token IMPL "->"
34%token AQUOTE_OPEN "${"
35%token AQUOTE_CLOSE "}$"
36%token LBRACE "{"
37%token RBRACE "}"
38%token LBRACK "["
39%token RBRACK "]"
40%token PLUS "+"
41%token MINUS "-"
42%token TIMES "*"
43%token SLASH "/"
44%token LPAREN "("
45%token RPAREN ")"
46%token COLON ":"
47%token SEMICOLON ";"
48%token COMMA ","
49%token ELLIPSIS "..."
50%token AS "@"
51/* Keywords */
52%token WITH "with"
53%token REC "rec"
54%token LET "let"
55%token IN "in"
56%token INHERIT "inherit"
57%token IF "if"
58%token THEN "then"
59%token ELSE "else"
60%token ASSERT "assert"
61%token ORDEF "or"
62
63/* End of input */
64%token EOF
65
66%{
67 open Types
68%}
69
70%start <Types.expr> main
71
72%%
73
74main:
75| e = expr0 EOF
76 { e }
77
78expr0:
79| "if"; e1 = expr0; "then"; e2 = expr0; "else"; e3 = expr0
80 { Cond (e1, e2, e3) }
81| "with"; e1 = expr0; ";"; e2 = expr0
82 { With (e1, e2) }
83| "assert"; e1 = expr0; ";"; e2 = expr0
84 { Assert (e1, e2) }
85| "let"; xs = delimited("{", list(binding), "}")
86 { SetLet xs }
87| "let"; xs = list(binding); "in"; e = expr0
88 { Let (xs, e) }
89| l = lambda
90 { Val l }
91| e = expr1
92 { e }
93
94/* Rules expr1-expr14 are almost direct translation of the operator
95 precedence table:
96 https://nixos.org/nix/manual/#sec-language-operators */
97
98%inline binary_expr(Lhs, Op, Rhs):
99| lhs = Lhs; op = Op; rhs = Rhs
100 { BinaryOp (op, lhs, rhs) }
101
102expr1:
103| e = binary_expr(expr2, "->" {Impl}, expr1)
104| e = expr2
105 { e }
106
107expr2:
108| e = binary_expr(expr2, "||" {Or}, expr3)
109| e = expr3
110 { e }
111
112expr3:
113| e = binary_expr(expr3, "&&" {And}, expr4)
114| e = expr4
115 { e }
116
117%inline expr4_ops:
118| "==" { Eq }
119| "!=" { Neq }
120
121expr4:
122| e = binary_expr(expr5, expr4_ops, expr5)
123| e = expr5
124 { e }
125
126%inline expr5_ops:
127| "<" { Lt }
128| ">" { Gt }
129| "<=" { Lte }
130| ">=" { Gte }
131
132expr5:
133| e = binary_expr(expr6, expr5_ops, expr6)
134| e = expr6
135 { e }
136
137expr6:
138| e = binary_expr(expr7, "//" {Merge}, expr6)
139| e = expr7
140 { e }
141
142expr7:
143| e = preceded("!", expr7)
144 { UnaryOp (Not, e) }
145| e = expr8
146 { e }
147
148%inline expr8_ops:
149| "+" { Plus }
150| "-" { Minus }
151
152expr8:
153| e = binary_expr(expr8, expr8_ops, expr9)
154| e = expr9
155 { e }
156
157%inline expr9_ops:
158| "*" { Mult }
159| "/" { Div }
160
161expr9:
162| e = binary_expr(expr9, expr9_ops, expr10)
163| e = expr10
164 { e }
165
166expr10:
167| e = binary_expr(expr11, "++" {Concat}, expr10)
168| e = expr11
169 { e }
170
171expr11:
172| e = expr12 "?" p = attr_path
173 { Test (e, p) }
174| e = expr12
175 { e }
176
177expr12:
178| e = preceded("-", expr13)
179 { UnaryOp (Negate, e) }
180| e = expr13
181 { e }
182
183expr13:
184| f = expr13; arg = expr14
185 { Apply (f, arg) }
186| e = expr14
187 { e }
188
189%inline selectable:
190| s = set
191 { Val s }
192| id = ID
193 { Id id }
194| e = delimited("(", expr0, ")")
195 { e }
196
197expr14:
198| e = selectable; "."; p = attr_path; o = option(preceded("or", expr14))
199 { Select (e, p, o) }
200| e = atomic_expr; "or"
201 { Apply (e, Id "or") }
202| e = atomic_expr
203 { e }
204
205atomic_expr:
206| id = ID
207 { Id id }
208| v = value
209 { Val v }
210| e = delimited("(", expr0, ")")
211 { e }
212
213attr_path:
214| p = separated_nonempty_list(".", attr_path_component)
215 { p }
216
217attr_path_component:
218| "or"
219 { Id "or" }
220| id = ID
221 { Id id }
222| e = delimited("${", expr0, "}$")
223 { Aquote e }
224| s = str
225 { Val s }
226
227value:
228| s = str
229 { s }
230| s = istr
231 { s }
232| i = INT
233 {Int i}
234| f = FLOAT
235 { Float f }
236| p = PATH
237 { Path p }
238| sp = SPATH
239 { SPath sp }
240| hp = HPATH
241 { HPath hp }
242| uri = URI
243 { Uri uri }
244| l = nixlist
245 { l }
246| s = set
247 { s }
248
249%inline str_mid(X):
250| xs = list(pair(delimited("${", expr0, "}$"), X)) { xs }
251
252/* Double-quoted string */
253str:
254| start = STR_START; mids = str_mid(STR_MID); STR_END
255 { Str (start, mids) }
256
257/* Indented string */
258istr:
259| start = ISTR_START; mids = str_mid(ISTR_MID); i = ISTR_END
260 { IStr (i, start, mids) }
261
262/* Lists and sets */
263nixlist:
264| xs = delimited("[", list(expr14), "]")
265 { List xs }
266
267empty_set:
268| "{"; "}" {}
269
270set:
271| empty_set
272 { AttSet (Nonrec, []) }
273| xs = delimited("{", nonempty_list(binding), "}")
274 { AttSet (Nonrec, xs) }
275| xs = preceded("rec", delimited("{", list(binding), "}"))
276 { AttSet (Rec, xs) }
277
278binding:
279| kv = terminated(separated_pair(attr_path, "=", expr0), ";")
280 { let (k, v) = kv in AttrPath (k, v) }
281| xs = delimited("inherit", pair(option(delimited("(", expr0, ")")), list(attr_path_component)), ";")
282 { let (prefix, ids) = xs in Inherit (prefix, ids) }
283
284lambda:
285| id = ID; "@"; p = param_set; ":"; e = expr0
286 { Lambda (ParamSet (Some id, p), e) }
287| p = param_set; "@"; id = ID; ":"; e = expr0
288 { Lambda (ParamSet (Some id, p), e) }
289| p = param_set; ":"; e = expr0
290 { Lambda (ParamSet (None, p), e) }
291| id = ID; ":"; e = expr0
292 { Lambda (Alias id, e) }
293
294%inline param_set:
295| empty_set
296 { ([], Exact) }
297| "{"; "..."; "}"
298 { ([], Loose) }
299| ps = delimited("{", pair(pair(params, ","?), boption("...")), "}")
300 { let ((ps, _), ellipsis) = ps in (ps, if ellipsis then Loose else Exact) }
301
302params:
303| p = param
304 { [p] }
305| ps = params; ","; p = param
306 { ps @ [p] }
307
308%inline param:
309p = pair(ID, option(preceded("?", expr0)))
310 { p }
diff --git a/lib/nix/printer.ml b/lib/nix/printer.ml
new file mode 100644
index 0000000..57e81f4
--- /dev/null
+++ b/lib/nix/printer.ml
@@ -0,0 +1,176 @@
1open Core
2open Types
3open PPrint
4
5let rec escape_chlist = function
6 | [] -> []
7 | '$' :: '{' :: l' -> '\\' :: '$' :: '{' :: escape_chlist l'
8 | '\n' :: l' -> '\\' :: 'n' :: escape_chlist l'
9 | '\r' :: l' -> '\\' :: 'r' :: escape_chlist l'
10 | '\t' :: l' -> '\\' :: 't' :: escape_chlist l'
11 | '\\' :: l' -> '\\' :: '\\' :: escape_chlist l'
12 | '"' :: l' -> '\\' :: '"' :: escape_chlist l'
13 | c :: l' -> c :: escape_chlist l'
14
15let escape_string s = s |> String.to_list |> escape_chlist |> String.of_list
16let out_width = ref 80
17let set_width i = out_width := i
18let indent = ref 2
19let set_indent i = indent := i
20
21let rec doc_of_expr = function
22 | BinaryOp (op, lhs, rhs) ->
23 let lhs_doc = maybe_parens_bop op `Left lhs
24 and rhs_doc = maybe_parens_bop op `Right rhs in
25 infix !indent 1 (doc_of_bop op) lhs_doc rhs_doc
26 | UnaryOp (op, e) -> precede (doc_of_uop op) (maybe_parens (prec_of_uop op) e)
27 | Cond (e1, e2, e3) ->
28 surround !indent 1
29 (soft_surround !indent 1 (string "if") (doc_of_expr e1) (string "then"))
30 (doc_of_expr e2)
31 (string "else" ^^ nest !indent (break 1 ^^ doc_of_expr e3))
32 | With (e1, e2) ->
33 flow (break 1) [ string "with"; doc_of_expr e1 ^^ semi; doc_of_expr e2 ]
34 | Assert (e1, e2) ->
35 flow (break 1) [ string "assert"; doc_of_expr e1 ^^ semi; doc_of_expr e2 ]
36 | Test (e, path) ->
37 maybe_parens 4 e ^^ space ^^ string "?"
38 ^^ group (break 1 ^^ separate_map dot doc_of_expr path)
39 | SetLet bs ->
40 surround !indent 1
41 (string "let " ^^ lbrace)
42 (group (separate_map (break 1) doc_of_binding bs))
43 rbrace
44 | Let (bs, e) ->
45 surround !indent 1 (string "let")
46 (separate_map (break 1) doc_of_binding bs)
47 (prefix !indent 1 (string "in") (doc_of_expr e))
48 | Val v -> doc_of_val v
49 | Id id -> string id
50 | Select (e, path, oe) ->
51 maybe_parens 1 e ^^ dot ^^ doc_of_attpath path
52 ^^ optional
53 (fun e ->
54 space ^^ string "or" ^^ nest !indent (break 1 ^^ maybe_parens 1 e))
55 oe
56 | Apply (e1, e2) -> prefix !indent 1 (maybe_parens 2 e1) (maybe_parens 2 e2)
57 | Aquote e -> surround !indent 0 (string "${") (doc_of_expr e) (string "}")
58
59and maybe_parens lvl e =
60 if prec_of_expr e >= lvl then surround !indent 0 lparen (doc_of_expr e) rparen
61 else doc_of_expr e
62
63and maybe_parens_bop op (loc : [ `Left | `Right ]) e =
64 match (loc, assoc_of_bop op) with
65 | (`Left, Some Left | `Right, Some Right)
66 when prec_of_expr e >= prec_of_bop op ->
67 doc_of_expr e
68 | _, _ -> maybe_parens (prec_of_bop op) e
69
70and doc_of_attpath path = separate_map dot doc_of_expr path
71
72and doc_of_paramset (params, kind) =
73 let ps =
74 List.map ~f:doc_of_param params
75 @ if Poly.(kind = Loose) then [ string "..." ] else []
76 in
77 surround !indent 0 lbrace (separate (comma ^^ break 1) ps) rbrace
78
79and doc_of_param (id, oe) =
80 string id ^^ optional (fun e -> qmark ^^ space ^^ doc_of_expr e) oe
81
82and doc_of_binding = function
83 | AttrPath (path, e) ->
84 doc_of_attpath path ^^ space ^^ equals ^^ space ^^ doc_of_expr e ^^ semi
85 | Inherit (oe, ids) ->
86 let id_docs =
87 List.map
88 ~f:(function
89 | Id x | Val (Str (x, [])) -> string x | _ -> assert false)
90 ids
91 in
92 let xs =
93 flow (break 1)
94 (match oe with
95 | Some e -> parens (doc_of_expr e) :: id_docs
96 | None -> id_docs)
97 in
98 soft_surround !indent 0 (string "inherit" ^^ space) xs semi
99
100and doc_of_bop = function
101 | Plus -> plus
102 | Minus -> minus
103 | Mult -> star
104 | Div -> slash
105 | Gt -> rangle
106 | Lt -> langle
107 | Lte -> string "<="
108 | Gte -> string ">="
109 | Eq -> string "=="
110 | Neq -> string "!="
111 | Or -> string "||"
112 | And -> string "&&"
113 | Impl -> string "->"
114 | Merge -> string "//"
115 | Concat -> string "++"
116
117and doc_of_uop = function Negate -> minus | Not -> bang
118
119and doc_of_val = function
120 | Str (start, xs) ->
121 dquotes
122 (string (escape_string start)
123 ^^ concat
124 (List.map
125 ~f:(fun (e, s) ->
126 surround !indent 0 (string "${") (doc_of_expr e)
127 (string "}" ^^ string (escape_string s)))
128 xs))
129 | IStr (i, start, xs) ->
130 let qq = string "''" in
131 let str s =
132 String.split ~on:'\n' s
133 |> List.map ~f:(fun s ->
134 let len = String.length s in
135 let s' =
136 if len >= i then String.sub s ~pos:i ~len:(len - i) else s
137 in
138 string s')
139 |> separate hardline
140 in
141 enclose qq qq
142 (str start
143 ^^ concat
144 (List.map
145 ~f:(fun (e, s) ->
146 enclose (string "${") rbrace (doc_of_expr e) ^^ str s)
147 xs))
148 | Int x | Float x | Path x | SPath x | HPath x | Uri x -> string x
149 | Lambda (pattern, body) ->
150 let pat =
151 match pattern with
152 | Alias id -> string id
153 | ParamSet (None, ps) -> doc_of_paramset ps
154 | ParamSet (Some id, ps) ->
155 doc_of_paramset ps ^^ group (break 1 ^^ at ^^ break 1 ^^ string id)
156 in
157 flow (break 1) [ pat ^^ colon; doc_of_expr body ]
158 | List [] -> lbracket ^^ space ^^ rbracket
159 | List es ->
160 surround !indent 1 lbracket
161 (separate_map (break 1) (maybe_parens 2) es)
162 rbracket
163 | AttSet (Nonrec, []) -> lbrace ^^ space ^^ rbrace
164 | AttSet (Nonrec, bs) ->
165 surround !indent 1 lbrace
166 (group (separate_map (break 1) doc_of_binding bs))
167 rbrace
168 | AttSet (Rec, bs) ->
169 string "rec" ^^ space ^^ doc_of_val (AttSet (Nonrec, bs))
170
171let print chan expr = ToChannel.pretty 0.7 !out_width chan (doc_of_expr expr)
172
173let to_string expr =
174 let buf = Stdlib.Buffer.create 0 in
175 ToBuffer.pretty 0.7 !out_width buf (doc_of_expr expr);
176 Stdlib.Buffer.contents buf
diff --git a/lib/nix/tokens.ml b/lib/nix/tokens.ml
new file mode 100644
index 0000000..4891d48
--- /dev/null
+++ b/lib/nix/tokens.ml
@@ -0,0 +1,64 @@
1type token =
2 (* Tokens with data *)
3 | INT of string
4 | FLOAT of string
5 (* A path (starting with / or ./) *)
6 | PATH of string
7 (* Search path, enclosed in < and > *)
8 | SPATH of string
9 (* Home path, starting with ~/ *)
10 | HPATH of string
11 | URI of string
12 | STR_START of string
13 | STR_MID of string
14 | STR_END
15 | ISTR_START of string
16 | ISTR_MID of string
17 | ISTR_END of int
18 | ID of string
19 (* Tokens that stand for themselves *)
20 | SELECT
21 | QMARK
22 | CONCAT
23 | NOT
24 | MERGE
25 | ASSIGN
26 | LT
27 | LTE
28 | GT
29 | GTE
30 | EQ
31 | NEQ
32 | AND
33 | OR
34 | IMPL
35 | AQUOTE_OPEN
36 | AQUOTE_CLOSE
37 | LBRACE
38 | RBRACE
39 | LBRACK
40 | RBRACK
41 | PLUS
42 | MINUS
43 | TIMES
44 | SLASH
45 | LPAREN
46 | RPAREN
47 | COLON
48 | SEMICOLON
49 | COMMA
50 | ELLIPSIS
51 | AS
52 (* Keywords *)
53 | WITH
54 | REC
55 | LET
56 | IN
57 | INHERIT
58 | IF
59 | THEN
60 | ELSE
61 | ASSERT
62 | ORDEF
63 (* End of input *)
64 | EOF
diff --git a/lib/nix/types.ml b/lib/nix/types.ml
new file mode 100644
index 0000000..8245406
--- /dev/null
+++ b/lib/nix/types.ml
@@ -0,0 +1,112 @@
1open Core
2
3(* Binary operators *)
4type binary_op =
5 | Plus
6 | Minus
7 | Mult
8 | Div
9 | Gt
10 | Lt
11 | Lte
12 | Gte
13 | Eq
14 | Neq
15 | Or
16 | And
17 | Impl
18 | Merge
19 | Concat
20[@@deriving sexp_of]
21
22(* Unary operators *)
23type unary_op = Negate | Not [@@deriving sexp_of]
24
25(* The top-level expression type *)
26type expr =
27 | BinaryOp of binary_op * expr * expr
28 | UnaryOp of unary_op * expr
29 | Cond of expr * expr * expr
30 | With of expr * expr
31 | Assert of expr * expr
32 | Test of expr * expr list
33 | SetLet of binding list
34 | Let of binding list * expr
35 | Val of value
36 | Id of id
37 | Select of expr * expr list * expr option
38 | Apply of expr * expr
39 | Aquote of expr
40[@@deriving sexp_of]
41
42(* Possible values *)
43and value =
44 (* Str is a string start, followed by arbitrary number of antiquotations and
45 strings that separate them *)
46 | Str of string * (expr * string) list
47 (* IStr is an indented string, so it has the extra integer component which
48 indicates the indentation *)
49 | IStr of int * string * (expr * string) list
50 | Int of string
51 | Float of string
52 | Path of string
53 | SPath of string
54 | HPath of string
55 | Uri of string
56 | Lambda of pattern * expr
57 | List of expr list
58 | AttSet of recursivity * binding list
59[@@deriving sexp_of]
60
61(* Patterns in lambda definitions *)
62and pattern = Alias of id | ParamSet of id option * param_set
63[@@deriving sexp_of]
64
65and param_set = param list * match_kind [@@deriving sexp_of]
66and param = id * expr option [@@deriving sexp_of]
67and recursivity = Rec | Nonrec
68and match_kind = Exact | Loose
69
70(* Bindings in attribute sets and let expressions *)
71and binding =
72 (* The first expr should be attrpath, which is the same as in Select *)
73 | AttrPath of expr list * expr
74 | Inherit of expr option * expr list
75[@@deriving sexp_of]
76
77(* Identifiers *)
78and id = string
79
80(* Precedence levels of binary operators *)
81let prec_of_bop = function
82 | Concat -> 5
83 | Mult | Div -> 6
84 | Plus | Minus -> 7
85 | Merge -> 9
86 | Gt | Lt | Lte | Gte -> 10
87 | Eq | Neq -> 11
88 | And -> 12
89 | Or -> 13
90 | Impl -> 14
91
92type assoc = Left | Right
93
94let assoc_of_bop = function
95 | Mult | Div | Plus | Minus -> Some Left
96 | Concat | Merge | And | Or -> Some Right
97 | Gt | Lt | Lte | Gte | Eq | Neq | Impl -> None
98
99(* Precedence levels of unary operators *)
100let prec_of_uop = function Negate -> 3 | Not -> 8
101
102(* Precedence level of expressions
103 (assuming that the constituents have higher levels) *)
104let prec_of_expr = function
105 | Val (Lambda _) -> 15
106 | Val _ | Id _ | Aquote _ -> 0
107 | BinaryOp (op, _, _) -> prec_of_bop op
108 | UnaryOp (op, _) -> prec_of_uop op
109 | Cond _ | With _ | Assert _ | Let _ | SetLet _ -> 15
110 | Test _ -> 4
111 | Select _ -> 1
112 | Apply _ -> 2