diff options
author | Rutger Broekhoff | 2025-07-07 21:52:08 +0200 |
---|---|---|
committer | Rutger Broekhoff | 2025-07-07 21:52:08 +0200 |
commit | ba61dfd69504ec6263a9dee9931d93adeb6f3142 (patch) | |
tree | d6c9b78e50eeab24e0c1c09ab45909a6ae3fd5db /lib/nix | |
download | verified-dyn-lang-interp-ba61dfd69504ec6263a9dee9931d93adeb6f3142.tar.gz verified-dyn-lang-interp-ba61dfd69504ec6263a9dee9931d93adeb6f3142.zip |
Initialize repository
Diffstat (limited to 'lib/nix')
-rw-r--r-- | lib/nix/dune | 15 | ||||
-rw-r--r-- | lib/nix/elaborator.ml | 208 | ||||
-rw-r--r-- | lib/nix/lexer.mll | 315 | ||||
-rw-r--r-- | lib/nix/nix.ml | 20 | ||||
-rw-r--r-- | lib/nix/parser.mly | 310 | ||||
-rw-r--r-- | lib/nix/printer.ml | 176 | ||||
-rw-r--r-- | lib/nix/tokens.ml | 64 | ||||
-rw-r--r-- | lib/nix/types.ml | 112 |
8 files changed, 1220 insertions, 0 deletions
diff --git a/lib/nix/dune b/lib/nix/dune new file mode 100644 index 0000000..3954c8a --- /dev/null +++ b/lib/nix/dune | |||
@@ -0,0 +1,15 @@ | |||
1 | (menhir | ||
2 | (modules parser) | ||
3 | (flags "--dump" "--strict" "--external-tokens" "Tokens") | ||
4 | (infer true)) | ||
5 | |||
6 | (ocamllex | ||
7 | (modules lexer)) | ||
8 | |||
9 | (library | ||
10 | (name nix) | ||
11 | (preprocess | ||
12 | (pps ppx_sexp_conv)) | ||
13 | (instrumentation | ||
14 | (backend bisect_ppx)) | ||
15 | (libraries core core_unix core_unix.filename_unix pprint ppx_sexp_conv str)) | ||
diff --git a/lib/nix/elaborator.ml b/lib/nix/elaborator.ml new file mode 100644 index 0000000..36ee0d4 --- /dev/null +++ b/lib/nix/elaborator.ml | |||
@@ -0,0 +1,208 @@ | |||
1 | open Core | ||
2 | open Types | ||
3 | |||
4 | (* The Nix elaborator does a few things: | ||
5 | - Attribute paths are transformed into a simple list of expressions: | ||
6 | + Simple identifiers are rewritten to string values | ||
7 | + Antiquotations are rewritten to their component expressions | ||
8 | + Anything else, that is not a string value, is rejected | ||
9 | and raises an exception | ||
10 | - In 'inherit (...) x1 ... xn', x1 ... xn are checked for 'reasonably' being | ||
11 | identifiers, i.e., being one of x, "x" and ${"x"}. | ||
12 | - Nested attribute paths are unfolded and attribute sets are merged where | ||
13 | possible. (Where we mean 'what Nix does' with 'where possible'; see the | ||
14 | comment at the respective function.) | ||
15 | - Paths are turned into strings and made absolute w.r.t. the current | ||
16 | working directory. | ||
17 | - Indented strings are converted to their 'normal' counterpart. *) | ||
18 | |||
19 | exception ElaborateError of string | ||
20 | |||
21 | type attr_set = recursivity * binding list | ||
22 | |||
23 | let set_expr (r, bs) = Val (AttSet (r, bs)) | ||
24 | let get_id = function Id x -> x | _ -> assert false | ||
25 | |||
26 | let rec update_bnd (bs : binding list) (x : string) ~(f : expr option -> expr) = | ||
27 | match bs with | ||
28 | | [] -> [ AttrPath ([ Val (Str (x, [])) ], f None) ] | ||
29 | | AttrPath ([ Val (Str (y, [])) ], e) :: s' when String.(x = y) -> | ||
30 | AttrPath ([ Val (Str (y, [])) ], f (Some e)) :: s' | ||
31 | | Inherit (_, ids) :: _ | ||
32 | when List.exists ids ~f:(fun e -> String.(get_id e = x)) -> | ||
33 | raise (ElaborateError "Cannot update inherit") | ||
34 | | bnd :: s' -> bnd :: update_bnd s' x ~f | ||
35 | |||
36 | let set_update_bnd (r, bs) x ~f = (r, update_bnd bs x ~f) | ||
37 | |||
38 | let rec has_bnd (bs : binding list) (x : string) : bool = | ||
39 | match bs with | ||
40 | | [] -> false | ||
41 | | AttrPath ([ Val (Str (y, [])) ], _) :: _ when String.(x = y) -> true | ||
42 | | Inherit (_, ids) :: _ | ||
43 | when List.exists ids ~f:(fun e -> String.(get_id e = x)) -> | ||
44 | true | ||
45 | | _ :: bs' -> has_bnd bs' x | ||
46 | |||
47 | let merge_bnds bs1 bs2 : binding list = | ||
48 | List.fold_left bs2 ~init:bs1 ~f:(fun bs1' b2 -> | ||
49 | match b2 with | ||
50 | | AttrPath ([ Val (Str (x, [])) ], e) -> | ||
51 | update_bnd bs1' x ~f:(function | ||
52 | | Some _ -> raise (ElaborateError "Duplicated attribute") | ||
53 | | None -> e) | ||
54 | | AttrPath ([ d ], e) -> AttrPath ([ d ], e) :: bs1' | ||
55 | | Inherit (md, xs) -> | ||
56 | if List.for_all xs ~f:(fun e -> not (has_bnd bs1' (get_id e))) then | ||
57 | Inherit (md, xs) :: bs1' | ||
58 | else raise (ElaborateError "Duplicated attribute") | ||
59 | | _ -> assert false) | ||
60 | |||
61 | (* This function intentionally clobbers recursivity, because that is the way | ||
62 | that Nix likes to handle attribute insertion. See | ||
63 | (1) https://github.com/NixOS/nix/issues/9020 | ||
64 | (2) https://github.com/NixOS/nix/issues/11268 | ||
65 | (3) https://github.com/NixOS/nix/pull/11294 *) | ||
66 | let rec insert (bs : binding list) (path : expr list) (e : expr) = | ||
67 | match path with | ||
68 | | [] -> raise (ElaborateError "Cannot insert attribute with empty path") | ||
69 | | [ Val (Str (x, [])) ] -> | ||
70 | update_bnd bs x ~f:(function | ||
71 | | None -> e | ||
72 | | Some (Val (AttSet (r1, bs1))) -> ( | ||
73 | match e with | ||
74 | | Val (AttSet (_, bs2)) -> set_expr (r1, merge_bnds bs1 bs2) | ||
75 | | _ -> raise (ElaborateError "Duplicated attribute")) | ||
76 | | _ -> raise (ElaborateError "Duplicated attribute")) | ||
77 | | Val (Str (x, [])) :: rest -> | ||
78 | update_bnd bs x ~f:(function | ||
79 | | Some (Val (AttSet (r, bs))) -> Val (AttSet (r, insert bs rest e)) | ||
80 | | Some _ -> raise (ElaborateError "Duplicated attribute") | ||
81 | | None -> Val (AttSet (Nonrec, insert [] rest e))) | ||
82 | | [ part ] -> AttrPath ([ part ], e) :: bs | ||
83 | | part :: rest -> | ||
84 | AttrPath ([ part ], Val (AttSet (Nonrec, insert [] rest e))) :: bs | ||
85 | |||
86 | let insert_inherit (bs : binding list) (from : expr option) (es : expr list) = | ||
87 | if List.for_all es ~f:(fun e -> not (has_bnd bs (get_id e))) then | ||
88 | Inherit (from, es) :: bs | ||
89 | else raise (ElaborateError "Duplicated attribute") | ||
90 | |||
91 | let simplify_path_component = function | ||
92 | | Id x -> Val (Str (x, [])) | ||
93 | | Val (Str (s, ess)) -> Val (Str (s, ess)) | ||
94 | | Aquote e -> e | ||
95 | | _ -> raise (ElaborateError "Unexpected path component") | ||
96 | |||
97 | let simplify_path = List.map ~f:simplify_path_component | ||
98 | |||
99 | let simplify_bnd_paths = | ||
100 | List.map ~f:(fun bnd -> | ||
101 | match bnd with | ||
102 | | AttrPath (path, e) -> AttrPath (simplify_path path, e) | ||
103 | | Inherit (me, xs) -> Inherit (me, xs)) | ||
104 | |||
105 | (* Law: concat_lines ∘ split_lines = id *) | ||
106 | |||
107 | let rec split_lines s = | ||
108 | match String.lsplit2 s ~on:'\n' with | ||
109 | | Some (s1, s2) -> s1 :: split_lines s2 | ||
110 | | None -> [ s ] | ||
111 | |||
112 | let rec concat_lines = function | ||
113 | | [] -> "" | ||
114 | | [ x ] -> x | ||
115 | | x :: xs -> x ^ "\n" ^ concat_lines xs | ||
116 | |||
117 | let map_tail ~f = function [] -> [] | x :: xs -> x :: List.map ~f xs | ||
118 | |||
119 | let unindent n s ~skip_first_line = | ||
120 | let map_op ~f = if skip_first_line then map_tail ~f else List.map ~f in | ||
121 | split_lines s | ||
122 | |> map_op ~f:(fun line -> | ||
123 | let expected_prefix = String.make n ' ' in | ||
124 | String.chop_prefix_if_exists ~prefix:expected_prefix line) | ||
125 | |> concat_lines | ||
126 | |||
127 | let is_spaces l = String.(strip l ~drop:(Char.( = ) ' ') |> is_empty) | ||
128 | |||
129 | let drop_first_empty_line s = | ||
130 | match String.lsplit2 s ~on:'\n' with | ||
131 | | Some (l, s') when is_spaces l -> s' | ||
132 | | _ -> s | ||
133 | |||
134 | let rec process ?(dir = None) = function | ||
135 | | BinaryOp (op, e1, e2) -> BinaryOp (op, process ~dir e1, process ~dir e2) | ||
136 | | UnaryOp (op, e) -> UnaryOp (op, process ~dir e) | ||
137 | | Cond (e1, e2, e3) -> Cond (process ~dir e1, process ~dir e2, process ~dir e3) | ||
138 | | With (e1, e2) -> With (process ~dir e1, process ~dir e2) | ||
139 | | Assert (e1, e2) -> Assert (process ~dir e1, process ~dir e2) | ||
140 | | Test (e1, es) -> | ||
141 | Test (process ~dir e1, List.(simplify_path es >>| process ~dir)) | ||
142 | | SetLet bs -> SetLet (process_bnds ~dir bs) | ||
143 | | Let (bs, e) -> Let (process_bnds ~dir bs, process ~dir e) | ||
144 | | Val v -> Val (process_val ~dir v) | ||
145 | | Id x -> Id x | ||
146 | | Select (e, es, me) -> | ||
147 | Select | ||
148 | ( process ~dir e, | ||
149 | List.(simplify_path es >>| process ~dir), | ||
150 | Option.(me >>| process ~dir) ) | ||
151 | | Apply (e1, e2) -> Apply (process ~dir e1, process ~dir e2) | ||
152 | | Aquote e -> Aquote (process ~dir e) | ||
153 | |||
154 | and process_val ~dir = function | ||
155 | | Str (s, ess) -> Str (s, List.(ess >>| fun (e, s) -> (process ~dir e, s))) | ||
156 | | IStr (n, s, ess) -> | ||
157 | let s' = drop_first_empty_line (unindent n s ~skip_first_line:false) | ||
158 | and ess' = | ||
159 | List.map ess ~f:(fun (e, s) -> | ||
160 | (process ~dir e, unindent n s ~skip_first_line:true)) | ||
161 | in | ||
162 | Str (s', ess') | ||
163 | | Lambda (p, e) -> Lambda (process_pattern ~dir p, process ~dir e) | ||
164 | | List es -> List List.(es >>| process ~dir) | ||
165 | | AttSet (r, bs) -> AttSet (r, process_bnds ~dir bs) | ||
166 | | Path p -> ( | ||
167 | if Filename.is_absolute p then Str (p, []) | ||
168 | else | ||
169 | match dir with | ||
170 | | Some dir when Filename.is_absolute dir -> | ||
171 | Str (Filename.concat dir p, []) | ||
172 | | Some _ -> | ||
173 | raise | ||
174 | (ElaborateError "Provided directory should be an absolute path") | ||
175 | | None -> raise (ElaborateError "Do not know how to resolve path")) | ||
176 | | v -> v | ||
177 | |||
178 | and process_bnds ~dir bs = | ||
179 | bs | ||
180 | |> List.map ~f:(function | ||
181 | | AttrPath (es, e) -> | ||
182 | AttrPath (List.(es >>| process ~dir), process ~dir e) | ||
183 | | Inherit (me, xs) -> | ||
184 | Inherit (Option.(me >>| process ~dir), process_inherit_ids xs)) | ||
185 | |> simplify_bnd_paths | ||
186 | |> List.fold ~init:[] ~f:(fun bs' bnd -> | ||
187 | match bnd with | ||
188 | | AttrPath (path, e) -> insert bs' path e | ||
189 | | Inherit (from, es) -> insert_inherit bs' from es) | ||
190 | |||
191 | and process_inherit_ids = | ||
192 | List.map ~f:(function | ||
193 | | Id x | Val (Str (x, [])) | Aquote (Val (Str (x, []))) -> Id x | ||
194 | | _ -> raise (ElaborateError "Unexpected expression in inherit")) | ||
195 | |||
196 | and process_pattern ~dir = function | ||
197 | | Alias x -> Alias x | ||
198 | | ParamSet (mx, (ps, k)) -> ParamSet (mx, (process_param_set ~dir mx ps, k)) | ||
199 | |||
200 | and process_param_set ~dir ?(seen = String.Set.empty) mx ps = | ||
201 | match ps with | ||
202 | | [] -> [] | ||
203 | | (y, me) :: ps' -> | ||
204 | if Set.mem seen y || Option.mem mx y ~equal:String.( = ) then | ||
205 | raise (ElaborateError "Duplicated function argument") | ||
206 | else | ||
207 | (y, Option.(me >>| process ~dir)) | ||
208 | :: process_param_set ~dir mx ps' ~seen:(Set.add seen y) | ||
diff --git a/lib/nix/lexer.mll b/lib/nix/lexer.mll new file mode 100644 index 0000000..023d888 --- /dev/null +++ b/lib/nix/lexer.mll | |||
@@ -0,0 +1,315 @@ | |||
1 | { | ||
2 | open Core | ||
3 | open Tokens | ||
4 | |||
5 | exception Error of string | ||
6 | |||
7 | (* Types of curly braces. | ||
8 | AQUOTE corresponds to the braces for antiquotation, i.e. '${...}' | ||
9 | and SET to an attribute set '{...}'. | ||
10 | *) | ||
11 | type braces = | ||
12 | | AQUOTE | ||
13 | | SET | ||
14 | |||
15 | let print_stack s = | ||
16 | let b = Buffer.create 100 in | ||
17 | Buffer.add_string b "[ "; | ||
18 | List.iter s ~f:(function | ||
19 | | AQUOTE -> Buffer.add_string b "AQUOTE; " | ||
20 | | SET -> Buffer.add_string b "SET; " | ||
21 | ); | ||
22 | Buffer.add_string b "]"; | ||
23 | Buffer.contents b | ||
24 | |||
25 | let token_of_str state buf = | ||
26 | match state with | ||
27 | | `Start -> STR_START (Buffer.contents buf) | ||
28 | | `Mid -> STR_MID (Buffer.contents buf) | ||
29 | |||
30 | let token_of_istr state buf = | ||
31 | match state with | ||
32 | | `Start -> ISTR_START (Buffer.contents buf) | ||
33 | | `Mid -> ISTR_MID (Buffer.contents buf) | ||
34 | |||
35 | (* lookup table for one-character tokens *) | ||
36 | let char_table = Array.create ~len:94 EOF | ||
37 | let _ = | ||
38 | List.iter ~f:(fun (k, v) -> Array.set char_table ((int_of_char k) - 1) v) | ||
39 | [ | ||
40 | '.', SELECT; | ||
41 | '?', QMARK; | ||
42 | '!', NOT; | ||
43 | '=', ASSIGN; | ||
44 | '<', LT; | ||
45 | '>', GT; | ||
46 | '[', LBRACK; | ||
47 | ']', RBRACK; | ||
48 | '+', PLUS; | ||
49 | '-', MINUS; | ||
50 | '*', TIMES; | ||
51 | '/', SLASH; | ||
52 | '(', LPAREN; | ||
53 | ')', RPAREN; | ||
54 | ':', COLON; | ||
55 | ';', SEMICOLON; | ||
56 | ',', COMMA; | ||
57 | '@', AS | ||
58 | ] | ||
59 | |||
60 | (* lookup table for two- and three-character tokens *) | ||
61 | let str_table = Hashtbl.create (module String) ~size:10 | ||
62 | let _ = | ||
63 | List.iter ~f:(fun (kwd, tok) -> Hashtbl.set str_table ~key:kwd ~data:tok) | ||
64 | [ | ||
65 | "//", MERGE; | ||
66 | "++", CONCAT; | ||
67 | "<=", LTE; | ||
68 | ">=", GTE; | ||
69 | "==", EQ; | ||
70 | "!=", NEQ; | ||
71 | "&&", AND; | ||
72 | "||", OR; | ||
73 | "->", IMPL; | ||
74 | "...", ELLIPSIS | ||
75 | ] | ||
76 | |||
77 | (* lookup table for keywords *) | ||
78 | let keyword_table = Hashtbl.create (module String) ~size:10 | ||
79 | let _ = | ||
80 | List.iter ~f:(fun (kwd, tok) -> Hashtbl.set keyword_table ~key:kwd ~data:tok) | ||
81 | [ "with", WITH; | ||
82 | "rec", REC; | ||
83 | "let", LET; | ||
84 | "in", IN; | ||
85 | "inherit", INHERIT; | ||
86 | "if" , IF; | ||
87 | "then", THEN; | ||
88 | "else", ELSE; | ||
89 | "assert", ASSERT; | ||
90 | "or", ORDEF ] | ||
91 | |||
92 | (* replace an escape sequence by the corresponding character(s) *) | ||
93 | let unescape = function | ||
94 | | "\\n" -> "\n" | ||
95 | | "\\r" -> "\r" | ||
96 | | "\\t" -> "\t" | ||
97 | | "\\\\" -> "\\" | ||
98 | | "\\${" -> "${" | ||
99 | | "''$" -> "$" | ||
100 | | "$$" -> "$" | ||
101 | | "'''" -> "''" | ||
102 | | "''\\t" -> "\t" | ||
103 | | "''\\r" -> "\r" | ||
104 | | "''\\n" -> "\n" | ||
105 | | x -> | ||
106 | failwith (Printf.sprintf "unescape unexpected arg %s" x) | ||
107 | |||
108 | let collect_tokens lexer q lexbuf = | ||
109 | let stack = ref [] in | ||
110 | let queue = Stdlib.Queue.create () in | ||
111 | let rec go () = | ||
112 | match (try Some (Stdlib.Queue.take queue) with Stdlib.Queue.Empty -> None) with | ||
113 | | Some token -> | ||
114 | ( | ||
115 | match token, !stack with | ||
116 | | AQUOTE_CLOSE, [] -> | ||
117 | Stdlib.Queue.add AQUOTE_CLOSE q | ||
118 | | EOF, _ -> | ||
119 | Stdlib.Queue.add EOF q; | ||
120 | | _, _ -> | ||
121 | Stdlib.Queue.add token q; | ||
122 | go () | ||
123 | ) | ||
124 | | None -> | ||
125 | lexer queue stack lexbuf; | ||
126 | go () | ||
127 | in | ||
128 | Stdlib.Queue.add AQUOTE_OPEN q; | ||
129 | stack := [AQUOTE]; | ||
130 | lexer queue stack lexbuf; | ||
131 | go () | ||
132 | |||
133 | (* utility functions *) | ||
134 | let print_position lexbuf = | ||
135 | let pos = Lexing.lexeme_start_p lexbuf in | ||
136 | Printf.sprintf "%s:%d:%d" pos.pos_fname | ||
137 | pos.pos_lnum (pos.pos_cnum - pos.pos_bol + 1) | ||
138 | |||
139 | |||
140 | let set_filename fname (lexbuf: Lexing.lexbuf) = | ||
141 | let pos = lexbuf.lex_curr_p in | ||
142 | lexbuf.lex_curr_p <- { pos with pos_fname = fname }; lexbuf | ||
143 | |||
144 | } | ||
145 | |||
146 | let nzdigit = ['1'-'9'] | ||
147 | let digit = nzdigit | '0' | ||
148 | let float = (nzdigit digit* '.' digit* | '0'? '.' digit+) (['E' 'e'] ['+' '-']? digit+)? | ||
149 | let alpha = ['a'-'z' 'A'-'Z'] | ||
150 | let alpha_digit = alpha | digit | ||
151 | let path_chr = alpha_digit | ['.' '_' '-' '+'] | ||
152 | let path = path_chr* ('/' path_chr+)+ | ||
153 | let spath = alpha_digit path_chr* ('/' path_chr+)* | ||
154 | let uri_chr = ['%' '/' '?' ':' '@' '&' '=' '+' '$' ',' '-' '_' '.' '!' '~' '*' '\''] | ||
155 | let scheme = alpha (alpha | ['+' '-' '.'])* | ||
156 | let uri = scheme ':' (alpha_digit | uri_chr)+ | ||
157 | let char_tokens = ['.' '?' '!' '=' '<' '>' '[' ']' '+' '-' '*' '/' '^' '(' ')' ':' ';' ',' '@'] | ||
158 | |||
159 | rule get_tokens q s = parse | ||
160 | (* skip whitespeces *) | ||
161 | | [' ' '\t' '\r'] | ||
162 | { get_tokens q s lexbuf } | ||
163 | (* increase line count for new lines *) | ||
164 | | '\n' | ||
165 | { Lexing.new_line lexbuf; get_tokens q s lexbuf } | ||
166 | | char_tokens as c | ||
167 | { Stdlib.Queue.add (Array.get char_table ((int_of_char c) - 1)) q } | ||
168 | | ("//" | "++" | "<=" | ">=" | "==" | "!=" | "&&" | "||" | "->" | "...") as s | ||
169 | { Stdlib.Queue.add (Hashtbl.find_exn str_table s) q} | ||
170 | | digit+ as i | ||
171 | { Stdlib.Queue.add (INT i) q } | ||
172 | | float | ||
173 | { Stdlib.Queue.add (FLOAT (Lexing.lexeme lexbuf)) q } | ||
174 | | path | ||
175 | { Stdlib.Queue.add (PATH (Lexing.lexeme lexbuf)) q } | ||
176 | | '<' (spath as p) '>' | ||
177 | { Stdlib.Queue.add (SPATH p) q } | ||
178 | | '~' path as p | ||
179 | { Stdlib.Queue.add (HPATH p) q } | ||
180 | | uri | ||
181 | { Stdlib.Queue.add(URI (Lexing.lexeme lexbuf)) q } | ||
182 | (* keywords or identifiers *) | ||
183 | | ((alpha | '_')+ (alpha_digit | ['_' '\'' '-'])*) as id | ||
184 | { Stdlib.Queue.add (Hashtbl.find keyword_table id |> Option.value ~default:(ID id)) q} | ||
185 | (* comments *) | ||
186 | | '#' ([^ '\n']* as c) | ||
187 | { ignore c; get_tokens q s lexbuf} | ||
188 | | "/*" | ||
189 | { comment (Buffer.create 64) lexbuf; | ||
190 | get_tokens q s lexbuf | ||
191 | } | ||
192 | (* the following three tokens change the braces stack *) | ||
193 | | "${" | ||
194 | { Stdlib.Queue.add AQUOTE_OPEN q; s := AQUOTE :: !s } | ||
195 | | '{' | ||
196 | { Stdlib.Queue.add LBRACE q; s := SET :: !s } | ||
197 | | '}' | ||
198 | { | ||
199 | match !s with | ||
200 | | AQUOTE :: rest -> | ||
201 | Stdlib.Queue.add AQUOTE_CLOSE q; s := rest | ||
202 | | SET :: rest -> | ||
203 | Stdlib.Queue.add RBRACE q; s := rest | ||
204 | | _ -> | ||
205 | let pos = print_position lexbuf in | ||
206 | let err = Printf.sprintf "Unbalanced '}' at %s\n" pos in | ||
207 | raise (Error err) | ||
208 | } | ||
209 | (* a double-quoted string *) | ||
210 | | '"' | ||
211 | { string `Start (Buffer.create 64) q lexbuf } | ||
212 | (* an indented string *) | ||
213 | | "''" (' '+ as ws) | ||
214 | { istring `Start (Some (String.length ws)) (Buffer.create 64) q lexbuf } | ||
215 | | "''" | ||
216 | { istring `Start None (Buffer.create 64) q lexbuf } | ||
217 | (* End of input *) | ||
218 | | eof | ||
219 | { Stdlib.Queue.add EOF q } | ||
220 | (* any other character raises an exception *) | ||
221 | | _ | ||
222 | { | ||
223 | let pos = print_position lexbuf in | ||
224 | let tok = Lexing.lexeme lexbuf in | ||
225 | let err = Printf.sprintf "Unexpected character '%s' at %s\n" tok pos in | ||
226 | raise (Error err) | ||
227 | } | ||
228 | |||
229 | (* Nix does not allow nested comments, but it is still handy to lex it | ||
230 | separately because we can properly increase line count. *) | ||
231 | and comment buf = parse | ||
232 | | '\n' | ||
233 | {Lexing.new_line lexbuf; Buffer.add_char buf '\n'; comment buf lexbuf} | ||
234 | | "*/" | ||
235 | { () } | ||
236 | | _ as c | ||
237 | { Buffer.add_char buf c; comment buf lexbuf } | ||
238 | |||
239 | and string state buf q = parse | ||
240 | | '"' (* terminate when we hit '"' *) | ||
241 | { Stdlib.Queue.add (token_of_str state buf) q; Stdlib.Queue.add STR_END q } | ||
242 | | '\n' | ||
243 | { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; string state buf q lexbuf } | ||
244 | | ("\\n" | "\\r" | "\\t" | "\\\\" | "\\${") as s | ||
245 | { Buffer.add_string buf (unescape s); string state buf q lexbuf } | ||
246 | | "\\" (_ as c) (* add the character verbatim *) | ||
247 | { Buffer.add_char buf c; string state buf q lexbuf } | ||
248 | | "${" (* collect all the tokens till we hit the matching '}' *) | ||
249 | { | ||
250 | Stdlib.Queue.add (token_of_str state buf) q; | ||
251 | collect_tokens get_tokens q lexbuf; | ||
252 | string `Mid (Buffer.create 64) q lexbuf | ||
253 | } | ||
254 | | _ as c (* otherwise just add the character to the buffer *) | ||
255 | { Buffer.add_char buf c; string state buf q lexbuf } | ||
256 | |||
257 | and istring state imin buf q = parse | ||
258 | | ('\n' ' '* "''") | ||
259 | { | ||
260 | Lexing.new_line lexbuf; | ||
261 | Buffer.add_string buf "\n"; | ||
262 | let indent = match imin with | None -> 0 | Some i -> i in | ||
263 | Stdlib.Queue.add (token_of_istr state buf) q; | ||
264 | Stdlib.Queue.add (ISTR_END indent) q | ||
265 | } | ||
266 | | "''" | ||
267 | { | ||
268 | let indent = match imin with | None -> 0 | Some i -> i in | ||
269 | Stdlib.Queue.add (token_of_istr state buf) q; | ||
270 | Stdlib.Queue.add (ISTR_END indent) q | ||
271 | } | ||
272 | | ('\n' ' '* '\n') as s | ||
273 | { | ||
274 | Lexing.new_line lexbuf; | ||
275 | Lexing.new_line lexbuf; | ||
276 | Buffer.add_string buf s; | ||
277 | istring state imin buf q lexbuf | ||
278 | } | ||
279 | | ('\n' (' '* as ws)) as s | ||
280 | { | ||
281 | Lexing.new_line lexbuf; | ||
282 | Buffer.add_string buf s; | ||
283 | let ws_count = String.length ws in | ||
284 | match imin with | ||
285 | | None -> | ||
286 | istring state (Some ws_count) buf q lexbuf | ||
287 | | Some i -> | ||
288 | istring state (Some (min i ws_count)) buf q lexbuf | ||
289 | } | ||
290 | | ("''$" | "'''" | "''\\t" | "''\\r" | "''\\n") as s | ||
291 | { Buffer.add_string buf (unescape s); istring state imin buf q lexbuf } | ||
292 | | "''\\" (_ as c) | ||
293 | { Buffer.add_char buf c; istring state imin buf q lexbuf } | ||
294 | | "${" | ||
295 | { | ||
296 | Stdlib.Queue.add (token_of_istr state buf) q; | ||
297 | collect_tokens get_tokens q lexbuf; | ||
298 | istring `Mid imin (Buffer.create 64) q lexbuf | ||
299 | } | ||
300 | | _ as c | ||
301 | { Buffer.add_char buf c; istring state imin buf q lexbuf } | ||
302 | { | ||
303 | |||
304 | let rec next_token | ||
305 | (q: token Stdlib.Queue.t) | ||
306 | (s: braces list ref) | ||
307 | (lexbuf: Lexing.lexbuf) | ||
308 | : token = | ||
309 | match (try Some (Stdlib.Queue.take q) with | Stdlib.Queue.Empty -> None) with | ||
310 | | Some token -> | ||
311 | token | ||
312 | | None -> | ||
313 | get_tokens q s lexbuf; | ||
314 | next_token q s lexbuf | ||
315 | } | ||
diff --git a/lib/nix/nix.ml b/lib/nix/nix.ml new file mode 100644 index 0000000..39dc94c --- /dev/null +++ b/lib/nix/nix.ml | |||
@@ -0,0 +1,20 @@ | |||
1 | open Core | ||
2 | module Ast = Types | ||
3 | module Printer = Printer | ||
4 | |||
5 | exception ParseError of string | ||
6 | |||
7 | let parse ~filename (data : string) = | ||
8 | let lexbuf = Lexer.set_filename filename (Lexing.from_string data) | ||
9 | and q, s = (Stdlib.Queue.create (), ref []) in | ||
10 | try Parser.main (Lexer.next_token q s) lexbuf with | ||
11 | | Lexer.Error msg -> | ||
12 | let msg' = String.rstrip msg in | ||
13 | raise (ParseError (sprintf "Lexing error: %s" msg')) | ||
14 | | Parser.Error -> | ||
15 | let msg = sprintf "Parse error at %s" (Lexer.print_position lexbuf) in | ||
16 | raise (ParseError msg) | ||
17 | |||
18 | let elaborate = Elaborator.process | ||
19 | |||
20 | exception ElaborateError = Elaborator.ElaborateError | ||
diff --git a/lib/nix/parser.mly b/lib/nix/parser.mly new file mode 100644 index 0000000..dc1638d --- /dev/null +++ b/lib/nix/parser.mly | |||
@@ -0,0 +1,310 @@ | |||
1 | /* Tokens with data */ | ||
2 | %token <string> INT | ||
3 | %token <string> FLOAT | ||
4 | /* A path */ | ||
5 | %token <string> PATH | ||
6 | /* Search path, enclosed in <> */ | ||
7 | %token <string> SPATH | ||
8 | /* Home path, starts with ~ */ | ||
9 | %token <string> HPATH | ||
10 | %token <string> URI | ||
11 | %token <string> STR_START | ||
12 | %token <string> STR_MID | ||
13 | %token STR_END | ||
14 | %token <string> ISTR_START | ||
15 | %token <string> ISTR_MID | ||
16 | %token <int> ISTR_END | ||
17 | %token <string> ID | ||
18 | /* Tokens that stand for themselves */ | ||
19 | %token SELECT "." | ||
20 | %token QMARK "?" | ||
21 | %token CONCAT "++" | ||
22 | %token NOT "!" | ||
23 | %token MERGE "//" | ||
24 | %token ASSIGN "=" | ||
25 | %token LT "<" | ||
26 | %token LTE "<=" | ||
27 | %token GT ">" | ||
28 | %token GTE ">=" | ||
29 | %token EQ "==" | ||
30 | %token NEQ "!=" | ||
31 | %token AND "&&" | ||
32 | %token OR "||" | ||
33 | %token IMPL "->" | ||
34 | %token AQUOTE_OPEN "${" | ||
35 | %token AQUOTE_CLOSE "}$" | ||
36 | %token LBRACE "{" | ||
37 | %token RBRACE "}" | ||
38 | %token LBRACK "[" | ||
39 | %token RBRACK "]" | ||
40 | %token PLUS "+" | ||
41 | %token MINUS "-" | ||
42 | %token TIMES "*" | ||
43 | %token SLASH "/" | ||
44 | %token LPAREN "(" | ||
45 | %token RPAREN ")" | ||
46 | %token COLON ":" | ||
47 | %token SEMICOLON ";" | ||
48 | %token COMMA "," | ||
49 | %token ELLIPSIS "..." | ||
50 | %token AS "@" | ||
51 | /* Keywords */ | ||
52 | %token WITH "with" | ||
53 | %token REC "rec" | ||
54 | %token LET "let" | ||
55 | %token IN "in" | ||
56 | %token INHERIT "inherit" | ||
57 | %token IF "if" | ||
58 | %token THEN "then" | ||
59 | %token ELSE "else" | ||
60 | %token ASSERT "assert" | ||
61 | %token ORDEF "or" | ||
62 | |||
63 | /* End of input */ | ||
64 | %token EOF | ||
65 | |||
66 | %{ | ||
67 | open Types | ||
68 | %} | ||
69 | |||
70 | %start <Types.expr> main | ||
71 | |||
72 | %% | ||
73 | |||
74 | main: | ||
75 | | e = expr0 EOF | ||
76 | { e } | ||
77 | |||
78 | expr0: | ||
79 | | "if"; e1 = expr0; "then"; e2 = expr0; "else"; e3 = expr0 | ||
80 | { Cond (e1, e2, e3) } | ||
81 | | "with"; e1 = expr0; ";"; e2 = expr0 | ||
82 | { With (e1, e2) } | ||
83 | | "assert"; e1 = expr0; ";"; e2 = expr0 | ||
84 | { Assert (e1, e2) } | ||
85 | | "let"; xs = delimited("{", list(binding), "}") | ||
86 | { SetLet xs } | ||
87 | | "let"; xs = list(binding); "in"; e = expr0 | ||
88 | { Let (xs, e) } | ||
89 | | l = lambda | ||
90 | { Val l } | ||
91 | | e = expr1 | ||
92 | { e } | ||
93 | |||
94 | /* Rules expr1-expr14 are almost direct translation of the operator | ||
95 | precedence table: | ||
96 | https://nixos.org/nix/manual/#sec-language-operators */ | ||
97 | |||
98 | %inline binary_expr(Lhs, Op, Rhs): | ||
99 | | lhs = Lhs; op = Op; rhs = Rhs | ||
100 | { BinaryOp (op, lhs, rhs) } | ||
101 | |||
102 | expr1: | ||
103 | | e = binary_expr(expr2, "->" {Impl}, expr1) | ||
104 | | e = expr2 | ||
105 | { e } | ||
106 | |||
107 | expr2: | ||
108 | | e = binary_expr(expr2, "||" {Or}, expr3) | ||
109 | | e = expr3 | ||
110 | { e } | ||
111 | |||
112 | expr3: | ||
113 | | e = binary_expr(expr3, "&&" {And}, expr4) | ||
114 | | e = expr4 | ||
115 | { e } | ||
116 | |||
117 | %inline expr4_ops: | ||
118 | | "==" { Eq } | ||
119 | | "!=" { Neq } | ||
120 | |||
121 | expr4: | ||
122 | | e = binary_expr(expr5, expr4_ops, expr5) | ||
123 | | e = expr5 | ||
124 | { e } | ||
125 | |||
126 | %inline expr5_ops: | ||
127 | | "<" { Lt } | ||
128 | | ">" { Gt } | ||
129 | | "<=" { Lte } | ||
130 | | ">=" { Gte } | ||
131 | |||
132 | expr5: | ||
133 | | e = binary_expr(expr6, expr5_ops, expr6) | ||
134 | | e = expr6 | ||
135 | { e } | ||
136 | |||
137 | expr6: | ||
138 | | e = binary_expr(expr7, "//" {Merge}, expr6) | ||
139 | | e = expr7 | ||
140 | { e } | ||
141 | |||
142 | expr7: | ||
143 | | e = preceded("!", expr7) | ||
144 | { UnaryOp (Not, e) } | ||
145 | | e = expr8 | ||
146 | { e } | ||
147 | |||
148 | %inline expr8_ops: | ||
149 | | "+" { Plus } | ||
150 | | "-" { Minus } | ||
151 | |||
152 | expr8: | ||
153 | | e = binary_expr(expr8, expr8_ops, expr9) | ||
154 | | e = expr9 | ||
155 | { e } | ||
156 | |||
157 | %inline expr9_ops: | ||
158 | | "*" { Mult } | ||
159 | | "/" { Div } | ||
160 | |||
161 | expr9: | ||
162 | | e = binary_expr(expr9, expr9_ops, expr10) | ||
163 | | e = expr10 | ||
164 | { e } | ||
165 | |||
166 | expr10: | ||
167 | | e = binary_expr(expr11, "++" {Concat}, expr10) | ||
168 | | e = expr11 | ||
169 | { e } | ||
170 | |||
171 | expr11: | ||
172 | | e = expr12 "?" p = attr_path | ||
173 | { Test (e, p) } | ||
174 | | e = expr12 | ||
175 | { e } | ||
176 | |||
177 | expr12: | ||
178 | | e = preceded("-", expr13) | ||
179 | { UnaryOp (Negate, e) } | ||
180 | | e = expr13 | ||
181 | { e } | ||
182 | |||
183 | expr13: | ||
184 | | f = expr13; arg = expr14 | ||
185 | { Apply (f, arg) } | ||
186 | | e = expr14 | ||
187 | { e } | ||
188 | |||
189 | %inline selectable: | ||
190 | | s = set | ||
191 | { Val s } | ||
192 | | id = ID | ||
193 | { Id id } | ||
194 | | e = delimited("(", expr0, ")") | ||
195 | { e } | ||
196 | |||
197 | expr14: | ||
198 | | e = selectable; "."; p = attr_path; o = option(preceded("or", expr14)) | ||
199 | { Select (e, p, o) } | ||
200 | | e = atomic_expr; "or" | ||
201 | { Apply (e, Id "or") } | ||
202 | | e = atomic_expr | ||
203 | { e } | ||
204 | |||
205 | atomic_expr: | ||
206 | | id = ID | ||
207 | { Id id } | ||
208 | | v = value | ||
209 | { Val v } | ||
210 | | e = delimited("(", expr0, ")") | ||
211 | { e } | ||
212 | |||
213 | attr_path: | ||
214 | | p = separated_nonempty_list(".", attr_path_component) | ||
215 | { p } | ||
216 | |||
217 | attr_path_component: | ||
218 | | "or" | ||
219 | { Id "or" } | ||
220 | | id = ID | ||
221 | { Id id } | ||
222 | | e = delimited("${", expr0, "}$") | ||
223 | { Aquote e } | ||
224 | | s = str | ||
225 | { Val s } | ||
226 | |||
227 | value: | ||
228 | | s = str | ||
229 | { s } | ||
230 | | s = istr | ||
231 | { s } | ||
232 | | i = INT | ||
233 | {Int i} | ||
234 | | f = FLOAT | ||
235 | { Float f } | ||
236 | | p = PATH | ||
237 | { Path p } | ||
238 | | sp = SPATH | ||
239 | { SPath sp } | ||
240 | | hp = HPATH | ||
241 | { HPath hp } | ||
242 | | uri = URI | ||
243 | { Uri uri } | ||
244 | | l = nixlist | ||
245 | { l } | ||
246 | | s = set | ||
247 | { s } | ||
248 | |||
249 | %inline str_mid(X): | ||
250 | | xs = list(pair(delimited("${", expr0, "}$"), X)) { xs } | ||
251 | |||
252 | /* Double-quoted string */ | ||
253 | str: | ||
254 | | start = STR_START; mids = str_mid(STR_MID); STR_END | ||
255 | { Str (start, mids) } | ||
256 | |||
257 | /* Indented string */ | ||
258 | istr: | ||
259 | | start = ISTR_START; mids = str_mid(ISTR_MID); i = ISTR_END | ||
260 | { IStr (i, start, mids) } | ||
261 | |||
262 | /* Lists and sets */ | ||
263 | nixlist: | ||
264 | | xs = delimited("[", list(expr14), "]") | ||
265 | { List xs } | ||
266 | |||
267 | empty_set: | ||
268 | | "{"; "}" {} | ||
269 | |||
270 | set: | ||
271 | | empty_set | ||
272 | { AttSet (Nonrec, []) } | ||
273 | | xs = delimited("{", nonempty_list(binding), "}") | ||
274 | { AttSet (Nonrec, xs) } | ||
275 | | xs = preceded("rec", delimited("{", list(binding), "}")) | ||
276 | { AttSet (Rec, xs) } | ||
277 | |||
278 | binding: | ||
279 | | kv = terminated(separated_pair(attr_path, "=", expr0), ";") | ||
280 | { let (k, v) = kv in AttrPath (k, v) } | ||
281 | | xs = delimited("inherit", pair(option(delimited("(", expr0, ")")), list(attr_path_component)), ";") | ||
282 | { let (prefix, ids) = xs in Inherit (prefix, ids) } | ||
283 | |||
284 | lambda: | ||
285 | | id = ID; "@"; p = param_set; ":"; e = expr0 | ||
286 | { Lambda (ParamSet (Some id, p), e) } | ||
287 | | p = param_set; "@"; id = ID; ":"; e = expr0 | ||
288 | { Lambda (ParamSet (Some id, p), e) } | ||
289 | | p = param_set; ":"; e = expr0 | ||
290 | { Lambda (ParamSet (None, p), e) } | ||
291 | | id = ID; ":"; e = expr0 | ||
292 | { Lambda (Alias id, e) } | ||
293 | |||
294 | %inline param_set: | ||
295 | | empty_set | ||
296 | { ([], Exact) } | ||
297 | | "{"; "..."; "}" | ||
298 | { ([], Loose) } | ||
299 | | ps = delimited("{", pair(pair(params, ","?), boption("...")), "}") | ||
300 | { let ((ps, _), ellipsis) = ps in (ps, if ellipsis then Loose else Exact) } | ||
301 | |||
302 | params: | ||
303 | | p = param | ||
304 | { [p] } | ||
305 | | ps = params; ","; p = param | ||
306 | { ps @ [p] } | ||
307 | |||
308 | %inline param: | ||
309 | p = pair(ID, option(preceded("?", expr0))) | ||
310 | { p } | ||
diff --git a/lib/nix/printer.ml b/lib/nix/printer.ml new file mode 100644 index 0000000..57e81f4 --- /dev/null +++ b/lib/nix/printer.ml | |||
@@ -0,0 +1,176 @@ | |||
1 | open Core | ||
2 | open Types | ||
3 | open PPrint | ||
4 | |||
5 | let rec escape_chlist = function | ||
6 | | [] -> [] | ||
7 | | '$' :: '{' :: l' -> '\\' :: '$' :: '{' :: escape_chlist l' | ||
8 | | '\n' :: l' -> '\\' :: 'n' :: escape_chlist l' | ||
9 | | '\r' :: l' -> '\\' :: 'r' :: escape_chlist l' | ||
10 | | '\t' :: l' -> '\\' :: 't' :: escape_chlist l' | ||
11 | | '\\' :: l' -> '\\' :: '\\' :: escape_chlist l' | ||
12 | | '"' :: l' -> '\\' :: '"' :: escape_chlist l' | ||
13 | | c :: l' -> c :: escape_chlist l' | ||
14 | |||
15 | let escape_string s = s |> String.to_list |> escape_chlist |> String.of_list | ||
16 | let out_width = ref 80 | ||
17 | let set_width i = out_width := i | ||
18 | let indent = ref 2 | ||
19 | let set_indent i = indent := i | ||
20 | |||
21 | let rec doc_of_expr = function | ||
22 | | BinaryOp (op, lhs, rhs) -> | ||
23 | let lhs_doc = maybe_parens_bop op `Left lhs | ||
24 | and rhs_doc = maybe_parens_bop op `Right rhs in | ||
25 | infix !indent 1 (doc_of_bop op) lhs_doc rhs_doc | ||
26 | | UnaryOp (op, e) -> precede (doc_of_uop op) (maybe_parens (prec_of_uop op) e) | ||
27 | | Cond (e1, e2, e3) -> | ||
28 | surround !indent 1 | ||
29 | (soft_surround !indent 1 (string "if") (doc_of_expr e1) (string "then")) | ||
30 | (doc_of_expr e2) | ||
31 | (string "else" ^^ nest !indent (break 1 ^^ doc_of_expr e3)) | ||
32 | | With (e1, e2) -> | ||
33 | flow (break 1) [ string "with"; doc_of_expr e1 ^^ semi; doc_of_expr e2 ] | ||
34 | | Assert (e1, e2) -> | ||
35 | flow (break 1) [ string "assert"; doc_of_expr e1 ^^ semi; doc_of_expr e2 ] | ||
36 | | Test (e, path) -> | ||
37 | maybe_parens 4 e ^^ space ^^ string "?" | ||
38 | ^^ group (break 1 ^^ separate_map dot doc_of_expr path) | ||
39 | | SetLet bs -> | ||
40 | surround !indent 1 | ||
41 | (string "let " ^^ lbrace) | ||
42 | (group (separate_map (break 1) doc_of_binding bs)) | ||
43 | rbrace | ||
44 | | Let (bs, e) -> | ||
45 | surround !indent 1 (string "let") | ||
46 | (separate_map (break 1) doc_of_binding bs) | ||
47 | (prefix !indent 1 (string "in") (doc_of_expr e)) | ||
48 | | Val v -> doc_of_val v | ||
49 | | Id id -> string id | ||
50 | | Select (e, path, oe) -> | ||
51 | maybe_parens 1 e ^^ dot ^^ doc_of_attpath path | ||
52 | ^^ optional | ||
53 | (fun e -> | ||
54 | space ^^ string "or" ^^ nest !indent (break 1 ^^ maybe_parens 1 e)) | ||
55 | oe | ||
56 | | Apply (e1, e2) -> prefix !indent 1 (maybe_parens 2 e1) (maybe_parens 2 e2) | ||
57 | | Aquote e -> surround !indent 0 (string "${") (doc_of_expr e) (string "}") | ||
58 | |||
59 | and maybe_parens lvl e = | ||
60 | if prec_of_expr e >= lvl then surround !indent 0 lparen (doc_of_expr e) rparen | ||
61 | else doc_of_expr e | ||
62 | |||
63 | and maybe_parens_bop op (loc : [ `Left | `Right ]) e = | ||
64 | match (loc, assoc_of_bop op) with | ||
65 | | (`Left, Some Left | `Right, Some Right) | ||
66 | when prec_of_expr e >= prec_of_bop op -> | ||
67 | doc_of_expr e | ||
68 | | _, _ -> maybe_parens (prec_of_bop op) e | ||
69 | |||
70 | and doc_of_attpath path = separate_map dot doc_of_expr path | ||
71 | |||
72 | and doc_of_paramset (params, kind) = | ||
73 | let ps = | ||
74 | List.map ~f:doc_of_param params | ||
75 | @ if Poly.(kind = Loose) then [ string "..." ] else [] | ||
76 | in | ||
77 | surround !indent 0 lbrace (separate (comma ^^ break 1) ps) rbrace | ||
78 | |||
79 | and doc_of_param (id, oe) = | ||
80 | string id ^^ optional (fun e -> qmark ^^ space ^^ doc_of_expr e) oe | ||
81 | |||
82 | and doc_of_binding = function | ||
83 | | AttrPath (path, e) -> | ||
84 | doc_of_attpath path ^^ space ^^ equals ^^ space ^^ doc_of_expr e ^^ semi | ||
85 | | Inherit (oe, ids) -> | ||
86 | let id_docs = | ||
87 | List.map | ||
88 | ~f:(function | ||
89 | | Id x | Val (Str (x, [])) -> string x | _ -> assert false) | ||
90 | ids | ||
91 | in | ||
92 | let xs = | ||
93 | flow (break 1) | ||
94 | (match oe with | ||
95 | | Some e -> parens (doc_of_expr e) :: id_docs | ||
96 | | None -> id_docs) | ||
97 | in | ||
98 | soft_surround !indent 0 (string "inherit" ^^ space) xs semi | ||
99 | |||
100 | and doc_of_bop = function | ||
101 | | Plus -> plus | ||
102 | | Minus -> minus | ||
103 | | Mult -> star | ||
104 | | Div -> slash | ||
105 | | Gt -> rangle | ||
106 | | Lt -> langle | ||
107 | | Lte -> string "<=" | ||
108 | | Gte -> string ">=" | ||
109 | | Eq -> string "==" | ||
110 | | Neq -> string "!=" | ||
111 | | Or -> string "||" | ||
112 | | And -> string "&&" | ||
113 | | Impl -> string "->" | ||
114 | | Merge -> string "//" | ||
115 | | Concat -> string "++" | ||
116 | |||
117 | and doc_of_uop = function Negate -> minus | Not -> bang | ||
118 | |||
119 | and doc_of_val = function | ||
120 | | Str (start, xs) -> | ||
121 | dquotes | ||
122 | (string (escape_string start) | ||
123 | ^^ concat | ||
124 | (List.map | ||
125 | ~f:(fun (e, s) -> | ||
126 | surround !indent 0 (string "${") (doc_of_expr e) | ||
127 | (string "}" ^^ string (escape_string s))) | ||
128 | xs)) | ||
129 | | IStr (i, start, xs) -> | ||
130 | let qq = string "''" in | ||
131 | let str s = | ||
132 | String.split ~on:'\n' s | ||
133 | |> List.map ~f:(fun s -> | ||
134 | let len = String.length s in | ||
135 | let s' = | ||
136 | if len >= i then String.sub s ~pos:i ~len:(len - i) else s | ||
137 | in | ||
138 | string s') | ||
139 | |> separate hardline | ||
140 | in | ||
141 | enclose qq qq | ||
142 | (str start | ||
143 | ^^ concat | ||
144 | (List.map | ||
145 | ~f:(fun (e, s) -> | ||
146 | enclose (string "${") rbrace (doc_of_expr e) ^^ str s) | ||
147 | xs)) | ||
148 | | Int x | Float x | Path x | SPath x | HPath x | Uri x -> string x | ||
149 | | Lambda (pattern, body) -> | ||
150 | let pat = | ||
151 | match pattern with | ||
152 | | Alias id -> string id | ||
153 | | ParamSet (None, ps) -> doc_of_paramset ps | ||
154 | | ParamSet (Some id, ps) -> | ||
155 | doc_of_paramset ps ^^ group (break 1 ^^ at ^^ break 1 ^^ string id) | ||
156 | in | ||
157 | flow (break 1) [ pat ^^ colon; doc_of_expr body ] | ||
158 | | List [] -> lbracket ^^ space ^^ rbracket | ||
159 | | List es -> | ||
160 | surround !indent 1 lbracket | ||
161 | (separate_map (break 1) (maybe_parens 2) es) | ||
162 | rbracket | ||
163 | | AttSet (Nonrec, []) -> lbrace ^^ space ^^ rbrace | ||
164 | | AttSet (Nonrec, bs) -> | ||
165 | surround !indent 1 lbrace | ||
166 | (group (separate_map (break 1) doc_of_binding bs)) | ||
167 | rbrace | ||
168 | | AttSet (Rec, bs) -> | ||
169 | string "rec" ^^ space ^^ doc_of_val (AttSet (Nonrec, bs)) | ||
170 | |||
171 | let print chan expr = ToChannel.pretty 0.7 !out_width chan (doc_of_expr expr) | ||
172 | |||
173 | let to_string expr = | ||
174 | let buf = Stdlib.Buffer.create 0 in | ||
175 | ToBuffer.pretty 0.7 !out_width buf (doc_of_expr expr); | ||
176 | Stdlib.Buffer.contents buf | ||
diff --git a/lib/nix/tokens.ml b/lib/nix/tokens.ml new file mode 100644 index 0000000..4891d48 --- /dev/null +++ b/lib/nix/tokens.ml | |||
@@ -0,0 +1,64 @@ | |||
1 | type token = | ||
2 | (* Tokens with data *) | ||
3 | | INT of string | ||
4 | | FLOAT of string | ||
5 | (* A path (starting with / or ./) *) | ||
6 | | PATH of string | ||
7 | (* Search path, enclosed in < and > *) | ||
8 | | SPATH of string | ||
9 | (* Home path, starting with ~/ *) | ||
10 | | HPATH of string | ||
11 | | URI of string | ||
12 | | STR_START of string | ||
13 | | STR_MID of string | ||
14 | | STR_END | ||
15 | | ISTR_START of string | ||
16 | | ISTR_MID of string | ||
17 | | ISTR_END of int | ||
18 | | ID of string | ||
19 | (* Tokens that stand for themselves *) | ||
20 | | SELECT | ||
21 | | QMARK | ||
22 | | CONCAT | ||
23 | | NOT | ||
24 | | MERGE | ||
25 | | ASSIGN | ||
26 | | LT | ||
27 | | LTE | ||
28 | | GT | ||
29 | | GTE | ||
30 | | EQ | ||
31 | | NEQ | ||
32 | | AND | ||
33 | | OR | ||
34 | | IMPL | ||
35 | | AQUOTE_OPEN | ||
36 | | AQUOTE_CLOSE | ||
37 | | LBRACE | ||
38 | | RBRACE | ||
39 | | LBRACK | ||
40 | | RBRACK | ||
41 | | PLUS | ||
42 | | MINUS | ||
43 | | TIMES | ||
44 | | SLASH | ||
45 | | LPAREN | ||
46 | | RPAREN | ||
47 | | COLON | ||
48 | | SEMICOLON | ||
49 | | COMMA | ||
50 | | ELLIPSIS | ||
51 | | AS | ||
52 | (* Keywords *) | ||
53 | | WITH | ||
54 | | REC | ||
55 | | LET | ||
56 | | IN | ||
57 | | INHERIT | ||
58 | | IF | ||
59 | | THEN | ||
60 | | ELSE | ||
61 | | ASSERT | ||
62 | | ORDEF | ||
63 | (* End of input *) | ||
64 | | EOF | ||
diff --git a/lib/nix/types.ml b/lib/nix/types.ml new file mode 100644 index 0000000..8245406 --- /dev/null +++ b/lib/nix/types.ml | |||
@@ -0,0 +1,112 @@ | |||
1 | open Core | ||
2 | |||
3 | (* Binary operators *) | ||
4 | type binary_op = | ||
5 | | Plus | ||
6 | | Minus | ||
7 | | Mult | ||
8 | | Div | ||
9 | | Gt | ||
10 | | Lt | ||
11 | | Lte | ||
12 | | Gte | ||
13 | | Eq | ||
14 | | Neq | ||
15 | | Or | ||
16 | | And | ||
17 | | Impl | ||
18 | | Merge | ||
19 | | Concat | ||
20 | [@@deriving sexp_of] | ||
21 | |||
22 | (* Unary operators *) | ||
23 | type unary_op = Negate | Not [@@deriving sexp_of] | ||
24 | |||
25 | (* The top-level expression type *) | ||
26 | type expr = | ||
27 | | BinaryOp of binary_op * expr * expr | ||
28 | | UnaryOp of unary_op * expr | ||
29 | | Cond of expr * expr * expr | ||
30 | | With of expr * expr | ||
31 | | Assert of expr * expr | ||
32 | | Test of expr * expr list | ||
33 | | SetLet of binding list | ||
34 | | Let of binding list * expr | ||
35 | | Val of value | ||
36 | | Id of id | ||
37 | | Select of expr * expr list * expr option | ||
38 | | Apply of expr * expr | ||
39 | | Aquote of expr | ||
40 | [@@deriving sexp_of] | ||
41 | |||
42 | (* Possible values *) | ||
43 | and value = | ||
44 | (* Str is a string start, followed by arbitrary number of antiquotations and | ||
45 | strings that separate them *) | ||
46 | | Str of string * (expr * string) list | ||
47 | (* IStr is an indented string, so it has the extra integer component which | ||
48 | indicates the indentation *) | ||
49 | | IStr of int * string * (expr * string) list | ||
50 | | Int of string | ||
51 | | Float of string | ||
52 | | Path of string | ||
53 | | SPath of string | ||
54 | | HPath of string | ||
55 | | Uri of string | ||
56 | | Lambda of pattern * expr | ||
57 | | List of expr list | ||
58 | | AttSet of recursivity * binding list | ||
59 | [@@deriving sexp_of] | ||
60 | |||
61 | (* Patterns in lambda definitions *) | ||
62 | and pattern = Alias of id | ParamSet of id option * param_set | ||
63 | [@@deriving sexp_of] | ||
64 | |||
65 | and param_set = param list * match_kind [@@deriving sexp_of] | ||
66 | and param = id * expr option [@@deriving sexp_of] | ||
67 | and recursivity = Rec | Nonrec | ||
68 | and match_kind = Exact | Loose | ||
69 | |||
70 | (* Bindings in attribute sets and let expressions *) | ||
71 | and binding = | ||
72 | (* The first expr should be attrpath, which is the same as in Select *) | ||
73 | | AttrPath of expr list * expr | ||
74 | | Inherit of expr option * expr list | ||
75 | [@@deriving sexp_of] | ||
76 | |||
77 | (* Identifiers *) | ||
78 | and id = string | ||
79 | |||
80 | (* Precedence levels of binary operators *) | ||
81 | let prec_of_bop = function | ||
82 | | Concat -> 5 | ||
83 | | Mult | Div -> 6 | ||
84 | | Plus | Minus -> 7 | ||
85 | | Merge -> 9 | ||
86 | | Gt | Lt | Lte | Gte -> 10 | ||
87 | | Eq | Neq -> 11 | ||
88 | | And -> 12 | ||
89 | | Or -> 13 | ||
90 | | Impl -> 14 | ||
91 | |||
92 | type assoc = Left | Right | ||
93 | |||
94 | let assoc_of_bop = function | ||
95 | | Mult | Div | Plus | Minus -> Some Left | ||
96 | | Concat | Merge | And | Or -> Some Right | ||
97 | | Gt | Lt | Lte | Gte | Eq | Neq | Impl -> None | ||
98 | |||
99 | (* Precedence levels of unary operators *) | ||
100 | let prec_of_uop = function Negate -> 3 | Not -> 8 | ||
101 | |||
102 | (* Precedence level of expressions | ||
103 | (assuming that the constituents have higher levels) *) | ||
104 | let prec_of_expr = function | ||
105 | | Val (Lambda _) -> 15 | ||
106 | | Val _ | Id _ | Aquote _ -> 0 | ||
107 | | BinaryOp (op, _, _) -> prec_of_bop op | ||
108 | | UnaryOp (op, _) -> prec_of_uop op | ||
109 | | Cond _ | With _ | Assert _ | Let _ | SetLet _ -> 15 | ||
110 | | Test _ -> 4 | ||
111 | | Select _ -> 1 | ||
112 | | Apply _ -> 2 | ||