Skip to content

Commit 4993f20

Browse files
committed
feat(Range): first cut of generalized single-position ranges (#168)
1 parent f473cd3 commit 4993f20

22 files changed

+420
-350
lines changed

src-lsp/LspShims.ml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,7 @@ struct
1010
let lsp_range_of_range (r : Asai.Range.t option) =
1111
match r with
1212
| Some r ->
13-
let (start , stop) =
14-
match Asai.Range.view r with
15-
| `Range (start, stop) -> start, stop
16-
| `End_of_file pos -> pos, pos
17-
in
13+
let (start, stop) = Asai.Range.split r in
1814
L.Range.create
1915
~start:(lsp_pos_of_pos start)
2016
~end_:(lsp_pos_of_pos stop)

src/Explication.ml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
11
include ExplicationData
22

3-
let dump_seg dump_tag = Utils.dump_pair (Utils.dump_option dump_tag) Utils.dump_string
3+
let dump_marker dump_tag fmt =
4+
function
5+
| RangeBegin tag -> Format.fprintf fmt {|@[<2>RangeBegin@ @[%a@]@]|} dump_tag tag
6+
| RangeEnd tag -> Format.fprintf fmt {|@[<2>RangeEnd@ @[%a@]@]|} dump_tag tag
7+
| Point tag -> Format.fprintf fmt {|@[<2>Point@ @[%a@]@]|} dump_tag tag
48
5-
let dump_line dump_tag fmt {tags; segments} =
6-
Format.fprintf fmt {|@[<1>{@[<2>tags=@,@[%a@]@];@ @[<2>segments=@ @[%a@]@]}@]|}
7-
(Utils.dump_list dump_tag) tags
8-
(Utils.dump_list (dump_seg dump_tag)) segments
9+
let dump_token dump_tag fmt =
10+
function
11+
| String str -> Format.fprintf fmt {|@[<2>String@ "%s"@]|} (String.escaped str)
12+
| Marker m -> Format.fprintf fmt {|@[<2>Marker@ @[<1>(%a)@]@]|} (dump_marker dump_tag) m
13+
14+
let dump_line dump_tag fmt {markers; tokens} =
15+
Format.fprintf fmt {|@[<1>{@[<2>markers=@,@[%a@]@];@ @[<2>tokens=@ @[%a@]@]}@]|}
16+
(Utils.dump_list dump_tag) markers
17+
(Utils.dump_list (dump_token dump_tag)) tokens
918
1019
let dump_block dump_tag fmt {begin_line_num; end_line_num; lines} =
1120
Format.fprintf fmt {|@[<1>{begin_line_num=%d;@ end_line_num=%d;@ @[<2>lines=@ @[%a@]@]}@]|}

src/Explication.mli

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,8 @@ include module type of ExplicationData
55

66
(** {1 Debugging} *)
77

8-
(** Ugly printer for debugging *)
8+
(** Ugly printer for {!type:marker} *)
9+
val dump_marker : (Format.formatter -> 'tag -> unit) -> Format.formatter -> 'tag marker -> unit
10+
11+
(** Ugly printer for {!type:t} *)
912
val dump : (Format.formatter -> 'tag -> unit) -> Format.formatter -> 'tag t -> unit

src/ExplicationData.ml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1-
(** A segment is an optionally tagged string from the user content. (Note the use of [option].) *)
2-
type 'tag segment = 'tag option * string
1+
(** A marker is a delimiter of a range or a specific point. *)
2+
type 'tag marker =
3+
| RangeBegin of 'tag
4+
| RangeEnd of 'tag
5+
| Point of 'tag
6+
7+
(** A token is either a string or a marker. *)
8+
type 'tag token =
9+
| String of string
10+
| Marker of 'tag marker
311

412
(** A line is a list of {!type:segment}s along with tags. *)
513
type 'tag line =
6-
{ tags : 'tag list
7-
; segments : 'tag segment list
14+
{ markers : 'tag list (** All tags in this line *)
15+
; tokens : 'tag token list
816
}
917

1018
(** A block is a collection of consecutive lines. *)

src/Explicator.ml

Lines changed: 53 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ let print_invalid_range fmt : UserContent.invalid_range -> unit =
3030
Format.fprintf fmt "its@ beginning@ position@ is@ invalid;@ %a" print_invalid_position r
3131
| `End r ->
3232
Format.fprintf fmt "its@ ending@ position@ is@ invalid;@ %a" print_invalid_position r
33-
| `Not_end_of_file (l, l') ->
34-
Format.fprintf fmt "its@ offset@ %d@ is@ not@ the@ end@ of@ file@ (%d)." l l'
35-
| `End_of_file r -> print_invalid_position fmt r
3633

3734
let () = Printexc.register_printer @@
3835
function
@@ -45,12 +42,11 @@ let () = Printexc.register_printer @@
4542
| _ -> None
4643

4744
let to_start_of_line (pos : Range.position) = {pos with offset = pos.start_of_line}
48-
let default_blend ~(priority : _ -> int) t1 t2 = if priority t2 <= priority t1 then t2 else t1
4945

5046
module Make (Tag : Tag) = struct
5147
type position = Range.position
5248

53-
(** Skip the newline sequence, assuming that [shift] is not zero. (Otherwise, it means we already reached eof.) *)
49+
(** Skip the newline sequence, assuming that [shift] is not zero. (Otherwise, it means we already reached EOF.) *)
5450
let eol_to_next_line shift (pos : position) : position =
5551
assert (shift <> 0);
5652
{ source = pos.source;
@@ -65,9 +61,8 @@ module Make (Tag : Tag) = struct
6561

6662
type explicator_state =
6763
{ lines : Tag.t line bwd
68-
; segments : Tag.t segment bwd
69-
; remaining_tagged_lines : (Tag.t * int) list
70-
; current_tag : Tag.t option
64+
; tokens : Tag.t token bwd
65+
; remaining_line_markers : (int * Tag.t) list
7166
; cursor : Range.position
7267
; eol : int
7368
; eol_shift : int option
@@ -76,105 +71,103 @@ module Make (Tag : Tag) = struct
7671

7772
module F = Flattener.Make(Tag)
7873

79-
let explicate_block ~line_breaks (b : Tag.t Flattener.block) : Tag.t block =
80-
match b.tagged_positions with
81-
| [] -> invalid_arg "explicate_block: empty block"
82-
| ((_, ploc) :: _) as ps ->
83-
let source = SourceReader.load ploc.source in
74+
let explicate_block ~line_breaks source (b : Tag.t Flattener.block) : Tag.t block =
75+
match b.markers with
76+
| [] -> invalid_arg "explicate_block: empty block; should be impossible"
77+
| ((first_loc, _) :: _) as markers ->
78+
let source = SourceReader.load source in
8479
let eof = SourceReader.length source in
8580
let find_eol i = UserContent.find_eol ~line_breaks (SourceReader.unsafe_get source) (i, eof) in
86-
let rec go state : (Tag.t option * Range.position) list -> _ =
81+
let rec go state : (Range.position * Tag.t marker) list -> _ =
8782
function
88-
| (ptag, ploc) :: ps when state.cursor.line_num = ploc.line_num ->
89-
if ploc.offset > eof then invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode";
90-
if ploc.offset > state.eol then invalid_arg "Asai.Explicator.explicate: unexpected newline; use the debug mode";
91-
if ploc.offset = state.cursor.offset then
92-
go {state with cursor = ploc; current_tag = ptag} ps
93-
else
94-
(* Still on the same line *)
95-
let segments =
96-
state.segments <:
97-
(state.current_tag, read_between ~source state.cursor.offset ploc.offset)
98-
in
99-
go { state with segments; cursor = ploc; current_tag = ptag } ps
100-
| ps ->
83+
| (loc, marker) :: markers when state.cursor.line_num = loc.line_num (* on the same line *) ->
84+
if loc.offset > eof then invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode";
85+
if loc.offset > state.eol then invalid_arg "Asai.Explicator.explicate: unexpected newline; use the debug mode";
86+
let tokens =
87+
if loc.offset = state.cursor.offset then
88+
state.tokens <: Marker marker
89+
else
90+
state.tokens <: String (read_between ~source state.cursor.offset loc.offset) <: Marker marker
91+
in
92+
go { state with tokens; cursor = loc } markers
93+
| markers ->
10194
(* Shifting to the next line *)
102-
let lines, remaining_tagged_lines =
103-
let segments =
95+
let lines, remaining_line_markers =
96+
let tokens =
10497
if state.cursor.offset < state.eol then
105-
state.segments
106-
<: (state.current_tag, read_between ~source state.cursor.offset state.eol)
107-
else if Option.is_none state.eol_shift && Option.is_some state.current_tag then
108-
state.segments
109-
<: (state.current_tag, "‹EOF›")
98+
state.tokens <: String (read_between ~source state.cursor.offset state.eol)
11099
else
111-
state.segments
100+
state.tokens
101+
in
102+
let line_markers, remaining_line_markers =
103+
Utils.span (fun (line_num, _) -> line_num = state.line_num) state.remaining_line_markers
112104
in
113-
let tagged_lines, remaining_tagged_lines = Utils.span (fun (_, i) -> i = state.line_num) state.remaining_tagged_lines in
114-
(state.lines <: {segments = Bwd.to_list segments; tags = List.map fst tagged_lines}), remaining_tagged_lines
105+
(state.lines <:
106+
{ tokens = Bwd.to_list tokens
107+
; markers = List.map snd line_markers
108+
}),
109+
remaining_line_markers
115110
in
116-
(* Continue the process if [ps] is not empty. *)
117-
match ps, state.eol_shift with
111+
(* Continue the process if [markers] is not empty. *)
112+
match markers, state.eol_shift with
118113
| [], _ ->
119114
assert (state.line_num = b.end_line_num);
120115
lines
121-
| _ :: _, None -> invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode"
122-
| (_, ploc) :: _, Some eol_shift ->
123-
if ploc.offset > eof then invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode";
124-
if ploc.offset <= state.eol then invalid_arg "Asai.Explicator.explicate: expected newline missing; use the debug mode";
125-
if ploc.offset < state.eol + eol_shift then invalid_arg "Asai.Explicator.explicate: offset within newline; use the debug mode";
116+
| _ :: _, None -> invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode"
117+
| (loc, _) :: _, Some eol_shift ->
118+
if loc.offset > eof then invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode";
119+
if loc.offset <= state.eol then invalid_arg "Asai.Explicator.explicate: expected newline missing; use the debug mode";
120+
if loc.offset < state.eol + eol_shift then invalid_arg "Asai.Explicator.explicate: offset within newline; use the debug mode";
126121
(* Okay, p is really on the next line *)
127122
let cursor = eol_to_next_line eol_shift {state.cursor with offset = state.eol} in
128123
let eol, eol_shift = find_eol (state.eol + eol_shift) in
129124
go
130125
{ lines
131-
; segments = Emp
132-
; remaining_tagged_lines
133-
; current_tag = state.current_tag
126+
; tokens = Emp
127+
; remaining_line_markers
134128
; cursor
135129
; eol
136130
; eol_shift
137131
; line_num = state.line_num + 1
138132
}
139-
ps
133+
markers
140134
in
141-
let begin_pos = to_start_of_line ploc in
142-
let eol, eol_shift = find_eol ploc.offset in
143135
let lines =
136+
let begin_pos = to_start_of_line first_loc in
137+
let eol, eol_shift = find_eol first_loc.offset in
144138
go
145139
{ lines = Emp
146-
; segments = Emp
147-
; remaining_tagged_lines = b.tagged_lines
148-
; current_tag = None
140+
; tokens = Emp
141+
; remaining_line_markers = b.line_markers
149142
; cursor = begin_pos
150143
; eol
151144
; eol_shift
152145
; line_num = b.begin_line_num
153146
}
154-
ps
147+
markers
155148
in
156149
{ begin_line_num = b.begin_line_num
157150
; end_line_num = b.end_line_num
158151
; lines = Bwd.to_list @@ lines
159152
}
160153

161-
let[@inline] explicate_blocks ~line_breaks = List.map (explicate_block ~line_breaks)
154+
let[@inline] explicate_blocks ~line_breaks source ranges =
155+
List.map (explicate_block ~line_breaks source) ranges
162156

163157
let[@inline] explicate_part ~line_breaks (source, bs) : Tag.t part =
164-
{ source; blocks = explicate_blocks ~line_breaks bs }
158+
{ source; blocks = explicate_blocks ~line_breaks source bs }
165159

166160
let check_ranges ~line_breaks ranges =
167161
List.iter
168-
(fun (_, range) ->
162+
(fun (range, _) ->
169163
let source = SourceReader.load @@ Range.source range in
170164
let read = SourceReader.unsafe_get source in
171165
let eof = SourceReader.length source in
172166
try UserContent.check_range ~line_breaks ~eof read range
173167
with UserContent.Invalid_range reason -> raise @@ Invalid_range (range, reason))
174168
ranges
175169

176-
let explicate ?(line_breaks=`Traditional) ?(block_splitting_threshold=5)
177-
?(blend=default_blend ~priority:Tag.priority) ?(debug=false) ranges =
170+
let explicate ?(line_breaks=`Traditional) ?(block_splitting_threshold=5) ?(debug=false) ranges =
178171
if debug then check_ranges ~line_breaks ranges;
179-
List.map (explicate_part ~line_breaks) @@ F.flatten ~block_splitting_threshold ~blend ranges
172+
List.map (explicate_part ~line_breaks) @@ F.flatten ~block_splitting_threshold ranges
180173
end

src/Explicator.mli

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
include module type of ExplicatorSigs
22

3-
(** The default tag blending algorithm that chooses the more important tag based on priority. *)
4-
val default_blend : priority:('tag -> int) -> 'tag -> 'tag -> 'tag
5-
63
(** Making an explicator. *)
74
module Make : functor (Tag : Tag) -> S with module Tag := Tag

src/ExplicatorSigs.ml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ module type Tag = sig
88
(** The abstract type of tags. *)
99
type t
1010

11-
(** Get the priority number of a tag. We followed the UNIX convention here---a {i smaller} priority number represents higher priority. The convention works well with {!val:List.sort}, which sorts numbers in ascending order. (The more important things go first.) *)
11+
(** Get the priority number of a tag. A {i smaller} priority number represents higher priority.
12+
13+
The convention works well with {!val:List.sort}, which sorts numbers in ascending order: the more important things go first. *)
1214
val priority : t -> int
1315

1416
(** Ugly printer for debugging *)
@@ -19,12 +21,11 @@ end
1921
module type S = sig
2022
module Tag : Tag
2123

22-
val explicate : ?line_breaks:[`Unicode | `Traditional] -> ?block_splitting_threshold:int -> ?blend:(Tag.t -> Tag.t -> Tag.t) -> ?debug:bool -> (Tag.t * Range.t) list -> Tag.t t
24+
val explicate : ?line_breaks:[`Unicode | `Traditional] -> ?block_splitting_threshold:int -> ?debug:bool -> (Range.t * Tag.t) list -> Tag.t t
2325
(** Explicate a list of ranges using content from a data reader. This function must be run under [SourceReader.run].
2426
2527
@param line_breaks The set of character sequences that are recognized as (hard) line breaks. The [`Unicode] set contains all Unicode character sequences in {{:https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G41643}Unicode 15.0.0 Table 5-1.} The [`Traditional] set only contains [U+000A (LF)], [U+000D (CR)], and [U+000D U+000A (CRLF)] as line breaks. The default is the [`Traditional] set.
2628
@param block_splitting_threshold The maximum number of consecutive, non-highlighted lines allowed in a block. The function will try to minimize the number of blocks, as long as no block has too many consecutive, non-highlighted lines. A higher threshold will lead to fewer blocks. When the threshold is zero, it means no block can contain any non-highlighted line. The default value is zero.
27-
@param blend The algorithm to blend two tags on a visual range. The default algorithm chooses the more important tag based on priority.
2829
@param debug Whether to enable the debug mode that performs expensive extra checking. The default is [false].
2930
3031
@raise Invalid_range See {!exception:Invalid_range}.

0 commit comments

Comments
 (0)