@@ -47,7 +47,7 @@ let s:default_config = {
47
47
\ ' n_suffix' : 64 ,
48
48
\ ' n_predict' : 128 ,
49
49
\ ' t_max_prompt_ms' : 500 ,
50
- \ ' t_max_predict_ms' : 1000 ,
50
+ \ ' t_max_predict_ms' : 500 ,
51
51
\ ' show_info' : 2 ,
52
52
\ ' auto_fim' : v: true ,
53
53
\ ' max_line_suffix' : 8 ,
@@ -196,6 +196,15 @@ function! llama#init()
196
196
if g: llama_config .ring_n_chunks > 0
197
197
call s: ring_update ()
198
198
endif
199
+
200
+ " for debugging
201
+ call timer_start (100 , {- > s: update_status ()})
202
+ endfunction
203
+
204
+ function ! s: update_status ()
205
+ let &statusline = ' indent = ' . s: indent_last
206
+
207
+ call timer_start (100 , {- > s: update_status ()})
199
208
endfunction
200
209
201
210
" compute how similar two chunks of text are
@@ -397,15 +406,13 @@ function! s:fim_ctx_local(pos_x, pos_y, prev)
397
406
let l: lines_prefix = getline (max ([1 , a: pos_y - g: llama_config .n_prefix]), a: pos_y - 1 )
398
407
let l: lines_suffix = getline (a: pos_y + 1 , min ([l: max_y , a: pos_y + g: llama_config .n_suffix]))
399
408
409
+ " the indentation of the current line
410
+ let l: indent = strlen (matchstr (l: line_cur , ' ^\s*' ))
411
+
400
412
" special handling of lines full of whitespaces - start from the beginning of the line
401
413
if match (l: line_cur , ' ^\s*$' ) >= 0
402
- let l: indent = 0
403
-
404
414
let l: line_cur_prefix = " "
405
415
let l: line_cur_suffix = " "
406
- else
407
- " the indentation of the current line
408
- let l: indent = strlen (matchstr (l: line_cur , ' ^\s*' ))
409
416
endif
410
417
else
411
418
if len (a: prev ) == 1
@@ -515,9 +522,9 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort
515
522
endif
516
523
517
524
let l: t_max_predict_ms = g: llama_config .t_max_predict_ms
518
- if empty (a: prev )
519
- " the first request is quick - we will launch a speculative request after this one is displayed
520
- let l: t_max_predict_ms = 250
525
+ if ! empty (a: prev )
526
+ " give more time for the speculative FIM
527
+ let l: t_max_predict_ms = min ([ 3 * g: llama_config .t_max_predict_ms, 3000 ])
521
528
endif
522
529
523
530
" compute multiple hashes that can be used to generate a completion for which the
@@ -547,8 +554,10 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort
547
554
endfor
548
555
endif
549
556
550
- " TODO: this might be incorrect
551
- let s: indent_last = l: indent
557
+ " update only for non-speculative fims
558
+ if empty (a: prev )
559
+ let s: indent_last = l: indent
560
+ endif
552
561
553
562
" TODO: refactor in a function
554
563
let l: text = getline (max ([1 , line (' .' ) - g: llama_config .ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/ 2 , line (' $' )]))
@@ -877,6 +886,11 @@ function! s:fim_render(pos_x, pos_y, data)
877
886
let l: content = [" " ]
878
887
endif
879
888
889
+ " truncate the last line if it repeats the next line
890
+ if len (l: content ) > 1 && l: content [-1 ] == getline (l: pos_y + 1 )
891
+ let l: content = l: content [0 :-2 ]
892
+ endif
893
+
880
894
" find the first non-empty line (strip whitespace)
881
895
let l: cmp_y = l: pos_y + 1
882
896
while l: cmp_y < line (' $' ) && getline (l: cmp_y ) = ~? ' ^\s*$'
0 commit comments