code.delx.au - gnu-emacs/blob - src/region-cache.c

   1 /* Caching facts about regions of the buffer, for optimization.
   2    Copyright (C) 1985-1989, 1993, 1995, 2001-2011  Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software: you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation, either version 3 of the License, or
   9 (at your option) any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19
  20 #include <config.h>
  21 #include <stdio.h>
  22 #include <setjmp.h>
  23
  24 #include "lisp.h"
  25 #include "buffer.h"
  26 #include "region-cache.h"
  27
  28 \f
  29 /* Data structures.  */
  30
  31 /* The region cache.
  32
  33    We want something that maps character positions in a buffer onto
  34    values.  The representation should deal well with long runs of
  35    characters with the same value.
  36
  37    The tricky part: the representation should be very cheap to
  38    maintain in the presence of many insertions and deletions.  If the
  39    overhead of maintaining the cache is too high, the speedups it
  40    offers will be worthless.
  41
  42
  43    We represent the region cache as a sorted array of struct
  44    boundary's, each of which contains a buffer position and a value;
  45    the value applies to all the characters after the buffer position,
  46    until the position of the next boundary, or the end of the buffer.
  47
  48    The cache always has a boundary whose position is BUF_BEG, so
  49    there's always a value associated with every character in the
  50    buffer.  Since the cache is sorted, this is always the first
  51    element of the cache.
  52
  53    To facilitate the insertion and deletion of boundaries in the
  54    cache, the cache has a gap, just like Emacs's text buffers do.
  55
  56    To help boundary positions float along with insertions and
  57    deletions, all boundary positions before the cache gap are stored
  58    relative to BUF_BEG (buf) (thus they're >= 0), and all boundary
  59    positions after the gap are stored relative to BUF_Z (buf) (thus
  60    they're <= 0).  Look at BOUNDARY_POS to see this in action.  See
  61    revalidate_region_cache to see how this helps.  */
  62
  63 struct boundary {
  64   EMACS_INT pos;
  65   int value;
  66 };
  67
  68 struct region_cache {
  69   /* A sorted array of locations where the known-ness of the buffer
  70      changes.  */
  71   struct boundary *boundaries;
  72
  73   /* boundaries[gap_start ... gap_start + gap_len - 1] is the gap.  */
  74   EMACS_INT gap_start, gap_len;
  75
  76   /* The number of elements allocated to boundaries, not including the
  77      gap.  */
  78   EMACS_INT cache_len;
  79
  80   /* The areas that haven't changed since the last time we cleaned out
  81      invalid entries from the cache.  These overlap when the buffer is
  82      entirely unchanged.  */
  83   EMACS_INT beg_unchanged, end_unchanged;
  84
  85   /* The first and last positions in the buffer.  Because boundaries
  86      store their positions relative to the start (BEG) and end (Z) of
  87      the buffer, knowing these positions allows us to accurately
  88      interpret positions without having to pass the buffer structure
  89      or its endpoints around all the time.
  90
  91      Yes, buffer_beg is always 1.  It's there for symmetry with
  92      buffer_end and the BEG and BUF_BEG macros.  */
  93   EMACS_INT buffer_beg, buffer_end;
  94 };
  95
  96 /* Return the position of boundary i in cache c.  */
  97 #define BOUNDARY_POS(c, i) \
  98   ((i) < (c)->gap_start \
  99    ? (c)->buffer_beg + (c)->boundaries[(i)].pos \
 100    : (c)->buffer_end + (c)->boundaries[(c)->gap_len + (i)].pos)
 101
 102 /* Return the value for text after boundary i in cache c.  */
 103 #define BOUNDARY_VALUE(c, i) \
 104   ((i) < (c)->gap_start \
 105    ? (c)->boundaries[(i)].value \
 106    : (c)->boundaries[(c)->gap_len + (i)].value)
 107
 108 /* Set the value for text after boundary i in cache c to v.  */
 109 #define SET_BOUNDARY_VALUE(c, i, v) \
 110   ((i) < (c)->gap_start \
 111    ? ((c)->boundaries[(i)].value = (v))\
 112    : ((c)->boundaries[(c)->gap_len + (i)].value = (v)))
 113
 114
 115 /* How many elements to add to the gap when we resize the buffer.  */
 116 #define NEW_CACHE_GAP (40)
 117
 118 /* See invalidate_region_cache; if an invalidation would throw away
 119    information about this many characters, call
 120    revalidate_region_cache before doing the new invalidation, to
 121    preserve that information, instead of throwing it away.  */
 122 #define PRESERVE_THRESHOLD (500)
 123
 124 static void revalidate_region_cache (struct buffer *buf, struct region_cache *c);
 125
 126 \f
 127 /* Interface: Allocating, initializing, and disposing of region caches.  */
 128
 129 struct region_cache *
 130 new_region_cache (void)
 131 {
 132   struct region_cache *c
 133     = (struct region_cache *) xmalloc (sizeof (struct region_cache));
 134
 135   c->gap_start = 0;
 136   c->gap_len = NEW_CACHE_GAP;
 137   c->cache_len = 0;
 138   c->boundaries =
 139     (struct boundary *) xmalloc ((c->gap_len + c->cache_len)
 140                                  * sizeof (*c->boundaries));
 141
 142   c->beg_unchanged = 0;
 143   c->end_unchanged = 0;
 144   c->buffer_beg = BEG;
 145   c->buffer_end = BEG;
 146
 147   /* Insert the boundary for the buffer start.  */
 148   c->cache_len++;
 149   c->gap_len--;
 150   c->gap_start++;
 151   c->boundaries[0].pos   = 0;  /* from buffer_beg */
 152   c->boundaries[0].value = 0;
 153
 154   return c;
 155 }
 156
 157 void
 158 free_region_cache (struct region_cache *c)
 159 {
 160   xfree (c->boundaries);
 161   xfree (c);
 162 }
 163
 164 \f
 165 /* Finding positions in the cache.  */
 166
 167 /* Return the index of the last boundary in cache C at or before POS.
 168    In other words, return the boundary that specifies the value for
 169    the region POS..(POS + 1).
 170
 171    This operation should be logarithmic in the number of cache
 172    entries.  It would be nice if it took advantage of locality of
 173    reference, too, by searching entries near the last entry found.  */
 174 static EMACS_INT
 175 find_cache_boundary (struct region_cache *c, EMACS_INT pos)
 176 {
 177   EMACS_INT low = 0, high = c->cache_len;
 178
 179   while (low + 1 < high)
 180     {
 181       /* mid is always a valid index, because low < high and ">> 1"
 182          rounds down.  */
 183       EMACS_INT mid = (low + high) >> 1;
 184       EMACS_INT boundary = BOUNDARY_POS (c, mid);
 185
 186       if (pos < boundary)
 187         high = mid;
 188       else
 189         low = mid;
 190     }
 191
 192   /* Some testing.  */
 193   if (BOUNDARY_POS (c, low) > pos
 194       || (low + 1 < c->cache_len
 195           && BOUNDARY_POS (c, low + 1) <= pos))
 196       abort ();
 197
 198   return low;
 199 }
 200
 201
 202 \f
 203 /* Moving the cache gap around, inserting, and deleting.  */
 204
 205
 206 /* Move the gap of cache C to index POS, and make sure it has space
 207    for at least MIN_SIZE boundaries.  */
 208 static void
 209 move_cache_gap (struct region_cache *c, EMACS_INT pos, EMACS_INT min_size)
 210 {
 211   /* Copy these out of the cache and into registers.  */
 212   EMACS_INT gap_start = c->gap_start;
 213   EMACS_INT gap_len = c->gap_len;
 214   EMACS_INT buffer_beg = c->buffer_beg;
 215   EMACS_INT buffer_end = c->buffer_end;
 216
 217   if (pos < 0
 218       || pos > c->cache_len)
 219     abort ();
 220
 221   /* We mustn't ever try to put the gap before the dummy start
 222      boundary.  That must always be start-relative.  */
 223   if (pos == 0)
 224     abort ();
 225
 226   /* Need we move the gap right?  */
 227   while (gap_start < pos)
 228     {
 229       /* Copy one boundary from after to before the gap, and
 230          convert its position to start-relative.  */
 231       c->boundaries[gap_start].pos
 232         = (buffer_end
 233            + c->boundaries[gap_start + gap_len].pos
 234            - buffer_beg);
 235       c->boundaries[gap_start].value
 236         = c->boundaries[gap_start + gap_len].value;
 237       gap_start++;
 238     }
 239
 240   /* To enlarge the gap, we need to re-allocate the boundary array, and
 241      then shift the area after the gap to the new end.  Since the cost
 242      is proportional to the amount of stuff after the gap, we do the
 243      enlargement here, after a right shift but before a left shift,
 244      when the portion after the gap is smallest.  */
 245   if (gap_len < min_size)
 246     {
 247       EMACS_INT i;
 248
 249       /* Always make at least NEW_CACHE_GAP elements, as long as we're
 250          expanding anyway.  */
 251       if (min_size < NEW_CACHE_GAP)
 252         min_size = NEW_CACHE_GAP;
 253
 254       c->boundaries =
 255         (struct boundary *) xrealloc (c->boundaries,
 256                                       ((min_size + c->cache_len)
 257                                        * sizeof (*c->boundaries)));
 258
 259       /* Some systems don't provide a version of the copy routine that
 260          can be trusted to shift memory upward into an overlapping
 261          region.  memmove isn't widely available.  */
 262       min_size -= gap_len;
 263       for (i = c->cache_len - 1; i >= gap_start; i--)
 264         {
 265           c->boundaries[i + min_size].pos   = c->boundaries[i + gap_len].pos;
 266           c->boundaries[i + min_size].value = c->boundaries[i + gap_len].value;
 267         }
 268
 269       gap_len = min_size;
 270     }
 271
 272   /* Need we move the gap left?  */
 273   while (pos < gap_start)
 274     {
 275       gap_start--;
 276
 277       /* Copy one region from before to after the gap, and
 278          convert its position to end-relative.  */
 279       c->boundaries[gap_start + gap_len].pos
 280         = c->boundaries[gap_start].pos + buffer_beg - buffer_end;
 281       c->boundaries[gap_start + gap_len].value
 282         = c->boundaries[gap_start].value;
 283     }
 284
 285   /* Assign these back into the cache.  */
 286   c->gap_start = gap_start;
 287   c->gap_len  = gap_len;
 288 }
 289
 290
 291 /* Insert a new boundary in cache C; it will have cache index INDEX,
 292    and have the specified POS and VALUE.  */
 293 static void
 294 insert_cache_boundary (struct region_cache *c, EMACS_INT index, EMACS_INT pos,
 295                        int value)
 296 {
 297   /* index must be a valid cache index.  */
 298   if (index < 0 || index > c->cache_len)
 299     abort ();
 300
 301   /* We must never want to insert something before the dummy first
 302      boundary.  */
 303   if (index == 0)
 304     abort ();
 305
 306   /* We must only be inserting things in order.  */
 307   if (! (BOUNDARY_POS (c, index-1) < pos
 308          && (index == c->cache_len
 309              || pos < BOUNDARY_POS (c, index))))
 310     abort ();
 311
 312   /* The value must be different from the ones around it.  However, we
 313      temporarily create boundaries that establish the same value as
 314      the subsequent boundary, so we're not going to flag that case.  */
 315   if (BOUNDARY_VALUE (c, index-1) == value)
 316     abort ();
 317
 318   move_cache_gap (c, index, 1);
 319
 320   c->boundaries[index].pos = pos - c->buffer_beg;
 321   c->boundaries[index].value = value;
 322   c->gap_start++;
 323   c->gap_len--;
 324   c->cache_len++;
 325 }
 326
 327
 328 /* Delete the i'th entry from cache C if START <= i < END.  */
 329
 330 static void
 331 delete_cache_boundaries (struct region_cache *c,
 332                          EMACS_INT start, EMACS_INT end)
 333 {
 334   EMACS_INT len = end - start;
 335
 336   /* Gotta be in range.  */
 337   if (start < 0
 338       || end > c->cache_len)
 339     abort ();
 340
 341   /* Gotta be in order.  */
 342   if (start > end)
 343     abort ();
 344
 345   /* Can't delete the dummy entry.  */
 346   if (start == 0
 347       && end >= 1)
 348     abort ();
 349
 350   /* Minimize gap motion.  If we're deleting nothing, do nothing.  */
 351   if (len == 0)
 352     ;
 353   /* If the gap is before the region to delete, delete from the start
 354      forward.  */
 355   else if (c->gap_start <= start)
 356     {
 357       move_cache_gap (c, start, 0);
 358       c->gap_len += len;
 359     }
 360   /* If the gap is after the region to delete, delete from the end
 361      backward.  */
 362   else if (end <= c->gap_start)
 363     {
 364       move_cache_gap (c, end, 0);
 365       c->gap_start -= len;
 366       c->gap_len   += len;
 367     }
 368   /* If the gap is in the region to delete, just expand it.  */
 369   else
 370     {
 371       c->gap_start = start;
 372       c->gap_len   += len;
 373     }
 374
 375   c->cache_len -= len;
 376 }
 377
 378
 379 \f
 380 /* Set the value for a region.  */
 381
 382 /* Set the value in cache C for the region START..END to VALUE.  */
 383 static void
 384 set_cache_region (struct region_cache *c,
 385                   EMACS_INT start, EMACS_INT end, int value)
 386 {
 387   if (start > end)
 388     abort ();
 389   if (start < c->buffer_beg
 390       || end   > c->buffer_end)
 391     abort ();
 392
 393   /* Eliminate this case; then we can assume that start and end-1 are
 394      both the locations of real characters in the buffer.  */
 395   if (start == end)
 396     return;
 397
 398   {
 399     /* We need to make sure that there are no boundaries in the area
 400        between start to end; the whole area will have the same value,
 401        so those boundaries will not be necessary.
 402
 403        Let start_ix be the cache index of the boundary governing the
 404        first character of start..end, and let end_ix be the cache
 405        index of the earliest boundary after the last character in
 406        start..end.  (This tortured terminology is intended to answer
 407        all the "< or <=?" sort of questions.)  */
 408     EMACS_INT start_ix = find_cache_boundary (c, start);
 409     EMACS_INT end_ix   = find_cache_boundary (c, end - 1) + 1;
 410
 411     /* We must remember the value established by the last boundary
 412        before end; if that boundary's domain stretches beyond end,
 413        we'll need to create a new boundary at end, and that boundary
 414        must have that remembered value.  */
 415     int value_at_end = BOUNDARY_VALUE (c, end_ix - 1);
 416
 417     /* Delete all boundaries strictly within start..end; this means
 418        those whose indices are between start_ix (exclusive) and end_ix
 419        (exclusive).  */
 420     delete_cache_boundaries (c, start_ix + 1, end_ix);
 421
 422     /* Make sure we have the right value established going in to
 423        start..end from the left, and no unnecessary boundaries.  */
 424     if (BOUNDARY_POS (c, start_ix) == start)
 425       {
 426         /* Is this boundary necessary?  If no, remove it; if yes, set
 427            its value.  */
 428         if (start_ix > 0
 429             && BOUNDARY_VALUE (c, start_ix - 1) == value)
 430           {
 431             delete_cache_boundaries (c, start_ix, start_ix + 1);
 432             start_ix--;
 433           }
 434         else
 435           SET_BOUNDARY_VALUE (c, start_ix, value);
 436       }
 437     else
 438       {
 439         /* Do we need to add a new boundary here?  */
 440         if (BOUNDARY_VALUE (c, start_ix) != value)
 441           {
 442             insert_cache_boundary (c, start_ix + 1, start, value);
 443             start_ix++;
 444           }
 445       }
 446
 447     /* This is equivalent to letting end_ix float (like a buffer
 448        marker does) with the insertions and deletions we may have
 449        done.  */
 450     end_ix = start_ix + 1;
 451
 452     /* Make sure we have the correct value established as we leave
 453        start..end to the right.  */
 454     if (end == c->buffer_end)
 455       /* There is no text after start..end; nothing to do.  */
 456       ;
 457     else if (end_ix >= c->cache_len
 458              || end < BOUNDARY_POS (c, end_ix))
 459       {
 460         /* There is no boundary at end, but we may need one.  */
 461         if (value_at_end != value)
 462           insert_cache_boundary (c, end_ix, end, value_at_end);
 463       }
 464     else
 465       {
 466         /* There is a boundary at end; should it be there?  */
 467         if (value == BOUNDARY_VALUE (c, end_ix))
 468           delete_cache_boundaries (c, end_ix, end_ix + 1);
 469       }
 470   }
 471 }
 472
 473
 474 \f
 475 /* Interface: Invalidating the cache.  Private: Re-validating the cache.  */
 476
 477 /* Indicate that a section of BUF has changed, to invalidate CACHE.
 478    HEAD is the number of chars unchanged at the beginning of the buffer.
 479    TAIL is the number of chars unchanged at the end of the buffer.
 480       NOTE: this is *not* the same as the ending position of modified
 481       region.
 482    (This way of specifying regions makes more sense than absolute
 483    buffer positions in the presence of insertions and deletions; the
 484    args to pass are the same before and after such an operation.)  */
 485 void
 486 invalidate_region_cache (struct buffer *buf, struct region_cache *c,
 487                          EMACS_INT head, EMACS_INT tail)
 488 {
 489   /* Let chead = c->beg_unchanged, and
 490          ctail = c->end_unchanged.
 491      If z-tail < beg+chead by a large amount, or
 492         z-ctail < beg+head by a large amount,
 493
 494      then cutting back chead and ctail to head and tail would lose a
 495      lot of information that we could preserve by revalidating the
 496      cache before processing this invalidation.  Losing that
 497      information may be more costly than revalidating the cache now.
 498      So go ahead and call revalidate_region_cache if it seems that it
 499      might be worthwhile.  */
 500   if (((BUF_BEG (buf) + c->beg_unchanged) - (BUF_Z (buf) - tail)
 501        > PRESERVE_THRESHOLD)
 502       || ((BUF_BEG (buf) + head) - (BUF_Z (buf) - c->end_unchanged)
 503           > PRESERVE_THRESHOLD))
 504     revalidate_region_cache (buf, c);
 505
 506
 507   if (head < c->beg_unchanged)
 508     c->beg_unchanged = head;
 509   if (tail < c->end_unchanged)
 510     c->end_unchanged = tail;
 511
 512   /* We now know nothing about the region between the unchanged head
 513      and the unchanged tail (call it the "modified region"), not even
 514      its length.
 515
 516      If the modified region has shrunk in size (deletions do this),
 517      then the cache may now contain boundaries originally located in
 518      text that doesn't exist any more.
 519
 520      If the modified region has increased in size (insertions do
 521      this), then there may now be boundaries in the modified region
 522      whose positions are wrong.
 523
 524      Even calling BOUNDARY_POS on boundaries still in the unchanged
 525      head or tail may well give incorrect answers now, since
 526      c->buffer_beg and c->buffer_end may well be wrong now.  (Well,
 527      okay, c->buffer_beg never changes, so boundaries in the unchanged
 528      head will still be okay.  But it's the principle of the thing.)
 529
 530      So things are generally a mess.
 531
 532      But we don't clean up this mess here; that would be expensive,
 533      and this function gets called every time any buffer modification
 534      occurs.  Rather, we can clean up everything in one swell foop,
 535      accounting for all the modifications at once, by calling
 536      revalidate_region_cache before we try to consult the cache the
 537      next time.  */
 538 }
 539
 540
 541 /* Clean out any cache entries applying to the modified region, and
 542    make the positions of the remaining entries accurate again.
 543
 544    After calling this function, the mess described in the comment in
 545    invalidate_region_cache is cleaned up.
 546
 547    This function operates by simply throwing away everything it knows
 548    about the modified region.  It doesn't care exactly which
 549    insertions and deletions took place; it just tosses it all.
 550
 551    For example, if you insert a single character at the beginning of
 552    the buffer, and a single character at the end of the buffer (for
 553    example), without calling this function in between the two
 554    insertions, then the entire cache will be freed of useful
 555    information.  On the other hand, if you do manage to call this
 556    function in between the two insertions, then the modified regions
 557    will be small in both cases, no information will be tossed, and the
 558    cache will know that it doesn't have knowledge of the first and
 559    last characters any more.
 560
 561    Calling this function may be expensive; it does binary searches in
 562    the cache, and causes cache gap motion.  */
 563
 564 static void
 565 revalidate_region_cache (struct buffer *buf, struct region_cache *c)
 566 {
 567   /* The boundaries now in the cache are expressed relative to the
 568      buffer_beg and buffer_end values stored in the cache.  Now,
 569      buffer_beg and buffer_end may not be the same as BUF_BEG (buf)
 570      and BUF_Z (buf), so we have two different "bases" to deal with
 571      --- the cache's, and the buffer's.  */
 572
 573   /* If the entire buffer is still valid, don't waste time.  Yes, this
 574      should be a >, not a >=; think about what beg_unchanged and
 575      end_unchanged get set to when the only change has been an
 576      insertion.  */
 577   if (c->buffer_beg + c->beg_unchanged
 578       > c->buffer_end - c->end_unchanged)
 579     return;
 580
 581   /* If all the text we knew about as of the last cache revalidation
 582      is still there, then all of the information in the cache is still
 583      valid.  Because c->buffer_beg and c->buffer_end are out-of-date,
 584      the modified region appears from the cache's point of view to be
 585      a null region located someplace in the buffer.
 586
 587      Now, invalidating that empty string will have no actual affect on
 588      the cache; instead, we need to update the cache's basis first
 589      (which will give the modified region the same size in the cache
 590      as it has in the buffer), and then invalidate the modified
 591      region. */
 592   if (c->buffer_beg + c->beg_unchanged
 593       == c->buffer_end - c->end_unchanged)
 594     {
 595       /* Move the gap so that all the boundaries in the unchanged head
 596          are expressed beg-relative, and all the boundaries in the
 597          unchanged tail are expressed end-relative.  That done, we can
 598          plug in the new buffer beg and end, and all the positions
 599          will be accurate.
 600
 601          The boundary which has jurisdiction over the modified region
 602          should be left before the gap.  */
 603       move_cache_gap (c,
 604                       (find_cache_boundary (c, (c->buffer_beg
 605                                                 + c->beg_unchanged))
 606                        + 1),
 607                       0);
 608
 609       c->buffer_beg = BUF_BEG (buf);
 610       c->buffer_end = BUF_Z   (buf);
 611
 612       /* Now that the cache's basis has been changed, the modified
 613          region actually takes up some space in the cache, so we can
 614          invalidate it.  */
 615       set_cache_region (c,
 616                         c->buffer_beg + c->beg_unchanged,
 617                         c->buffer_end - c->end_unchanged,
 618                         0);
 619     }
 620
 621   /* Otherwise, there is a non-empty region in the cache which
 622      corresponds to the modified region of the buffer.  */
 623   else
 624     {
 625       EMACS_INT modified_ix;
 626
 627       /* These positions are correct, relative to both the cache basis
 628          and the buffer basis.  */
 629       set_cache_region (c,
 630                         c->buffer_beg + c->beg_unchanged,
 631                         c->buffer_end - c->end_unchanged,
 632                         0);
 633
 634       /* Now the cache contains only boundaries that are in the
 635          unchanged head and tail; we've disposed of any boundaries
 636          whose positions we can't be sure of given the information
 637          we've saved.
 638
 639          If we put the cache gap between the unchanged head and the
 640          unchanged tail, we can adjust all the boundary positions at
 641          once, simply by setting buffer_beg and buffer_end.
 642
 643          The boundary which has jurisdiction over the modified region
 644          should be left before the gap.  */
 645       modified_ix =
 646         find_cache_boundary (c, (c->buffer_beg + c->beg_unchanged)) + 1;
 647       move_cache_gap (c, modified_ix, 0);
 648
 649       c->buffer_beg = BUF_BEG (buf);
 650       c->buffer_end = BUF_Z   (buf);
 651
 652       /* Now, we may have shrunk the buffer when we changed the basis,
 653          and brought the boundaries we created for the start and end
 654          of the modified region together, giving them the same
 655          position.  If that's the case, we should collapse them into
 656          one boundary.  Or we may even delete them both, if the values
 657          before and after them are the same.  */
 658       if (modified_ix < c->cache_len
 659           && (BOUNDARY_POS (c, modified_ix - 1)
 660               == BOUNDARY_POS (c, modified_ix)))
 661         {
 662           int value_after = BOUNDARY_VALUE (c, modified_ix);
 663
 664           /* Should we remove both of the boundaries?  Yes, if the
 665              latter boundary is now establishing the same value that
 666              the former boundary's predecessor does.  */
 667           if (modified_ix - 1 > 0
 668               && value_after == BOUNDARY_VALUE (c, modified_ix - 2))
 669             delete_cache_boundaries (c, modified_ix - 1, modified_ix + 1);
 670           else
 671             {
 672               /* We do need a boundary here; collapse the two
 673                  boundaries into one.  */
 674               SET_BOUNDARY_VALUE (c, modified_ix - 1, value_after);
 675               delete_cache_boundaries (c, modified_ix, modified_ix + 1);
 676             }
 677         }
 678     }
 679
 680   /* Now the entire cache is valid.  */
 681   c->beg_unchanged
 682     = c->end_unchanged
 683       = c->buffer_end - c->buffer_beg;
 684 }
 685
 686 \f
 687 /* Interface: Adding information to the cache.  */
 688
 689 /* Assert that the region of BUF between START and END (absolute
 690    buffer positions) is "known," for the purposes of CACHE (e.g. "has
 691    no newlines", in the case of the line cache).  */
 692 void
 693 know_region_cache (struct buffer *buf, struct region_cache *c,
 694                    EMACS_INT start, EMACS_INT end)
 695 {
 696   revalidate_region_cache (buf, c);
 697
 698   set_cache_region (c, start, end, 1);
 699 }
 700
 701 \f
 702 /* Interface: using the cache.  */
 703
 704 /* Return true if the text immediately after POS in BUF is known, for
 705    the purposes of CACHE.  If NEXT is non-zero, set *NEXT to the nearest
 706    position after POS where the knownness changes.  */
 707 int
 708 region_cache_forward (struct buffer *buf, struct region_cache *c,
 709                       EMACS_INT pos, EMACS_INT *next)
 710 {
 711   revalidate_region_cache (buf, c);
 712
 713   {
 714     EMACS_INT i = find_cache_boundary (c, pos);
 715     int i_value = BOUNDARY_VALUE (c, i);
 716     EMACS_INT j;
 717
 718     /* Beyond the end of the buffer is unknown, by definition.  */
 719     if (pos >= BUF_Z (buf))
 720       {
 721         if (next) *next = BUF_Z (buf);
 722         i_value = 0;
 723       }
 724     else if (next)
 725       {
 726         /* Scan forward from i to find the next differing position.  */
 727         for (j = i + 1; j < c->cache_len; j++)
 728           if (BOUNDARY_VALUE (c, j) != i_value)
 729             break;
 730
 731         if (j < c->cache_len)
 732           *next = BOUNDARY_POS (c, j);
 733         else
 734           *next = BUF_Z (buf);
 735       }
 736
 737     return i_value;
 738   }
 739 }
 740
 741 /* Return true if the text immediately before POS in BUF is known, for
 742    the purposes of CACHE.  If NEXT is non-zero, set *NEXT to the nearest
 743    position before POS where the knownness changes.  */
 744 int region_cache_backward (struct buffer *buf, struct region_cache *c,
 745                            EMACS_INT pos, EMACS_INT *next)
 746 {
 747   revalidate_region_cache (buf, c);
 748
 749   /* Before the beginning of the buffer is unknown, by
 750      definition. */
 751   if (pos <= BUF_BEG (buf))
 752     {
 753       if (next) *next = BUF_BEG (buf);
 754       return 0;
 755     }
 756
 757   {
 758     EMACS_INT i = find_cache_boundary (c, pos - 1);
 759     int i_value = BOUNDARY_VALUE (c, i);
 760     EMACS_INT j;
 761
 762     if (next)
 763       {
 764         /* Scan backward from i to find the next differing position.  */
 765         for (j = i - 1; j >= 0; j--)
 766           if (BOUNDARY_VALUE (c, j) != i_value)
 767             break;
 768
 769         if (j >= 0)
 770           *next = BOUNDARY_POS (c, j + 1);
 771         else
 772           *next = BUF_BEG (buf);
 773       }
 774
 775     return i_value;
 776   }
 777 }
 778
 779 \f
 780 /* Debugging: pretty-print a cache to the standard error output.  */
 781
 782 void
 783 pp_cache (struct region_cache *c)
 784 {
 785   int i;
 786   EMACS_INT beg_u = c->buffer_beg + c->beg_unchanged;
 787   EMACS_INT end_u = c->buffer_end - c->end_unchanged;
 788
 789   fprintf (stderr,
 790            "basis: %ld..%ld    modified: %ld..%ld\n",
 791            (long)c->buffer_beg, (long)c->buffer_end,
 792            (long)beg_u, (long)end_u);
 793
 794   for (i = 0; i < c->cache_len; i++)
 795     {
 796       EMACS_INT pos = BOUNDARY_POS (c, i);
 797
 798       putc (((pos < beg_u) ? 'v'
 799              : (pos == beg_u) ? '-'
 800              : ' '),
 801             stderr);
 802       putc (((pos > end_u) ? '^'
 803              : (pos == end_u) ? '-'
 804              : ' '),
 805             stderr);
 806       fprintf (stderr, "%ld : %d\n", (long)pos, BOUNDARY_VALUE (c, i));
 807     }
 808 }
 809