1
1
package edu .vanderbilt .accre .laurelin .spark_ttree ;
2
2
3
+ import static com .google .common .base .Preconditions .checkNotNull ;
3
4
import static edu .vanderbilt .accre .laurelin .root_proxy .TBranch .entryOffsetToRangeMap ;
4
5
5
6
import java .io .IOException ;
8
9
import java .io .ObjectStreamException ;
9
10
import java .io .Serializable ;
10
11
import java .nio .ByteBuffer ;
11
- import java .util .Arrays ;
12
12
import java .util .HashMap ;
13
13
import java .util .Map ;
14
14
import java .util .Map .Entry ;
15
15
16
16
import org .apache .logging .log4j .LogManager ;
17
17
import org .apache .logging .log4j .Logger ;
18
18
19
+ import com .google .common .cache .CacheBuilder ;
20
+ import com .google .common .cache .CacheLoader ;
21
+ import com .google .common .cache .LoadingCache ;
19
22
import com .google .common .collect .ImmutableMap ;
20
23
import com .google .common .collect .ImmutableRangeMap ;
21
24
import com .google .common .collect .ImmutableRangeMap .Builder ;
25
+ import com .google .common .collect .Interner ;
26
+ import com .google .common .collect .Interners ;
22
27
import com .google .common .collect .Range ;
23
28
24
29
import edu .vanderbilt .accre .laurelin .Cache ;
36
41
* and byte offsets to each basket
37
42
*/
38
43
public class SlimTBranch implements Serializable , SlimTBranchInterface , ObjectInputValidation {
44
+ private static final Logger logger = LogManager .getLogger ();
39
45
private static final long serialVersionUID = 1L ;
40
46
private String path ;
41
47
@@ -51,19 +57,7 @@ public class SlimTBranch implements Serializable, SlimTBranchInterface, ObjectIn
51
57
private int basketStart ;
52
58
private int basketEnd ;
53
59
54
- /**
55
- * Deduplicate range->basket maps, since many (all?) of them will be same
56
- * for different branches in a file
57
- */
58
- private static HashMap <ImmutableRangeMap <Long , Integer >,
59
- ImmutableRangeMap <Long , Integer >> dedupRangeMap =
60
- new HashMap <ImmutableRangeMap <Long , Integer >,
61
- ImmutableRangeMap <Long , Integer >>();
62
-
63
- public synchronized ImmutableRangeMap <Long , Integer > dedupAndReturnRangeMap (ImmutableRangeMap <Long , Integer > val ) {
64
- dedupRangeMap .putIfAbsent (val , val );
65
- return dedupRangeMap .get (val );
66
- }
60
+ private Interner <ImmutableRangeMap <Long , Integer >> rangeMapInterner = Interners .newWeakInterner ();
67
61
68
62
/**
69
63
* Copy the given slim branch and trim it by removing unneccessary basket
@@ -88,6 +82,7 @@ public SlimTBranch copyAndTrim(long eventStart, long eventEnd) {
88
82
}
89
83
90
84
public void checkInvariants () {
85
+ checkNotNull (rangeToBasketIDMap );
91
86
if (basketEnd == 0 ) {
92
87
assert basketEnd != 0 ;
93
88
}
@@ -130,7 +125,9 @@ public SlimTBranch(String path, Range<Long>[] basketRangeList, TBranch.ArrayDesc
130
125
int targetBasket = i + basketStart ;
131
126
basketBuilder = basketBuilder .put (basketRangeList [i ], targetBasket );
132
127
}
133
- rangeToBasketIDMap = dedupAndReturnRangeMap (basketBuilder .build ());
128
+ ImmutableRangeMap <Long , Integer > tmp = basketBuilder .build ();
129
+ checkNotNull (tmp );
130
+ rangeToBasketIDMap = rangeMapInterner .intern (tmp );
134
131
checkInvariants ();
135
132
}
136
133
@@ -145,6 +142,7 @@ public static SlimTBranch getFromTBranch(TBranch fatBranch) {
145
142
146
143
@ Override
147
144
public ImmutableRangeMap <Long , Integer > getRangeToBasketIDMap () {
145
+ checkNotNull (rangeToBasketIDMap );
148
146
return rangeToBasketIDMap ;
149
147
}
150
148
@@ -430,18 +428,22 @@ public Range<Long>[] getRangeToBasketID() {
430
428
return rangeToBasketID ;
431
429
}
432
430
433
- private static class ArrayKeyWrapper <T > {
434
- public T [] val ;
431
+ private static class TrimBasketKey {
432
+ private ImmutableRangeMap <Long , Integer > range ;
433
+ private int start ;
434
+ private int end ;
435
435
436
- public ArrayKeyWrapper (T [] val ) {
437
- this .val = val ;
436
+ public TrimBasketKey (ImmutableRangeMap <Long , Integer > range , int start , int end ) {
437
+ this .range = range ;
438
+ this .start = start ;
439
+ this .end = end ;
438
440
}
439
441
440
442
@ Override
441
443
public int hashCode () {
442
444
final int prime = 31 ;
443
- int result = 1 ;
444
- result = prime * result + Arrays . deepHashCode ( val );
445
+ int result = start + end ;
446
+ result = ( prime * result ) ^ range . hashCode ( );
445
447
return result ;
446
448
}
447
449
@@ -453,22 +455,45 @@ public boolean equals(Object obj) {
453
455
if (obj == null ) {
454
456
return false ;
455
457
}
456
- if (!(obj instanceof ArrayKeyWrapper )) {
458
+ if (!(obj instanceof TrimBasketKey )) {
457
459
return false ;
458
460
}
459
- ArrayKeyWrapper <T > other = (ArrayKeyWrapper <T >) obj ;
460
- return Arrays .deepEquals (val , other .val );
461
+ TrimBasketKey other = (TrimBasketKey ) obj ;
462
+ return ((start == other .start ) &&
463
+ (end == other .end ) &&
464
+ (range .equals (other .range )));
461
465
}
462
466
}
463
467
464
- private static transient HashMap <ArrayKeyWrapper <Range <Long >>, Range <Long >[]> rangeDedupMap = new
465
- HashMap <ArrayKeyWrapper <Range <Long >>, Range <Long >[]>();
466
-
467
- private synchronized Range <Long >[] dedupRange (Range <Long >[] val ) {
468
- ArrayKeyWrapper <Range <Long >> key = new ArrayKeyWrapper <Range <Long >>(val );
469
- rangeDedupMap .putIfAbsent (key , val );
470
- return rangeDedupMap .get (key );
471
- }
468
+ /**
469
+ * Deduplicate range->basket maps, since many (all?) of them will be same
470
+ * for different branches in a file. Guessing 2000 as a good cache size
471
+ * since that's the upper-bound on the number of branches I'd expect to
472
+ * see in a file.
473
+ */
474
+ private static LoadingCache <TrimBasketKey ,
475
+ Range <Long >[]> dedupRangeMap =
476
+ CacheBuilder .newBuilder ()
477
+ .maximumSize (2000 )
478
+ .softValues ()
479
+ .build (
480
+ new CacheLoader <TrimBasketKey ,
481
+ Range <Long >[]>() {
482
+ @ Override
483
+ public Range <Long >[] load (TrimBasketKey key ) {
484
+ ImmutableMap <Range <Long >, Integer > map = key .range .asMapOfRanges ();
485
+ Range <Long >[] rangeToBasketID = new Range [key .end - key .start ];
486
+ for (Entry <Range <Long >, Integer > e : map .entrySet ()) {
487
+ int idx = e .getValue ();
488
+ if ((idx < key .start ) || (idx >= key .end )) {
489
+ continue ;
490
+ }
491
+ Range <Long > val = e .getKey ();
492
+ rangeToBasketID [idx - key .start ] = val ;
493
+ }
494
+ return rangeToBasketID ;
495
+ }
496
+ });
472
497
473
498
protected SerializeStorage (SlimTBranch in ) {
474
499
in .checkInvariants ();
@@ -499,7 +524,9 @@ protected SerializeStorage(SlimTBranch in) {
499
524
Range <Long > val = e .getKey ();
500
525
rangeToBasketID [idx - basketStart ] = val ;
501
526
}
502
- rangeToBasketID = dedupRange (rangeToBasketID );
527
+ TrimBasketKey cacheKey = new TrimBasketKey (in .getRangeToBasketIDMap (), basketStart , basketEnd );
528
+ rangeToBasketID = dedupRangeMap .getUnchecked (cacheKey );
529
+ checkNotNull (rangeToBasketID );
503
530
}
504
531
505
532
/**
@@ -508,6 +535,7 @@ protected SerializeStorage(SlimTBranch in) {
508
535
* @throws ObjectStreamException We don't throw, but required by Java in signature
509
536
*/
510
537
private Object readResolve () throws ObjectStreamException {
538
+ checkNotNull (rangeToBasketID );
511
539
SlimTBranch ret = new SlimTBranch (path , rangeToBasketID , arrayDesc , basketStart );
512
540
int idx = basketStart ;
513
541
for (long off : basketByteOffsets ) {
0 commit comments