Skip to content

Commit e4671e0

Browse files
Use new path expansion implementation
Remove all uses of old path expansion implementation (expandPathToList) Fixes #81 #77
1 parent 20095c9 commit e4671e0

File tree

5 files changed

+21
-17
lines changed

5 files changed

+21
-17
lines changed

src/main/java/edu/vanderbilt/accre/laurelin/root_proxy/IOFactory.java

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -203,12 +203,4 @@ private static List<Path> resolveGlob(String path) throws IOException {
203203
private static boolean isGlob(String path) {
204204
return path.matches(".*[{}\\[\\]*?].*");
205205
}
206-
207-
public static List<String> expandPathToList(String path) throws IOException {
208-
if (Pattern.matches(hadoopPattern, path)) {
209-
return HadoopFile.expandPathToList(path);
210-
} else {
211-
return NIOFile.expandPathToList(path);
212-
}
213-
}
214-
}
206+
}

src/main/java/edu/vanderbilt/accre/laurelin/root_proxy/ROOTFileCache.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.util.LinkedList;
99
import java.util.concurrent.locks.ReentrantLock;
1010

11+
import org.apache.hadoop.fs.Path;
1112
import org.apache.logging.log4j.LogManager;
1213
import org.apache.logging.log4j.Logger;
1314

@@ -161,6 +162,10 @@ public void forciblyInvalidateOpenFile(String path) {
161162
}
162163
}
163164

165+
public ROOTFile getROOTFile(Path path) throws IOException {
166+
return getROOTFile(path.toString());
167+
}
168+
164169
/**
165170
* Gets a ROOTFile pointing to the given URI, ensuring that they're unique
166171
* within the cache object and with a timeout to prevent immediately
@@ -228,6 +233,8 @@ public long getPhantomReferenceCount() {
228233
}
229234
}
230235

236+
237+
231238
private ROOTFile load(String path) throws IOException {
232239
assert lock.isHeldByCurrentThread() == true;
233240
FileInterface fileInterface = timedCache.getIfPresent(path);
@@ -361,4 +368,5 @@ public FileInterface getIfPresent(String path) {
361368
return fiMap.get(path);
362369
}
363370
}
371+
364372
}

src/main/java/edu/vanderbilt/accre/laurelin/spark_ttree/Reader.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
import java.io.IOException;
44
import java.io.Serializable;
55
import java.util.ArrayList;
6+
import java.util.Arrays;
67
import java.util.HashMap;
78
import java.util.Iterator;
8-
import java.util.LinkedList;
99
import java.util.List;
1010
import java.util.Map;
1111
import java.util.Map.Entry;
1212
import java.util.function.Function;
1313

14+
import org.apache.hadoop.fs.Path;
1415
import org.apache.logging.log4j.LogManager;
1516
import org.apache.logging.log4j.Logger;
1617
import org.apache.spark.SparkContext;
@@ -47,7 +48,7 @@ public class Reader implements DataSourceReader,
4748
SupportsPushDownRequiredColumns {
4849
static final Logger logger = LogManager.getLogger();
4950

50-
private LinkedList<String> paths;
51+
private List<String> paths;
5152
private String treeName;
5253
private TTree currTree;
5354
private TFile currFile;
@@ -62,9 +63,10 @@ public Reader(DataSourceOptions options, SparkContext sparkContext, CollectionAc
6263
logger.trace("construct ttreedatasourcev2reader");
6364
this.sparkContext = sparkContext;
6465
try {
65-
this.paths = new LinkedList<String>();
66-
for (String path: options.paths()) {
67-
this.paths.addAll(IOFactory.expandPathToList(path));
66+
List<Path> expanded = IOFactory.resolvePathList(Arrays.asList(options.paths()));
67+
this.paths = new ArrayList<String>(expanded.size());
68+
for (Path p: expanded) {
69+
this.paths.add(p.toString());
6870
}
6971
// FIXME - More than one file, please
7072
currFile = TFile.getFromFile(fileCache.getROOTFile(this.paths.get(0)));

src/test/java/edu/vanderbilt/accre/root_proxy/IOTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,15 +168,15 @@ private static ByteBuffer getTestBytes(long off, int len) {
168168
*/
169169
@Test
170170
public void searchDirectory_nio() throws IOException {
171-
List<String> files = IOFactory.expandPathToList("testdata/recursive");
171+
List<org.apache.hadoop.fs.Path> files = resolveHelper("testdata/recursive");
172172
assertEquals(3, files.size());
173173
}
174174

175175
@Test
176176
public void searchDirectory_hadoop() throws IOException {
177177
Path currentRelativePath = Paths.get("");
178178
String s = currentRelativePath.toAbsolutePath().toString();
179-
List<String> files = IOFactory.expandPathToList("file:///" + s + "/" + "testdata/recursive");
179+
List<org.apache.hadoop.fs.Path> files = resolveHelper("file:///" + s + "/" + "testdata/recursive");
180180
assertEquals(3, files.size());
181181
}
182182

src/test/java/edu/vanderbilt/accre/spark_ttree/TTreeDataSourceIntegrationTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,15 @@ public void testVectorLoad() {
7272

7373
@Test
7474
public void testTwoFiles() {
75+
// If the files are duplicated in the input list, they shouldn't be
76+
// after reading
7577
Dataset<Row> df = spark
7678
.read()
7779
.format("root")
7880
.option("tree", "Events")
7981
.option("threadCount", "0")
8082
.load("testdata/all-types.root", "testdata/all-types.root");
81-
assertEquals(18, df.count());
83+
assertEquals(9, df.count());
8284
}
8385

8486
@AfterClass

0 commit comments

Comments
 (0)