Skip to content

Commit 7c6bda8

Browse files
ENH: Add StringArray Support To ReadCSVFile Filter (#1339)
* Add support for importing string arrays in ReadCSVFile filter. * Remove quotes from strings before importing them. * Fix bug where headers were being cached when they shouldn't have been. * Add unit test that verifies that quotes are removed from strings. * Fix Python examples and pipelines. --------- Signed-off-by: Joey Kleingers <[email protected]>
1 parent 696c1ff commit 7c6bda8

File tree

17 files changed

+551
-141
lines changed

17 files changed

+551
-141
lines changed

src/Plugins/OrientationAnalysis/test/WriteGBCDGMTFileTest.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ TEST_CASE("OrientationAnalysis::WriteGBCDGMTFileFilter", "[OrientationAnalysis][
101101
ReadCSVData data;
102102
data.inputFilePath = fmt::format("{}/6_6_Small_IN100_GBCD/small_in100_sigma_3_1.dat", unit_test::k_TestFilesDir);
103103
data.customHeaders = {k_ExemplarGMT1, k_ExemplarGMT2, k_ExemplarGMT3};
104-
data.dataTypes = {DataType::float32, DataType::float32, DataType::float32};
104+
data.dataTypes = {CSVType::float32, CSVType::float32, CSVType::float32};
105105
data.skippedArrayMask = {false, false, false};
106106
data.startImportRow = 2;
107107
data.delimiters = {' '};
@@ -121,7 +121,7 @@ TEST_CASE("OrientationAnalysis::WriteGBCDGMTFileFilter", "[OrientationAnalysis][
121121
ReadCSVData data;
122122
data.inputFilePath = outputFile.string();
123123
data.customHeaders = {k_GMT1, k_GMT2, k_GMT3};
124-
data.dataTypes = {DataType::float32, DataType::float32, DataType::float32};
124+
data.dataTypes = {CSVType::float32, CSVType::float32, CSVType::float32};
125125
data.skippedArrayMask = {false, false, false};
126126
data.startImportRow = 2;
127127
data.delimiters = {' '};
@@ -187,7 +187,7 @@ TEST_CASE("OrientationAnalysis::WriteGBCDGMTFileFilter", "[OrientationAnalysis][
187187
ReadCSVData data;
188188
data.inputFilePath = fmt::format("{}/6_6_Small_IN100_GBCD/small_in100_sigma_9_1.dat", unit_test::k_TestFilesDir);
189189
data.customHeaders = {k_ExemplarGMT1, k_ExemplarGMT2, k_ExemplarGMT3};
190-
data.dataTypes = {DataType::float32, DataType::float32, DataType::float32};
190+
data.dataTypes = {CSVType::float32, CSVType::float32, CSVType::float32};
191191
data.skippedArrayMask = {false, false, false};
192192
data.startImportRow = 2;
193193
data.delimiters = {' '};
@@ -207,7 +207,7 @@ TEST_CASE("OrientationAnalysis::WriteGBCDGMTFileFilter", "[OrientationAnalysis][
207207
ReadCSVData data;
208208
data.inputFilePath = outputFile.string();
209209
data.customHeaders = {k_GMT1, k_GMT2, k_GMT3};
210-
data.dataTypes = {DataType::float32, DataType::float32, DataType::float32};
210+
data.dataTypes = {CSVType::float32, CSVType::float32, CSVType::float32};
211211
data.skippedArrayMask = {false, false, false};
212212
data.startImportRow = 2;
213213
data.delimiters = {' '};
@@ -273,7 +273,7 @@ TEST_CASE("OrientationAnalysis::WriteGBCDGMTFileFilter", "[OrientationAnalysis][
273273
ReadCSVData data;
274274
data.inputFilePath = fmt::format("{}/6_6_Small_IN100_GBCD/small_in100_sigma_11_1.dat", unit_test::k_TestFilesDir);
275275
data.customHeaders = {k_ExemplarGMT1, k_ExemplarGMT2, k_ExemplarGMT3};
276-
data.dataTypes = {DataType::float32, DataType::float32, DataType::float32};
276+
data.dataTypes = {CSVType::float32, CSVType::float32, CSVType::float32};
277277
data.skippedArrayMask = {false, false, false};
278278
data.startImportRow = 2;
279279
data.delimiters = {' '};
@@ -293,7 +293,7 @@ TEST_CASE("OrientationAnalysis::WriteGBCDGMTFileFilter", "[OrientationAnalysis][
293293
ReadCSVData data;
294294
data.inputFilePath = outputFile.string();
295295
data.customHeaders = {k_GMT1, k_GMT2, k_GMT3};
296-
data.dataTypes = {DataType::float32, DataType::float32, DataType::float32};
296+
data.dataTypes = {CSVType::float32, CSVType::float32, CSVType::float32};
297297
data.skippedArrayMask = {false, false, false};
298298
data.startImportRow = 2;
299299
data.delimiters = {' '};

src/Plugins/OrientationAnalysis/test/WriteGBCDTriangleDataTest.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ TEST_CASE("OrientationAnalysis::WriteGBCDTriangleDataFilter: Valid filter execut
9292
ReadCSVData data;
9393
data.inputFilePath = fmt::format("{}/6_6_Small_IN100_GBCD/6_6_Small_IN100_GBCD_Triangles.ph", unit_test::k_TestFilesDir);
9494
data.customHeaders = {k_Phi1Right, k_PhiRight, k_Phi2Right, k_Phi1Left, k_PhiLeft, k_Phi2Left, k_TriangleNormal0, k_TriangleNormal1, k_TriangleNormal2, k_SurfaceArea};
95-
data.dataTypes = {DataType::float32, DataType::float32, DataType::float32, DataType::float32, DataType::float32,
96-
DataType::float32, DataType::float64, DataType::float64, DataType::float64, DataType::float64};
95+
data.dataTypes = {CSVType::float32, CSVType::float32, CSVType::float32, CSVType::float32, CSVType::float32,
96+
CSVType::float32, CSVType::float64, CSVType::float64, CSVType::float64, CSVType::float64};
9797
data.skippedArrayMask = {false, false, false, false, false, false, false, false, false, false};
9898
data.startImportRow = 6;
9999
data.delimiters = {' '};
@@ -114,8 +114,8 @@ TEST_CASE("OrientationAnalysis::WriteGBCDTriangleDataFilter: Valid filter execut
114114
ReadCSVData data;
115115
data.inputFilePath = outputFile.string();
116116
data.customHeaders = {k_Phi1Right, k_PhiRight, k_Phi2Right, k_Phi1Left, k_PhiLeft, k_Phi2Left, k_TriangleNormal0, k_TriangleNormal1, k_TriangleNormal2, k_SurfaceArea};
117-
data.dataTypes = {DataType::float32, DataType::float32, DataType::float32, DataType::float32, DataType::float32,
118-
DataType::float32, DataType::float64, DataType::float64, DataType::float64, DataType::float64};
117+
data.dataTypes = {CSVType::float32, CSVType::float32, CSVType::float32, CSVType::float32, CSVType::float32,
118+
CSVType::float32, CSVType::float64, CSVType::float64, CSVType::float64, CSVType::float64};
119119
data.skippedArrayMask = {false, false, false, false, false, false, false, false, false, false};
120120
data.startImportRow = 5;
121121
data.delimiters = {' '};

src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ReadCSVFile.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ ReadCSVFile::ReadCSVFile() = default;
2626

2727
ReadCSVFile::~ReadCSVFile() noexcept = default;
2828

29-
Result<> ReadCSVFile::readFile(DataStructure& dataStructure, const std::string& inputFilePath, usize importStartingRow, usize headersLineNumber, const std::vector<DataType>& columnDataTypes,
29+
Result<> ReadCSVFile::readFile(DataStructure& dataStructure, const std::string& inputFilePath, usize importStartingRow, usize headersLineNumber, const std::vector<CSVType>& columnDataTypes,
3030
const std::vector<bool>& columnsSkipped, const DataPath& groupPath, const std::vector<usize>& tupleDims, const std::vector<char>& delimiters, bool consecutiveDelimiters,
3131
const std::atomic_bool& shouldCancel, const IFilter::MessageHandler& msgHandler)
3232
{
@@ -39,7 +39,7 @@ Result<> ReadCSVFile::readFile(DataStructure& dataStructure, const std::string&
3939
}
4040

4141
Result<> ReadCSVFile::readFile(DataStructure& dataStructure, const std::string& inputFilePath, usize importStartingRow, const std::vector<std::string>& columnHeaders,
42-
const std::vector<DataType>& columnDataTypes, const std::vector<bool>& columnsSkipped, const DataPath& groupPath, const std::vector<usize>& tupleDims,
42+
const std::vector<CSVType>& columnDataTypes, const std::vector<bool>& columnsSkipped, const DataPath& groupPath, const std::vector<usize>& tupleDims,
4343
const std::vector<char>& delimiters, bool consecutiveDelimiters, const std::atomic_bool& shouldCancel, const IFilter::MessageHandler& msgHandler)
4444
{
4545
auto headers = columnHeaders;

src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ReadCSVFile.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "simplnx/DataStructure/DataStructure.hpp"
77
#include "simplnx/Filter/Arguments.hpp"
88
#include "simplnx/Filter/IFilter.hpp"
9+
#include "simplnx/Parameters/util/ReadCSVData.hpp"
910

1011
namespace fs = std::filesystem;
1112

@@ -46,11 +47,11 @@ class SIMPLNXCORE_EXPORT ReadCSVFile
4647
ReadCSVFile& operator=(const ReadCSVFile&) = delete;
4748
ReadCSVFile& operator=(ReadCSVFile&&) noexcept = delete;
4849

49-
Result<> readFile(DataStructure& dataStructure, const std::string& inputFilePath, usize importStartingRow, const std::vector<std::string>& columnHeaders,
50-
const std::vector<DataType>& columnDataTypes, const std::vector<bool>& columnsSkipped, const DataPath& groupPath, const std::vector<usize>& tupleDims,
51-
const std::vector<char>& delimiters, bool consecutiveDelimiters, const std::atomic_bool& shouldCancel, const IFilter::MessageHandler& msgHandler);
50+
Result<> readFile(DataStructure& dataStructure, const std::string& inputFilePath, usize importStartingRow, const std::vector<std::string>& columnHeaders, const std::vector<CSVType>& columnDataTypes,
51+
const std::vector<bool>& columnsSkipped, const DataPath& groupPath, const std::vector<usize>& tupleDims, const std::vector<char>& delimiters, bool consecutiveDelimiters,
52+
const std::atomic_bool& shouldCancel, const IFilter::MessageHandler& msgHandler);
5253

53-
Result<> readFile(DataStructure& dataStructure, const std::string& inputFilePath, usize importStartingRow, usize headersLineNumber, const std::vector<DataType>& columnDataTypes,
54+
Result<> readFile(DataStructure& dataStructure, const std::string& inputFilePath, usize importStartingRow, usize headersLineNumber, const std::vector<CSVType>& columnDataTypes,
5455
const std::vector<bool>& columnsSkipped, const DataPath& groupPath, const std::vector<usize>& tupleDims, const std::vector<char>& delimiters, bool consecutiveDelimiters,
5556
const std::atomic_bool& shouldCancel, const IFilter::MessageHandler& msgHandler);
5657
};

src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadCSVFileFilter.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "simplnx/DataStructure/IDataArray.hpp"
99
#include "simplnx/Filter/Actions/CreateArrayAction.hpp"
1010
#include "simplnx/Filter/Actions/CreateAttributeMatrixAction.hpp"
11+
#include "simplnx/Filter/Actions/CreateStringArrayAction.hpp"
1112
#include "simplnx/Parameters/AttributeMatrixSelectionParameter.hpp"
1213
#include "simplnx/Parameters/BoolParameter.hpp"
1314
#include "simplnx/Parameters/DataGroupCreationParameter.hpp"
@@ -129,10 +130,13 @@ Result<> cacheHeaders(const ReadCSVData& readCsvData)
129130
Result<> cacheFullFile(const ReadCSVData& readCsvData)
130131
{
131132
s_HeaderCache[s_InstanceId].FilePath = readCsvData.inputFilePath;
132-
auto result = cacheHeaders(readCsvData);
133-
if(result.invalid())
133+
if(readCsvData.headerMode == ReadCSVData::HeaderMode::LINE && readCsvData.headersLine != s_HeaderCache[s_InstanceId].HeadersLine)
134134
{
135-
return result;
135+
auto result = cacheHeaders(readCsvData);
136+
if(result.invalid())
137+
{
138+
return result;
139+
}
136140
}
137141

138142
s_HeaderCache[s_InstanceId].TotalLines = nx::core::FileUtilities::LinesInFile(readCsvData.inputFilePath);
@@ -431,12 +435,19 @@ IFilter::PreflightResult ReadCSVFileFilter::preflightImpl(const DataStructure& d
431435
continue;
432436
}
433437

434-
DataType dataType = readCSVData.dataTypes[i];
438+
CSVType csvType = readCSVData.dataTypes[i];
435439
std::string name = headers[i];
436440

437441
DataPath arrayPath = groupPath;
438442
arrayPath = arrayPath.createChildPath(name);
439-
resultOutputActions.value().appendAction(std::make_unique<CreateArrayAction>(dataType, tupleDims, std::vector<usize>{1}, arrayPath));
443+
if(csvType == CSVType::string)
444+
{
445+
resultOutputActions.value().appendAction(std::make_unique<CreateStringArrayAction>(tupleDims, arrayPath));
446+
}
447+
else
448+
{
449+
resultOutputActions.value().appendAction(std::make_unique<CreateArrayAction>(ConvertCSVTypeToDataType(csvType), tupleDims, std::vector<usize>{1}, arrayPath));
450+
}
440451
}
441452

442453
return {std::move(resultOutputActions), {}};

0 commit comments

Comments
 (0)