diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java index 5db05490d245..fc8073adf787 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java @@ -74,10 +74,12 @@ public RecordSchema getSchema(Map variables, InputStream content final int firstRow = rawFirstRow == null ? NumberUtils.toInt(ExcelReader.STARTING_ROW.getDefaultValue()) : rawFirstRow; final int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(firstRow); final String password = context.getProperty(ExcelReader.PASSWORD).getValue(); + final InputFileType inputFileType = context.getProperty(ExcelReader.INPUT_FILE_TYPE).asAllowableValue(InputFileType.class); final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withRequiredSheets(requiredSheets) .withFirstRow(zeroBasedFirstRow) .withPassword(password) + .withInputFileType(inputFileType) .build(); final RowIterator rowIterator = new RowIterator(contentStream, configuration, logger); diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java index 428721e745b3..d36c7f7923df 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java @@ -53,13 +53,13 @@ import java.util.List; import java.util.Map; -@Tags({"excel", "spreadsheet", "xlsx", "parse", "record", "row", "reader", "values", "cell"}) +@Tags({"excel", "spreadsheet", "xls", "xlsx", "parse", "record", "row", "reader", "values", "cell"}) @CapabilityDescription("Parses a Microsoft Excel document returning each row in each sheet as a separate record. " + "This reader allows for inferring a schema from all the required sheets " + "or providing an explicit schema for interpreting the values." + "See Controller Service's Usage for further documentation. " - + "This reader is currently only capable of processing .xlsx " - + "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents.") + + "This reader is capable of processing both password and non password protected .xlsx (XSSF 2007 OOXML file format) " + + "and older .xls (HSSF '97(-2007) file format) Excel documents.") public class ExcelReader extends SchemaRegistryService implements RecordReaderFactory { public static final PropertyDescriptor REQUIRED_SHEETS = new PropertyDescriptor @@ -85,6 +85,15 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) .build(); + public static final PropertyDescriptor INPUT_FILE_TYPE = new PropertyDescriptor + .Builder().name("Input File Type") + .displayName("Input File Type") + .description("Specifies type of Excel input file.") + .required(true) + .allowableValues(InputFileType.class) + .defaultValue(InputFileType.XLSX) + .build(); + public static final PropertyDescriptor PROTECTION_TYPE = new PropertyDescriptor .Builder().name("Protection Type") .displayName("Protection Type") @@ -128,6 +137,7 @@ public RecordReader createRecordReader(final Map variables, fina final List requiredSheets = getRequiredSheets(variables); final int firstRow = getStartingRow(variables); final String password = configurationContext.getProperty(PASSWORD).getValue(); + final InputFileType inputFileType = configurationContext.getProperty(INPUT_FILE_TYPE).asAllowableValue(InputFileType.class); final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withDateFormat(dateFormat) .withRequiredSheets(requiredSheets) @@ -136,6 +146,7 @@ public RecordReader createRecordReader(final Map variables, fina .withTimeFormat(timeFormat) .withTimestampFormat(timestampFormat) .withPassword(password) + .withInputFileType(inputFileType) .build(); return new ExcelRecordReader(configuration, in, logger); diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java index ea45a2adca33..d055a1f25118 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java @@ -71,7 +71,7 @@ public ExcelRecordReader(ExcelRecordReaderConfiguration configuration, InputStre try { this.rowIterator = new RowIterator(inputStream, configuration, logger); } catch (RuntimeException e) { - throw new MalformedRecordException("Read initial Record from Excel XLSX failed", e); + throw new MalformedRecordException("Read initial Record from Excel file failed", e); } } diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java index 6faa826ad53d..f3fd777ec1a0 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java @@ -30,6 +30,7 @@ public class ExcelRecordReaderConfiguration { private String timestampFormat; private String password; private boolean avoidTempFiles; + private InputFileType inputFileType; private ExcelRecordReaderConfiguration() { } @@ -66,6 +67,10 @@ public boolean isAvoidTempFiles() { return avoidTempFiles; } + public InputFileType getInputFileType() { + return inputFileType; + } + public static final class Builder { private RecordSchema schema; private List requiredSheets; @@ -75,6 +80,7 @@ public static final class Builder { private String timestampFormat; private String password; private boolean avoidTempFiles; + private InputFileType inputFileType; public Builder withSchema(RecordSchema schema) { this.schema = schema; @@ -116,6 +122,11 @@ public Builder withAvoidTempFiles(boolean avoidTempFiles) { return this; } + public Builder withInputFileType(InputFileType inputFileType) { + this.inputFileType = inputFileType; + return this; + } + public ExcelRecordReaderConfiguration build() { ExcelRecordReaderConfiguration excelRecordReaderConfiguration = new ExcelRecordReaderConfiguration(); excelRecordReaderConfiguration.schema = this.schema; @@ -126,6 +137,7 @@ public ExcelRecordReaderConfiguration build() { excelRecordReaderConfiguration.firstRow = this.firstRow; excelRecordReaderConfiguration.password = password; excelRecordReaderConfiguration.avoidTempFiles = avoidTempFiles; + excelRecordReaderConfiguration.inputFileType = inputFileType; return excelRecordReaderConfiguration; } diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java index 24f4a9fcff2a..0c98f6e0a02e 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java @@ -36,10 +36,12 @@ public ExcelRecordSource(final InputStream in, final PropertyContext context, fi final int firstRow = rawFirstRow == null ? NumberUtils.toInt(ExcelReader.STARTING_ROW.getDefaultValue()) : rawFirstRow; final int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(firstRow); final String password = context.getProperty(ExcelReader.PASSWORD).getValue(); + final InputFileType inputFileType = context.getProperty(ExcelReader.INPUT_FILE_TYPE).asAllowableValue(InputFileType.class); final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withRequiredSheets(requiredSheets) .withFirstRow(zeroBasedFirstRow) .withPassword(password) + .withInputFileType(inputFileType) .build(); this.rowIterator = new RowIterator(in, configuration, logger); } diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/InputFileType.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/InputFileType.java new file mode 100644 index 000000000000..9f5d4df000e1 --- /dev/null +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/InputFileType.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.excel; + +import org.apache.nifi.components.DescribedValue; + +public enum InputFileType implements DescribedValue { + XLS("XLS 1997-2007 file format"), + XLSX("XLSX 2007-present OOXML file format"); + + InputFileType(String description) { + this.description = description; + } + + private final String description; + + @Override + public String getValue() { + return name(); + } + + @Override + public String getDisplayName() { + return name(); + } + + @Override + public String getDescription() { + return description; + } +} diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java index 6d067a7c078c..d4172be8ad6a 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java @@ -19,6 +19,8 @@ import com.github.pjfanning.xlsx.StreamingReader; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.exception.ProcessException; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; @@ -42,13 +44,26 @@ class RowIterator implements Iterator, Closeable { private Row currentRow; RowIterator(final InputStream in, final ExcelRecordReaderConfiguration configuration, final ComponentLog logger) { - this.workbook = StreamingReader.builder() - .rowCacheSize(100) - .bufferSize(4096) - .password(configuration.getPassword()) - .setAvoidTempFiles(configuration.isAvoidTempFiles()) - .setReadSharedFormulas(true) // NOTE: If not set to true, then data with shared formulas fail. - .open(in); + if (configuration.getInputFileType() == InputFileType.XLSX) { + this.workbook = StreamingReader.builder() + .rowCacheSize(100) + .bufferSize(4096) + .password(configuration.getPassword()) + .setAvoidTempFiles(configuration.isAvoidTempFiles()) + .setReadSharedFormulas(true) // NOTE: If not set to true, then data with shared formulas fail. + .open(in); + } else { + // Providing the password to the HSSFWorkbook is done by setting a thread variable managed by + // Biff8EncryptionKey. After the workbook is created, the thread variable can be cleared. + Biff8EncryptionKey.setCurrentUserPassword(configuration.getPassword()); + try { + this.workbook = new HSSFWorkbook(in); + } catch (final IOException e) { + throw new ProcessException("Failed to open XLS file", e); + } finally { + Biff8EncryptionKey.setCurrentUserPassword(null); + } + } final List requiredSheets = configuration.getRequiredSheets(); if (requiredSheets == null || requiredSheets.isEmpty()) { diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java index 14f7015a15dd..4b33c2e238eb 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java @@ -16,7 +16,6 @@ */ package org.apache.nifi.excel; -import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.serialization.MalformedRecordException; @@ -25,7 +24,8 @@ import org.apache.nifi.serialization.record.RecordField; import org.apache.nifi.serialization.record.RecordFieldType; import org.apache.nifi.serialization.record.RecordSchema; -import com.github.pjfanning.xlsx.exceptions.ReadException; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.poifs.crypt.EncryptionInfo; import org.apache.poi.poifs.crypt.EncryptionMode; @@ -33,13 +33,15 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; -import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.ValueSource; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; @@ -53,9 +55,9 @@ import java.nio.file.Path; import java.sql.Timestamp; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.function.Supplier; import static java.nio.file.Files.newDirectoryStream; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -71,6 +73,7 @@ public class TestExcelRecordReader { private static final String MULTI_SHEET_FILE = "twoSheets.xlsx"; private static final String PASSWORD = "nifi"; private static final ByteArrayOutputStream PASSWORD_PROTECTED = new ByteArrayOutputStream(); + private static final ByteArrayOutputStream PASSWORD_PROTECTED_OLDER_EXCEL = new ByteArrayOutputStream(); private static final Object[][] DATA = { {"ID", "Name"}, {1, "Manny"}, @@ -98,21 +101,8 @@ public static void cleanUpAfterAll() { @BeforeAll static void setUpBeforeAll() throws Exception { - //Generate an Excel file and populate it with data - final InputStream workbook = createWorkbook(DATA); - - //Protect the Excel file with a password - try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) { - EncryptionInfo encryptionInfo = new EncryptionInfo(EncryptionMode.agile); - Encryptor encryptor = encryptionInfo.getEncryptor(); - encryptor.confirmPassword(PASSWORD); - - try (OPCPackage opc = OPCPackage.open(workbook); - OutputStream os = encryptor.getDataStream(poifsFileSystem)) { - opc.save(os); - } - poifsFileSystem.writeFilesystem(PASSWORD_PROTECTED); - } + createPasswordProtectedWorkbook(); + createPasswordProtectedOlderExcelWorkbook(); } @Test @@ -122,20 +112,32 @@ public void testNonExcelFile() { MalformedRecordException mre = assertThrows(MalformedRecordException.class, () -> new ExcelRecordReader(configuration, getInputStream("notExcel.txt"), logger)); final Throwable cause = mre.getCause(); - assertInstanceOf(ReadException.class, cause); + assertInstanceOf(ProcessException.class, cause); } @Test - public void testOlderExcelFormatFile() { - ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder().build(); - MalformedRecordException mre = assertThrows(MalformedRecordException.class, () -> new ExcelRecordReader(configuration, getInputStream("olderFormat.xls"), logger)); - assertTrue(ExceptionUtils.getStackTrace(mre).contains("data appears to be in the OLE2 Format")); + public void testOlderExcelFormatFile() throws MalformedRecordException { + final List fields = List.of( + new RecordField("A", RecordFieldType.STRING.getDataType()), + new RecordField("B", RecordFieldType.STRING.getDataType()), + new RecordField("C", RecordFieldType.STRING.getDataType())); + + ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() + .withSchema(new SimpleRecordSchema(fields)) + .withInputFileType(InputFileType.XLS) + .build(); + + final ExcelRecordReader recordReader = new ExcelRecordReader(configuration, getInputStream("olderFormat.xls"), logger); + final List records = getRecords(recordReader, false, false); + + assertEquals(4, records.size()); } @Test public void testMultipleRecordsSingleSheet() throws MalformedRecordException { ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withSchema(getDataFormattingSchema()) + .withInputFileType(InputFileType.XLSX) .build(); ExcelRecordReader recordReader = new ExcelRecordReader(configuration, getInputStream(DATA_FORMATTING_FILE), logger); @@ -145,7 +147,7 @@ public void testMultipleRecordsSingleSheet() throws MalformedRecordException { } private RecordSchema getDataFormattingSchema() { - final List fields = Arrays.asList( + final List fields = List.of( new RecordField("Numbers", RecordFieldType.DOUBLE.getDataType()), new RecordField("Timestamps", RecordFieldType.DATE.getDataType()), new RecordField("Money", RecordFieldType.DOUBLE.getDataType()), @@ -176,12 +178,13 @@ private List getRecords(ExcelRecordReader recordReader, boolean coerceTy @ParameterizedTest @ValueSource(booleans = {true, false}) public void testDropUnknownFields(boolean dropUnknownFields) throws MalformedRecordException { - final List fields = Arrays.asList( + final List fields = List.of( new RecordField("Numbers", RecordFieldType.DOUBLE.getDataType()), new RecordField("Timestamps", RecordFieldType.DATE.getDataType())); ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withSchema(new SimpleRecordSchema(fields)) + .withInputFileType(InputFileType.XLSX) .build(); ExcelRecordReader recordReader = new ExcelRecordReader(configuration, getInputStream(DATA_FORMATTING_FILE), logger); @@ -203,6 +206,7 @@ public void testSkipLines() throws MalformedRecordException { ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withFirstRow(5) .withSchema(getDataFormattingSchema()) + .withInputFileType(InputFileType.XLSX) .build(); ExcelRecordReader recordReader = new ExcelRecordReader(configuration, getInputStream(DATA_FORMATTING_FILE), logger); @@ -221,6 +225,7 @@ public void tesCoerceTypes(boolean coerceTypes) throws MalformedRecordException .withTimeFormat(RecordFieldType.TIME.getDefaultFormat()) .withTimestampFormat(RecordFieldType.TIMESTAMP.getDefaultFormat()) .withSchema(schema) + .withInputFileType(InputFileType.XLSX) .build(); ExcelRecordReader recordReader = new ExcelRecordReader(configuration, getInputStream("dates.xlsx"), logger); @@ -238,6 +243,7 @@ public void testSelectSpecificSheet() throws MalformedRecordException { .withSchema(schema) .withFirstRow(1) .withRequiredSheets(requiredSheets) + .withInputFileType(InputFileType.XLSX) .build(); ExcelRecordReader recordReader = new ExcelRecordReader(configuration, getInputStream(MULTI_SHEET_FILE), logger); @@ -247,7 +253,7 @@ public void testSelectSpecificSheet() throws MalformedRecordException { } private RecordSchema getSpecificSheetSchema() { - return new SimpleRecordSchema(Arrays.asList(new RecordField("first", RecordFieldType.STRING.getDataType()), + return new SimpleRecordSchema(List.of(new RecordField("first", RecordFieldType.STRING.getDataType()), new RecordField("second", RecordFieldType.STRING.getDataType()), new RecordField("third", RecordFieldType.STRING.getDataType()))); } @@ -260,6 +266,7 @@ public void testSelectSpecificSheetNotFound() { .withSchema(schema) .withFirstRow(1) .withRequiredSheets(requiredSheets) + .withInputFileType(InputFileType.XLSX) .build(); MalformedRecordException mre = assertThrows(MalformedRecordException.class, @@ -270,10 +277,11 @@ public void testSelectSpecificSheetNotFound() { @Test public void testSelectAllSheets() throws MalformedRecordException { - RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new RecordField("first", RecordFieldType.STRING.getDataType()), + RecordSchema schema = new SimpleRecordSchema(List.of(new RecordField("first", RecordFieldType.STRING.getDataType()), new RecordField("second", RecordFieldType.STRING.getDataType()))); ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withSchema(schema) + .withInputFileType(InputFileType.XLSX) .build(); ExcelRecordReader recordReader = new ExcelRecordReader(configuration, getInputStream(MULTI_SHEET_FILE), logger); @@ -284,13 +292,14 @@ public void testSelectAllSheets() throws MalformedRecordException { @Test void testWhereCellValueDoesNotMatchSchemaType() { - RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new RecordField("first", RecordFieldType.STRING.getDataType()), + RecordSchema schema = new SimpleRecordSchema(List.of(new RecordField("first", RecordFieldType.STRING.getDataType()), new RecordField("second", RecordFieldType.FLOAT.getDataType()))); List requiredSheets = Collections.singletonList("TestSheetA"); ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withSchema(schema) .withFirstRow(2) .withRequiredSheets(requiredSheets) + .withInputFileType(InputFileType.XLSX) .build(); final MalformedRecordException mre = assertThrows(MalformedRecordException.class, () -> { @@ -302,48 +311,55 @@ void testWhereCellValueDoesNotMatchSchemaType() { assertTrue(mre.getMessage().contains("on row") && mre.getMessage().contains("in sheet")); } - @Test - void testPasswordProtected() throws Exception { - RecordSchema schema = getPasswordProtectedSchema(); - ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() + @ParameterizedTest + @EnumSource(InputFileType.class) + void testPasswordProtected(InputFileType inputFileType) throws Exception { + final RecordSchema schema = getPasswordProtectedSchema(); + final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withSchema(schema) .withPassword(PASSWORD) .withAvoidTempFiles(true) + .withInputFileType(inputFileType) .build(); - InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray()); - ExcelRecordReader recordReader = new ExcelRecordReader(configuration, inputStream, logger); - List records = getRecords(recordReader, false, false); + final ByteArrayOutputStream fileStream = inputFileType == InputFileType.XLSX ? PASSWORD_PROTECTED : PASSWORD_PROTECTED_OLDER_EXCEL; + final InputStream inputStream = new ByteArrayInputStream(fileStream.toByteArray()); + final ExcelRecordReader recordReader = new ExcelRecordReader(configuration, inputStream, logger); + final List records = getRecords(recordReader, false, false); assertEquals(DATA.length, records.size()); } - @Test - void testPasswordProtectedWithoutPassword() { - RecordSchema schema = getPasswordProtectedSchema(); - ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() + @ParameterizedTest + @EnumSource(InputFileType.class) + void testPasswordProtectedWithoutPassword(InputFileType inputFileType) { + final RecordSchema schema = getPasswordProtectedSchema(); + final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withSchema(schema) + .withInputFileType(inputFileType) .build(); - InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray()); + final ByteArrayOutputStream fileStream = inputFileType == InputFileType.XLSX ? PASSWORD_PROTECTED : PASSWORD_PROTECTED_OLDER_EXCEL; + final InputStream inputStream = new ByteArrayInputStream(fileStream.toByteArray()); assertThrows(Exception.class, () -> new ExcelRecordReader(configuration, inputStream, logger)); } private RecordSchema getPasswordProtectedSchema() { - return new SimpleRecordSchema(Arrays.asList(new RecordField("id", RecordFieldType.INT.getDataType()), + return new SimpleRecordSchema(List.of(new RecordField("id", RecordFieldType.INT.getDataType()), new RecordField("name", RecordFieldType.STRING.getDataType()))); } @Test void testWithNumberColumnWhoseValueIsEmptyString() throws Exception { - final RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new RecordField("first", RecordFieldType.STRING.getDataType()), + final RecordSchema schema = new SimpleRecordSchema(List.of(new RecordField("first", RecordFieldType.STRING.getDataType()), new RecordField("second", RecordFieldType.LONG.getDataType()))); final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withSchema(schema) + .withInputFileType(InputFileType.XLSX) .build(); final Object[][] data = {{"Manny", ""}}; - final InputStream workbook = createWorkbook(data); + final InputStream workbook = createWorkbook(data, XSSFWorkbook::new); final ExcelRecordReader recordReader = new ExcelRecordReader(configuration, workbook, logger); assertDoesNotThrow(() -> getRecords(recordReader, true, true)); @@ -355,9 +371,10 @@ void testWhereLongSpecifiedInSchemaAsString() throws Exception { final RecordSchema schema = new SimpleRecordSchema(List.of(new RecordField(fieldName, RecordFieldType.STRING.getDataType()))); final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withSchema(schema) + .withInputFileType(InputFileType.XLSX) .build(); final Object[][] data = {{9876543210L}}; - final InputStream workbook = createWorkbook(data); + final InputStream workbook = createWorkbook(data, XSSFWorkbook::new); final ExcelRecordReader recordReader = new ExcelRecordReader(configuration, workbook, logger); final List records = getRecords(recordReader, true, true); final Record firstRecord = records.getFirst(); @@ -366,10 +383,10 @@ void testWhereLongSpecifiedInSchemaAsString() throws Exception { assertEquals(scientificNotationNumber, firstRecord.getAsString(fieldName)); } - private static InputStream createWorkbook(Object[][] data) throws Exception { + private static InputStream createWorkbook(Object[][] data, Supplier workbookSupplier) throws Exception { final ByteArrayOutputStream workbookOutputStream = new ByteArrayOutputStream(); - try (XSSFWorkbook workbook = new XSSFWorkbook()) { - final XSSFSheet sheet = workbook.createSheet("SomeSheetName"); + try (Workbook workbook = workbookSupplier.get()) { + final Sheet sheet = workbook.createSheet("SomeSheetName"); populateSheet(sheet, data); workbook.write(workbookOutputStream); } @@ -377,7 +394,34 @@ private static InputStream createWorkbook(Object[][] data) throws Exception { return new ByteArrayInputStream(workbookOutputStream.toByteArray()); } - private static void populateSheet(XSSFSheet sheet, Object[][] data) { + private static void createPasswordProtectedWorkbook() throws Exception { + // Generate an Excel file and populate it with data + final InputStream workbook = createWorkbook(DATA, XSSFWorkbook::new); + + // Protect the Excel file with a password + try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) { + EncryptionInfo encryptionInfo = new EncryptionInfo(EncryptionMode.agile); + Encryptor encryptor = encryptionInfo.getEncryptor(); + encryptor.confirmPassword(PASSWORD); + + try (OPCPackage opc = OPCPackage.open(workbook); + OutputStream os = encryptor.getDataStream(poifsFileSystem)) { + opc.save(os); + } + poifsFileSystem.writeFilesystem(PASSWORD_PROTECTED); + } + } + + private static void createPasswordProtectedOlderExcelWorkbook() throws Exception { + Biff8EncryptionKey.setCurrentUserPassword(PASSWORD); + try (final InputStream inputStream = createWorkbook(DATA, HSSFWorkbook::new)) { + inputStream.transferTo(PASSWORD_PROTECTED_OLDER_EXCEL); + } finally { + Biff8EncryptionKey.setCurrentUserPassword(null); + } + } + + private static void populateSheet(Sheet sheet, Object[][] data) { //Adding the data to the Excel worksheet int rowCount = 0; for (Object[] dataRow : data) {