CE-1955 Update fastexcel version; Update XlsxFileToRows to read formats, and then do a better job of handling numbers as date-time, date, int, or decimal (hopefully)

This commit is contained in:
2024-12-26 19:09:41 -06:00
parent 9cfc7fafc1
commit a4499219c8
3 changed files with 210 additions and 43 deletions

View File

@ -100,7 +100,7 @@
<dependency>
<groupId>org.dhatim</groupId>
<artifactId>fastexcel</artifactId>
<version>0.12.15</version>
<version>0.18.4</version>
</dependency>
<dependency>
<groupId>org.dhatim</groupId>

View File

@ -26,14 +26,16 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.math.BigDecimal;
import java.math.MathContext;
import java.time.LocalDateTime;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import com.kingsrook.qqq.backend.core.exceptions.QException;
import com.kingsrook.qqq.backend.core.logging.QLogger;
import com.kingsrook.qqq.backend.core.processes.implementations.bulk.insert.model.BulkLoadFileRow;
import org.dhatim.fastexcel.reader.Cell;
import org.dhatim.fastexcel.reader.ReadableWorkbook;
import org.dhatim.fastexcel.reader.ReadingOptions;
import org.dhatim.fastexcel.reader.Row;
import org.dhatim.fastexcel.reader.Sheet;
@ -42,6 +44,10 @@ import org.dhatim.fastexcel.reader.Sheet;
*******************************************************************************/
public class XlsxFileToRows extends AbstractIteratorBasedFileToRows<org.dhatim.fastexcel.reader.Row> implements FileToRowsInterface
{
private static final QLogger LOG = QLogger.getLogger(XlsxFileToRows.class);
private static final Pattern DAY_PATTERN = Pattern.compile(".*\\b(d|dd)\\b.*");
private ReadableWorkbook workbook;
private Stream<org.dhatim.fastexcel.reader.Row> rows;
@ -55,7 +61,7 @@ public class XlsxFileToRows extends AbstractIteratorBasedFileToRows<org.dhatim.f
{
try
{
workbook = new ReadableWorkbook(inputStream);
workbook = new ReadableWorkbook(inputStream, new ReadingOptions(true, true));
Sheet sheet = workbook.getFirstSheet();
rows = sheet.openStream();
@ -79,41 +85,7 @@ public class XlsxFileToRows extends AbstractIteratorBasedFileToRows<org.dhatim.f
for(int i = 0; i < readerRow.getCellCount(); i++)
{
Cell cell = readerRow.getCell(i);
if(cell.getType() != null)
{
values[i] = switch(cell.getType())
{
case NUMBER ->
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// ... with fastexcel reader, we don't get styles... so, we just know type = number, for dates and ints & decimals... //
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Optional<LocalDateTime> dateTime = readerRow.getCellAsDate(i);
if(dateTime.isPresent() && dateTime.get().getYear() > 1915 && dateTime.get().getYear() < 2100)
{
yield dateTime.get();
}
Optional<BigDecimal> optionalBigDecimal = readerRow.getCellAsNumber(i);
if(optionalBigDecimal.isPresent())
{
BigDecimal bigDecimal = optionalBigDecimal.get();
if(bigDecimal.subtract(bigDecimal.round(new MathContext(0))).compareTo(BigDecimal.ZERO) == 0)
{
yield bigDecimal.intValue();
}
yield bigDecimal;
}
yield (null);
}
case BOOLEAN -> readerRow.getCellAsBoolean(i).orElse(null);
case STRING, FORMULA -> cell.getText();
case EMPTY, ERROR -> null;
};
}
values[i] = processCell(readerRow, i);
}
return new BulkLoadFileRow(values, getRowNo());
@ -121,6 +93,150 @@ public class XlsxFileToRows extends AbstractIteratorBasedFileToRows<org.dhatim.f
/***************************************************************************
**
***************************************************************************/
private Serializable processCell(Row readerRow, int columnIndex)
{
Cell cell = readerRow.getCell(columnIndex);
if(cell == null)
{
return (null);
}
String dataFormatString = cell.getDataFormatString();
switch(cell.getType())
{
case NUMBER ->
{
/////////////////////////////////////////////////////////////////////////////////////
// dates, date-times, integers, and decimals are all identified as type = "number" //
// so go through this process to try to identify what user means it as //
/////////////////////////////////////////////////////////////////////////////////////
if(isDateTimeFormat(dataFormatString))
{
////////////////////////////////////////////////////////////////////////////////////////
// first - if it has a date-time looking format string, then treat it as a date-time. //
////////////////////////////////////////////////////////////////////////////////////////
return (cell.asDate());
}
else if(isDateFormat(dataFormatString))
{
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// second, if it has a date looking format string (which is a sub-set of date-time), then treat as date. //
///////////////////////////////////////////////////////////////////////////////////////////////////////////
return (cell.asDate().toLocalDate());
}
else
{
////////////////////////////////////////////////////////////////////////////////////////
// now assume it's a number - but in case this optional is empty (why?) return a null //
////////////////////////////////////////////////////////////////////////////////////////
Optional<BigDecimal> bigDecimal = readerRow.getCellAsNumber(columnIndex);
if(bigDecimal.isEmpty())
{
return (null);
}
try
{
////////////////////////////////////////////////////////////
// now if the bigDecimal is an exact integer, return that //
////////////////////////////////////////////////////////////
Integer i = bigDecimal.get().intValueExact();
return (i);
}
catch(ArithmeticException e)
{
/////////////////////////////////
// else, end up with a decimal //
/////////////////////////////////
return (bigDecimal.get());
}
}
}
case STRING ->
{
return cell.asString();
}
case BOOLEAN ->
{
return cell.asBoolean();
}
case EMPTY, ERROR, FORMULA ->
{
LOG.debug("cell type: " + cell.getType() + " had value string: " + cell.asString());
return (null);
}
default ->
{
return (null);
}
}
}
/***************************************************************************
**
***************************************************************************/
static boolean isDateTimeFormat(String dataFormatString)
{
if(dataFormatString == null)
{
return (false);
}
if(hasDay(dataFormatString) && hasHour(dataFormatString))
{
return (true);
}
return false;
}
/***************************************************************************
**
***************************************************************************/
static boolean hasHour(String dataFormatString)
{
return dataFormatString.contains("h");
}
/***************************************************************************
**
***************************************************************************/
static boolean hasDay(String dataFormatString)
{
return DAY_PATTERN.matcher(dataFormatString).matches();
}
/***************************************************************************
**
***************************************************************************/
static boolean isDateFormat(String dataFormatString)
{
if(dataFormatString == null)
{
return (false);
}
if(hasDay(dataFormatString))
{
return (true);
}
return false;
}
/***************************************************************************
**
***************************************************************************/

View File

@ -27,7 +27,6 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.Month;
import java.util.Map;
import com.kingsrook.qqq.backend.core.BaseTest;
@ -45,6 +44,7 @@ import org.junit.jupiter.api.Test;
import static com.kingsrook.qqq.backend.core.actions.reporting.GenerateReportActionTest.REPORT_NAME;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
/*******************************************************************************
@ -61,13 +61,14 @@ class XlsxFileToRowsTest extends BaseTest
{
byte[] byteArray = writeExcelBytes();
FileToRowsInterface fileToRowsInterface = FileToRowsInterface.forFile("someFile.xlsx", new ByteArrayInputStream(byteArray));
FileToRowsInterface fileToRowsInterface = new XlsxFileToRows();
fileToRowsInterface.init(new ByteArrayInputStream(byteArray));
BulkLoadFileRow headerRow = fileToRowsInterface.next();
BulkLoadFileRow bodyRow = fileToRowsInterface.next();
assertEquals(new BulkLoadFileRow(new String[] {"Id", "First Name", "Last Name", "Birth Date"}, 1), headerRow);
assertEquals(new BulkLoadFileRow(new Serializable[] {1, "Darin", "Jonson", LocalDateTime.of(1980, Month.JANUARY, 31, 0, 0)}, 2), bodyRow);
assertEquals(new BulkLoadFileRow(new String[] { "Id", "First Name", "Last Name", "Birth Date" }, 1), headerRow);
assertEquals(new BulkLoadFileRow(new Serializable[] { 1, "Darin", "Jonson", LocalDate.of(1980, Month.JANUARY, 31) }, 2), bodyRow);
///////////////////////////////////////////////////////////////////////////////////////
// make sure there's at least a limit (less than 20) to how many more rows there are //
@ -107,4 +108,54 @@ class XlsxFileToRowsTest extends BaseTest
return byteArray;
}
/*******************************************************************************
**
*******************************************************************************/
@Test
void testDateTimeFormats()
{
assertFormatDateAndOrDateTime(true, false, "dddd, m/d/yy at h:mm");
assertFormatDateAndOrDateTime(true, false, "h PM, ddd mmm dd");
assertFormatDateAndOrDateTime(true, false, "dd/mm/yyyy hh:mm");
assertFormatDateAndOrDateTime(true, false, "yyyy-mm-dd hh:mm:ss.000");
assertFormatDateAndOrDateTime(true, false, "hh:mm dd/mm/yyyy");
assertFormatDateAndOrDateTime(false, true, "yyyy-mm-dd");
assertFormatDateAndOrDateTime(false, true, "mmmm d \\[dddd\\]");
assertFormatDateAndOrDateTime(false, true, "mmm dd, yyyy");
assertFormatDateAndOrDateTime(false, true, "d-mmm");
assertFormatDateAndOrDateTime(false, true, "dd.mm.yyyy");
assertFormatDateAndOrDateTime(false, false, "yyyy");
assertFormatDateAndOrDateTime(false, false, "mmm-yyyy");
assertFormatDateAndOrDateTime(false, false, "hh");
assertFormatDateAndOrDateTime(false, false, "hh:mm");
}
/***************************************************************************
*
***************************************************************************/
private void assertFormatDateAndOrDateTime(boolean expectDateTime, boolean expectDate, String format)
{
if(XlsxFileToRows.isDateTimeFormat(format))
{
assertTrue(expectDateTime, format + " was considered a dateTime, but wasn't expected to.");
assertFalse(expectDate, format + " was considered a dateTime, but was expected to be a date.");
}
else if(XlsxFileToRows.isDateFormat(format))
{
assertFalse(expectDateTime, format + " was considered a date, but was expected to be a dateTime.");
assertTrue(expectDate, format + " was considered a date, but was expected to.");
}
else
{
assertFalse(expectDateTime, format + " was not considered a dateTime, but was expected to.");
assertFalse(expectDate, format + " was considered a date, but was expected to.");
}
}
}