mirror of
https://github.com/Kingsrook/qqq.git
synced 2025-07-18 05:01:07 +00:00
CE-1955 Update fastexcel version; Update XlsxFileToRows to read formats, and then do a better job of handling numbers as date-time, date, int, or decimal (hopefully)
This commit is contained in:
@ -100,7 +100,7 @@
|
||||
<dependency>
|
||||
<groupId>org.dhatim</groupId>
|
||||
<artifactId>fastexcel</artifactId>
|
||||
<version>0.12.15</version>
|
||||
<version>0.18.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.dhatim</groupId>
|
||||
|
@ -26,14 +26,16 @@ import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.math.BigDecimal;
|
||||
import java.math.MathContext;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Optional;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
import com.kingsrook.qqq.backend.core.exceptions.QException;
|
||||
import com.kingsrook.qqq.backend.core.logging.QLogger;
|
||||
import com.kingsrook.qqq.backend.core.processes.implementations.bulk.insert.model.BulkLoadFileRow;
|
||||
import org.dhatim.fastexcel.reader.Cell;
|
||||
import org.dhatim.fastexcel.reader.ReadableWorkbook;
|
||||
import org.dhatim.fastexcel.reader.ReadingOptions;
|
||||
import org.dhatim.fastexcel.reader.Row;
|
||||
import org.dhatim.fastexcel.reader.Sheet;
|
||||
|
||||
|
||||
@ -42,6 +44,10 @@ import org.dhatim.fastexcel.reader.Sheet;
|
||||
*******************************************************************************/
|
||||
public class XlsxFileToRows extends AbstractIteratorBasedFileToRows<org.dhatim.fastexcel.reader.Row> implements FileToRowsInterface
|
||||
{
|
||||
private static final QLogger LOG = QLogger.getLogger(XlsxFileToRows.class);
|
||||
|
||||
private static final Pattern DAY_PATTERN = Pattern.compile(".*\\b(d|dd)\\b.*");
|
||||
|
||||
private ReadableWorkbook workbook;
|
||||
private Stream<org.dhatim.fastexcel.reader.Row> rows;
|
||||
|
||||
@ -55,7 +61,7 @@ public class XlsxFileToRows extends AbstractIteratorBasedFileToRows<org.dhatim.f
|
||||
{
|
||||
try
|
||||
{
|
||||
workbook = new ReadableWorkbook(inputStream);
|
||||
workbook = new ReadableWorkbook(inputStream, new ReadingOptions(true, true));
|
||||
Sheet sheet = workbook.getFirstSheet();
|
||||
|
||||
rows = sheet.openStream();
|
||||
@ -79,41 +85,7 @@ public class XlsxFileToRows extends AbstractIteratorBasedFileToRows<org.dhatim.f
|
||||
|
||||
for(int i = 0; i < readerRow.getCellCount(); i++)
|
||||
{
|
||||
Cell cell = readerRow.getCell(i);
|
||||
if(cell.getType() != null)
|
||||
{
|
||||
values[i] = switch(cell.getType())
|
||||
{
|
||||
case NUMBER ->
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ... with fastexcel reader, we don't get styles... so, we just know type = number, for dates and ints & decimals... //
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Optional<LocalDateTime> dateTime = readerRow.getCellAsDate(i);
|
||||
if(dateTime.isPresent() && dateTime.get().getYear() > 1915 && dateTime.get().getYear() < 2100)
|
||||
{
|
||||
yield dateTime.get();
|
||||
}
|
||||
|
||||
Optional<BigDecimal> optionalBigDecimal = readerRow.getCellAsNumber(i);
|
||||
if(optionalBigDecimal.isPresent())
|
||||
{
|
||||
BigDecimal bigDecimal = optionalBigDecimal.get();
|
||||
if(bigDecimal.subtract(bigDecimal.round(new MathContext(0))).compareTo(BigDecimal.ZERO) == 0)
|
||||
{
|
||||
yield bigDecimal.intValue();
|
||||
}
|
||||
|
||||
yield bigDecimal;
|
||||
}
|
||||
|
||||
yield (null);
|
||||
}
|
||||
case BOOLEAN -> readerRow.getCellAsBoolean(i).orElse(null);
|
||||
case STRING, FORMULA -> cell.getText();
|
||||
case EMPTY, ERROR -> null;
|
||||
};
|
||||
}
|
||||
values[i] = processCell(readerRow, i);
|
||||
}
|
||||
|
||||
return new BulkLoadFileRow(values, getRowNo());
|
||||
@ -121,6 +93,150 @@ public class XlsxFileToRows extends AbstractIteratorBasedFileToRows<org.dhatim.f
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
**
|
||||
***************************************************************************/
|
||||
private Serializable processCell(Row readerRow, int columnIndex)
|
||||
{
|
||||
Cell cell = readerRow.getCell(columnIndex);
|
||||
if(cell == null)
|
||||
{
|
||||
return (null);
|
||||
}
|
||||
|
||||
String dataFormatString = cell.getDataFormatString();
|
||||
switch(cell.getType())
|
||||
{
|
||||
case NUMBER ->
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// dates, date-times, integers, and decimals are all identified as type = "number" //
|
||||
// so go through this process to try to identify what user means it as //
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
if(isDateTimeFormat(dataFormatString))
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// first - if it has a date-time looking format string, then treat it as a date-time. //
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
return (cell.asDate());
|
||||
}
|
||||
else if(isDateFormat(dataFormatString))
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// second, if it has a date looking format string (which is a sub-set of date-time), then treat as date. //
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
return (cell.asDate().toLocalDate());
|
||||
}
|
||||
else
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// now assume it's a number - but in case this optional is empty (why?) return a null //
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
Optional<BigDecimal> bigDecimal = readerRow.getCellAsNumber(columnIndex);
|
||||
if(bigDecimal.isEmpty())
|
||||
{
|
||||
return (null);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
////////////////////////////////////////////////////////////
|
||||
// now if the bigDecimal is an exact integer, return that //
|
||||
////////////////////////////////////////////////////////////
|
||||
Integer i = bigDecimal.get().intValueExact();
|
||||
return (i);
|
||||
}
|
||||
catch(ArithmeticException e)
|
||||
{
|
||||
/////////////////////////////////
|
||||
// else, end up with a decimal //
|
||||
/////////////////////////////////
|
||||
return (bigDecimal.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
case STRING ->
|
||||
{
|
||||
return cell.asString();
|
||||
}
|
||||
case BOOLEAN ->
|
||||
{
|
||||
return cell.asBoolean();
|
||||
}
|
||||
case EMPTY, ERROR, FORMULA ->
|
||||
{
|
||||
LOG.debug("cell type: " + cell.getType() + " had value string: " + cell.asString());
|
||||
return (null);
|
||||
}
|
||||
default ->
|
||||
{
|
||||
return (null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
**
|
||||
***************************************************************************/
|
||||
static boolean isDateTimeFormat(String dataFormatString)
|
||||
{
|
||||
if(dataFormatString == null)
|
||||
{
|
||||
return (false);
|
||||
}
|
||||
|
||||
if(hasDay(dataFormatString) && hasHour(dataFormatString))
|
||||
{
|
||||
return (true);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
**
|
||||
***************************************************************************/
|
||||
static boolean hasHour(String dataFormatString)
|
||||
{
|
||||
return dataFormatString.contains("h");
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
**
|
||||
***************************************************************************/
|
||||
static boolean hasDay(String dataFormatString)
|
||||
{
|
||||
return DAY_PATTERN.matcher(dataFormatString).matches();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
**
|
||||
***************************************************************************/
|
||||
static boolean isDateFormat(String dataFormatString)
|
||||
{
|
||||
if(dataFormatString == null)
|
||||
{
|
||||
return (false);
|
||||
}
|
||||
|
||||
if(hasDay(dataFormatString))
|
||||
{
|
||||
return (true);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
**
|
||||
***************************************************************************/
|
||||
|
@ -27,7 +27,6 @@ import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.Month;
|
||||
import java.util.Map;
|
||||
import com.kingsrook.qqq.backend.core.BaseTest;
|
||||
@ -45,6 +44,7 @@ import org.junit.jupiter.api.Test;
|
||||
import static com.kingsrook.qqq.backend.core.actions.reporting.GenerateReportActionTest.REPORT_NAME;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
@ -61,13 +61,14 @@ class XlsxFileToRowsTest extends BaseTest
|
||||
{
|
||||
byte[] byteArray = writeExcelBytes();
|
||||
|
||||
FileToRowsInterface fileToRowsInterface = FileToRowsInterface.forFile("someFile.xlsx", new ByteArrayInputStream(byteArray));
|
||||
FileToRowsInterface fileToRowsInterface = new XlsxFileToRows();
|
||||
fileToRowsInterface.init(new ByteArrayInputStream(byteArray));
|
||||
|
||||
BulkLoadFileRow headerRow = fileToRowsInterface.next();
|
||||
BulkLoadFileRow bodyRow = fileToRowsInterface.next();
|
||||
|
||||
assertEquals(new BulkLoadFileRow(new String[] { "Id", "First Name", "Last Name", "Birth Date" }, 1), headerRow);
|
||||
assertEquals(new BulkLoadFileRow(new Serializable[] {1, "Darin", "Jonson", LocalDateTime.of(1980, Month.JANUARY, 31, 0, 0)}, 2), bodyRow);
|
||||
assertEquals(new BulkLoadFileRow(new Serializable[] { 1, "Darin", "Jonson", LocalDate.of(1980, Month.JANUARY, 31) }, 2), bodyRow);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// make sure there's at least a limit (less than 20) to how many more rows there are //
|
||||
@ -107,4 +108,54 @@ class XlsxFileToRowsTest extends BaseTest
|
||||
return byteArray;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
**
|
||||
*******************************************************************************/
|
||||
@Test
|
||||
void testDateTimeFormats()
|
||||
{
|
||||
assertFormatDateAndOrDateTime(true, false, "dddd, m/d/yy at h:mm");
|
||||
assertFormatDateAndOrDateTime(true, false, "h PM, ddd mmm dd");
|
||||
assertFormatDateAndOrDateTime(true, false, "dd/mm/yyyy hh:mm");
|
||||
assertFormatDateAndOrDateTime(true, false, "yyyy-mm-dd hh:mm:ss.000");
|
||||
assertFormatDateAndOrDateTime(true, false, "hh:mm dd/mm/yyyy");
|
||||
|
||||
assertFormatDateAndOrDateTime(false, true, "yyyy-mm-dd");
|
||||
assertFormatDateAndOrDateTime(false, true, "mmmm d \\[dddd\\]");
|
||||
assertFormatDateAndOrDateTime(false, true, "mmm dd, yyyy");
|
||||
assertFormatDateAndOrDateTime(false, true, "d-mmm");
|
||||
assertFormatDateAndOrDateTime(false, true, "dd.mm.yyyy");
|
||||
|
||||
assertFormatDateAndOrDateTime(false, false, "yyyy");
|
||||
assertFormatDateAndOrDateTime(false, false, "mmm-yyyy");
|
||||
assertFormatDateAndOrDateTime(false, false, "hh");
|
||||
assertFormatDateAndOrDateTime(false, false, "hh:mm");
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
***************************************************************************/
|
||||
private void assertFormatDateAndOrDateTime(boolean expectDateTime, boolean expectDate, String format)
|
||||
{
|
||||
if(XlsxFileToRows.isDateTimeFormat(format))
|
||||
{
|
||||
assertTrue(expectDateTime, format + " was considered a dateTime, but wasn't expected to.");
|
||||
assertFalse(expectDate, format + " was considered a dateTime, but was expected to be a date.");
|
||||
}
|
||||
else if(XlsxFileToRows.isDateFormat(format))
|
||||
{
|
||||
assertFalse(expectDateTime, format + " was considered a date, but was expected to be a dateTime.");
|
||||
assertTrue(expectDate, format + " was considered a date, but was expected to.");
|
||||
}
|
||||
else
|
||||
{
|
||||
assertFalse(expectDateTime, format + " was not considered a dateTime, but was expected to.");
|
||||
assertFalse(expectDate, format + " was considered a date, but was expected to.");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user