Apache POI - FileInputStream works, File object fails (NullPointerException)
I try to copy all worksheets from one workbook to another workbook. The thing is, it works normally if I read the workbooks via FileInputStreams, but it does not work with File Objects.
Consider the following method:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DataConsolidateFunction;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.AreaReference;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.usermodel.XSSFPivotTable;
import org.apache.poi.xssf.usermodel.XSSFSheet;
public void copyAllSheetsAcrossWorkbook(String oldWorkbook, String newWorkbook)
throws EncryptedDocumentException, InvalidFormatException, IOException {
FileInputStream fisOld = null;
FileInputStream fisNew = null;
Workbook oldWB = null;
Workbook newWB = null;
FileOutputStream fileOut = null;
System.out.println("oldWorkbook: " + oldWorkbook);
System.out.println("newWorkbook: " + newWorkbook);
fisOld = new FileInputStream(oldWorkbook);
fisNew = new FileInputStream(newWorkbook);
// THIS WORKS
// oldWB = WorkbookFactory.create(fisOld);
// newWB = WorkbookFactory.create(fisNew);
// THIS DOES NOT WORK
oldWB = WorkbookFactory.create(new File(oldWorkbook));
newWB = WorkbookFactory.create(new File(newWorkbook));
if (newWB == null) {
System.out.println("newWB is null");
}
// CellStyle newStyle = newWB.createCellStyle();
Row row;
Cell cell;
copiedSheets = new ArrayList<String>();
for (int i = 0; i < oldWB.getNumberOfSheets(); i++) {
XSSFSheet sheetFromOldWB = (XSSFSheet) oldWB.getSheetAt(i);
String sheetNameFromOldWB = sheetFromOldWB.getSheetName();
XSSFSheet sheetForNewWB = (XSSFSheet) newWB.getSheet(sheetNameFromOldWB);
if (sheetForNewWB != null) {
int sheetIndex = newWB.getSheetIndex(sheetNameFromOldWB);
newWB.removeSheetAt(sheetIndex);
}
LOGGER.info("Copying to new Workbook: " + sheetNameFromOldWB);
sheetForNewWB = (XSSFSheet) newWB.createSheet(sheetFromOldWB.getSheetName());
for (int rowIndex = 0; rowIndex < sheetFromOldWB.getPhysicalNumberOfRows(); rowIndex++) {
row = sheetForNewWB.createRow(rowIndex);
for (int colIndex = 0; colIndex < sheetFromOldWB.getRow(rowIndex).getPhysicalNumberOfCells(); colIndex++) {
cell = row.createCell(colIndex);
// get cell from old WB's sheet and when cell is null, return as blank cells.
Cell c = sheetFromOldWB.getRow(rowIndex).getCell(colIndex, Row.MissingCellPolicy.CREATE_NULL_AS_BLANK);
// Below is where all the copying is happening.
// CellStyle origStyle = c.getCellStyle();
// newStyle.cloneStyleFrom(origStyle);
// cell.setCellStyle(newStyle);
switch (c.getCellTypeEnum()) {
case STRING:
cell.setCellValue(c.getRichStringCellValue().getString());
break;
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
cell.setCellValue(c.getDateCellValue());
} else {
cell.setCellValue(c.getNumericCellValue());
}
break;
case BOOLEAN:
cell.setCellValue(c.getBooleanCellValue());
break;
case FORMULA:
cell.setCellFormula(c.getCellFormula());
break;
default:
break;
}
}
}
copiedSheets.add(oldWB.getSheetName(i));
}
fileOut = new FileOutputStream(newWorkbook);
newWB.write(fileOut); // <------ HERE I GET NULLPOINTEREXCEPTION
fisOld.close();
fisNew.close();
oldWB.close();
fileOut.close();
newWB.close();
I get the following exception at newWB.write(fileOut);
:
Exception in thread "main" org.apache.poi.POIXMLException: java.lang.NullPointerException
at org.apache.poi.POIXMLDocument.getProperties(POIXMLDocument.java:168)
at org.apache.poi.POIXMLDocument.write(POIXMLDocument.java:246)
at com.capgemini.toolkit.App.copyAllSheetsAcrossWorkbook(App.java:263)
at com.capgemini.toolkit.App.main(App.java:58)
Caused by: java.lang.NullPointerException
at org.apache.poi.openxml4j.util.ZipSecureFile$ThresholdInputStream.read(ZipSecureFile.java:210)
at com.sun.org.apache.xerces.internal.impl.XMLEntityManager$RewindableInputStream.read(Unknown Source)
at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source)
at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(Unknown Source)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(Unknown Source)
at javax.xml.parsers.DocumentBuilder.parse(Unknown Source)
at org.apache.poi.util.DocumentHelper.readDocument(DocumentHelper.java:140)
at org.apache.poi.POIXMLTypeLoader.parse(POIXMLTypeLoader.java:143)
at org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument$Factory.parse(Unknown Source)
at org.apache.poi.POIXMLProperties.<init>(POIXMLProperties.java:78)
at org.apache.poi.POIXMLDocument.getProperties(POIXMLDocument.java:166)
... 3 more
In the POI documentation, it is always mentioned to better use a File
object due to lower memory consumption. That's why I'm wondering why it does not work with a File
object .
For testing, this is the only method which is running in the main method and I used 2 fresh Excel files (.xlsx) with some dummy data.
Does anyone see why it does not work with a File
object ? Am I doing something wrong?
FYI: I'm using POI 3.16.
Solution 1:
Using a File
instead of a FileInputStream
for opening a Workbook
leads to a lower memory footprint because then, in case of XSSF
(*.xlsx
), the ZipPackage will be opened from the *.xlsx
file directly instead reading the whole ZIP
content into the memory.
But this also means, that the ZipPackage
gets the file opened until the Workbook
will be closed. So until the Workbook
will be closed, nothing can write to that file the same time. So, since there is not a possibility to write the Workbook
content back to the same file from where the Workbook
was opened from, using a File
instead a FileInputStream
for opening a Workbook
is fine if you wants only reading from that Workbook
then. But it does not work if you wants reading from and writing to the same file. Then FileInputStream
and FileOutputStream
is needed.
So in your case you tries reading the Workbook newWB
from a File
and then writing the Workbook
into the same file using
fileOut = new FileOutputStream(newWorkbook);
newWB.write(fileOut);
while the file is opened already. This fails.
But:
fisNew = new FileInputStream(newWorkbook);
oldWB = WorkbookFactory.create(new File(oldWorkbook));
newWB = WorkbookFactory.create(fisNew);
...
fileOut = new FileOutputStream(newWorkbook);
newWB.write(fileOut);
fileOut.close();
oldWB.close();
newWB.close();
should work.
Btw.: If you are using a File
, then you should not using a FileInputStream
for the same file. So don't use fisOld
.
Another disadvantage of using a File
instead of a FileInputStream
for opening a Workbook
is that while closing the Workbook
and so implicitly closing the underlaying file system (POIFSFileSystem
in case of HSSF
and ZipPackage
in case of XSSF
) the file gets an updated last modified date. There are no changings made into the file but the file had been opened and new written into the file system. That's why the last modified date is updated.
Edit Sep 21 2017:
The disadvantage of using a File
seems to be greater than thought first. OPCPackage.close also saves all changings into the underlaying OPCPackage
. So if you are opening a XSSFWorkbook
from a file and then wants writing the changings into another file using write(java.io.OutputStream stream)
, then the source file will also be changed while closing the OPCPackage
. The problem only occurs if write(java.io.OutputStream stream)
is used from XSSFWorkbook
since then POIXMLDocument.write is called which calls POIXMLDocumentPart.onSave which "Saves changes in the underlying OOXML package.". So the OPCPackage
is updated with all changings before closing.
Short Example:
import org.apache.poi.ss.usermodel.*;
import java.io.File;
import java.io.FileOutputStream;
class ReadAndWriteExcelWorkbook {
public static void main(String[] args) throws Exception {
Workbook workbook = WorkbookFactory.create(new File("file.xlsx"));
Sheet sheet = workbook.getSheetAt(0);
Row row = sheet.getRow(0);
if (row == null) row = sheet.createRow(0);
Cell cell = row.getCell(0);
if (cell == null) cell = row.createCell(0);
cell.setCellValue("changed");
FileOutputStream out = new FileOutputStream("fileNew.xlsx");
workbook.write(out);
out.close();
workbook.close();
}
}
After this code both files fileNew.xlsx
as well as file.xlsx
are changed.
Solution 2:
Just stumbled across a potential solution to this issue. I'm no expert, so feel free to suggest alternatives or modifications to my method.
I also encountered this issue, where the POI documentation advises using a File object rather than a FileInputStream, but fails to mention that the created Workbook cannot then be written to the original file to modify it.
However, by creating a temporary copy of the original file using the nio.channels.FileChannel.transferFrom function of the later JDKs (As shown here Standard concise way to copy a file in Java?) I was able to read my data from the duplicated file and then write to the original using the regular workbook.write function.
One caveat of this, is that the 'temporary' copy still cannot be deleted whilst it is being accessed. However, it apparently can still have data transferred to it. Once the jvm instance ends, the file can be deleted, so I am treating it like the temporary or backup documents that are sometimes created, for instance when modifying a Word document.