Exception reading XLSB File Apache POI java.io.CharConversionException

17,748

Solution 1

Apache POI doesn't support the .xlsb file format for anything other than text extraction. Apache POI will happily provide full read or write support .xls files (via HSSF) and .xlsx files (via XSSF), or both (via the common SS UserModel interface).

However, the .xlsb format is not supported for generatl operations - it's a very odd hybrid between the two, and the large amount of work involved has meant no-one has been willing to volunteer/sponsor the work required.

What Apache POI does offer for .xlsb, as of Apache POI 3.15 beta3 / 3.16, is a text extractor for .xlsb files - XSSFBEventBasedExcelExtractor. You can use that to get the text out of your file, or with a few tweaks convert it to something like CSV

For full read/write support, you'll need to convert your file to either .xls (if it doesn't have very large numbers of rows/columns), or .xlsx (if it does). If you're really really keen to help though, you could review the source code for XSSFBEventBasedExcelExtractor, then have a go at contributing patches to add full support to POI for it!

(Additionally, I think from the exception that your particular .xlsb file is partly corrupt, but even if it wasn't it still wouldn't be supported by Apache POI for anything other than text extraction, sorry)

Solution 2

I have tried reading XLSB file using Apache POI and it is successful. Below is the code snippet I have used.

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
import org.xml.sax.SAXException;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

public class ApachePoiXLSB {

    public static void main (String [] args){

        String xlsbFileName = "test.xlsb";

        OPCPackage pkg;

        try {
            pkg = OPCPackage.open(xlsbFileName);
            XSSFBReader r = new XSSFBReader(pkg);
            XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
            XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
            XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator) r.getSheetsData();

            List<String> sheetTexts = new ArrayList<>();
            while (it.hasNext()) {
                InputStream is = it.next();
                String name = it.getSheetName();
                TestSheetHandler testSheetHandler = new TestSheetHandler();
                testSheetHandler.startSheet(name);
                XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
                        xssfbStylesTable,
                        it.getXSSFBSheetComments(),
                        sst, testSheetHandler,
                        new DataFormatter(),
                        false);
                sheetHandler.parse();
                testSheetHandler.endSheet();
                sheetTexts.add(testSheetHandler.toString());
            }
            System.out.println("output text:"+sheetTexts);

        } catch (InvalidFormatException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (OpenXML4JException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (SAXException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}


import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;

class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {

        private final StringBuilder sb = new StringBuilder();

        public void startSheet(String sheetName) {
            sb.append("<sheet name=\"").append(sheetName).append(">");
        }

        public void endSheet() {
            sb.append("</sheet>");
        }

        @Override
        public void startRow(int rowNum) {
            sb.append("\n<tr num=\"").append(rowNum).append(">");
        }

        @Override
        public void endRow(int rowNum) {
            sb.append("\n</tr num=\"").append(rowNum).append(">");
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            formattedValue = (formattedValue == null) ? "" : formattedValue;
            if (comment == null) {
                sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
            } else {
                sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
                        .append(formattedValue)
                        .append("<span type=\"comment\" author=\"")
                        .append(comment.getAuthor()).append("\">")
                        .append(comment.getString().toString().trim()).append("</span>")
                        .append("</td>");
            }
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
            if (isHeader) {
                sb.append("<header tagName=\"").append(tagName).append("\">").append(text).append("</header>");
            } else {
                sb.append("<footer tagName=\"").append(tagName).append("\">").append(text).append("</footer>");

            }
        }

        @Override
        public String toString() {
            return sb.toString();
        }
    }
Share:
17,748
Angel Zero
Author by

Angel Zero

Updated on July 02, 2022

Comments

  • Angel Zero
    Angel Zero almost 2 years

    Im developing a Java aplication that reads an excel xlsb file using Apache POI, but I got an exception while reading it, my code is as follows:

    import java.io.IOException;
    import java.io.InputStream;
    
    import org.apache.poi.xssf.eventusermodel.XSSFReader;
    import org.apache.poi.xssf.model.SharedStringsTable;
    import org.apache.poi.xssf.usermodel.XSSFRichTextString;
    import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
    import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
    import org.apache.poi.openxml4j.opc.Package;
    import org.xml.sax.Attributes;
    import org.xml.sax.ContentHandler;
    import org.xml.sax.InputSource;
    import org.xml.sax.SAXException;
    import org.xml.sax.XMLReader;
    import org.xml.sax.helpers.DefaultHandler;
    import org.xml.sax.helpers.XMLReaderFactory;
    
    import java.util.Iterator;
    
    public class Prueba {
    
        public static void main (String [] args){
    
            String direccion = "C:/Documents and Settings/RSalasL/My Documents/New Folder/masstigeoct12.xlsb";
    
            Package pkg;
            try {
                pkg = Package.open(direccion);
                XSSFReader r = new XSSFReader(pkg);
                SharedStringsTable sst = r.getSharedStringsTable();
    
                XMLReader parser = fetchSheetParser(sst);
    
                Iterator<InputStream> sheets = r.getSheetsData();
                while(sheets.hasNext()) {
                    System.out.println("Processing new sheet:\n");
                    InputStream sheet = sheets.next();
                    InputSource sheetSource = new InputSource(sheet);
                    parser.parse(sheetSource);
                    sheet.close();
                    System.out.println("");
                }
    
            } catch (InvalidFormatException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (OpenXML4JException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (SAXException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
    
        }
    
        public void processAllSheets(String filename) throws Exception {
            Package pkg = Package.open(filename);
            XSSFReader r = new XSSFReader( pkg );
            SharedStringsTable sst = r.getSharedStringsTable();
    
            XMLReader parser = fetchSheetParser(sst);
    
            Iterator<InputStream> sheets = r.getSheetsData();
            while(sheets.hasNext()) {
                System.out.println("Processing new sheet:\n");
                InputStream sheet = sheets.next();
                InputSource sheetSource = new InputSource(sheet);
                parser.parse(sheetSource);
                sheet.close();
                System.out.println("");
            }
        }
    
    
        public static XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
            XMLReader parser =
                XMLReaderFactory.createXMLReader(
                        "org.apache.xerces.parsers.SAXParser"
                );
            ContentHandler handler = new SheetHandler(sst);
            parser.setContentHandler(handler);
            return parser;
        }
    
        private static class SheetHandler extends DefaultHandler {
            private SharedStringsTable sst;
            private String lastContents;
            private boolean nextIsString;
    
            private SheetHandler(SharedStringsTable sst) {
                this.sst = sst;
            }
    
            public void startElement(String uri, String localName, String name,
                    Attributes attributes) throws SAXException {
                // c => cell
                if(name.equals("c")) {
                    // Print the cell reference
                    System.out.print(attributes.getValue("r") + " - ");
                    // Figure out if the value is an index in the SST
                    String cellType = attributes.getValue("t");
                    if(cellType != null && cellType.equals("s")) {
                        nextIsString = true;
                    } else {
                        nextIsString = false;
                    }
                }
                // Clear contents cache
                lastContents = "";
            }
    
            public void endElement(String uri, String localName, String name)
                    throws SAXException {
                // Process the last contents as required.
                // Do now, as characters() may be called more than once
                if(nextIsString) {
                    int idx = Integer.parseInt(lastContents);
                    lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                nextIsString = false;
                }
    
                // v => contents of a cell
                // Output after we've seen the string contents
                if(name.equals("v")) {
                    System.out.println(lastContents);
                }
            }
    
            public void characters(char[] ch, int start, int length)
                    throws SAXException {
                lastContents += new String(ch, start, length);
            }
        }
    
    }
    

    And the exception is this:

    java.io.CharConversionException: Characters larger than 4 bytes are not supported: byte 0x83 implies a length of more than 4 bytes
        at org.apache.xmlbeans.impl.piccolo.xml.UTF8XMLDecoder.decode(UTF8XMLDecoder.java:162)
        at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader$FastStreamDecoder.read(XMLStreamReader.java:762)
        at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader.read(XMLStreamReader.java:162)
        at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yy_refill(PiccoloLexer.java:3474)
        at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:3958)
        at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
        at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
        at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
        at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3439)
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1270)
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1257)
        at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
        at org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument$Factory.parse(Unknown Source)
        at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:207)
        at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:166)
        at org.apache.poi.xssf.eventusermodel.XSSFReader.getSheetsData(XSSFReader.java:160)
        at EDManager.Prueba.main(Prueba.java:36)
    

    The file has 2 sheets, one with 329 rows and 3 columns and the other with 566 rows and 3 columns, I just want to read the file to find if a value is in the second sheet.

  • Tim Allison
    Tim Allison over 7 years
    Merely 4 years later, we just added support for streaming/read-only parsing of xlsb. That functionality will be available in POI 3.16-beta3
  • Jaja
    Jaja over 4 years
    But smartxls is not free, so this does not help