Apache POI使用总结

Apache POI是Apache推出的Java操作Office文档的开源库,其中对Excel的支持最完整,所以一般公司涉及报表开发都会使用。之前工作中曾用过不到一年,当时用的是3.10版本,现在来看比较旧了,不知道和现在的版本差别大不大。

  • POI对表格的操作都是从第0行、第0列开始。
  • 读取表格的时候会读取到有数据的最后一行之后的空行,暂无解决办法,所以最好规定读取的行数。
  • 读取xls格式使用HSSF,读取xlsx格式使用XSSF。
  • 读取的xlsx数据大于10万时,最好使用下方提到的“POI读取EXCEL2007以上格式文件大量数据时的处理办法”。
  • 写表格时,如果设置了合并单元格以及单元格边框,需要在合并单元格后再次设置边框。
  • 在写入公式时,不要在单元格中直接写入“=XXXX”的公式,而是使用setCellFormula方法写入。
  • POI读取EXCEL2007以上格式文件大量数据时的处理办法:
    (此段代码摘自网络不属于本人原创,本人使用过没有问题,使用的JDK版本是1.7)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    338
    339
    340
    341
    342
    343
    344
    345
    346
    347
    348
    349
    350
    351
    352
    353
    354
    355
    356
    357
    358
    359
    360
    361
    362
    363
    364
    365
    366
    367
    368
    369
    370
    371
    372
    373
    374
    375
    376
    377
    378
    379
    380
    381
    382
    383
    384
    385
    386
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.PrintStream;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;

    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.parsers.SAXParser;
    import javax.xml.parsers.SAXParserFactory;

    import org.apache.poi.hssf.usermodel.HSSFDateUtil;
    import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
    import org.apache.poi.openxml4j.opc.OPCPackage;
    import org.apache.poi.openxml4j.opc.PackageAccess;
    import org.apache.poi.ss.usermodel.BuiltinFormats;
    import org.apache.poi.ss.usermodel.DataFormatter;
    import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
    import org.apache.poi.xssf.eventusermodel.XSSFReader;
    import org.apache.poi.xssf.model.StylesTable;
    import org.apache.poi.xssf.usermodel.XSSFCellStyle;
    import org.apache.poi.xssf.usermodel.XSSFRichTextString;
    import org.xml.sax.Attributes;
    import org.xml.sax.InputSource;
    import org.xml.sax.SAXException;
    import org.xml.sax.XMLReader;
    import org.xml.sax.helpers.DefaultHandler;

    /**
    * 使用CSV模式解决XLSX文件,可以有效解决用户模式内存溢出的问题。该模式是POI官方推荐的读取大数据的模式,在用户模式下,数据量较大、Sheet较多、或者是有很多无用的空行的情况,容易出现内存溢出,用户模式读取Excel的典型代码如下:
    * FileInputStream file=new FileInputStream("c:\\test.xlsx"); Workbook wb=new XSSFWorkbook(file);
    */
    public class XLSXCovertCSVReader {
    /**
    * The type of the data value is indicated by an attribute on the cell. The value is usually in a "v" element within the cell.
    */
    enum xssfDataType {
    BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
    }

    /**
    * 使用xssf_sax_API处理Excel,请参考:http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
    *
    * Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at http://www.ecma-international.org/publications/standards/Ecma-376.htm
    *
    * A web-friendly version is http://openiso.org/Ecma/376/Part4
    */
    class MyXSSFSheetHandler extends DefaultHandler {
    /**
    * Table with styles
    */
    private StylesTable stylesTable;
    /**
    * Table with unique strings
    */
    private ReadOnlySharedStringsTable sharedStringsTable;
    /**
    * Destination for data
    */
    private final PrintStream output;
    /**
    * Number of columns to read starting with leftmost
    */
    private final int minColumnCount;
    // Set when V start element is seen
    private boolean vIsOpen;
    // Set when cell start element is seen;
    // used when cell close element is seen.
    private xssfDataType nextDataType;
    // Used to format numeric cell values.
    private short formatIndex;
    private String formatString;
    private final DataFormatter formatter;
    private int thisColumn = -1;
    // The last column printed to the output stream
    private int lastColumnNumber = -1;
    // Gathers characters as they are seen.
    private StringBuffer value;
    private String[] record;
    private List rows = new ArrayList();
    private boolean isCellNull = false;

    /**
    * Accepts objects needed while parsing.
    *
    * @param styles
    * Table of styles
    * @param strings
    * Table of shared strings
    * @param cols
    * Minimum number of columns to show
    * @param target
    * Sink for output
    */
    public MyXSSFSheetHandler(StylesTable styles, ReadOnlySharedStringsTable strings, int cols, PrintStream target) {
    this.stylesTable = styles;
    this.sharedStringsTable = strings;
    this.minColumnCount = cols;
    this.output = target;
    this.value = new StringBuffer();
    this.nextDataType = xssfDataType.NUMBER;
    this.formatter = new DataFormatter();
    record = new String[this.minColumnCount];
    rows.clear();// 每次读取都清空行集合
    }

    /*
    * (non-Javadoc)
    *
    * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
    */
    public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
    if ("inlineStr".equals(name) || "v".equals(name)) {
    vIsOpen = true;
    // Clear contents cache
    value.setLength(0);
    }
    // c => cell
    else if ("c".equals(name)) {
    // Get the cell reference
    String r = attributes.getValue("r");
    int firstDigit = -1;
    for (int c = 0; c < r.length(); ++c) {
    if (Character.isDigit(r.charAt(c))) {
    firstDigit = c;
    break;
    }
    }
    thisColumn = nameToColumn(r.substring(0, firstDigit));
    // Set up defaults.
    this.nextDataType = xssfDataType.NUMBER;
    this.formatIndex = -1;
    this.formatString = null;
    String cellType = attributes.getValue("t");
    String cellStyleStr = attributes.getValue("s");
    if ("b".equals(cellType))
    nextDataType = xssfDataType.BOOL;
    else if ("e".equals(cellType))
    nextDataType = xssfDataType.ERROR;
    else if ("inlineStr".equals(cellType))
    nextDataType = xssfDataType.INLINESTR;
    else if ("s".equals(cellType))
    nextDataType = xssfDataType.SSTINDEX;
    else if ("str".equals(cellType))
    nextDataType = xssfDataType.FORMULA;
    else if (cellStyleStr != null) {
    // It's a number, but almost certainly one with a special style or format
    int styleIndex = Integer.parseInt(cellStyleStr);
    XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
    this.formatIndex = style.getDataFormat();
    this.formatString = style.getDataFormatString();
    if (this.formatString == null)
    this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
    }
    }
    }

    /*
    * (non-Javadoc)
    *
    * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
    */
    public void endElement(String uri, String localName, String name) throws SAXException {
    String thisStr = null;
    // v => contents of a cell
    if ("v".equals(name)) {
    // Process the value contents as required.
    // Do now, as characters() may be called more than once
    switch (nextDataType) {
    case BOOL:
    //char first = value.charAt(0);
    //thisStr = first == '0' ? "FALSE" : "TRUE";
    thisStr = value.toString();
    break;
    case ERROR:
    //thisStr = "\"ERROR:" + value.toString() + '"';
    thisStr = value.toString();
    break;
    case FORMULA:
    // A formula could result in a string value,
    // so always add double-quote characters.
    // thisStr = '"' + value.toString() + '"';
    thisStr = value.toString();
    break;
    case INLINESTR:
    // TODO: have seen an example of this, so it's untested.
    XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
    //thisStr = '"' + rtsi.toString() + '"';
    thisStr = rtsi.toString();
    break;
    case SSTINDEX:
    String sstIndex = value.toString();
    try {
    int idx = Integer.parseInt(sstIndex);
    XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
    //thisStr = '"' + rtss.toString() + '"';
    thisStr = rtss.toString();
    } catch (NumberFormatException ex) {
    output.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
    }
    break;
    case NUMBER:
    String n = value.toString();
    // 判断是否是日期格式
    if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) {
    Double d = Double.parseDouble(n);
    Date date = HSSFDateUtil.getJavaDate(d);
    thisStr = formateDateToString(date);
    } else if (this.formatString != null)
    thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
    else
    thisStr = n;
    break;
    default:
    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
    break;
    }
    // Output after we've seen the string contents
    // Emit commas for any fields that were missing on this row
    if (lastColumnNumber == -1) {
    lastColumnNumber = 0;
    }
    // 判断单元格的值是否为空
    if (thisStr == null || "".equals(isCellNull)) {
    isCellNull = true;// 设置单元格是否为空值
    }
    record[thisColumn] = thisStr;
    // Update column
    if (thisColumn > -1)
    lastColumnNumber = thisColumn;
    } else if ("row".equals(name)) {
    // Print out any missing commas if needed
    if (minColumns > 0) {
    // Columns are 0 based
    if (lastColumnNumber == -1) {
    lastColumnNumber = 0;
    }
    if (isCellNull == false && record[0] == null && record[1] == null && record[2] == null) {
    } else {
    rows.add(record.clone());
    }
    }
    lastColumnNumber = -1;
    }
    }

    public List getRows() {
    return rows;
    }

    public void setRows(List rows) {
    this.rows = rows;
    }

    /**
    * Captures characters only if a suitable element is open. Originally was just "v"; extended for inlineStr also.
    */
    public void characters(char[] ch, int start, int length) throws SAXException {
    if (vIsOpen)
    value.append(ch, start, length);
    }

    /**
    * Converts an Excel column name like "C" to a zero-based index.
    *
    * @param name
    * @return Index corresponding to the specified name
    */
    private int nameToColumn(String name) {
    int column = -1;
    for (int i = 0; i < name.length(); ++i) {
    int c = name.charAt(i);
    column = (column + 1) * 26 + c - 'A';
    }
    return column;
    }

    private String formateDateToString(Date date) {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");// 格式化日期
    return sdf.format(date);
    }
    }

    private OPCPackage xlsxPackage;
    private int minColumns;
    private PrintStream output;
    private String sheetName;

    /**
    * Creates a new XLSX -> CSV converter
    *
    * @param pkg
    * The XLSX package to process
    * @param output
    * The PrintStream to output the CSV to
    * @param minColumns
    * The minimum number of columns to output, or -1 for no minimum
    */
    public XLSXCovertCSVReader(OPCPackage pkg, PrintStream output, String sheetName, int minColumns) {
    this.xlsxPackage = pkg;
    this.output = output;
    this.minColumns = minColumns;
    this.sheetName = sheetName;
    }

    /**
    * Parses and shows the content of one sheet using the specified styles and shared-strings tables.
    *
    * @param styles
    * @param strings
    * @param sheetInputStream
    */
    public List processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException,
    ParserConfigurationException, SAXException {
    InputSource sheetSource = new InputSource(sheetInputStream);
    SAXParserFactory saxFactory = SAXParserFactory.newInstance();
    SAXParser saxParser = saxFactory.newSAXParser();
    XMLReader sheetParser = saxParser.getXMLReader();
    MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings, this.minColumns, this.output);
    sheetParser.setContentHandler(handler);
    sheetParser.parse(sheetSource);
    return handler.getRows();
    }

    /**
    * 初始化这个处理程序
    *
    * @throws IOException
    * @throws OpenXML4JException
    * @throws ParserConfigurationException
    * @throws SAXException
    */
    public List process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
    XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
    List list = null;
    StylesTable styles = xssfReader.getStylesTable();
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    int index = 0;
    while (iter.hasNext()) {
    InputStream stream = iter.next();
    String sheetNameTemp = iter.getSheetName();
    if (this.sheetName.equals(sheetNameTemp)) {
    list = processSheet(styles, strings, stream);
    stream.close();
    ++index;
    }
    }
    return list;
    }

    /**
    * 读取Excel
    *
    * @param path
    * 文件路径
    * @param sheetName
    * sheet名称
    * @param minColumns
    * 列总数
    * @return
    * @throws SAXException
    * @throws ParserConfigurationException
    * @throws OpenXML4JException
    * @throws IOException
    */
    public static List readerExcel(String path, String sheetName, int minColumns) throws IOException, OpenXML4JException,
    ParserConfigurationException, SAXException {
    OPCPackage p = OPCPackage.open(path, PackageAccess.READ);
    XLSXCovertCSVReader xlsx2csv = new XLSXCovertCSVReader(p, System.out, sheetName, minColumns);
    List list = xlsx2csv.process();
    p.close();
    return list;
    }

    public static void main(String[] args) throws Exception {
    List list = XLSXCovertCSVReader.readerExcel("C:/test.xlsx", "Sheet1", 14);
    for (String[] record : list) {
    for (String cell : record) {
    System.out.print(cell + " ");
    }
    System.out.println();
    }
    }
    }