How to distinguish inline numbers from OLE Automation date numbers in OpenXML SpreadSheet?

Solution 1:

I just came across similar issue and it is not easy to check whether a cell contains date/time value, see Using cell format to determine a cell contains date/time value, but the issue doesn't end with built-in number formats, I needed to handle custom formats too. There are no utilities in OpenXML SDK 2.5 to help, so I had to write my own (doesn't support Thai date/time formats).

public class ExcelHelper
{
    static uint[] builtInDateTimeNumberFormatIDs = new uint[] { 14, 15, 16, 17, 18, 19, 20, 21, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58 };
    static Dictionary<uint, NumberingFormat> builtInDateTimeNumberFormats = builtInDateTimeNumberFormatIDs.ToDictionary(id => id, id => new NumberingFormat { NumberFormatId = id });
    static Regex dateTimeFormatRegex = new Regex(@"((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)", RegexOptions.Compiled);

    public static Dictionary<uint, NumberingFormat> GetDateTimeCellFormats(WorkbookPart workbookPart)
    {
        var dateNumberFormats = workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats
            .Descendants<NumberingFormat>()
            .Where(nf => dateTimeFormatRegex.Match(nf.FormatCode.Value).Success)
            .ToDictionary(nf => nf.NumberFormatId.Value);

        var cellFormats = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats
            .Descendants<CellFormat>();

        var dateCellFormats = new Dictionary<uint, NumberingFormat>();
        uint styleIndex = 0;
        foreach (var cellFormat in cellFormats)
        {
            if (cellFormat.ApplyNumberFormat != null && cellFormat.ApplyNumberFormat.Value)
            {
                if (dateNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                {
                    dateCellFormats.Add(styleIndex, dateNumberFormats[cellFormat.NumberFormatId.Value]);
                }
                else if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                {
                    dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
                }
            }

            styleIndex++;
        }

        return dateCellFormats;
    }

    // Usage Example
    public static bool IsDateTimeCell(WorkbookPart workbookPart, Cell cell)
    {
        if (cell.StyleIndex == null)
            return false;

        var dateTimeCellFormats = ExcelHelper.GetDateTimeCellFormats(workbookPart);

        return dateTimeCellFormats.ContainsKey(cell.StyleIndex);
    }
}

Solution 2:

This answer is a version of the above accepted answer to make it work for a failing scenario. It fails in a scenario where your cell is of Currency type with the built-in format applied - &quot;$&quot;#,##0_);[Red]\(&quot;$&quot;#,##0\)

The regex ((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*) (in the accepted answer) parses the above mentioned built-in format as well, which in turn makes it a datetime cell & the value returned is date time value, instead of a currency value.

I have modified the GetDateTimeCellFormats method little bit, so that the regex doesn't interfere with any of built-in formats not belonging to date/time/datetime.

        var dateCellFormats = new Dictionary<uint, NumberingFormat>();
        uint styleIndex = 0;
        foreach (var cellFormat in cellFormatList)
        {
            if (cellFormat.ApplyNumberFormat == null || !cellFormat.ApplyNumberFormat.Value)
            {
                styleIndex++;
                continue;
            }

            var numFmtId = cellFormat.NumberFormatId.Value;
            if (numFmtId < 164)
            {
                if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                    dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
            }
            else
            {
                if (dateNumberFormatsDict.ContainsKey(cellFormat.NumberFormatId.Value))
                    dateCellFormats.Add(styleIndex, dateNumberFormatsDict[cellFormat.NumberFormatId.Value]);
            }
            styleIndex++;
        }

Rest all code remains same.