Quantcast
Channel: Tuning Excel calculation engine which uses MS Excel interop - Code Review Stack Exchange
Viewing all articles
Browse latest Browse all 3

Tuning Excel calculation engine which uses MS Excel interop

$
0
0

I am currently building an Excel calculation engine. Its purpose is basically to wrap the calculation logic of an Excel workbook in order to use the logic from a C# library.

using (var s = new Spreadsheet()){    s.AutoCalculate = false;    s.LoadFromFile("D:/workbook.xlsx");    // Example: Set B36 to 1000.0    s.SetValue(1000.0/i, "SheetName", 36, 2);     // Arbitrary cell values could be set here    // Calculate result values    s.Calculate();    // Example: Read B52    var result = s.GetValue("SheetName", 52, 2));     // Arbitrary cell values could be read here}

The library is meant to work for arbitrary workbooks and a flexible number of cell writes and reads. I cannot optimize anything specifically for a workbook or a known workbook layout. The worksheet might contain VBA macros and functions. I've tried a lot to avoid Interop, but have not yet managed to find any solution.

Here is my wrapper:

public class Spreadsheet : IDisposable{    [DllImport("user32.dll")]    private static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId);    private readonly Dictionary<string, Worksheet> _worksheets = new Dictionary<string, Worksheet>();    private readonly Dictionary<CellInfo, Range> _cells = new Dictionary<CellInfo, Range>();     private Application _excelApplication;    private Workbooks _workbooks;    private Workbook _workbook;    private Windows _windows;    private Window _windowItem;    private bool _autoCalculate;    public bool AutoCalculate    {        get { return _autoCalculate; }        set        {            _autoCalculate = value;            if (_excelApplication != null)                _excelApplication.Calculation =                    _autoCalculate ? XlCalculation.xlCalculationAutomatic : XlCalculation.xlCalculationManual;        }    }    public Spreadsheet()    {        OpenExcelApplication();                }            private void OpenExcelApplication()    {        _excelApplication = new Application {Visible = false};        _excelApplication.ScreenUpdating = false;        _excelApplication.DisplayAlerts = false;    }    public void SetValue(double value, string sheetName, int cellRow, int cellColumn)    {        var cell = GetCell(sheetName, cellRow, cellColumn);        cell.Value2 = value;    }    public double GetValue(string sheetName, int row, int column)    {        var cell = GetCell(sheetName, row, column);        var valueString = cell.Value2.ToString();        return double.Parse(valueString);    }    public void Calculate()    {        _excelApplication.Calculate();    }    public void LoadFromFile(string location)    {        if (_workbook != null)        {            try            {                _workbook.Close(false, Missing.Value, Missing.Value);            }            catch (Exception)            {}        }        var tempFileName = Path.GetTempFileName();        File.Copy(location, tempFileName, true);        _workbook = _excelApplication.Workbooks.Open(tempFileName);        _workbook.EnableAutoRecover = false;        _workbook.ForceFullCalculation = false;        _excelApplication.Calculation =                    _autoCalculate ?                    XlCalculation.xlCalculationAutomatic :                    XlCalculation.xlCalculationManual;    }    private Worksheet GetSheet(string name)    {        if (_worksheets.ContainsKey(name)) return _worksheets[name];        var sheet = _workbook.Sheets[name];        _worksheets.Add(name, sheet);        return sheet;    }    private Range GetCell(string sheetName, int row, int column)    {        var cellInfo = new CellInfo();        cellInfo.SheetName = sheetName;        cellInfo.Row = row;        cellInfo.Column = column;        if (_cells.ContainsKey(cellInfo)) return _cells[cellInfo];        var sheet = GetSheet(sheetName);        var cell = (Range)sheet.Cells[row, column];        _cells.Add(cellInfo, cell);        return cell;    }    public static bool TryKillProcessByMainWindowHwnd(int hWnd)    {        uint processId;        GetWindowThreadProcessId((IntPtr) hWnd, out processId);        if (processId == 0) return false;        try        {            Process.GetProcessById((int) processId).Kill();        }        catch (ArgumentException)        {            return false;        }        catch (Win32Exception)        {            return false;        }        catch (NotSupportedException)        {            return false;        }        catch (InvalidOperationException)        {            return false;        }        return true;    }    public static void KillProcessByMainWindowHwnd(int hWnd)    {        uint processId;        GetWindowThreadProcessId((IntPtr) hWnd, out processId);        if (processId == 0)            throw new ArgumentException("Process has not been found by the given main window handle.", "hWnd");        Process.GetProcessById((int) processId).Kill();    }    public void Dispose()    {        try        {            var hWnd = _excelApplication.Application.Hwnd;            TryKillProcessByMainWindowHwnd(hWnd);        }        catch (Exception)        {        }    }}public struct CellInfo{    public string SheetName;    public int Row;    public int Column;    public override bool Equals(object obj)    {        if (ReferenceEquals(null, obj)) return false;        return obj is CellInfo && Equals((CellInfo) obj);    }    public bool Equals(CellInfo other)    {        return string.Equals(SheetName, other.SheetName) && Row == other.Row && Column == other.Column;    }    public override int GetHashCode()    {        unchecked        {            var hashCode = (SheetName != null ? SheetName.GetHashCode() : 0);            hashCode = (hashCode*397) ^ Row;            hashCode = (hashCode*397) ^ Column;            return hashCode;        }    }}

Please don't kill me for my way of resource disposal, I will get that straight before using it in production.

As you can see, I have implemented caching of worksheets and single cells to speed things up a little.

I have disabled the obvious performance killers, such as screen updates. What else could I do to improve the performance of this Excel calculation engine? I have thought about not recalculating the complete workbook but only the read cells in a recursive way, but haven't found a way to do so, as Range.Calculate does not seem to calculate the cells which are referenced by the cells within the range, so that the values are not consistent with the input values.


Viewing all articles
Browse latest Browse all 3

Latest Images

Trending Articles





Latest Images