Skip to content

Commit

Permalink
allocation fighting and performance boosting
Browse files Browse the repository at this point in the history
  • Loading branch information
petriashev committed Feb 26, 2023
1 parent 7790d48 commit a0b77c8
Show file tree
Hide file tree
Showing 39 changed files with 1,166 additions and 222 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ public interface IExcelElement<TOpenXmlElement> : IMetadataProvider
/// <summary>
/// Gets OpenXml element.
/// </summary>
[MaybeNull]
TOpenXmlElement Data { get; }
TOpenXmlElement? Data { get; }

/// <summary>
/// Returns true if <see cref="Data"/> is not null.
Expand All @@ -45,21 +44,14 @@ public class ExcelElement<TOpenXmlElement> : IExcelElement<TOpenXmlElement>
/// <summary>
/// Gets OpenXml element.
/// </summary>
[MaybeNull]
public TOpenXmlElement Data { get; }

///// <summary>
///// Gets OpenXml element as <see cref="Option{A}"/>.
///// </summary>
///// <returns>Optional OpenXml element.</returns>
//public Option<TOpenXmlElement> AsOption() => Data!;
public TOpenXmlElement? Data { get; }

/// <summary>
/// Initializes a new instance of the <see cref="ExcelElement{TOpenXmlElement}"/> class.
/// </summary>
/// <param name="doc">OpenXml document that contains this element.</param>
/// <param name="data">OpenXml element.</param>
public ExcelElement(SpreadsheetDocument doc, [MaybeNull] TOpenXmlElement data)
public ExcelElement(SpreadsheetDocument doc, TOpenXmlElement? data)
{
Doc = doc.AssertArgumentNotNull(nameof(doc));
Data = data;
Expand Down Expand Up @@ -95,15 +87,14 @@ public ExcelElement(SpreadsheetDocument doc, [MaybeNull] TOpenXmlElement data)
/// <summary>
/// Gets OpenXml element.
/// </summary>
[MaybeNull]
public TOpenXmlElement Data { get; }
public TOpenXmlElement? Data { get; }

/// <summary>
/// Initializes a new instance of the <see cref="ExcelElementLight{TOpenXmlElement}"/> struct.
/// </summary>
/// <param name="doc">OpenXml document that contains this element.</param>
/// <param name="data">OpenXml element.</param>
public ExcelElementLight(SpreadsheetDocument doc, [MaybeNull] TOpenXmlElement data)
public ExcelElementLight(SpreadsheetDocument doc, TOpenXmlElement? data)
{
Doc = doc.AssertArgumentNotNull(nameof(doc));
Data = data;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright (c) MicroElements. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.Linq;
using DocumentFormat.OpenXml;
using MicroElements.CodeContracts;
using MicroElements.Collections.Cache;

namespace MicroElements.Metadata.OpenXml.Excel.Parsing
{
public static partial class ExcelParsingExtensions
{
/// <summary>
/// Gets cell reference by column row index.
/// Example: (0,0)->A1.
/// </summary>
/// <param name="column">Column index.</param>
/// <param name="row">Row index.</param>
/// <param name="zeroBased">Is column and row zero based.</param>
/// <returns>Cell reference.</returns>
public static StringValue GetCellReference(int column, int row, bool zeroBased = true)
{
int columnIndex = zeroBased ? column : column - 1;
string columnName = GetColumnName(columnIndex);
int rowName = zeroBased ? row + 1 : row;
return new StringValue(string.Concat(columnName, rowName.ToString()));
}

/// <summary>
/// Gets column index (cached).
/// </summary>
public static string GetColumnName(int columnIndex = 0)
{
return Cache
.Instance<int, string>("ColumnName")
.GetOrAdd(columnIndex, i => GetColumnName(string.Empty, i));
}

private static string GetColumnName(string prefix, int columnIndex = 0)
{
return columnIndex < 26
? $"{prefix}{(char)(65 + columnIndex)}"
: GetColumnName(GetColumnName(prefix, ((columnIndex - (columnIndex % 26)) / 26) - 1), columnIndex % 26);
}

/// <summary>
/// Gets column reference from cell reference.
/// For example: A1->A, CD22->CD.
/// </summary>
/// <param name="cellReference">Cell reference.</param>
/// <returns>Column reference.</returns>
public static string GetColumnReference(this StringValue cellReference)
{
cellReference.AssertArgumentNotNull(nameof(cellReference));

return cellReference.Value.GetColumnReference();
}

/// <summary>
/// Gets column reference from cell reference.
/// For example: A1->A, CD22->CD.
/// </summary>
/// <param name="cellReference">Cell reference.</param>
/// <returns>Column reference.</returns>
public static string GetColumnReference(this string cellReference)
{
cellReference.AssertArgumentNotNull(nameof(cellReference));

if (cellReference.Length == 2)
return cellReference.Substring(0, 1);

return new string(cellReference.TakeWhile(char.IsLetter).ToArray());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using MicroElements.CodeContracts;
using MicroElements.Collections.Cache;
using MicroElements.Collections.Extensions.Iterate;
using MicroElements.Collections.TwoLayerCache;
using MicroElements.Diagnostics;
using MicroElements.Metadata.Parsing;
using MicroElements.Validation;
Expand Down Expand Up @@ -64,17 +66,6 @@ public static IEnumerable<ExcelElement<Column>> GetColumns(this ExcelElement<She
return Array.Empty<ExcelElement<Column>>();
}

/// <summary>
/// Gets rows from sheet.
/// </summary>
/// <param name="sheet">Source sheet.</param>
/// <returns>Sheet rows.</returns>
public static IEnumerable<ExcelElement<Row>> GetRows(this ExcelElement<Sheet> sheet)
{
return GetOpenXmlRows(sheet)
.Zip(Enumerable.Repeat(sheet, int.MaxValue), (row, sh) => new ExcelElement<Row>(sh.Doc, row));
}

/// <summary>
/// Gets rows from sheet.
/// </summary>
Expand All @@ -96,65 +87,13 @@ public static IEnumerable<Row> GetOpenXmlRows(this ExcelElement<Sheet> sheet)
}

/// <summary>
/// Gets cell reference by column row index.
/// Example: (0,0)->A1.
/// </summary>
/// <param name="column">Column index.</param>
/// <param name="row">Row index.</param>
/// <param name="zeroBased">Is column and row zero based.</param>
/// <returns>Cell reference.</returns>
public static StringValue GetCellReference(int column, int row, bool zeroBased = true)
{
int columnIndex = zeroBased ? column : column - 1;
string columnName = GetColumnName(columnIndex);
int rowName = zeroBased ? row + 1 : row;
return new StringValue(string.Concat(columnName, rowName.ToString()));
}

private static readonly ConcurrentDictionary<int, string> _columnIndexes = new ConcurrentDictionary<int, string>();

/// <summary>
/// Gets column index (cached).
/// </summary>
public static string GetColumnName(int columnIndex = 0)
{
return _columnIndexes.GetOrAdd(columnIndex, i => GetColumnName(string.Empty, i));
}

private static string GetColumnName(string prefix, int columnIndex = 0)
{
return columnIndex < 26
? $"{prefix}{(char)(65 + columnIndex)}"
: GetColumnName(GetColumnName(prefix, ((columnIndex - (columnIndex % 26)) / 26) - 1), columnIndex % 26);
}

/// <summary>
/// Gets column reference from cell reference.
/// For example: A1->A, CD22->CD.
/// </summary>
/// <param name="cellReference">Cell reference.</param>
/// <returns>Column reference.</returns>
public static string GetColumnReference(this StringValue cellReference)
{
cellReference.AssertArgumentNotNull(nameof(cellReference));

return cellReference.Value.GetColumnReference();
}

/// <summary>
/// Gets column reference from cell reference.
/// For example: A1->A, CD22->CD.
/// Gets rows from sheet.
/// </summary>
/// <param name="cellReference">Cell reference.</param>
/// <returns>Column reference.</returns>
public static string GetColumnReference(this string cellReference)
/// <param name="sheet">Source sheet.</param>
/// <returns>Sheet rows.</returns>
public static IEnumerable<ExcelElement<Row>> GetRows(this ExcelElement<Sheet> sheet)
{
cellReference.AssertArgumentNotNull(nameof(cellReference));

if (cellReference.Length == 2)
return cellReference.Substring(0, 1);

return new string(cellReference.TakeWhile(char.IsLetter).ToArray());
return GetOpenXmlRows(sheet).Select(row => new ExcelElement<Row>(sheet.Doc, row));
}

/// <summary>
Expand Down Expand Up @@ -253,7 +192,7 @@ public static ExcelElement<HeaderCell>[] GetHeaders(this ExcelElement<Row> row)
if (cell != null)
{
IPropertyParser? propertyParser = header.GetMetadata<IPropertyParser>();
rowValues[i] = cell.GetCellValue(nullValue, propertyParser);
rowValues[i] = cell.GetCellValue(nullValue, propertyParser?.TargetType);

//StringValue cellReference = cell.Data.CellReference;
}
Expand Down Expand Up @@ -309,28 +248,37 @@ private static string GetFormattedValue(this ExcelElement<Cell> cell)
/// Uses SharedStringTable if needed.
/// For DateTime, LocalDate and LocalTime tries to convert double excel value to ISO format.
/// </summary>
public static string? GetCellValue(this ExcelElement<Cell> cell, string? nullValue = null, IPropertyParser? propertyParser = null)
public static string? GetCellValue<T>(
this T cellElement,
string? nullValue = null,
Type? targetType = null)
where T : IExcelElement<Cell>
{
Cell? cellData = cell.Data;
string? cellValue = cellData?.CellValue?.InnerText ?? nullValue;
Cell? cell = cellElement.Data;
SharedStringTable sharedStringTable = cellElement.Doc.WorkbookPart.SharedStringTablePart.SharedStringTable;

string? cellValue = cell?.CellValue?.InnerText ?? nullValue;
string? cellTextValue = null;

if (cellData == null)
if (cell == null)
return cellValue;

CellValues? dataTypeValue = cellData.DataType?.Value;
CellValues? dataTypeValue = cell.DataType?.Value;

if (cellValue != null && dataTypeValue == CellValues.SharedString)
{
cellTextValue = cell.Doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem(int.Parse(cellValue)).InnerText;
// cellTextValue = sharedStringTable.ChildElements.GetItem(int.Parse(cellValue)).InnerText;
cellTextValue = sharedStringTable
.GetWeakCache<string, string>()
.GetOrAdd(cellValue, static (cellValue, sharedStringTable) => sharedStringTable.ChildElements.GetItem(int.Parse(cellValue)).InnerText, sharedStringTable);
}

if (cellTextValue == null && cellValue != null)
{
propertyParser ??= cell.GetMetadata<IPropertyParser>();

if (propertyParser != null)
cellTextValue = TryParseAsDateType(cellValue, propertyParser.TargetType);
if (targetType != null)
{
cellTextValue = TryParseAsDateType(cellValue, targetType);
}
}

return cellTextValue ?? cellValue;
Expand Down Expand Up @@ -578,11 +526,13 @@ public static partial class ExcelParsingExtensions
// Use first row as headers
headers = row.GetHeaders();

var propertyParsers = parserProvider.GetParsers().ToArray();

// Associate parser for each header
foreach (var header in headers)
{
// TODO: RIGID SEARCH. Use predicate?
var propertyParser = parserProvider.GetParsers().FirstOrDefault(parser => parser.SourceName == header.Data.Name);
var propertyParser = propertyParsers.FirstOrDefault(parser => parser.SourceName == header.Data.Name);
if (propertyParser != null)
{
header.SetMetadata(propertyParser);
Expand Down
Loading

0 comments on commit a0b77c8

Please sign in to comment.