feat: implement generic data set manipulator, ref: NOISSUE

This commit is contained in:
Simon Diesenreiter 2024-12-05 23:36:23 +01:00
parent 2ed103abbf
commit 85d94eca2f
14 changed files with 568 additions and 71 deletions

View File

@ -1,8 +1,8 @@
namespace TextParser.Tests; namespace TextParser.Tests;
using Parsing; using Parsing;
using Parsing.Data;
using Parsing.Schema; using Parsing.Schema;
using Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization; using Parsing.Tokenization;
public class TextParserTests public class TextParserTests
@ -26,6 +26,10 @@ public class TextParserTests
private const string testInput7 = @"adfdf1()324ddf3()()()svsdvs private const string testInput7 = @"adfdf1()324ddf3()()()svsdvs
davnsldkvjs2()()m23423()()() davnsldkvjs2()()m23423()()()
mcsodkcn owdjnfj 1() asdfnad 23234 2()() sdvsdv"; mcsodkcn owdjnfj 1() asdfnad 23234 2()() sdvsdv";
private const string testInput8 = @"2 4 6 4 1 3 5 4 7 2 4 6 8 3";
private const string testInput9 = @"2 4 6 4 1
3 5 4 7 6
4 6 8 3 9";
[Fact] [Fact]
public void LineParser_TestSimpleRepetition() public void LineParser_TestSimpleRepetition()
@ -315,4 +319,74 @@ public class TextParserTests
Assert.Equal(1, convertedData[4]); Assert.Equal(1, convertedData[4]);
Assert.Equal(2, convertedData[5]); Assert.Equal(2, convertedData[5]);
} }
[Fact]
public void DataManipulator_SimpleOneDimensionalTest()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Repeat()
.Expect(InputType.Integer)
.EndRepetition()
.Build();
var parser = new TextParser<InputSchemaContext>(schema);
var row = parser
.SetInputText(testInput8)
.Parse()
.AsSingleStream<int>();
var searchSequence = new List<int> { 4, 6 };
var manipulator = DefaultOneDimensionalManipulator.Create(row);
var searchResults = manipulator.FindInSet(searchSequence);
Assert.Equal(3, searchResults.Count);
Assert.Equal(1, searchResults[0].DataIndex.GetIndices()[0]);
Assert.Equal(3, searchResults[1].DataIndex.GetIndices()[0]);
Assert.Equal(10, searchResults[2].DataIndex.GetIndices()[0]);
Assert.Equal(Direction.Forward, searchResults[0].Direction);
Assert.Equal(Direction.Backward, searchResults[1].Direction);
Assert.Equal(Direction.Forward, searchResults[2].Direction);
}
[Fact]
public void DataManipulator_SimpleTwoDimensionalTest()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Repeat()
.Expect(InputType.Integer)
.EndRepetition()
.Build();
var parser = new TextParser<InputSchemaContext>(schema);
var row = parser
.SetInputText(testInput9)
.Parse()
.AsListRows<int>();
var searchSequence = new List<int> { 4, 6 };
var manipulator = DefaultTwoDimensionalManipulator.Create(row);
var searchResults = manipulator.FindInSet(searchSequence);
Assert.Equal(6, searchResults.Count);
Assert.Equal(0, searchResults[0].DataIndex.GetIndices()[0]);
Assert.Equal(0, searchResults[0].DataIndex.GetIndices()[1]);
Assert.Equal(2, searchResults[1].DataIndex.GetIndices()[0]);
Assert.Equal(1, searchResults[1].DataIndex.GetIndices()[1]);
Assert.Equal(2, searchResults[2].DataIndex.GetIndices()[0]);
Assert.Equal(1, searchResults[2].DataIndex.GetIndices()[1]);
Assert.Equal(1, searchResults[3].DataIndex.GetIndices()[0]);
Assert.Equal(2, searchResults[3].DataIndex.GetIndices()[1]);
Assert.Equal(3, searchResults[4].DataIndex.GetIndices()[0]);
Assert.Equal(2, searchResults[4].DataIndex.GetIndices()[1]);
Assert.Equal(3, searchResults[5].DataIndex.GetIndices()[0]);
Assert.Equal(2, searchResults[5].DataIndex.GetIndices()[1]);
Assert.Equal(Direction.E, searchResults[0].Direction);
Assert.Equal(Direction.N, searchResults[1].Direction);
Assert.Equal(Direction.SW, searchResults[2].Direction);
Assert.Equal(Direction.E, searchResults[3].Direction);
Assert.Equal(Direction.SE, searchResults[4].Direction);
Assert.Equal(Direction.W, searchResults[5].Direction);
}
} }

View File

@ -0,0 +1,49 @@
namespace Parsing.Data;
using Parsing;
using Parsing.Tokenization;
public static class DataConversionHelpers
{
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
{
var newList = new List<TNewType>();
foreach (var token in tokenList)
{
var typedToken = token as IValueToken<TOldType>;
if (typedToken == null)
{
throw new Exception("Invalid Token type encountered during value conversion");
}
newList.Add(converter(typedToken.GetValue()));
}
return newList;
}
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
{
var newList = new List<TNewType>();
foreach (var token in tokenList)
{
var typedToken = token as IValueToken<TOldType>;
if (typedToken == null)
{
throw new Exception("Invalid Token type encountered during value conversion");
}
newList.AddRange(converter(typedToken.GetValue()));
}
return newList;
}
public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
{
var newListList = new List<List<TNewType>>();
foreach (var tokenList in tokenListList)
{
newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
}
return newListList;
}
}

View File

@ -0,0 +1,23 @@
namespace Parsing.Data;
public static class DataManipulationHelpers
{
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
{
if (data.Count < 2)
{
return data[0];
}
TType result = data[0];
for (int i = 1; i < data.Count; i++)
{
result = reducer(result, data[i]);
}
return result;
}
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
{
return reducer(data);
}
}

View File

@ -0,0 +1,141 @@
using Parsing.Data;
namespace Parsing.Data;
public class SearchResult<TIndexType>
{
public IDataIndex<TIndexType>? DataIndex { get; set; }
}
public class DirectionalSearchResult<TIndexType> : SearchResult<TIndexType>
{
public Direction Direction { get; set; }
public int Length { get; set; }
}
public abstract class DataSetManipulatorBase<TCollectedType, TDataType, TIndexType> where TDataType : IEquatable<TDataType>
{
protected IDataSetIndexer<TDataType, TIndexType> indexer;
protected List<TCollectedType> dataSet;
public DataSetManipulatorBase(List<TCollectedType> dataSet, IDataSetIndexer<TDataType, TIndexType> indexer)
{
this.indexer = indexer;
this.dataSet = dataSet;
}
// we do not know how to iterate a specific data set exactly, the implementation has to take care of validating directional input
protected abstract Direction ValidDirections();
protected void ValidateDirection(Direction d)
{
var allValidDirections = this.ValidDirections();
var isValid = ((d | allValidDirections) == allValidDirections) && ((d & allValidDirections) > 0);
if (!isValid)
{
throw new ArgumentException("Invalid search direction provided for given data set!");
}
}
protected List<Direction> SimplifyDirections(Direction d)
{
this.ValidateDirection(d);
var allDirections = DirectionProvider.GetAllDirections();
var singleDirections = new List<Direction>();
foreach (Direction direction in allDirections)
{
if ((direction & d) > 0)
{
singleDirections.Add(direction);
}
}
return singleDirections;
}
public List<Direction> GetValidDirectionList(Direction d)
{
return SimplifyDirections(this.ValidDirections());
}
// we do not know how to iterate a specific data set exactly, the implementation has to take care of ending traversal in any direction
public abstract bool IsValidIndex(IDataIndex<TIndexType> queryPosition);
// we do not know how to iterate a specific data set exactly, the implementation has to take care of traversing the set
public abstract IDataIndex<TIndexType> Move(IDataIndex<TIndexType> currentPosition, Direction direction);
public List<IDataIndex<TIndexType>> GetNeighborIndices(IDataIndex<TIndexType> currentPosition, Direction directions)
{
var singleDirections = this.SimplifyDirections(directions);
var neighbors = new List<IDataIndex<TIndexType>>();
foreach (var direction in singleDirections)
{
var newPosition = this.Move(currentPosition, direction);
if (this.IsValidIndex(newPosition))
{
neighbors.Add(newPosition);
}
}
return neighbors;
}
// we do not know how to iterate a specific data set exactly, but we only need to find specific items to be able to continue with any other algorithm
public abstract List<SearchResult<TIndexType>> FindInSet(TDataType data);
public List<DirectionalSearchResult<TIndexType>> FindAtPosition(IDataIndex<TIndexType> currentPosition, List<TDataType> data)
{
return this.FindAtPosition(currentPosition, data, this.ValidDirections());
}
public List<DirectionalSearchResult<TIndexType>> FindAtPosition(IDataIndex<TIndexType> currentPosition, List<TDataType> data, Direction directions)
{
var results = new List<DirectionalSearchResult<TIndexType>>();
var givenDirections = this.SimplifyDirections(directions);
if (EqualityComparer<TDataType>.Default.Equals(this.indexer.Get(this.dataSet, currentPosition), data[0]))
{
// found valid search start point, now validate each given direction
foreach (var direction in givenDirections)
{
int searchIndex = 1;
var searchPosition = this.Move(currentPosition, direction); ;
while (searchIndex < data.Count && this.IsValidIndex(searchPosition)
&& EqualityComparer<TDataType>.Default.Equals(this.indexer.Get(this.dataSet, searchPosition), data[searchIndex]))
{
searchPosition = this.Move(searchPosition, direction);
searchIndex++;
}
if (searchIndex == data.Count)
{
var result = new DirectionalSearchResult<TIndexType>();
result.DataIndex = currentPosition;
result.Direction = direction;
result.Length = searchIndex;
results.Add(result);
}
}
}
return results;
}
public List<DirectionalSearchResult<TIndexType>> FindInSet(List<TDataType> data)
{
var result = new List<DirectionalSearchResult<TIndexType>>();
// find valid starting points in set and perform search from there
var startingPoints = this.FindInSet(data[0]);
foreach (var startingPoint in startingPoints)
{
foreach (var results in this.FindAtPosition(startingPoint.DataIndex, data))
{
result.AddRange(results);
}
}
return result;
}
}

View File

@ -0,0 +1,62 @@
public class DefaultDataSetIndexer<TDataType> : IDataSetIndexer<TDataType, int>
{
public TDataType Get<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, IDataIndex<int> index)
{
var indices = index.GetIndices();
return this.GetInternal(collection, indices.ToArray());
}
private TDataType GetInternal<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, int[] indices)
{
if (indices.Length == 3)
{
return this.GetAtIndex((collection as List<List<List<TDataType>>>), indices[0], indices[1], indices[2]);
}
else if (indices.Length == 2)
{
return this.GetAtIndex((collection as List<List<TDataType>>), indices[0], indices[1]);
}
else if (indices.Length == 1)
{
return this.GetAtIndex((collection as List<TDataType>), indices[0]);
}
else
{
throw new ArgumentException("Invalid Data Set access!");
}
}
public TDataType Get<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, params int[] indices)
{
return this.GetInternal(collection, indices);
}
public TDataType GetAtIndex(List<TDataType> collection, int index)
{
if (collection == null)
{
throw new ArgumentException("Invalid data set provided for access");
}
return collection[index];
}
public TDataType GetAtIndex(List<List<TDataType>> collection, int x, int y)
{
if (collection == null)
{
throw new ArgumentException("Invalid data set provided for access");
}
return collection[collection.Count - y - 1][x];
}
public TDataType GetAtIndex(List<List<List<TDataType>>> collection, int x, int y, int z)
{
if (collection == null)
{
throw new ArgumentException("Invalid data set provided for access");
}
return collection[z][y][x];
}
}

View File

@ -0,0 +1,60 @@
using System.Runtime.InteropServices;
using Parsing.Data;
namespace Parsing.Data;
public static class DefaultOneDimensionalManipulator
{
public static DefaultOneDimensionalManipulator<TDataType> Create<TDataType>(List<TDataType> dataSet) where TDataType : IEquatable<TDataType>
{
return new DefaultOneDimensionalManipulator<TDataType>(dataSet);
}
}
public class DefaultOneDimensionalManipulator<TDataType> : DataSetManipulatorBase<TDataType, TDataType, int> where TDataType : IEquatable<TDataType>
{
public DefaultOneDimensionalManipulator(List<TDataType> dataSet) : base(dataSet, new DefaultDataSetIndexer<TDataType>())
{
}
protected override Direction ValidDirections()
{
return (Direction.Left | Direction.Right);
}
public override bool IsValidIndex(IDataIndex<int> queryPosition)
{
var index = queryPosition.GetIndices()[0];
return (index >= 0) && (index < this.dataSet.Count);
}
public override IDataIndex<int> Move(IDataIndex<int> currentPosition, Direction direction)
{
switch (direction)
{
case Direction.Forward:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] + 1);
case Direction.Backward:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] - 1);
default:
throw new ArgumentOutOfRangeException("Direction was not accounted for move for current data set!");
}
}
public override List<SearchResult<int>> FindInSet(TDataType data)
{
var results = new List<SearchResult<int>>();
for (int i = 0; i < this.dataSet.Count; i++)
{
if (EqualityComparer<TDataType>.Default.Equals(this.dataSet[i], data))
{
var singleResult = new SearchResult<int>();
singleResult.DataIndex = new DefaultPositionalDataIndex(i);
results.Add(singleResult);
}
}
return results;
}
}

View File

@ -0,0 +1,14 @@
public class DefaultPositionalDataIndex : IDataIndex<int>
{
private List<int> indices = new List<int>();
public DefaultPositionalDataIndex(params int[] indices)
{
this.indices.AddRange(indices);
}
public IList<int> GetIndices()
{
return indices;
}
}

View File

@ -0,0 +1,83 @@
using System.Runtime.InteropServices;
using Parsing.Data;
namespace Parsing.Data;
public static class DefaultTwoDimensionalManipulator
{
public static DefaultTwoDimensionalManipulator<TDataType> Create<TDataType>(List<List<TDataType>> dataSet) where TDataType : IEquatable<TDataType>
{
return new DefaultTwoDimensionalManipulator<TDataType>(dataSet);
}
}
public class DefaultTwoDimensionalManipulator<TDataType> : DataSetManipulatorBase<List<TDataType>, TDataType, int> where TDataType : IEquatable<TDataType>
{
public DefaultTwoDimensionalManipulator(List<List<TDataType>> dataSet) : base(dataSet, new DefaultDataSetIndexer<TDataType>())
{
}
protected override Direction ValidDirections()
{
return (Direction.N
| Direction.NE
| Direction.E
| Direction.SE
| Direction.S
| Direction.SW
| Direction.W
| Direction.NW);
}
public override bool IsValidIndex(IDataIndex<int> queryPosition)
{
var xIndex = queryPosition.GetIndices()[0];
var yIndex = queryPosition.GetIndices()[1];
return (yIndex >= 0) && (yIndex < this.dataSet.Count) && (xIndex >= 0) && (xIndex < this.dataSet[yIndex].Count);
}
public override IDataIndex<int> Move(IDataIndex<int> currentPosition, Direction direction)
{
switch (direction)
{
case Direction.N:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0], currentPosition.GetIndices()[1] + 1);
case Direction.NE:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] + 1, currentPosition.GetIndices()[1] + 1);
case Direction.E:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] + 1, currentPosition.GetIndices()[1]);
case Direction.SE:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] + 1, currentPosition.GetIndices()[1] - 1);
case Direction.S:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0], currentPosition.GetIndices()[1] - 1);
case Direction.SW:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] - 1, currentPosition.GetIndices()[1] - 1);
case Direction.W:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] - 1, currentPosition.GetIndices()[1]);
case Direction.NW:
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] - 1, currentPosition.GetIndices()[1] + 1);
default:
throw new ArgumentOutOfRangeException("Direction was not accounted for move for current data set!");
}
}
public override List<SearchResult<int>> FindInSet(TDataType data)
{
var results = new List<SearchResult<int>>();
for (int y = 0; y < this.dataSet.Count; y++)
{
for (int x = 0; x < this.dataSet[this.dataSet.Count - y - 1].Count; x++)
{
if (EqualityComparer<TDataType>.Default.Equals(this.dataSet[this.dataSet.Count - y - 1][x], data))
{
var singleResult = new SearchResult<int>();
singleResult.DataIndex = new DefaultPositionalDataIndex(x, y);
results.Add(singleResult);
}
}
}
return results;
}
}

View File

@ -0,0 +1,43 @@
[Flags]
public enum Direction
{
N = 1,
NE = 2,
E = 4,
SE = 8,
S = 16,
SW = 32,
W = 64,
NW = 128,
Horizontal = E | W,
Vertical = N | S,
Cardinal = Horizontal | Vertical,
RisingDiagonal = NE | SW,
FallingDiagonal = NW | SE,
Diagonal = RisingDiagonal | FallingDiagonal,
All = Cardinal | Diagonal,
Left = W,
Right = E,
Up = N,
Down = S,
Forward = Right,
Backward = Left
}
public static class DirectionProvider
{
public static Direction[] GetAllDirections()
{
var directions = new Direction[] {
Direction.N,
Direction.NE,
Direction.E,
Direction.SE,
Direction.S,
Direction.SW,
Direction.W,
Direction.NW
};
return directions;
}
}

View File

@ -0,0 +1,4 @@
public interface IDataIndex<TIndexType>
{
public IList<TIndexType> GetIndices();
}

View File

@ -0,0 +1,10 @@
public interface IDataSetIndexer<TDataType, TIndexType>
{
public TDataType Get<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, IDataIndex<TIndexType> index);
public TDataType Get<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, params TIndexType[] indices);
public TDataType GetAtIndex(List<TDataType> collection, TIndexType index);
public TDataType GetAtIndex(List<List<TDataType>> collection, TIndexType x, TIndexType y);
public TDataType GetAtIndex(List<List<List<TDataType>>> collection, TIndexType x, TIndexType y, TIndexType z);
}

View File

View File

@ -1,77 +1,10 @@
namespace Parsing; namespace Parsing.Data;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using Parsing.Schema; using Parsing.Schema;
using Parsing.Tokenization; using Parsing.Tokenization;
public static class DataConversionHelpers
{
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
{
var newList = new List<TNewType>();
foreach (var token in tokenList)
{
var typedToken = token as IValueToken<TOldType>;
if (typedToken == null)
{
throw new Exception("Invalid Token type encountered during value conversion");
}
newList.Add(converter(typedToken.GetValue()));
}
return newList;
}
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
{
var newList = new List<TNewType>();
foreach (var token in tokenList)
{
var typedToken = token as IValueToken<TOldType>;
if (typedToken == null)
{
throw new Exception("Invalid Token type encountered during value conversion");
}
newList.AddRange(converter(typedToken.GetValue()));
}
return newList;
}
public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
{
var newListList = new List<List<TNewType>>();
foreach (var tokenList in tokenListList)
{
newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
}
return newListList;
}
}
public static class DataManipulationHelpers
{
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
{
if (data.Count < 2)
{
return data[0];
}
TType result = data[0];
for (int i = 1; i < data.Count; i++)
{
result = reducer(result, data[i]);
}
return result;
}
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
{
return reducer(data);
}
}
public class TokenConverter public class TokenConverter
{ {
protected List<List<IToken>> rawTokens = new List<List<IToken>>(); protected List<List<IToken>> rawTokens = new List<List<IToken>>();

View File

@ -2,6 +2,7 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using Parsing.Data;
using Parsing.Schema; using Parsing.Schema;
using Parsing.Tokenization; using Parsing.Tokenization;