From 550c8280a68a913191a3bdc284c22c018b837332 Mon Sep 17 00:00:00 2001 From: Simon Diesenreiter Date: Thu, 5 Dec 2024 23:58:11 +0100 Subject: [PATCH] fix: allow for parsing single chars as input, ref: NOISSUE --- TextParser.Tests/TextParserTests.cs | 30 ++- TextParser/Schema/BlockType.cs | 11 +- .../BuildingBlocks/BuildingBlockBase.cs | 2 +- TextParser/Schema/BuildingBlocks/CharBlock.cs | 49 ++++ .../BuildingBlocks/FixedRepetitionBlock.cs | 4 +- .../BuildingBlocks/GreedyRepetitionBlock.cs | 4 +- .../Schema/BuildingBlocks/IBuildingBlock.cs | 2 +- .../Schema/BuildingBlocks/IntegerBlock.cs | 4 +- .../Schema/BuildingBlocks/StringBlock.cs | 4 +- TextParser/Schema/InputSchema.cs | 6 +- TextParser/Schema/InputSchemaBuilder.cs | 3 + TextParser/Schema/InputType.cs | 1 + TextParser/TokenConverter.cs | 226 ++++++++++++++++++ 13 files changed, 325 insertions(+), 21 deletions(-) create mode 100644 TextParser/Schema/BuildingBlocks/CharBlock.cs create mode 100644 TextParser/TokenConverter.cs diff --git a/TextParser.Tests/TextParserTests.cs b/TextParser.Tests/TextParserTests.cs index 3384bd6..3490678 100644 --- a/TextParser.Tests/TextParserTests.cs +++ b/TextParser.Tests/TextParserTests.cs @@ -30,6 +30,9 @@ public class TextParserTests private const string testInput9 = @"2 4 6 4 1 3 5 4 7 6 4 6 8 3 9"; + private const string testInput10 = @"abc + bca + cab"; [Fact] public void LineParser_TestSimpleRepetition() @@ -322,7 +325,7 @@ public class TextParserTests [Fact] public void DataManipulator_SimpleOneDimensionalTest() - { + { var schemaBuilder = new InputSchemaBuilder(); var schema = schemaBuilder .Repeat() @@ -351,7 +354,7 @@ public class TextParserTests [Fact] public void DataManipulator_SimpleTwoDimensionalTest() - { + { var schemaBuilder = new InputSchemaBuilder(); var schema = schemaBuilder .Repeat() @@ -389,4 +392,27 @@ public class TextParserTests Assert.Equal(Direction.SE, searchResults[4].Direction); Assert.Equal(Direction.W, searchResults[5].Direction); } + + [Fact] + public void TextPArser_TestReadingChars() + { + var schemaBuilder = new InputSchemaBuilder(); + var schema = schemaBuilder + .Repeat() + .Expect(InputType.Char) + .EndRepetition() + .Build(); + + var parser = new TextParser(schema); + var row = parser + .SetInputText(testInput10) + .Parse() + .AsListRows(); + + Assert.Equal(3, row.Count); + Assert.Equal("a", row[0][0]); + Assert.Equal(3, row[0].Count); + Assert.Equal(3, row[1].Count); + Assert.Equal(3, row[2].Count); + } } diff --git a/TextParser/Schema/BlockType.cs b/TextParser/Schema/BlockType.cs index 15f0bfd..cf2f494 100644 --- a/TextParser/Schema/BlockType.cs +++ b/TextParser/Schema/BlockType.cs @@ -4,10 +4,11 @@ public enum BlockType { Integer = 1, - String = 2, + Char = 2, + String = 4, // technically not a block type but keeping it here for consistency/having all types in one place - Fragment = 4, - FixedRepetition = 8, - GreedyRepetition = 16, - NonZeroRepetition = 32, + Fragment = 8, + FixedRepetition = 16, + GreedyRepetition = 32, + NonZeroRepetition = 64, } diff --git a/TextParser/Schema/BuildingBlocks/BuildingBlockBase.cs b/TextParser/Schema/BuildingBlocks/BuildingBlockBase.cs index 0931f32..1295c12 100644 --- a/TextParser/Schema/BuildingBlocks/BuildingBlockBase.cs +++ b/TextParser/Schema/BuildingBlocks/BuildingBlockBase.cs @@ -8,7 +8,7 @@ abstract class BuildingBlockBase : IBuildingBlock { } - public abstract IToken ParseWord(InputProvider inputs); + public abstract List ParseWord(InputProvider inputs); public abstract bool CanParseWord(InputProvider inputs); diff --git a/TextParser/Schema/BuildingBlocks/CharBlock.cs b/TextParser/Schema/BuildingBlocks/CharBlock.cs new file mode 100644 index 0000000..e8fe029 --- /dev/null +++ b/TextParser/Schema/BuildingBlocks/CharBlock.cs @@ -0,0 +1,49 @@ +namespace Parsing.Schema.BuildingBlocks; + +using Parsing.Tokenization; + +class CharBlock : BuildingBlockBase +{ + + public CharBlock() + { + } + + public override List ParseWord(InputProvider inputs) + { + var tokenList = new List(); + foreach (char c in inputs.YieldWord()) + { + tokenList.Add(new StringToken(c.ToString())); + } + return tokenList; + } + + public override bool CanParseWord(InputProvider inputs) + { + string word = string.Empty; + using (inputs.GetLookaheadContext()) + { + word = inputs.YieldWord(); + } + + return this.CanParseWord(word); + } + + public override bool CanParseWord(string word) + { + // Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything + IntegerBlock intBlock = new IntegerBlock(); + if (intBlock.CanParseWord(word)) + { + return false; + } + + return true; + } + + public override BlockType GetBlockType() + { + return BlockType.String; + } +} \ No newline at end of file diff --git a/TextParser/Schema/BuildingBlocks/FixedRepetitionBlock.cs b/TextParser/Schema/BuildingBlocks/FixedRepetitionBlock.cs index 27e40a0..1bc89d9 100644 --- a/TextParser/Schema/BuildingBlocks/FixedRepetitionBlock.cs +++ b/TextParser/Schema/BuildingBlocks/FixedRepetitionBlock.cs @@ -20,7 +20,7 @@ class FixedRepetitionBlock : BuildingBlockBase this.context = this.inputSchema.CreateContext(); } - public override IToken ParseWord(InputProvider inputs) + public override List ParseWord(InputProvider inputs) { var result = inputSchema.ProcessNextWord(context, inputs); if (context.HasFinished) @@ -31,7 +31,7 @@ class FixedRepetitionBlock : BuildingBlockBase this.context = this.inputSchema.CreateContext(); } } - return result.Single(); + return result; } public override bool CanParseWord(InputProvider inputs) diff --git a/TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs b/TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs index 773c1c6..5b9062a 100644 --- a/TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs +++ b/TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs @@ -15,14 +15,14 @@ class GreedyRepetitionBlock : BuildingBlockBase this.context = this.inputSchema.CreateContext(); } - public override IToken ParseWord(InputProvider inputs) + public override List ParseWord(InputProvider inputs) { var result = inputSchema.ProcessNextWord(context, inputs); if (!this.CanParseWord(inputs)) { this.context = this.inputSchema.CreateContext(); } - return result.Single(); + return result; } public override bool CanParseWord(InputProvider inputs) diff --git a/TextParser/Schema/BuildingBlocks/IBuildingBlock.cs b/TextParser/Schema/BuildingBlocks/IBuildingBlock.cs index 5ccb8c8..ad7a238 100644 --- a/TextParser/Schema/BuildingBlocks/IBuildingBlock.cs +++ b/TextParser/Schema/BuildingBlocks/IBuildingBlock.cs @@ -4,7 +4,7 @@ using Parsing.Tokenization; public interface IBuildingBlock { - public IToken ParseWord(InputProvider inputs); + public List ParseWord(InputProvider inputs); public bool CanParseWord(InputProvider inputs); diff --git a/TextParser/Schema/BuildingBlocks/IntegerBlock.cs b/TextParser/Schema/BuildingBlocks/IntegerBlock.cs index 5db0f7e..a5ef5ed 100644 --- a/TextParser/Schema/BuildingBlocks/IntegerBlock.cs +++ b/TextParser/Schema/BuildingBlocks/IntegerBlock.cs @@ -9,9 +9,9 @@ class IntegerBlock : BuildingBlockBase { } - public override IToken ParseWord(InputProvider inputs) + public override List ParseWord(InputProvider inputs) { - return new IntegerToken(inputs.YieldWord()); + return new List() { new IntegerToken(inputs.YieldWord()) }; } public override bool CanParseWord(InputProvider inputs) diff --git a/TextParser/Schema/BuildingBlocks/StringBlock.cs b/TextParser/Schema/BuildingBlocks/StringBlock.cs index e873ba8..db67695 100644 --- a/TextParser/Schema/BuildingBlocks/StringBlock.cs +++ b/TextParser/Schema/BuildingBlocks/StringBlock.cs @@ -9,9 +9,9 @@ class StringBlock : BuildingBlockBase { } - public override IToken ParseWord(InputProvider inputs) + public override List ParseWord(InputProvider inputs) { - return new StringToken(inputs.YieldWord()); + return new List() { new StringToken(inputs.YieldWord()) }; } public override bool CanParseWord(InputProvider inputs) diff --git a/TextParser/Schema/InputSchema.cs b/TextParser/Schema/InputSchema.cs index 3f4e199..0d3dbc0 100644 --- a/TextParser/Schema/InputSchema.cs +++ b/TextParser/Schema/InputSchema.cs @@ -28,15 +28,13 @@ public class InputSchema : ISchema public List ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs) { var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex]; - var token = nextBlock.ParseWord(inputs); + var tokens = nextBlock.ParseWord(inputs); if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs)) { currentContext.lastProcessedBlockIndex++; currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count; } - var newTokenList = new List(); - newTokenList.Add(token); - return newTokenList; + return tokens; } public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs) diff --git a/TextParser/Schema/InputSchemaBuilder.cs b/TextParser/Schema/InputSchemaBuilder.cs index c2a01fa..645794f 100644 --- a/TextParser/Schema/InputSchemaBuilder.cs +++ b/TextParser/Schema/InputSchemaBuilder.cs @@ -21,6 +21,9 @@ public class InputSchemaBuilder : RepetitionSchemaBuilder ConvertData(this List tokenList, Func converter) where TTokenType : IValueToken + { + var newList = new List(); + foreach (var token in tokenList) + { + var typedToken = token as IValueToken; + if (typedToken == null) + { + throw new Exception("Invalid Token type encountered during value conversion"); + } + + newList.Add(converter(typedToken.GetValue())); + } + return newList; + } + + public static List ConvertData(this List tokenList, Func> converter) where TTokenType : IValueToken + { + var newList = new List(); + foreach (var token in tokenList) + { + var typedToken = token as IValueToken; + if (typedToken == null) + { + throw new Exception("Invalid Token type encountered during value conversion"); + } + + newList.AddRange(converter(typedToken.GetValue())); + } + return newList; + } + + public static List> ConvertData(this List> tokenListList, Func converter) where TTokenType : IValueToken + { + var newListList = new List>(); + foreach (var tokenList in tokenListList) + { + newListList.Add(tokenList.ConvertData(converter)); + } + return newListList; + } +} + +public static class DataManipulationHelpers +{ + public static TType ReduceData(this List data, Func reducer) + { + if (data.Count < 2) + { + return data[0]; + } + TType result = data[0]; + for (int i = 1; i < data.Count; i++) + { + result = reducer(result, data[i]); + } + return result; + } + + public static TType ReduceData(this List data, Func, TType> reducer) + { + return reducer(data); + } +} + +public class TokenConverter +{ + protected List> rawTokens = new List>(); + + public TokenConverter() + { + } + + private List AsGenericCollection() where T : ICollection, new() + { + List returnData = new List(); + foreach (var tokenRow in this.rawTokens) + { + T newRow = new T(); + foreach (IToken token in tokenRow) + { + if (token == null) + { + throw new Exception("No token was provided, but token was expected!"); + } + IValueToken? valueToken = token as IValueToken; + if (valueToken == null) + { + throw new Exception("Provided token is not a ValueToken"); + } + newRow.Add(valueToken.GetValue()); + } + + returnData.Add(newRow); + } + return returnData; + } + + private void CheckConversionPrerequisites() + { + // in order to convert rows to columns or grid we require every row to have the same length + int rowLength = this.rawTokens[0].Count; + + foreach (var tokenRow in this.rawTokens) + { + if (tokenRow.Count != rowLength) + { + throw new Exception("Attempted to convert token dataset that is not able to be converted!"); + } + } + } + + public List AsSingleStream() + { + List returnData = new List(); + foreach (var tokenRow in this.rawTokens) + { + foreach (IToken token in tokenRow) + { + if (token == null) + { + throw new Exception("No token was provided, but token was expected!"); + } + IValueToken? valueToken = token as IValueToken; + if (valueToken == null) + { + throw new Exception("Provided token is not a ValueToken"); + } + returnData.Add(valueToken.GetValue()); + } + } + return returnData; + } + + public List AsRows() + { + var listRows = this.AsListRows(); + var newList = new List(); + + foreach (var rowList in listRows) + { + newList.Add(rowList.ToArray()); + } + + return newList; + } + + public List> AsListRows() + { + return this.AsGenericCollection, T>(); + } + + public List AsColumns() + { + var listColumns = this.AsListColumns(); + var newList = new List(); + + foreach (var columnList in listColumns) + { + newList.Add(columnList.ToArray()); + } + + return newList; + } + + public List> AsListColumns() + { + this.CheckConversionPrerequisites(); + var rows = AsListRows(); + + var columns = new List>(); + for (int i = 0; i < rows[0].Count; i++) + { + columns.Add(new List()); + } + + foreach (var row in rows) + { + for (int i = 0; i < row.Count; i++) + { + columns[i].Add(row[i]); + } + } + + return columns; + } + + public T[][] AsGrid() + { + this.CheckConversionPrerequisites(); + var rowsList = AsRows(); + return rowsList.ToArray(); + } + + public List> AsRawData() + { + return this.rawTokens; + } + + public List AsFragments() + { + var items = this.AsSingleStream(); + var newList = new List(); + + foreach (var item in items) + { + var typedItem = item as Fragment; + if (typedItem == null) + { + throw new Exception("Invalid token type encountered"); + } + newList.Add(typedItem); + } + + return newList; + } +}