diff --git a/TextParser.Tests/TextParserTests.cs b/TextParser.Tests/TextParserTests.cs index 6a4af45..dffaa96 100644 --- a/TextParser.Tests/TextParserTests.cs +++ b/TextParser.Tests/TextParserTests.cs @@ -15,6 +15,11 @@ public class TextParserTests private const string testInput4 = @"2 ab ba fd er sd 8 cd dc 7 uh 6 yp rt"; + private const string testInput5 = @"asdfnums(2,5,3)ght + + cv strs(test) jh 4,3,2 + + 34,54,2nums(2,8) strs(aa,ab,ba,bb)aa,bb"; [Fact] public void LineParser_TestSimpleRepetition() @@ -208,46 +213,48 @@ public class TextParserTests .StartOptions() .Option() .Expect("nums(") - .Expect(InputType.Integer) + .Expect(InputType.Integer, "num") .Repeat() .Expect(",") - .Expect(InputType.Integer) + .Expect(InputType.Integer, "num") .EndRepetition() .Expect(")") .Option() .Expect("strs(") - .Expect(InputType.String) + .Expect(InputType.String, "str") .Repeat() .Expect(",") - .Expect(InputType.String) + .Expect(InputType.String, "str") .EndRepetition() .Expect(")") .EndOptions() .Build(); var parser = new TextParser(schema); - var rows = parser - .SetInputText(testInput4) + var fragmentData = parser + .SetInputText(testInput5) .Parse() .AsFragments(); - Assert.Equal(3, rows.Count); - Assert.Equal(6, rows[0].Count); - Assert.Equal(3, rows[1].Count); - Assert.Equal(5, rows[2].Count); - // Assert.Equal(InputType.Integer, rows[0][0].GetInputType()); - // Assert.Equal(InputType.String, rows[0][1].GetInputType()); - // Assert.Equal(InputType.String, rows[0][2].GetInputType()); - // Assert.Equal(InputType.String, rows[0][3].GetInputType()); - // Assert.Equal(InputType.String, rows[0][4].GetInputType()); - // Assert.Equal(InputType.String, rows[0][5].GetInputType()); - // Assert.Equal(InputType.Integer, rows[1][0].GetInputType()); - // Assert.Equal(InputType.String, rows[1][1].GetInputType()); - // Assert.Equal(InputType.String, rows[1][2].GetInputType()); - // Assert.Equal(InputType.Integer, rows[2][0].GetInputType()); - // Assert.Equal(InputType.String, rows[2][1].GetInputType()); - // Assert.Equal(InputType.Integer, rows[2][2].GetInputType()); - // Assert.Equal(InputType.String, rows[2][3].GetInputType()); - // Assert.Equal(InputType.String, rows[2][4].GetInputType()); + var convertedData = fragmentData + .ConvertAll((Fragment f) => + { + int numSum = 0; + foreach (var numString in f["num"]) + { + numSum += int.Parse(numString); + } + return f["num"].Count + f["str"].Count + numSum; + }); + + Assert.Equal(4, fragmentData.Count); + Assert.Equal(3, fragmentData[0]["num"].Count); + Assert.Single(fragmentData[1]["str"]); + Assert.Equal(2, fragmentData[2]["num"].Count); + Assert.Equal(4, fragmentData[3]["str"].Count); + Assert.Equal(13, convertedData[0]); + Assert.Equal(1, convertedData[1]); + Assert.Equal(12, convertedData[2]); + Assert.Equal(4, convertedData[3]); } } diff --git a/TextParser/Schema/FragmentSchema.cs b/TextParser/Schema/FragmentSchema.cs index 87e288c..cc47246 100644 --- a/TextParser/Schema/FragmentSchema.cs +++ b/TextParser/Schema/FragmentSchema.cs @@ -15,10 +15,12 @@ public class FragmentSchemaContext : ISchemaContext public class FragmentSchema : ISchema { private string fragmentRegex; + private List namedGroups = new List(); - public FragmentSchema(string fragmentRegex) + public FragmentSchema(string fragmentRegex, List namedGroups) { this.fragmentRegex = fragmentRegex; + this.namedGroups = namedGroups; } public List ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs) @@ -29,17 +31,23 @@ public class FragmentSchema : ISchema // one token per match foreach (Match match in r.Matches(inputs.YieldWord())) { - var newToken = new FragmentToken(match.Result("$1")); + var newToken = new FragmentToken(match.Value); // token contains data from all included matches - foreach (var groupKey in match.Groups.Keys) + foreach (var groupName in this.namedGroups) { - List matchedSubstrings = new List(); - foreach (var capture in match.Groups[groupKey].Captures) + var captureList = new List(); + foreach (Capture capture in match.Groups[groupName].Captures) { - //matchedSubstrings.Add(capture.Value); + captureList.Add(capture.Value); } - newToken.AddMatch(groupKey, matchedSubstrings); + newToken.AddMatch(groupName, captureList); } + tokenList.Add(newToken); + } + + if (!inputs.CanYieldWord()) + { + currentContext.HasFinished = true; } return tokenList; @@ -69,11 +77,20 @@ public class FragmentSchema : ISchema InputProvider inputs = new InputProvider(words); var overallContext = this.CreateContext(); - while (this.CanProcessNextWord(overallContext, inputs)) + while (!overallContext.HasFinished && inputs.CanYieldWord()) { - tokens.AddRange(this.ProcessNextWord(overallContext, inputs)); + if (this.CanProcessNextWord(overallContext, inputs)) + { + tokens.AddRange(this.ProcessNextWord(overallContext, inputs)); + } + else + { + inputs.SkipCurrentWord(); + } } + overallContext.HasFinished = true; + return tokens; } diff --git a/TextParser/Schema/FragmentSchemaBuilder.cs b/TextParser/Schema/FragmentSchemaBuilder.cs index 9988220..464d601 100644 --- a/TextParser/Schema/FragmentSchemaBuilder.cs +++ b/TextParser/Schema/FragmentSchemaBuilder.cs @@ -7,6 +7,8 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder namedGroups = new List(); + public FragmentSchemaBuilder() { } @@ -39,6 +41,7 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder"; + namedGroups.Add(name); } switch (type) { @@ -122,7 +125,7 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder where S : RepetitionSchem return newBuilder; } - public required S UpperLayerBuilder { get; set; } + public S? UpperLayerBuilder { get; set; } public int NumRepetition { get; set; } diff --git a/TextParser/TokenConverter.cs b/TextParser/TokenConverter.cs index d1b9cc4..79202be 100644 --- a/TextParser/TokenConverter.cs +++ b/TextParser/TokenConverter.cs @@ -5,49 +5,71 @@ using System.Collections.Generic; using Parsing.Schema; using Parsing.Tokenization; -public static class ConversionHelpers +public static class DataConversionHelpers { - // public static List ConvertData(this List tokenList, Func converter) where T : IValueToken - // { - // var newList = new List(); - // foreach (var token in tokenList) - // { - // var typedToken = token as IValueToken; - // if (typedToken == null) - // { - // throw new Exception("Invalid Token type encountered during value conversion"); - // } + public static List ConvertData(this List tokenList, Func converter) where TTokenType : IValueToken + { + var newList = new List(); + foreach (var token in tokenList) + { + var typedToken = token as IValueToken; + if (typedToken == null) + { + throw new Exception("Invalid Token type encountered during value conversion"); + } - // newList.Add(converter(typedToken.GetValue())); - // } - // return newList; - // } + newList.Add(converter(typedToken.GetValue())); + } + return newList; + } - // public static List ConvertData(this List tokenList, Func, V> converter) where T : IValueToken - // { - // var newList = new List(); - // foreach (var token in tokenList) - // { - // var typedToken = token as IValueToken; - // if (typedToken == null) - // { - // throw new Exception("Invalid Token type encountered during value conversion"); - // } + public static List ConvertData(this List tokenList, Func> converter) where TTokenType : IValueToken + { + var newList = new List(); + foreach (var token in tokenList) + { + var typedToken = token as IValueToken; + if (typedToken == null) + { + throw new Exception("Invalid Token type encountered during value conversion"); + } - // newList.AddRange(converter(typedToken.GetValue())); - // } - // return newList; - // } + newList.AddRange(converter(typedToken.GetValue())); + } + return newList; + } - // public static List ConvertData(this List> tokenListList, Func converter) where T : IValueToken - // { - // var newListList = new List>(); - // foreach (var tokenList in tokenListList) - // { - // newListList.Add(tokenList.ConvertData(converter)); - // } - // return newListList; - // } + public static List> ConvertData(this List> tokenListList, Func converter) where TTokenType : IValueToken + { + var newListList = new List>(); + foreach (var tokenList in tokenListList) + { + newListList.Add(tokenList.ConvertData(converter)); + } + return newListList; + } +} + +public static class DataManipulationHelpers +{ + public static TType ReduceData(this List data, Func reducer) + { + if (data.Count < 2) + { + return data[0] ?? default(TType); + } + TType result = data[0]; + for (int i = 1; i < data.Count; i++) + { + result = reducer(result, data[i]); + } + return result; + } + + public static TType ReduceData(this List data, Func, TType> reducer) + { + return reducer(data); + } } public class TokenConverter @@ -101,20 +123,7 @@ public class TokenConverter { List returnData = new List(); foreach (var tokenRow in this.rawTokens) - { // Assert.Equal(InputType.Integer, rows[0][0].GetInputType()); - // Assert.Equal(InputType.String, rows[0][1].GetInputType()); - // Assert.Equal(InputType.String, rows[0][2].GetInputType()); - // Assert.Equal(InputType.String, rows[0][3].GetInputType()); - // Assert.Equal(InputType.String, rows[0][4].GetInputType()); - // Assert.Equal(InputType.String, rows[0][5].GetInputType()); - // Assert.Equal(InputType.Integer, rows[1][0].GetInputType()); - // Assert.Equal(InputType.String, rows[1][1].GetInputType()); - // Assert.Equal(InputType.String, rows[1][2].GetInputType()); - // Assert.Equal(InputType.Integer, rows[2][0].GetInputType()); - // Assert.Equal(InputType.String, rows[2][1].GetInputType()); - // Assert.Equal(InputType.Integer, rows[2][2].GetInputType()); - // Assert.Equal(InputType.String, rows[2][3].GetInputType()); - // Assert.Equal(InputType.String, rows[2][4].GetInputType()); + { foreach (IToken token in tokenRow) { if (token == null) diff --git a/TextParser/Tokenization/FragmentToken.cs b/TextParser/Tokenization/FragmentToken.cs index bdf0f74..5484f5f 100644 --- a/TextParser/Tokenization/FragmentToken.cs +++ b/TextParser/Tokenization/FragmentToken.cs @@ -1,5 +1,6 @@ namespace Parsing.Tokenization; +using System.Runtime.CompilerServices; using Parsing.Schema; public class Fragment : Dictionary> diff --git a/TextParser/Tokenization/InputProvider.cs b/TextParser/Tokenization/InputProvider.cs index aacda6e..1349e8a 100644 --- a/TextParser/Tokenization/InputProvider.cs +++ b/TextParser/Tokenization/InputProvider.cs @@ -48,6 +48,11 @@ public class InputProvider return this.CurrentPosition < this.words.Length; } + public void SkipCurrentWord() + { + this.CurrentPosition += 1; + } + public string YieldWord() { if (!this.CanYieldWord())