fix: fix bugs with fragment parsing support, refs: NOISSUE

This commit is contained in:
Simon Diesenreiter 2024-12-03 21:06:00 +01:00
parent 29f7aa37da
commit 62092d0380
7 changed files with 129 additions and 87 deletions

View File

@ -15,6 +15,11 @@ public class TextParserTests
private const string testInput4 = @"2 ab ba fd er sd private const string testInput4 = @"2 ab ba fd er sd
8 cd dc 8 cd dc
7 uh 6 yp rt"; 7 uh 6 yp rt";
private const string testInput5 = @"asdfnums(2,5,3)ght
cv strs(test) jh 4,3,2
34,54,2nums(2,8) strs(aa,ab,ba,bb)aa,bb";
[Fact] [Fact]
public void LineParser_TestSimpleRepetition() public void LineParser_TestSimpleRepetition()
@ -208,46 +213,48 @@ public class TextParserTests
.StartOptions() .StartOptions()
.Option() .Option()
.Expect("nums(") .Expect("nums(")
.Expect(InputType.Integer) .Expect(InputType.Integer, "num")
.Repeat() .Repeat()
.Expect(",") .Expect(",")
.Expect(InputType.Integer) .Expect(InputType.Integer, "num")
.EndRepetition() .EndRepetition()
.Expect(")") .Expect(")")
.Option() .Option()
.Expect("strs(") .Expect("strs(")
.Expect(InputType.String) .Expect(InputType.String, "str")
.Repeat() .Repeat()
.Expect(",") .Expect(",")
.Expect(InputType.String) .Expect(InputType.String, "str")
.EndRepetition() .EndRepetition()
.Expect(")") .Expect(")")
.EndOptions() .EndOptions()
.Build(); .Build();
var parser = new TextParser<FragmentSchemaContext>(schema); var parser = new TextParser<FragmentSchemaContext>(schema);
var rows = parser var fragmentData = parser
.SetInputText(testInput4) .SetInputText(testInput5)
.Parse() .Parse()
.AsFragments(); .AsFragments();
Assert.Equal(3, rows.Count); var convertedData = fragmentData
Assert.Equal(6, rows[0].Count); .ConvertAll((Fragment f) =>
Assert.Equal(3, rows[1].Count); {
Assert.Equal(5, rows[2].Count); int numSum = 0;
// Assert.Equal(InputType.Integer, rows[0][0].GetInputType()); foreach (var numString in f["num"])
// Assert.Equal(InputType.String, rows[0][1].GetInputType()); {
// Assert.Equal(InputType.String, rows[0][2].GetInputType()); numSum += int.Parse(numString);
// Assert.Equal(InputType.String, rows[0][3].GetInputType()); }
// Assert.Equal(InputType.String, rows[0][4].GetInputType()); return f["num"].Count + f["str"].Count + numSum;
// Assert.Equal(InputType.String, rows[0][5].GetInputType()); });
// Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
// Assert.Equal(InputType.String, rows[1][1].GetInputType()); Assert.Equal(4, fragmentData.Count);
// Assert.Equal(InputType.String, rows[1][2].GetInputType()); Assert.Equal(3, fragmentData[0]["num"].Count);
// Assert.Equal(InputType.Integer, rows[2][0].GetInputType()); Assert.Single(fragmentData[1]["str"]);
// Assert.Equal(InputType.String, rows[2][1].GetInputType()); Assert.Equal(2, fragmentData[2]["num"].Count);
// Assert.Equal(InputType.Integer, rows[2][2].GetInputType()); Assert.Equal(4, fragmentData[3]["str"].Count);
// Assert.Equal(InputType.String, rows[2][3].GetInputType()); Assert.Equal(13, convertedData[0]);
// Assert.Equal(InputType.String, rows[2][4].GetInputType()); Assert.Equal(1, convertedData[1]);
Assert.Equal(12, convertedData[2]);
Assert.Equal(4, convertedData[3]);
} }
} }

View File

@ -15,10 +15,12 @@ public class FragmentSchemaContext : ISchemaContext
public class FragmentSchema : ISchema<FragmentSchemaContext> public class FragmentSchema : ISchema<FragmentSchemaContext>
{ {
private string fragmentRegex; private string fragmentRegex;
private List<string> namedGroups = new List<string>();
public FragmentSchema(string fragmentRegex) public FragmentSchema(string fragmentRegex, List<string> namedGroups)
{ {
this.fragmentRegex = fragmentRegex; this.fragmentRegex = fragmentRegex;
this.namedGroups = namedGroups;
} }
public List<IToken> ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs) public List<IToken> ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
@ -29,17 +31,23 @@ public class FragmentSchema : ISchema<FragmentSchemaContext>
// one token per match // one token per match
foreach (Match match in r.Matches(inputs.YieldWord())) foreach (Match match in r.Matches(inputs.YieldWord()))
{ {
var newToken = new FragmentToken(match.Result("$1")); var newToken = new FragmentToken(match.Value);
// token contains data from all included matches // token contains data from all included matches
foreach (var groupKey in match.Groups.Keys) foreach (var groupName in this.namedGroups)
{ {
List<string> matchedSubstrings = new List<string>(); var captureList = new List<string>();
foreach (var capture in match.Groups[groupKey].Captures) foreach (Capture capture in match.Groups[groupName].Captures)
{ {
//matchedSubstrings.Add(capture.Value); captureList.Add(capture.Value);
} }
newToken.AddMatch(groupKey, matchedSubstrings); newToken.AddMatch(groupName, captureList);
} }
tokenList.Add(newToken);
}
if (!inputs.CanYieldWord())
{
currentContext.HasFinished = true;
} }
return tokenList; return tokenList;
@ -69,11 +77,20 @@ public class FragmentSchema : ISchema<FragmentSchemaContext>
InputProvider inputs = new InputProvider(words); InputProvider inputs = new InputProvider(words);
var overallContext = this.CreateContext(); var overallContext = this.CreateContext();
while (this.CanProcessNextWord(overallContext, inputs)) while (!overallContext.HasFinished && inputs.CanYieldWord())
{ {
tokens.AddRange(this.ProcessNextWord(overallContext, inputs)); if (this.CanProcessNextWord(overallContext, inputs))
{
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
}
else
{
inputs.SkipCurrentWord();
}
} }
overallContext.HasFinished = true;
return tokens; return tokens;
} }

View File

@ -7,6 +7,8 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuild
{ {
protected string fragmentRegex = @""; protected string fragmentRegex = @"";
private List<string> namedGroups = new List<string>();
public FragmentSchemaBuilder() public FragmentSchemaBuilder()
{ {
} }
@ -39,6 +41,7 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuild
if (!string.IsNullOrEmpty(name)) if (!string.IsNullOrEmpty(name))
{ {
groupNamePrefix = "?<" + name + ">"; groupNamePrefix = "?<" + name + ">";
namedGroups.Add(name);
} }
switch (type) switch (type)
{ {
@ -122,7 +125,7 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuild
public FragmentSchema Build() public FragmentSchema Build()
{ {
var schema = new FragmentSchema(this.fragmentRegex); var schema = new FragmentSchema(this.fragmentRegex, this.namedGroups);
return schema; return schema;
} }
} }

View File

@ -9,7 +9,7 @@ public abstract class RepetitionSchemaBuilder<S, T, U> where S : RepetitionSchem
return newBuilder; return newBuilder;
} }
public required S UpperLayerBuilder { get; set; } public S? UpperLayerBuilder { get; set; }
public int NumRepetition { get; set; } public int NumRepetition { get; set; }

View File

@ -5,49 +5,71 @@ using System.Collections.Generic;
using Parsing.Schema; using Parsing.Schema;
using Parsing.Tokenization; using Parsing.Tokenization;
public static class ConversionHelpers public static class DataConversionHelpers
{ {
// public static List<U> ConvertData<T, U, V>(this List<IToken> tokenList, Func<U, V> converter) where T : IValueToken<V> public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
// { {
// var newList = new List<U>(); var newList = new List<TNewType>();
// foreach (var token in tokenList) foreach (var token in tokenList)
// { {
// var typedToken = token as IValueToken<V>; var typedToken = token as IValueToken<TOldType>;
// if (typedToken == null) if (typedToken == null)
// { {
// throw new Exception("Invalid Token type encountered during value conversion"); throw new Exception("Invalid Token type encountered during value conversion");
// } }
// newList.Add(converter(typedToken.GetValue())); newList.Add(converter(typedToken.GetValue()));
// } }
// return newList; return newList;
// } }
// public static List<U> ConvertData<T, U, V>(this List<IToken> tokenList, Func<List<U>, V> converter) where T : IValueToken<V> public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
// { {
// var newList = new List<U>(); var newList = new List<TNewType>();
// foreach (var token in tokenList) foreach (var token in tokenList)
// { {
// var typedToken = token as IValueToken<V>; var typedToken = token as IValueToken<TOldType>;
// if (typedToken == null) if (typedToken == null)
// { {
// throw new Exception("Invalid Token type encountered during value conversion"); throw new Exception("Invalid Token type encountered during value conversion");
// } }
// newList.AddRange(converter(typedToken.GetValue())); newList.AddRange(converter(typedToken.GetValue()));
// } }
// return newList; return newList;
// } }
// public static List<U> ConvertData<T, U, V>(this List<List<IToken>> tokenListList, Func<U, V> converter) where T : IValueToken<V> public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
// { {
// var newListList = new List<List<U>>(); var newListList = new List<List<TNewType>>();
// foreach (var tokenList in tokenListList) foreach (var tokenList in tokenListList)
// { {
// newListList.Add(tokenList.ConvertData(converter)); newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
// } }
// return newListList; return newListList;
// } }
}
public static class DataManipulationHelpers
{
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
{
if (data.Count < 2)
{
return data[0] ?? default(TType);
}
TType result = data[0];
for (int i = 1; i < data.Count; i++)
{
result = reducer(result, data[i]);
}
return result;
}
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
{
return reducer(data);
}
} }
public class TokenConverter public class TokenConverter
@ -101,20 +123,7 @@ public class TokenConverter
{ {
List<T> returnData = new List<T>(); List<T> returnData = new List<T>();
foreach (var tokenRow in this.rawTokens) foreach (var tokenRow in this.rawTokens)
{ // Assert.Equal(InputType.Integer, rows[0][0].GetInputType()); {
// Assert.Equal(InputType.String, rows[0][1].GetInputType());
// Assert.Equal(InputType.String, rows[0][2].GetInputType());
// Assert.Equal(InputType.String, rows[0][3].GetInputType());
// Assert.Equal(InputType.String, rows[0][4].GetInputType());
// Assert.Equal(InputType.String, rows[0][5].GetInputType());
// Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
// Assert.Equal(InputType.String, rows[1][1].GetInputType());
// Assert.Equal(InputType.String, rows[1][2].GetInputType());
// Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
// Assert.Equal(InputType.String, rows[2][1].GetInputType());
// Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
// Assert.Equal(InputType.String, rows[2][3].GetInputType());
// Assert.Equal(InputType.String, rows[2][4].GetInputType());
foreach (IToken token in tokenRow) foreach (IToken token in tokenRow)
{ {
if (token == null) if (token == null)

View File

@ -1,5 +1,6 @@
namespace Parsing.Tokenization; namespace Parsing.Tokenization;
using System.Runtime.CompilerServices;
using Parsing.Schema; using Parsing.Schema;
public class Fragment : Dictionary<string, List<string>> public class Fragment : Dictionary<string, List<string>>

View File

@ -48,6 +48,11 @@ public class InputProvider
return this.CurrentPosition < this.words.Length; return this.CurrentPosition < this.words.Length;
} }
public void SkipCurrentWord()
{
this.CurrentPosition += 1;
}
public string YieldWord() public string YieldWord()
{ {
if (!this.CanYieldWord()) if (!this.CanYieldWord())