fix: fix bugs with fragment parsing support, refs: NOISSUE

This commit is contained in:
Simon Diesenreiter 2024-12-03 21:06:00 +01:00
parent 29f7aa37da
commit 62092d0380
7 changed files with 129 additions and 87 deletions

View File

@ -15,6 +15,11 @@ public class TextParserTests
private const string testInput4 = @"2 ab ba fd er sd
8 cd dc
7 uh 6 yp rt";
private const string testInput5 = @"asdfnums(2,5,3)ght
cv strs(test) jh 4,3,2
34,54,2nums(2,8) strs(aa,ab,ba,bb)aa,bb";
[Fact]
public void LineParser_TestSimpleRepetition()
@ -208,46 +213,48 @@ public class TextParserTests
.StartOptions()
.Option()
.Expect("nums(")
.Expect(InputType.Integer)
.Expect(InputType.Integer, "num")
.Repeat()
.Expect(",")
.Expect(InputType.Integer)
.Expect(InputType.Integer, "num")
.EndRepetition()
.Expect(")")
.Option()
.Expect("strs(")
.Expect(InputType.String)
.Expect(InputType.String, "str")
.Repeat()
.Expect(",")
.Expect(InputType.String)
.Expect(InputType.String, "str")
.EndRepetition()
.Expect(")")
.EndOptions()
.Build();
var parser = new TextParser<FragmentSchemaContext>(schema);
var rows = parser
.SetInputText(testInput4)
var fragmentData = parser
.SetInputText(testInput5)
.Parse()
.AsFragments();
Assert.Equal(3, rows.Count);
Assert.Equal(6, rows[0].Count);
Assert.Equal(3, rows[1].Count);
Assert.Equal(5, rows[2].Count);
// Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
// Assert.Equal(InputType.String, rows[0][1].GetInputType());
// Assert.Equal(InputType.String, rows[0][2].GetInputType());
// Assert.Equal(InputType.String, rows[0][3].GetInputType());
// Assert.Equal(InputType.String, rows[0][4].GetInputType());
// Assert.Equal(InputType.String, rows[0][5].GetInputType());
// Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
// Assert.Equal(InputType.String, rows[1][1].GetInputType());
// Assert.Equal(InputType.String, rows[1][2].GetInputType());
// Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
// Assert.Equal(InputType.String, rows[2][1].GetInputType());
// Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
// Assert.Equal(InputType.String, rows[2][3].GetInputType());
// Assert.Equal(InputType.String, rows[2][4].GetInputType());
var convertedData = fragmentData
.ConvertAll((Fragment f) =>
{
int numSum = 0;
foreach (var numString in f["num"])
{
numSum += int.Parse(numString);
}
return f["num"].Count + f["str"].Count + numSum;
});
Assert.Equal(4, fragmentData.Count);
Assert.Equal(3, fragmentData[0]["num"].Count);
Assert.Single(fragmentData[1]["str"]);
Assert.Equal(2, fragmentData[2]["num"].Count);
Assert.Equal(4, fragmentData[3]["str"].Count);
Assert.Equal(13, convertedData[0]);
Assert.Equal(1, convertedData[1]);
Assert.Equal(12, convertedData[2]);
Assert.Equal(4, convertedData[3]);
}
}

View File

@ -15,10 +15,12 @@ public class FragmentSchemaContext : ISchemaContext
public class FragmentSchema : ISchema<FragmentSchemaContext>
{
private string fragmentRegex;
private List<string> namedGroups = new List<string>();
public FragmentSchema(string fragmentRegex)
public FragmentSchema(string fragmentRegex, List<string> namedGroups)
{
this.fragmentRegex = fragmentRegex;
this.namedGroups = namedGroups;
}
public List<IToken> ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
@ -29,17 +31,23 @@ public class FragmentSchema : ISchema<FragmentSchemaContext>
// one token per match
foreach (Match match in r.Matches(inputs.YieldWord()))
{
var newToken = new FragmentToken(match.Result("$1"));
var newToken = new FragmentToken(match.Value);
// token contains data from all included matches
foreach (var groupKey in match.Groups.Keys)
foreach (var groupName in this.namedGroups)
{
List<string> matchedSubstrings = new List<string>();
foreach (var capture in match.Groups[groupKey].Captures)
var captureList = new List<string>();
foreach (Capture capture in match.Groups[groupName].Captures)
{
//matchedSubstrings.Add(capture.Value);
captureList.Add(capture.Value);
}
newToken.AddMatch(groupKey, matchedSubstrings);
newToken.AddMatch(groupName, captureList);
}
tokenList.Add(newToken);
}
if (!inputs.CanYieldWord())
{
currentContext.HasFinished = true;
}
return tokenList;
@ -69,10 +77,19 @@ public class FragmentSchema : ISchema<FragmentSchemaContext>
InputProvider inputs = new InputProvider(words);
var overallContext = this.CreateContext();
while (this.CanProcessNextWord(overallContext, inputs))
while (!overallContext.HasFinished && inputs.CanYieldWord())
{
if (this.CanProcessNextWord(overallContext, inputs))
{
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
}
else
{
inputs.SkipCurrentWord();
}
}
overallContext.HasFinished = true;
return tokens;
}

View File

@ -7,6 +7,8 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuild
{
protected string fragmentRegex = @"";
private List<string> namedGroups = new List<string>();
public FragmentSchemaBuilder()
{
}
@ -39,6 +41,7 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuild
if (!string.IsNullOrEmpty(name))
{
groupNamePrefix = "?<" + name + ">";
namedGroups.Add(name);
}
switch (type)
{
@ -122,7 +125,7 @@ public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuild
public FragmentSchema Build()
{
var schema = new FragmentSchema(this.fragmentRegex);
var schema = new FragmentSchema(this.fragmentRegex, this.namedGroups);
return schema;
}
}

View File

@ -9,7 +9,7 @@ public abstract class RepetitionSchemaBuilder<S, T, U> where S : RepetitionSchem
return newBuilder;
}
public required S UpperLayerBuilder { get; set; }
public S? UpperLayerBuilder { get; set; }
public int NumRepetition { get; set; }

View File

@ -5,49 +5,71 @@ using System.Collections.Generic;
using Parsing.Schema;
using Parsing.Tokenization;
public static class ConversionHelpers
public static class DataConversionHelpers
{
// public static List<U> ConvertData<T, U, V>(this List<IToken> tokenList, Func<U, V> converter) where T : IValueToken<V>
// {
// var newList = new List<U>();
// foreach (var token in tokenList)
// {
// var typedToken = token as IValueToken<V>;
// if (typedToken == null)
// {
// throw new Exception("Invalid Token type encountered during value conversion");
// }
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
{
var newList = new List<TNewType>();
foreach (var token in tokenList)
{
var typedToken = token as IValueToken<TOldType>;
if (typedToken == null)
{
throw new Exception("Invalid Token type encountered during value conversion");
}
// newList.Add(converter(typedToken.GetValue()));
// }
// return newList;
// }
newList.Add(converter(typedToken.GetValue()));
}
return newList;
}
// public static List<U> ConvertData<T, U, V>(this List<IToken> tokenList, Func<List<U>, V> converter) where T : IValueToken<V>
// {
// var newList = new List<U>();
// foreach (var token in tokenList)
// {
// var typedToken = token as IValueToken<V>;
// if (typedToken == null)
// {
// throw new Exception("Invalid Token type encountered during value conversion");
// }
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
{
var newList = new List<TNewType>();
foreach (var token in tokenList)
{
var typedToken = token as IValueToken<TOldType>;
if (typedToken == null)
{
throw new Exception("Invalid Token type encountered during value conversion");
}
// newList.AddRange(converter(typedToken.GetValue()));
// }
// return newList;
// }
newList.AddRange(converter(typedToken.GetValue()));
}
return newList;
}
// public static List<U> ConvertData<T, U, V>(this List<List<IToken>> tokenListList, Func<U, V> converter) where T : IValueToken<V>
// {
// var newListList = new List<List<U>>();
// foreach (var tokenList in tokenListList)
// {
// newListList.Add(tokenList.ConvertData(converter));
// }
// return newListList;
// }
public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
{
var newListList = new List<List<TNewType>>();
foreach (var tokenList in tokenListList)
{
newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
}
return newListList;
}
}
public static class DataManipulationHelpers
{
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
{
if (data.Count < 2)
{
return data[0] ?? default(TType);
}
TType result = data[0];
for (int i = 1; i < data.Count; i++)
{
result = reducer(result, data[i]);
}
return result;
}
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
{
return reducer(data);
}
}
public class TokenConverter
@ -101,20 +123,7 @@ public class TokenConverter
{
List<T> returnData = new List<T>();
foreach (var tokenRow in this.rawTokens)
{ // Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
// Assert.Equal(InputType.String, rows[0][1].GetInputType());
// Assert.Equal(InputType.String, rows[0][2].GetInputType());
// Assert.Equal(InputType.String, rows[0][3].GetInputType());
// Assert.Equal(InputType.String, rows[0][4].GetInputType());
// Assert.Equal(InputType.String, rows[0][5].GetInputType());
// Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
// Assert.Equal(InputType.String, rows[1][1].GetInputType());
// Assert.Equal(InputType.String, rows[1][2].GetInputType());
// Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
// Assert.Equal(InputType.String, rows[2][1].GetInputType());
// Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
// Assert.Equal(InputType.String, rows[2][3].GetInputType());
// Assert.Equal(InputType.String, rows[2][4].GetInputType());
{
foreach (IToken token in tokenRow)
{
if (token == null)

View File

@ -1,5 +1,6 @@
namespace Parsing.Tokenization;
using System.Runtime.CompilerServices;
using Parsing.Schema;
public class Fragment : Dictionary<string, List<string>>

View File

@ -48,6 +48,11 @@ public class InputProvider
return this.CurrentPosition < this.words.Length;
}
public void SkipCurrentWord()
{
this.CurrentPosition += 1;
}
public string YieldWord()
{
if (!this.CanYieldWord())