generated from Templates/Dotnet_Library
feat: initial support for fragment parsing, ref: NOISSUE
This commit is contained in:
parent
5f07f16f1f
commit
29f7aa37da
@ -26,7 +26,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new LineParser(schema);
|
var parser = new LineParser<InputSchemaContext>(schema);
|
||||||
var tokens = parser.ParseLine(testInput1);
|
var tokens = parser.ParseLine(testInput1);
|
||||||
|
|
||||||
Assert.Equal(4, tokens.Count);
|
Assert.Equal(4, tokens.Count);
|
||||||
@ -51,7 +51,7 @@ public class TextParserTests
|
|||||||
.Expect(InputType.Integer)
|
.Expect(InputType.Integer)
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new LineParser(schema);
|
var parser = new LineParser<InputSchemaContext>(schema);
|
||||||
var tokens = parser.ParseLine(testInput1);
|
var tokens = parser.ParseLine(testInput1);
|
||||||
|
|
||||||
Assert.Equal(4, tokens.Count);
|
Assert.Equal(4, tokens.Count);
|
||||||
@ -79,7 +79,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new LineParser(schema);
|
var parser = new LineParser<InputSchemaContext>(schema);
|
||||||
var tokens = parser.ParseLine(testInput2);
|
var tokens = parser.ParseLine(testInput2);
|
||||||
|
|
||||||
Assert.Equal(6, tokens.Count);
|
Assert.Equal(6, tokens.Count);
|
||||||
@ -107,7 +107,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new TextParser(schema);
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
var rows = parser
|
var rows = parser
|
||||||
.SetInputText(testInput3)
|
.SetInputText(testInput3)
|
||||||
.Parse()
|
.Parse()
|
||||||
@ -139,7 +139,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new TextParser(schema);
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
var columns = parser
|
var columns = parser
|
||||||
.SetInputText(testInput3)
|
.SetInputText(testInput3)
|
||||||
.Parse()
|
.Parse()
|
||||||
@ -174,7 +174,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new TextParser(schema);
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
var rows = parser
|
var rows = parser
|
||||||
.SetInputText(testInput4)
|
.SetInputText(testInput4)
|
||||||
.Parse()
|
.Parse()
|
||||||
@ -199,4 +199,55 @@ public class TextParserTests
|
|||||||
Assert.Equal(InputType.String, rows[2][3].GetInputType());
|
Assert.Equal(InputType.String, rows[2][3].GetInputType());
|
||||||
Assert.Equal(InputType.String, rows[2][4].GetInputType());
|
Assert.Equal(InputType.String, rows[2][4].GetInputType());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FragmentParser_SimpleTest()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new FragmentSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.StartOptions()
|
||||||
|
.Option()
|
||||||
|
.Expect("nums(")
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Repeat()
|
||||||
|
.Expect(",")
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.EndRepetition()
|
||||||
|
.Expect(")")
|
||||||
|
.Option()
|
||||||
|
.Expect("strs(")
|
||||||
|
.Expect(InputType.String)
|
||||||
|
.Repeat()
|
||||||
|
.Expect(",")
|
||||||
|
.Expect(InputType.String)
|
||||||
|
.EndRepetition()
|
||||||
|
.Expect(")")
|
||||||
|
.EndOptions()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser<FragmentSchemaContext>(schema);
|
||||||
|
var rows = parser
|
||||||
|
.SetInputText(testInput4)
|
||||||
|
.Parse()
|
||||||
|
.AsFragments();
|
||||||
|
|
||||||
|
Assert.Equal(3, rows.Count);
|
||||||
|
Assert.Equal(6, rows[0].Count);
|
||||||
|
Assert.Equal(3, rows[1].Count);
|
||||||
|
Assert.Equal(5, rows[2].Count);
|
||||||
|
// Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][1].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][2].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][3].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][4].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][5].GetInputType());
|
||||||
|
// Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[1][1].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[1][2].GetInputType());
|
||||||
|
// Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[2][1].GetInputType());
|
||||||
|
// Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[2][3].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[2][4].GetInputType());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,14 +3,14 @@ namespace Parsing;
|
|||||||
using Parsing.Schema;
|
using Parsing.Schema;
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
public class LineParser
|
public class LineParser<T> where T : ISchemaContext
|
||||||
{
|
{
|
||||||
private string[] delimiters;
|
private string[] delimiters;
|
||||||
private bool removeEmptyEntries = false;
|
private bool removeEmptyEntries = false;
|
||||||
private InputSchema schema;
|
private ISchema<T> schema;
|
||||||
private InputSchemaContext context;
|
private T context;
|
||||||
|
|
||||||
public LineParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true)
|
public LineParser(ISchema<T> schema, string[]? delimiters = null, bool removeEmptyEntries = true)
|
||||||
{
|
{
|
||||||
this.delimiters = delimiters ?? new string[] { " " };
|
this.delimiters = delimiters ?? new string[] { " " };
|
||||||
this.removeEmptyEntries = removeEmptyEntries;
|
this.removeEmptyEntries = removeEmptyEntries;
|
||||||
|
@ -1,6 +1,13 @@
|
|||||||
namespace Parsing.Schema;
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
[Flags]
|
||||||
public enum BlockType
|
public enum BlockType
|
||||||
{
|
{
|
||||||
Integer, String, FixedRepetition, GreedyRepetition
|
Integer = 1,
|
||||||
|
String = 2,
|
||||||
|
// technically not a block type but keeping it here for consistency/having all types in one place
|
||||||
|
Fragment = 4,
|
||||||
|
FixedRepetition = 8,
|
||||||
|
GreedyRepetition = 16,
|
||||||
|
NonZeroRepetition = 32,
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
namespace Parsing.Schema.BuildingBlocks;
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
using System.IO.Pipelines;
|
using System.IO.Pipelines;
|
||||||
|
using System.Linq;
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
class FixedRepetitionBlock : BuildingBlockBase
|
class FixedRepetitionBlock : BuildingBlockBase
|
||||||
@ -30,7 +31,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
|||||||
this.context = this.inputSchema.CreateContext();
|
this.context = this.inputSchema.CreateContext();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result.SingleOrDefault();
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
namespace Parsing.Schema.BuildingBlocks;
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
using System.IO.Pipelines;
|
using System.IO.Pipelines;
|
||||||
|
using System.Linq;
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
class GreedyRepetitionBlock : BuildingBlockBase
|
class GreedyRepetitionBlock : BuildingBlockBase
|
||||||
@ -21,7 +22,7 @@ class GreedyRepetitionBlock : BuildingBlockBase
|
|||||||
{
|
{
|
||||||
this.context = this.inputSchema.CreateContext();
|
this.context = this.inputSchema.CreateContext();
|
||||||
}
|
}
|
||||||
return result;
|
return result.SingleOrDefault();
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
84
TextParser/Schema/FragmentSchema.cs
Normal file
84
TextParser/Schema/FragmentSchema.cs
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
using System.Collections;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
public class FragmentSchemaContext : ISchemaContext
|
||||||
|
{
|
||||||
|
public int lastProcessedBlockIndex { get; set; } = 0;
|
||||||
|
public bool HasFinished { get; set; } = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class FragmentSchema : ISchema<FragmentSchemaContext>
|
||||||
|
{
|
||||||
|
private string fragmentRegex;
|
||||||
|
|
||||||
|
public FragmentSchema(string fragmentRegex)
|
||||||
|
{
|
||||||
|
this.fragmentRegex = fragmentRegex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<IToken> ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
|
||||||
|
{
|
||||||
|
Regex r = new Regex(this.fragmentRegex);
|
||||||
|
|
||||||
|
var tokenList = new List<IToken>();
|
||||||
|
// one token per match
|
||||||
|
foreach (Match match in r.Matches(inputs.YieldWord()))
|
||||||
|
{
|
||||||
|
var newToken = new FragmentToken(match.Result("$1"));
|
||||||
|
// token contains data from all included matches
|
||||||
|
foreach (var groupKey in match.Groups.Keys)
|
||||||
|
{
|
||||||
|
List<string> matchedSubstrings = new List<string>();
|
||||||
|
foreach (var capture in match.Groups[groupKey].Captures)
|
||||||
|
{
|
||||||
|
//matchedSubstrings.Add(capture.Value);
|
||||||
|
}
|
||||||
|
newToken.AddMatch(groupKey, matchedSubstrings);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokenList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
|
||||||
|
{
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
return this.CanProcessNextWord(currentContext, inputs.YieldWord());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(FragmentSchemaContext currentContext, string word)
|
||||||
|
{
|
||||||
|
if (currentContext.HasFinished)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Regex r = new Regex(this.fragmentRegex);
|
||||||
|
return r.Match(word).Success;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<IToken> ProcessWordList(string[] words)
|
||||||
|
{
|
||||||
|
List<IToken> tokens = new List<IToken>();
|
||||||
|
InputProvider inputs = new InputProvider(words);
|
||||||
|
var overallContext = this.CreateContext();
|
||||||
|
|
||||||
|
while (this.CanProcessNextWord(overallContext, inputs))
|
||||||
|
{
|
||||||
|
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaContext CreateContext()
|
||||||
|
{
|
||||||
|
return new FragmentSchemaContext();
|
||||||
|
}
|
||||||
|
}
|
128
TextParser/Schema/FragmentSchemaBuilder.cs
Normal file
128
TextParser/Schema/FragmentSchemaBuilder.cs
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuilder, FragmentSchema, FragmentSchemaContext>, ISchemaBuilder<FragmentSchema, FragmentSchemaContext>
|
||||||
|
{
|
||||||
|
protected string fragmentRegex = @"";
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder StartOptions()
|
||||||
|
{
|
||||||
|
this.fragmentRegex += "((";
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder EndOptions()
|
||||||
|
{
|
||||||
|
this.fragmentRegex += "))";
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Option()
|
||||||
|
{
|
||||||
|
// if we just started an options group there is no need to add an option separator
|
||||||
|
if (!this.fragmentRegex.EndsWith("(") || this.fragmentRegex.EndsWith("\\("))
|
||||||
|
{
|
||||||
|
this.fragmentRegex += ")|(";
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Expect(InputType type, string name = "")
|
||||||
|
{
|
||||||
|
string groupNamePrefix = "";
|
||||||
|
if (!string.IsNullOrEmpty(name))
|
||||||
|
{
|
||||||
|
groupNamePrefix = "?<" + name + ">";
|
||||||
|
}
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case InputType.String:
|
||||||
|
this.fragmentRegex += "(" + groupNamePrefix + "\\w+)";
|
||||||
|
break;
|
||||||
|
case InputType.Integer:
|
||||||
|
this.fragmentRegex += "(" + groupNamePrefix + "\\d+)";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception("Unrecognized InputType");
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Expect(string literal)
|
||||||
|
{
|
||||||
|
this.fragmentRegex += Regex.Escape(literal);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Repeat(int repetitionCount)
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
|
newSchemaBuilder.NumRepetition = repetitionCount;
|
||||||
|
newSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
||||||
|
|
||||||
|
return newSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Repeat()
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
|
newSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
||||||
|
|
||||||
|
return newSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Repeat(RepetitionType repetitionType)
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
|
newSchemaBuilder.RepetitionType = repetitionType;
|
||||||
|
|
||||||
|
return newSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder EndRepetition()
|
||||||
|
{
|
||||||
|
// return back to upper layer of parsing
|
||||||
|
var currentBuilder = this as FragmentSchemaBuilder;
|
||||||
|
if (currentBuilder == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid repetition definitions!");
|
||||||
|
}
|
||||||
|
var oldSchemaBuilder = currentBuilder.UpperLayerBuilder;
|
||||||
|
|
||||||
|
var currentRegex = "(" + currentBuilder.fragmentRegex + ")";
|
||||||
|
switch (currentBuilder.RepetitionType)
|
||||||
|
{
|
||||||
|
case RepetitionType.FixedRepetition:
|
||||||
|
currentRegex += "{" + this.NumRepetition.ToString() + "}";
|
||||||
|
break;
|
||||||
|
case RepetitionType.GreedyRepetition:
|
||||||
|
currentRegex += "*";
|
||||||
|
break;
|
||||||
|
case RepetitionType.NonZeroRepetition:
|
||||||
|
case RepetitionType.NonZeroRepetition | RepetitionType.GreedyRepetition:
|
||||||
|
currentRegex += "+";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception("Unrecognized RepetitionType");
|
||||||
|
}
|
||||||
|
|
||||||
|
oldSchemaBuilder.fragmentRegex += "(" + currentRegex + ")";
|
||||||
|
|
||||||
|
return oldSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchema Build()
|
||||||
|
{
|
||||||
|
var schema = new FragmentSchema(this.fragmentRegex);
|
||||||
|
return schema;
|
||||||
|
}
|
||||||
|
}
|
25
TextParser/Schema/ISchema.cs
Normal file
25
TextParser/Schema/ISchema.cs
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
using System.Collections;
|
||||||
|
|
||||||
|
public interface ISchemaContext
|
||||||
|
{
|
||||||
|
public int lastProcessedBlockIndex { get; set; }
|
||||||
|
public bool HasFinished { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public interface ISchema<T> where T : ISchemaContext
|
||||||
|
{
|
||||||
|
public List<IToken> ProcessNextWord(T currentContext, InputProvider inputs);
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(T currentContext, InputProvider inputs);
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(T currentContext, string word);
|
||||||
|
|
||||||
|
public List<IToken> ProcessWordList(string[] words);
|
||||||
|
|
||||||
|
public T CreateContext();
|
||||||
|
}
|
8
TextParser/Schema/ISchemaBuilder.cs
Normal file
8
TextParser/Schema/ISchemaBuilder.cs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
public interface ISchemaBuilder<T, U> where T : ISchema<U> where U : ISchemaContext
|
||||||
|
{
|
||||||
|
public T Build();
|
||||||
|
}
|
@ -5,13 +5,13 @@ using Parsing.Schema.BuildingBlocks;
|
|||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
using System.Collections;
|
using System.Collections;
|
||||||
|
|
||||||
public class InputSchemaContext
|
public class InputSchemaContext : ISchemaContext
|
||||||
{
|
{
|
||||||
public int lastProcessedBlockIndex { get; set; } = 0;
|
public int lastProcessedBlockIndex { get; set; } = 0;
|
||||||
public bool HasFinished { get; set; } = false;
|
public bool HasFinished { get; set; } = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public class InputSchema
|
public class InputSchema : ISchema<InputSchemaContext>
|
||||||
{
|
{
|
||||||
private List<IBuildingBlock> buildingBlocks;
|
private List<IBuildingBlock> buildingBlocks;
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ public class InputSchema
|
|||||||
this.buildingBlocks.Add(buildingBlock);
|
this.buildingBlocks.Add(buildingBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
public IToken ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
public List<IToken> ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||||
{
|
{
|
||||||
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||||
var token = nextBlock.ParseWord(inputs);
|
var token = nextBlock.ParseWord(inputs);
|
||||||
@ -34,7 +34,9 @@ public class InputSchema
|
|||||||
currentContext.lastProcessedBlockIndex++;
|
currentContext.lastProcessedBlockIndex++;
|
||||||
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
||||||
}
|
}
|
||||||
return token;
|
var newTokenList = new List<IToken>();
|
||||||
|
newTokenList.Add(token);
|
||||||
|
return newTokenList;
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||||
@ -68,7 +70,7 @@ public class InputSchema
|
|||||||
|
|
||||||
while (this.CanProcessNextWord(overallContext, inputs))
|
while (this.CanProcessNextWord(overallContext, inputs))
|
||||||
{
|
{
|
||||||
tokens.Add(this.ProcessNextWord(overallContext, inputs));
|
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
|
||||||
}
|
}
|
||||||
|
|
||||||
return tokens;
|
return tokens;
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
using Parsing.Schema.BuildingBlocks;
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
public class InputSchemaBuilder
|
public class InputSchemaBuilder : RepetitionSchemaBuilder<InputSchemaBuilder, InputSchema, InputSchemaContext>, ISchemaBuilder<InputSchema, InputSchemaContext>
|
||||||
{
|
{
|
||||||
private InputSchema schema = new InputSchema();
|
private InputSchema schema = new InputSchema();
|
||||||
|
|
||||||
@ -31,7 +31,7 @@ public class InputSchemaBuilder
|
|||||||
public InputSchemaBuilder Repeat(int repetitionCount)
|
public InputSchemaBuilder Repeat(int repetitionCount)
|
||||||
{
|
{
|
||||||
// add another layer of parsing
|
// add another layer of parsing
|
||||||
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
|
var newInputSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
newInputSchemaBuilder.NumRepetition = repetitionCount;
|
newInputSchemaBuilder.NumRepetition = repetitionCount;
|
||||||
newInputSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
newInputSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
||||||
|
|
||||||
@ -41,7 +41,7 @@ public class InputSchemaBuilder
|
|||||||
public InputSchemaBuilder Repeat()
|
public InputSchemaBuilder Repeat()
|
||||||
{
|
{
|
||||||
// add another layer of parsing
|
// add another layer of parsing
|
||||||
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
|
var newInputSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
newInputSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
newInputSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
||||||
|
|
||||||
return newInputSchemaBuilder;
|
return newInputSchemaBuilder;
|
||||||
@ -50,7 +50,7 @@ public class InputSchemaBuilder
|
|||||||
public InputSchemaBuilder EndRepetition()
|
public InputSchemaBuilder EndRepetition()
|
||||||
{
|
{
|
||||||
// return back to upper layer of parsing
|
// return back to upper layer of parsing
|
||||||
var currentBuilder = this as RepetitionSchemaBuilder;
|
var currentBuilder = this as InputSchemaBuilder;
|
||||||
if (currentBuilder == null)
|
if (currentBuilder == null)
|
||||||
{
|
{
|
||||||
throw new Exception("Invalid repetition definitions!");
|
throw new Exception("Invalid repetition definitions!");
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
namespace Parsing.Schema;
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
[Flags]
|
||||||
public enum InputType
|
public enum InputType
|
||||||
{
|
{
|
||||||
Integer = BlockType.Integer,
|
Integer = BlockType.Integer,
|
||||||
String = BlockType.String
|
String = BlockType.String,
|
||||||
|
Fragment = BlockType.Fragment,
|
||||||
}
|
}
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
namespace Parsing.Schema;
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
public class RepetitionSchemaBuilder : InputSchemaBuilder
|
public abstract class RepetitionSchemaBuilder<S, T, U> where S : RepetitionSchemaBuilder<S, T, U>, ISchemaBuilder<T, U>, new() where T : ISchema<U> where U : ISchemaContext
|
||||||
{
|
{
|
||||||
public RepetitionSchemaBuilder(InputSchemaBuilder upperLayerBuilder)
|
public S GetNewRepetitionSchemaBuilder(S upperLayerBuilder)
|
||||||
{
|
{
|
||||||
this.UpperLayerBuilder = upperLayerBuilder;
|
var newBuilder = new S();
|
||||||
|
newBuilder.UpperLayerBuilder = upperLayerBuilder;
|
||||||
|
return newBuilder;
|
||||||
}
|
}
|
||||||
|
|
||||||
public InputSchemaBuilder UpperLayerBuilder { get; set; }
|
public required S UpperLayerBuilder { get; set; }
|
||||||
|
|
||||||
public int NumRepetition { get; set; }
|
public int NumRepetition { get; set; }
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
namespace Parsing.Schema;
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
[Flags]
|
||||||
public enum RepetitionType
|
public enum RepetitionType
|
||||||
{
|
{
|
||||||
FixedRepetition = BlockType.FixedRepetition,
|
FixedRepetition = BlockType.FixedRepetition,
|
||||||
GreedyRepetition = BlockType.GreedyRepetition
|
GreedyRepetition = BlockType.GreedyRepetition,
|
||||||
|
NonZeroRepetition = BlockType.NonZeroRepetition,
|
||||||
}
|
}
|
||||||
|
@ -5,20 +5,20 @@ using System.Collections.Generic;
|
|||||||
using Parsing.Schema;
|
using Parsing.Schema;
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
public class TextParser : TokenConverter
|
public class TextParser<T> : TokenConverter where T : ISchemaContext
|
||||||
{
|
{
|
||||||
private LineParser lineParser;
|
private LineParser<T> lineParser;
|
||||||
private string[] lines;
|
private string[] lines;
|
||||||
private bool removeEmptyEntries;
|
private bool removeEmptyEntries;
|
||||||
|
|
||||||
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true) : base()
|
public TextParser(ISchema<T> schema, string[]? delimiters = null, bool removeEmptyEntries = true) : base()
|
||||||
{
|
{
|
||||||
this.lineParser = new LineParser(schema, delimiters, removeEmptyEntries);
|
this.lineParser = new LineParser<T>(schema, delimiters, removeEmptyEntries);
|
||||||
this.lines = new string[] { };
|
this.lines = new string[] { };
|
||||||
this.removeEmptyEntries = removeEmptyEntries;
|
this.removeEmptyEntries = removeEmptyEntries;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TextParser SetInputText(string text)
|
public TextParser<T> SetInputText(string text)
|
||||||
{
|
{
|
||||||
var options = StringSplitOptions.TrimEntries;
|
var options = StringSplitOptions.TrimEntries;
|
||||||
if (removeEmptyEntries)
|
if (removeEmptyEntries)
|
||||||
@ -29,7 +29,7 @@ public class TextParser : TokenConverter
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TextParser Parse()
|
public TextParser<T> Parse()
|
||||||
{
|
{
|
||||||
foreach (var line in this.lines)
|
foreach (var line in this.lines)
|
||||||
{
|
{
|
||||||
|
@ -5,6 +5,51 @@ using System.Collections.Generic;
|
|||||||
using Parsing.Schema;
|
using Parsing.Schema;
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
public static class ConversionHelpers
|
||||||
|
{
|
||||||
|
// public static List<U> ConvertData<T, U, V>(this List<IToken> tokenList, Func<U, V> converter) where T : IValueToken<V>
|
||||||
|
// {
|
||||||
|
// var newList = new List<U>();
|
||||||
|
// foreach (var token in tokenList)
|
||||||
|
// {
|
||||||
|
// var typedToken = token as IValueToken<V>;
|
||||||
|
// if (typedToken == null)
|
||||||
|
// {
|
||||||
|
// throw new Exception("Invalid Token type encountered during value conversion");
|
||||||
|
// }
|
||||||
|
|
||||||
|
// newList.Add(converter(typedToken.GetValue()));
|
||||||
|
// }
|
||||||
|
// return newList;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public static List<U> ConvertData<T, U, V>(this List<IToken> tokenList, Func<List<U>, V> converter) where T : IValueToken<V>
|
||||||
|
// {
|
||||||
|
// var newList = new List<U>();
|
||||||
|
// foreach (var token in tokenList)
|
||||||
|
// {
|
||||||
|
// var typedToken = token as IValueToken<V>;
|
||||||
|
// if (typedToken == null)
|
||||||
|
// {
|
||||||
|
// throw new Exception("Invalid Token type encountered during value conversion");
|
||||||
|
// }
|
||||||
|
|
||||||
|
// newList.AddRange(converter(typedToken.GetValue()));
|
||||||
|
// }
|
||||||
|
// return newList;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public static List<U> ConvertData<T, U, V>(this List<List<IToken>> tokenListList, Func<U, V> converter) where T : IValueToken<V>
|
||||||
|
// {
|
||||||
|
// var newListList = new List<List<U>>();
|
||||||
|
// foreach (var tokenList in tokenListList)
|
||||||
|
// {
|
||||||
|
// newListList.Add(tokenList.ConvertData(converter));
|
||||||
|
// }
|
||||||
|
// return newListList;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
public class TokenConverter
|
public class TokenConverter
|
||||||
{
|
{
|
||||||
protected List<List<IToken>> rawTokens = new List<List<IToken>>();
|
protected List<List<IToken>> rawTokens = new List<List<IToken>>();
|
||||||
@ -52,6 +97,41 @@ public class TokenConverter
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<T> AsSingleStream<T>()
|
||||||
|
{
|
||||||
|
List<T> returnData = new List<T>();
|
||||||
|
foreach (var tokenRow in this.rawTokens)
|
||||||
|
{ // Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][1].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][2].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][3].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][4].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[0][5].GetInputType());
|
||||||
|
// Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[1][1].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[1][2].GetInputType());
|
||||||
|
// Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[2][1].GetInputType());
|
||||||
|
// Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[2][3].GetInputType());
|
||||||
|
// Assert.Equal(InputType.String, rows[2][4].GetInputType());
|
||||||
|
foreach (IToken token in tokenRow)
|
||||||
|
{
|
||||||
|
if (token == null)
|
||||||
|
{
|
||||||
|
throw new Exception("No token was provided, but token was expected!");
|
||||||
|
}
|
||||||
|
IValueToken<T>? valueToken = token as IValueToken<T>;
|
||||||
|
if (valueToken == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Provided token is not a ValueToken");
|
||||||
|
}
|
||||||
|
returnData.Add(valueToken.GetValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return returnData;
|
||||||
|
}
|
||||||
|
|
||||||
public List<T[]> AsRows<T>()
|
public List<T[]> AsRows<T>()
|
||||||
{
|
{
|
||||||
var listRows = this.AsListRows<T>();
|
var listRows = this.AsListRows<T>();
|
||||||
@ -116,4 +196,22 @@ public class TokenConverter
|
|||||||
{
|
{
|
||||||
return this.rawTokens;
|
return this.rawTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<Fragment> AsFragments()
|
||||||
|
{
|
||||||
|
var items = this.AsSingleStream<Fragment>();
|
||||||
|
var newList = new List<Fragment>();
|
||||||
|
|
||||||
|
foreach (var item in items)
|
||||||
|
{
|
||||||
|
var typedItem = item as Fragment;
|
||||||
|
if (typedItem == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid token type encountered");
|
||||||
|
}
|
||||||
|
newList.Add(typedItem);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
38
TextParser/Tokenization/FragmentToken.cs
Normal file
38
TextParser/Tokenization/FragmentToken.cs
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
|
||||||
|
public class Fragment : Dictionary<string, List<string>>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public class FragmentToken : IValueToken<Fragment>
|
||||||
|
{
|
||||||
|
private string word;
|
||||||
|
private Fragment matches = new Fragment();
|
||||||
|
|
||||||
|
public FragmentToken(string word)
|
||||||
|
{
|
||||||
|
this.word = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetText()
|
||||||
|
{
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddMatch(string name, List<string> values)
|
||||||
|
{
|
||||||
|
this.matches.Add(name, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Fragment GetValue()
|
||||||
|
{
|
||||||
|
return this.matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputType GetInputType()
|
||||||
|
{
|
||||||
|
return InputType.Fragment;
|
||||||
|
}
|
||||||
|
}
|
@ -32,6 +32,12 @@ public class InputProvider
|
|||||||
this.CurrentPosition = 0;
|
this.CurrentPosition = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public InputProvider(string text)
|
||||||
|
{
|
||||||
|
this.words = text.Split("\n");
|
||||||
|
this.CurrentPosition = 0;
|
||||||
|
}
|
||||||
|
|
||||||
public InputProvider.LookaheadContext GetLookaheadContext()
|
public InputProvider.LookaheadContext GetLookaheadContext()
|
||||||
{
|
{
|
||||||
return new InputProvider.LookaheadContext(this);
|
return new InputProvider.LookaheadContext(this);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user