generated from Templates/Dotnet_Library
feat: implement greedy repetition, ref: A24-13
This commit is contained in:
parent
e15190ecd6
commit
0d85132a32
@ -12,6 +12,9 @@ public class TextParserTests
|
||||
private const string testInput3 = @"2 4 6 1
|
||||
3 5 7 2
|
||||
4 6 8 3";
|
||||
private const string testInput4 = @"2 ab ba fd er sd
|
||||
8 cd dc
|
||||
7 uh 6 yp rt";
|
||||
|
||||
[Fact]
|
||||
public void LineParser_TestSimpleRepetition()
|
||||
@ -157,4 +160,43 @@ public class TextParserTests
|
||||
Assert.Equal(2, columns[3][1]);
|
||||
Assert.Equal(3, columns[3][2]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TextParser_TestGreedyRepetitionAsRows()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
.Repeat()
|
||||
.Expect(InputType.Integer)
|
||||
.Repeat()
|
||||
.Expect(InputType.String)
|
||||
.EndRepetition()
|
||||
.EndRepetition()
|
||||
.Build();
|
||||
|
||||
var parser = new TextParser(schema);
|
||||
var rows = parser
|
||||
.SetInputText(testInput4)
|
||||
.Parse()
|
||||
.AsRawData();
|
||||
|
||||
Assert.Equal(3, rows.Count);
|
||||
Assert.Equal(6, rows[0].Count);
|
||||
Assert.Equal(3, rows[1].Count);
|
||||
Assert.Equal(5, rows[2].Count);
|
||||
Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][1].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][2].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][3].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][4].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][5].GetInputType());
|
||||
Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[1][1].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[1][2].GetInputType());
|
||||
Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[2][1].GetInputType());
|
||||
Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[2][3].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[2][4].GetInputType());
|
||||
}
|
||||
}
|
||||
|
@ -12,6 +12,8 @@ abstract class BuildingBlockBase : IBuildingBlock
|
||||
|
||||
public abstract bool CanParseWord(InputProvider inputs);
|
||||
|
||||
public abstract bool CanParseWord(string word);
|
||||
|
||||
public abstract BlockType GetBlockType();
|
||||
|
||||
public virtual bool IsRepetitionType()
|
||||
@ -19,7 +21,7 @@ abstract class BuildingBlockBase : IBuildingBlock
|
||||
return false;
|
||||
}
|
||||
|
||||
public virtual bool CheckIsDoneParsingAndReset()
|
||||
public virtual bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||
{
|
||||
// most blocks are always done parsing after consuming a token
|
||||
// repetition blocks can consume multiple tokens
|
||||
|
@ -48,6 +48,21 @@ class FixedRepetitionBlock : BuildingBlockBase
|
||||
return result;
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
bool result;
|
||||
if (this.repetitionCount == 0)
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = inputSchema.CanProcessNextWord(context, word);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public override BlockType GetBlockType()
|
||||
{
|
||||
return BlockType.FixedRepetition;
|
||||
@ -58,7 +73,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
||||
return true;
|
||||
}
|
||||
|
||||
public override bool CheckIsDoneParsingAndReset()
|
||||
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||
{
|
||||
// we are done parsing once all repetitions are exhausted
|
||||
var result = this.repetitionCount == 0;
|
||||
|
57
TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs
Normal file
57
TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs
Normal file
@ -0,0 +1,57 @@
|
||||
namespace Parsing.Schema.BuildingBlocks;
|
||||
|
||||
using System.IO.Pipelines;
|
||||
using Parsing.Tokenization;
|
||||
|
||||
class GreedyRepetitionBlock : BuildingBlockBase
|
||||
{
|
||||
private InputSchema inputSchema;
|
||||
private InputSchemaContext context;
|
||||
|
||||
public GreedyRepetitionBlock(InputSchema inputSchema)
|
||||
{
|
||||
this.inputSchema = inputSchema;
|
||||
this.context = this.inputSchema.CreateContext();
|
||||
}
|
||||
|
||||
public override IToken ParseWord(InputProvider inputs)
|
||||
{
|
||||
var result = inputSchema.ProcessNextWord(context, inputs);
|
||||
if (!this.CanParseWord(inputs))
|
||||
{
|
||||
this.context = this.inputSchema.CreateContext();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public override bool CanParseWord(InputProvider inputs)
|
||||
{
|
||||
return inputSchema.CanProcessNextWord(context, inputs) && inputs.CanYieldWord();
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
return inputSchema.CanProcessNextWord(context, word);
|
||||
}
|
||||
|
||||
public override BlockType GetBlockType()
|
||||
{
|
||||
return BlockType.GreedyRepetition;
|
||||
}
|
||||
|
||||
public override bool IsRepetitionType()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||
{
|
||||
// we are done parsing greedily once the next token doesn't match anymore
|
||||
var result = !this.CanParseWord(inputs);
|
||||
if (result)
|
||||
{
|
||||
this.context = this.inputSchema.CreateContext();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -8,9 +8,11 @@ public interface IBuildingBlock
|
||||
|
||||
public bool CanParseWord(InputProvider inputs);
|
||||
|
||||
public bool CanParseWord(string word);
|
||||
|
||||
public BlockType GetBlockType();
|
||||
|
||||
public bool IsRepetitionType();
|
||||
|
||||
public bool CheckIsDoneParsingAndReset();
|
||||
public bool CheckIsDoneParsingAndReset(InputProvider inputs);
|
||||
}
|
@ -18,12 +18,16 @@ class IntegerBlock : BuildingBlockBase
|
||||
{
|
||||
using (inputs.GetLookaheadContext())
|
||||
{
|
||||
int number = 0;
|
||||
var success = int.TryParse(inputs.YieldWord(), out number);
|
||||
return success;
|
||||
return this.CanParseWord(inputs.YieldWord());
|
||||
}
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
int number = 0;
|
||||
return int.TryParse(word, out number);
|
||||
}
|
||||
|
||||
public override BlockType GetBlockType()
|
||||
{
|
||||
return BlockType.Integer;
|
||||
|
@ -16,6 +16,24 @@ class StringBlock : BuildingBlockBase
|
||||
|
||||
public override bool CanParseWord(InputProvider inputs)
|
||||
{
|
||||
string word = string.Empty;
|
||||
using (inputs.GetLookaheadContext())
|
||||
{
|
||||
word = inputs.YieldWord();
|
||||
}
|
||||
|
||||
return this.CanParseWord(word);
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
|
||||
IntegerBlock intBlock = new IntegerBlock();
|
||||
if(intBlock.CanParseWord(word))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ public class InputSchema
|
||||
{
|
||||
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||
var token = nextBlock.ParseWord(inputs);
|
||||
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset())
|
||||
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs))
|
||||
{
|
||||
currentContext.lastProcessedBlockIndex++;
|
||||
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
||||
@ -50,6 +50,16 @@ public class InputSchema
|
||||
}
|
||||
}
|
||||
|
||||
public bool CanProcessNextWord(InputSchemaContext currentContext, string word)
|
||||
{
|
||||
if (currentContext.HasFinished)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||
return nextBlock.CanParseWord(word);
|
||||
}
|
||||
|
||||
public List<IToken> ProcessWordList(string[] words)
|
||||
{
|
||||
List<IToken> tokens = new List<IToken>();
|
||||
|
@ -38,6 +38,15 @@ public class InputSchemaBuilder
|
||||
return newInputSchemaBuilder;
|
||||
}
|
||||
|
||||
public InputSchemaBuilder Repeat()
|
||||
{
|
||||
// add another layer of parsing
|
||||
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
|
||||
newInputSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
||||
|
||||
return newInputSchemaBuilder;
|
||||
}
|
||||
|
||||
public InputSchemaBuilder EndRepetition()
|
||||
{
|
||||
// return back to upper layer of parsing
|
||||
@ -54,6 +63,9 @@ public class InputSchemaBuilder
|
||||
case RepetitionType.FixedRepetition:
|
||||
oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
|
||||
break;
|
||||
case RepetitionType.GreedyRepetition:
|
||||
oldInputSchemaBuilder.schema.AddBuildingBlock(new GreedyRepetitionBlock(currentSchema));
|
||||
break;
|
||||
default:
|
||||
throw new Exception("Unrecognized RepetitionType");
|
||||
}
|
||||
|
@ -38,6 +38,20 @@ public class TokenConverter
|
||||
return returnData;
|
||||
}
|
||||
|
||||
private void CheckConversionPrerequisites()
|
||||
{
|
||||
// in order to convert rows to columns or grid we require every row to have the same length
|
||||
int rowLength = this.rawTokens[0].Count;
|
||||
|
||||
foreach(var tokenRow in this.rawTokens)
|
||||
{
|
||||
if(tokenRow.Count != rowLength)
|
||||
{
|
||||
throw new Exception("Attempted to convert token dataset that is not able to be converted!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<T[]> AsRows<T>()
|
||||
{
|
||||
var listRows = this.AsListRows<T>();
|
||||
@ -71,6 +85,7 @@ public class TokenConverter
|
||||
|
||||
public List<List<T>> AsListColumns<T>()
|
||||
{
|
||||
this.CheckConversionPrerequisites();
|
||||
var rows = AsListRows<T>();
|
||||
|
||||
var columns = new List<List<T>>();
|
||||
@ -92,7 +107,13 @@ public class TokenConverter
|
||||
|
||||
public T[][] AsGrid<T>()
|
||||
{
|
||||
this.CheckConversionPrerequisites();
|
||||
var rowsList = AsRows<T>();
|
||||
return rowsList.ToArray();
|
||||
}
|
||||
|
||||
public List<List<IToken>> AsRawData()
|
||||
{
|
||||
return this.rawTokens;
|
||||
}
|
||||
}
|
||||
|
@ -37,9 +37,14 @@ public class InputProvider
|
||||
return new InputProvider.LookaheadContext(this);
|
||||
}
|
||||
|
||||
public bool CanYieldWord()
|
||||
{
|
||||
return this.CurrentPosition < this.words.Length;
|
||||
}
|
||||
|
||||
public string YieldWord()
|
||||
{
|
||||
if (this.CurrentPosition > this.words.Length)
|
||||
if (!this.CanYieldWord())
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user