generated from Templates/Dotnet_Library
feat: implement greedy repetition, ref: A24-13
This commit is contained in:
parent
e15190ecd6
commit
0d85132a32
@ -12,6 +12,9 @@ public class TextParserTests
|
|||||||
private const string testInput3 = @"2 4 6 1
|
private const string testInput3 = @"2 4 6 1
|
||||||
3 5 7 2
|
3 5 7 2
|
||||||
4 6 8 3";
|
4 6 8 3";
|
||||||
|
private const string testInput4 = @"2 ab ba fd er sd
|
||||||
|
8 cd dc
|
||||||
|
7 uh 6 yp rt";
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void LineParser_TestSimpleRepetition()
|
public void LineParser_TestSimpleRepetition()
|
||||||
@ -157,4 +160,43 @@ public class TextParserTests
|
|||||||
Assert.Equal(2, columns[3][1]);
|
Assert.Equal(2, columns[3][1]);
|
||||||
Assert.Equal(3, columns[3][2]);
|
Assert.Equal(3, columns[3][2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TextParser_TestGreedyRepetitionAsRows()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.String)
|
||||||
|
.EndRepetition()
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser(schema);
|
||||||
|
var rows = parser
|
||||||
|
.SetInputText(testInput4)
|
||||||
|
.Parse()
|
||||||
|
.AsRawData();
|
||||||
|
|
||||||
|
Assert.Equal(3, rows.Count);
|
||||||
|
Assert.Equal(6, rows[0].Count);
|
||||||
|
Assert.Equal(3, rows[1].Count);
|
||||||
|
Assert.Equal(5, rows[2].Count);
|
||||||
|
Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][1].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][2].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][3].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][4].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][5].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[1][1].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[1][2].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[2][1].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[2][3].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[2][4].GetInputType());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,8 @@ abstract class BuildingBlockBase : IBuildingBlock
|
|||||||
|
|
||||||
public abstract bool CanParseWord(InputProvider inputs);
|
public abstract bool CanParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
public abstract bool CanParseWord(string word);
|
||||||
|
|
||||||
public abstract BlockType GetBlockType();
|
public abstract BlockType GetBlockType();
|
||||||
|
|
||||||
public virtual bool IsRepetitionType()
|
public virtual bool IsRepetitionType()
|
||||||
@ -19,7 +21,7 @@ abstract class BuildingBlockBase : IBuildingBlock
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public virtual bool CheckIsDoneParsingAndReset()
|
public virtual bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||||
{
|
{
|
||||||
// most blocks are always done parsing after consuming a token
|
// most blocks are always done parsing after consuming a token
|
||||||
// repetition blocks can consume multiple tokens
|
// repetition blocks can consume multiple tokens
|
||||||
|
@ -48,6 +48,21 @@ class FixedRepetitionBlock : BuildingBlockBase
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
bool result;
|
||||||
|
if (this.repetitionCount == 0)
|
||||||
|
{
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result = inputSchema.CanProcessNextWord(context, word);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
public override BlockType GetBlockType()
|
public override BlockType GetBlockType()
|
||||||
{
|
{
|
||||||
return BlockType.FixedRepetition;
|
return BlockType.FixedRepetition;
|
||||||
@ -58,7 +73,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CheckIsDoneParsingAndReset()
|
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||||
{
|
{
|
||||||
// we are done parsing once all repetitions are exhausted
|
// we are done parsing once all repetitions are exhausted
|
||||||
var result = this.repetitionCount == 0;
|
var result = this.repetitionCount == 0;
|
||||||
|
57
TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs
Normal file
57
TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using System.IO.Pipelines;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class GreedyRepetitionBlock : BuildingBlockBase
|
||||||
|
{
|
||||||
|
private InputSchema inputSchema;
|
||||||
|
private InputSchemaContext context;
|
||||||
|
|
||||||
|
public GreedyRepetitionBlock(InputSchema inputSchema)
|
||||||
|
{
|
||||||
|
this.inputSchema = inputSchema;
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public override IToken ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
var result = inputSchema.ProcessNextWord(context, inputs);
|
||||||
|
if (!this.CanParseWord(inputs))
|
||||||
|
{
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
return inputSchema.CanProcessNextWord(context, inputs) && inputs.CanYieldWord();
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
return inputSchema.CanProcessNextWord(context, word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.GreedyRepetition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool IsRepetitionType()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||||
|
{
|
||||||
|
// we are done parsing greedily once the next token doesn't match anymore
|
||||||
|
var result = !this.CanParseWord(inputs);
|
||||||
|
if (result)
|
||||||
|
{
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
@ -8,9 +8,11 @@ public interface IBuildingBlock
|
|||||||
|
|
||||||
public bool CanParseWord(InputProvider inputs);
|
public bool CanParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
public bool CanParseWord(string word);
|
||||||
|
|
||||||
public BlockType GetBlockType();
|
public BlockType GetBlockType();
|
||||||
|
|
||||||
public bool IsRepetitionType();
|
public bool IsRepetitionType();
|
||||||
|
|
||||||
public bool CheckIsDoneParsingAndReset();
|
public bool CheckIsDoneParsingAndReset(InputProvider inputs);
|
||||||
}
|
}
|
@ -18,12 +18,16 @@ class IntegerBlock : BuildingBlockBase
|
|||||||
{
|
{
|
||||||
using (inputs.GetLookaheadContext())
|
using (inputs.GetLookaheadContext())
|
||||||
{
|
{
|
||||||
int number = 0;
|
return this.CanParseWord(inputs.YieldWord());
|
||||||
var success = int.TryParse(inputs.YieldWord(), out number);
|
|
||||||
return success;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
int number = 0;
|
||||||
|
return int.TryParse(word, out number);
|
||||||
|
}
|
||||||
|
|
||||||
public override BlockType GetBlockType()
|
public override BlockType GetBlockType()
|
||||||
{
|
{
|
||||||
return BlockType.Integer;
|
return BlockType.Integer;
|
||||||
|
@ -16,6 +16,24 @@ class StringBlock : BuildingBlockBase
|
|||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
|
string word = string.Empty;
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
word = inputs.YieldWord();
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.CanParseWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
|
||||||
|
IntegerBlock intBlock = new IntegerBlock();
|
||||||
|
if(intBlock.CanParseWord(word))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ public class InputSchema
|
|||||||
{
|
{
|
||||||
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||||
var token = nextBlock.ParseWord(inputs);
|
var token = nextBlock.ParseWord(inputs);
|
||||||
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset())
|
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs))
|
||||||
{
|
{
|
||||||
currentContext.lastProcessedBlockIndex++;
|
currentContext.lastProcessedBlockIndex++;
|
||||||
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
||||||
@ -50,6 +50,16 @@ public class InputSchema
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(InputSchemaContext currentContext, string word)
|
||||||
|
{
|
||||||
|
if (currentContext.HasFinished)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||||
|
return nextBlock.CanParseWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
public List<IToken> ProcessWordList(string[] words)
|
public List<IToken> ProcessWordList(string[] words)
|
||||||
{
|
{
|
||||||
List<IToken> tokens = new List<IToken>();
|
List<IToken> tokens = new List<IToken>();
|
||||||
|
@ -38,6 +38,15 @@ public class InputSchemaBuilder
|
|||||||
return newInputSchemaBuilder;
|
return newInputSchemaBuilder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public InputSchemaBuilder Repeat()
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
|
||||||
|
newInputSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
||||||
|
|
||||||
|
return newInputSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
public InputSchemaBuilder EndRepetition()
|
public InputSchemaBuilder EndRepetition()
|
||||||
{
|
{
|
||||||
// return back to upper layer of parsing
|
// return back to upper layer of parsing
|
||||||
@ -54,6 +63,9 @@ public class InputSchemaBuilder
|
|||||||
case RepetitionType.FixedRepetition:
|
case RepetitionType.FixedRepetition:
|
||||||
oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
|
oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
|
||||||
break;
|
break;
|
||||||
|
case RepetitionType.GreedyRepetition:
|
||||||
|
oldInputSchemaBuilder.schema.AddBuildingBlock(new GreedyRepetitionBlock(currentSchema));
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Exception("Unrecognized RepetitionType");
|
throw new Exception("Unrecognized RepetitionType");
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,20 @@ public class TokenConverter
|
|||||||
return returnData;
|
return returnData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void CheckConversionPrerequisites()
|
||||||
|
{
|
||||||
|
// in order to convert rows to columns or grid we require every row to have the same length
|
||||||
|
int rowLength = this.rawTokens[0].Count;
|
||||||
|
|
||||||
|
foreach(var tokenRow in this.rawTokens)
|
||||||
|
{
|
||||||
|
if(tokenRow.Count != rowLength)
|
||||||
|
{
|
||||||
|
throw new Exception("Attempted to convert token dataset that is not able to be converted!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public List<T[]> AsRows<T>()
|
public List<T[]> AsRows<T>()
|
||||||
{
|
{
|
||||||
var listRows = this.AsListRows<T>();
|
var listRows = this.AsListRows<T>();
|
||||||
@ -71,6 +85,7 @@ public class TokenConverter
|
|||||||
|
|
||||||
public List<List<T>> AsListColumns<T>()
|
public List<List<T>> AsListColumns<T>()
|
||||||
{
|
{
|
||||||
|
this.CheckConversionPrerequisites();
|
||||||
var rows = AsListRows<T>();
|
var rows = AsListRows<T>();
|
||||||
|
|
||||||
var columns = new List<List<T>>();
|
var columns = new List<List<T>>();
|
||||||
@ -92,7 +107,13 @@ public class TokenConverter
|
|||||||
|
|
||||||
public T[][] AsGrid<T>()
|
public T[][] AsGrid<T>()
|
||||||
{
|
{
|
||||||
|
this.CheckConversionPrerequisites();
|
||||||
var rowsList = AsRows<T>();
|
var rowsList = AsRows<T>();
|
||||||
return rowsList.ToArray();
|
return rowsList.ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<List<IToken>> AsRawData()
|
||||||
|
{
|
||||||
|
return this.rawTokens;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -37,9 +37,14 @@ public class InputProvider
|
|||||||
return new InputProvider.LookaheadContext(this);
|
return new InputProvider.LookaheadContext(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public bool CanYieldWord()
|
||||||
|
{
|
||||||
|
return this.CurrentPosition < this.words.Length;
|
||||||
|
}
|
||||||
|
|
||||||
public string YieldWord()
|
public string YieldWord()
|
||||||
{
|
{
|
||||||
if (this.CurrentPosition > this.words.Length)
|
if (!this.CanYieldWord())
|
||||||
{
|
{
|
||||||
return string.Empty;
|
return string.Empty;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user