feat: implement greedy repetition, ref: A24-13
Some checks failed
CI / linter (9.0.X, ubuntu-latest) (push) Failing after 2m6s
SonarQube Scan / SonarQube Trigger (push) Successful in 2m7s
CI / tests_linux (9.0.X, ubuntu-latest) (push) Has been skipped

This commit is contained in:
Simon Diesenreiter 2024-12-02 15:30:07 +01:00
parent e15190ecd6
commit 0d85132a32
11 changed files with 196 additions and 8 deletions

View File

@ -12,6 +12,9 @@ public class TextParserTests
private const string testInput3 = @"2 4 6 1 private const string testInput3 = @"2 4 6 1
3 5 7 2 3 5 7 2
4 6 8 3"; 4 6 8 3";
private const string testInput4 = @"2 ab ba fd er sd
8 cd dc
7 uh 6 yp rt";
[Fact] [Fact]
public void LineParser_TestSimpleRepetition() public void LineParser_TestSimpleRepetition()
@ -157,4 +160,43 @@ public class TextParserTests
Assert.Equal(2, columns[3][1]); Assert.Equal(2, columns[3][1]);
Assert.Equal(3, columns[3][2]); Assert.Equal(3, columns[3][2]);
} }
[Fact]
public void TextParser_TestGreedyRepetitionAsRows()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Repeat()
.Expect(InputType.Integer)
.Repeat()
.Expect(InputType.String)
.EndRepetition()
.EndRepetition()
.Build();
var parser = new TextParser(schema);
var rows = parser
.SetInputText(testInput4)
.Parse()
.AsRawData();
Assert.Equal(3, rows.Count);
Assert.Equal(6, rows[0].Count);
Assert.Equal(3, rows[1].Count);
Assert.Equal(5, rows[2].Count);
Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
Assert.Equal(InputType.String, rows[0][1].GetInputType());
Assert.Equal(InputType.String, rows[0][2].GetInputType());
Assert.Equal(InputType.String, rows[0][3].GetInputType());
Assert.Equal(InputType.String, rows[0][4].GetInputType());
Assert.Equal(InputType.String, rows[0][5].GetInputType());
Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
Assert.Equal(InputType.String, rows[1][1].GetInputType());
Assert.Equal(InputType.String, rows[1][2].GetInputType());
Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
Assert.Equal(InputType.String, rows[2][1].GetInputType());
Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
Assert.Equal(InputType.String, rows[2][3].GetInputType());
Assert.Equal(InputType.String, rows[2][4].GetInputType());
}
} }

View File

@ -12,6 +12,8 @@ abstract class BuildingBlockBase : IBuildingBlock
public abstract bool CanParseWord(InputProvider inputs); public abstract bool CanParseWord(InputProvider inputs);
public abstract bool CanParseWord(string word);
public abstract BlockType GetBlockType(); public abstract BlockType GetBlockType();
public virtual bool IsRepetitionType() public virtual bool IsRepetitionType()
@ -19,7 +21,7 @@ abstract class BuildingBlockBase : IBuildingBlock
return false; return false;
} }
public virtual bool CheckIsDoneParsingAndReset() public virtual bool CheckIsDoneParsingAndReset(InputProvider inputs)
{ {
// most blocks are always done parsing after consuming a token // most blocks are always done parsing after consuming a token
// repetition blocks can consume multiple tokens // repetition blocks can consume multiple tokens

View File

@ -48,6 +48,21 @@ class FixedRepetitionBlock : BuildingBlockBase
return result; return result;
} }
public override bool CanParseWord(string word)
{
bool result;
if (this.repetitionCount == 0)
{
result = false;
}
else
{
result = inputSchema.CanProcessNextWord(context, word);
}
return result;
}
public override BlockType GetBlockType() public override BlockType GetBlockType()
{ {
return BlockType.FixedRepetition; return BlockType.FixedRepetition;
@ -58,7 +73,7 @@ class FixedRepetitionBlock : BuildingBlockBase
return true; return true;
} }
public override bool CheckIsDoneParsingAndReset() public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
{ {
// we are done parsing once all repetitions are exhausted // we are done parsing once all repetitions are exhausted
var result = this.repetitionCount == 0; var result = this.repetitionCount == 0;

View File

@ -0,0 +1,57 @@
namespace Parsing.Schema.BuildingBlocks;
using System.IO.Pipelines;
using Parsing.Tokenization;
class GreedyRepetitionBlock : BuildingBlockBase
{
private InputSchema inputSchema;
private InputSchemaContext context;
public GreedyRepetitionBlock(InputSchema inputSchema)
{
this.inputSchema = inputSchema;
this.context = this.inputSchema.CreateContext();
}
public override IToken ParseWord(InputProvider inputs)
{
var result = inputSchema.ProcessNextWord(context, inputs);
if (!this.CanParseWord(inputs))
{
this.context = this.inputSchema.CreateContext();
}
return result;
}
public override bool CanParseWord(InputProvider inputs)
{
return inputSchema.CanProcessNextWord(context, inputs) && inputs.CanYieldWord();
}
public override bool CanParseWord(string word)
{
return inputSchema.CanProcessNextWord(context, word);
}
public override BlockType GetBlockType()
{
return BlockType.GreedyRepetition;
}
public override bool IsRepetitionType()
{
return true;
}
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
{
// we are done parsing greedily once the next token doesn't match anymore
var result = !this.CanParseWord(inputs);
if (result)
{
this.context = this.inputSchema.CreateContext();
}
return result;
}
}

View File

@ -8,9 +8,11 @@ public interface IBuildingBlock
public bool CanParseWord(InputProvider inputs); public bool CanParseWord(InputProvider inputs);
public bool CanParseWord(string word);
public BlockType GetBlockType(); public BlockType GetBlockType();
public bool IsRepetitionType(); public bool IsRepetitionType();
public bool CheckIsDoneParsingAndReset(); public bool CheckIsDoneParsingAndReset(InputProvider inputs);
} }

View File

@ -18,12 +18,16 @@ class IntegerBlock : BuildingBlockBase
{ {
using (inputs.GetLookaheadContext()) using (inputs.GetLookaheadContext())
{ {
int number = 0; return this.CanParseWord(inputs.YieldWord());
var success = int.TryParse(inputs.YieldWord(), out number);
return success;
} }
} }
public override bool CanParseWord(string word)
{
int number = 0;
return int.TryParse(word, out number);
}
public override BlockType GetBlockType() public override BlockType GetBlockType()
{ {
return BlockType.Integer; return BlockType.Integer;

View File

@ -16,6 +16,24 @@ class StringBlock : BuildingBlockBase
public override bool CanParseWord(InputProvider inputs) public override bool CanParseWord(InputProvider inputs)
{ {
string word = string.Empty;
using (inputs.GetLookaheadContext())
{
word = inputs.YieldWord();
}
return this.CanParseWord(word);
}
public override bool CanParseWord(string word)
{
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
IntegerBlock intBlock = new IntegerBlock();
if(intBlock.CanParseWord(word))
{
return false;
}
return true; return true;
} }

View File

@ -29,7 +29,7 @@ public class InputSchema
{ {
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex]; var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
var token = nextBlock.ParseWord(inputs); var token = nextBlock.ParseWord(inputs);
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset()) if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs))
{ {
currentContext.lastProcessedBlockIndex++; currentContext.lastProcessedBlockIndex++;
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count; currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
@ -50,6 +50,16 @@ public class InputSchema
} }
} }
public bool CanProcessNextWord(InputSchemaContext currentContext, string word)
{
if (currentContext.HasFinished)
{
return false;
}
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
return nextBlock.CanParseWord(word);
}
public List<IToken> ProcessWordList(string[] words) public List<IToken> ProcessWordList(string[] words)
{ {
List<IToken> tokens = new List<IToken>(); List<IToken> tokens = new List<IToken>();

View File

@ -38,6 +38,15 @@ public class InputSchemaBuilder
return newInputSchemaBuilder; return newInputSchemaBuilder;
} }
public InputSchemaBuilder Repeat()
{
// add another layer of parsing
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
newInputSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
return newInputSchemaBuilder;
}
public InputSchemaBuilder EndRepetition() public InputSchemaBuilder EndRepetition()
{ {
// return back to upper layer of parsing // return back to upper layer of parsing
@ -54,6 +63,9 @@ public class InputSchemaBuilder
case RepetitionType.FixedRepetition: case RepetitionType.FixedRepetition:
oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition)); oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
break; break;
case RepetitionType.GreedyRepetition:
oldInputSchemaBuilder.schema.AddBuildingBlock(new GreedyRepetitionBlock(currentSchema));
break;
default: default:
throw new Exception("Unrecognized RepetitionType"); throw new Exception("Unrecognized RepetitionType");
} }

View File

@ -38,6 +38,20 @@ public class TokenConverter
return returnData; return returnData;
} }
private void CheckConversionPrerequisites()
{
// in order to convert rows to columns or grid we require every row to have the same length
int rowLength = this.rawTokens[0].Count;
foreach(var tokenRow in this.rawTokens)
{
if(tokenRow.Count != rowLength)
{
throw new Exception("Attempted to convert token dataset that is not able to be converted!");
}
}
}
public List<T[]> AsRows<T>() public List<T[]> AsRows<T>()
{ {
var listRows = this.AsListRows<T>(); var listRows = this.AsListRows<T>();
@ -71,6 +85,7 @@ public class TokenConverter
public List<List<T>> AsListColumns<T>() public List<List<T>> AsListColumns<T>()
{ {
this.CheckConversionPrerequisites();
var rows = AsListRows<T>(); var rows = AsListRows<T>();
var columns = new List<List<T>>(); var columns = new List<List<T>>();
@ -92,7 +107,13 @@ public class TokenConverter
public T[][] AsGrid<T>() public T[][] AsGrid<T>()
{ {
this.CheckConversionPrerequisites();
var rowsList = AsRows<T>(); var rowsList = AsRows<T>();
return rowsList.ToArray(); return rowsList.ToArray();
} }
public List<List<IToken>> AsRawData()
{
return this.rawTokens;
}
} }

View File

@ -37,9 +37,14 @@ public class InputProvider
return new InputProvider.LookaheadContext(this); return new InputProvider.LookaheadContext(this);
} }
public bool CanYieldWord()
{
return this.CurrentPosition < this.words.Length;
}
public string YieldWord() public string YieldWord()
{ {
if (this.CurrentPosition > this.words.Length) if (!this.CanYieldWord())
{ {
return string.Empty; return string.Empty;
} }