feat: added initial implementation of TextParser, ref: A24-3
Some checks failed
CI / linter (9.0.X, ubuntu-latest) (push) Failing after 1m28s
CI / tests_linux (9.0.X, ubuntu-latest) (push) Has been skipped
SonarQube Scan / SonarQube Trigger (push) Successful in 1m40s
Upload Python Package / Create Release (push) Successful in 9s
Upload Python Package / deploy (push) Failing after 1m13s

This commit is contained in:
Simon Diesenreiter 2024-12-01 21:31:50 +01:00
parent 683b03ffe0
commit a4e4ee2b85
23 changed files with 611 additions and 31 deletions

View File

@ -2,6 +2,9 @@
name: CI name: CI
env:
SKIP_MAKE_SETUP_CHECK: 'true'
# Controls when the workflow will run # Controls when the workflow will run
on: on:
# Triggers the workflow on push or pull request events but only for the main branch # Triggers the workflow on push or pull request events but only for the main branch

View File

@ -2,6 +2,9 @@ name: Upload Python Package
permissions: permissions:
contents: write contents: write
env:
SKIP_MAKE_SETUP_CHECK: 'true'
on: on:
push: push:
# Sequence of patterns matched against refs/tags # Sequence of patterns matched against refs/tags

View File

@ -2,7 +2,7 @@
.PHONY: issetup .PHONY: issetup
issetup: issetup:
@[ -f .git/hooks/commit-msg ] || (echo "You must run 'make setup' first to initialize the repo!" && exit 1) @[ -f .git/hooks/commit-msg ] || [ -v SKIP_MAKE_SETUP_CHECK ] || (echo "You must run 'make setup' first to initialize the repo!" && exit 1)
.PHONY: setup .PHONY: setup
setup: setup:

View File

@ -0,0 +1,93 @@
namespace TextParser.Tests;
using Parsing;
using Parsing.Schema;
using Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization;
public class TextParserTests
{
private const string testInput1 = "2 4 6 8";
private const string testInput2 = "2 ab ba 8 cd dc";
[Fact]
public void TestSimpleRepetition()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Repeat(4)
.Expect(InputType.Integer)
.EndRepetition()
.Build();
var parser = new TextParser(schema);
var tokens = parser.ParseLine(testInput1);
Assert.Equal(4, tokens.Count);
Assert.Equal(InputType.Integer, tokens[0].GetInputType());
Assert.Equal(InputType.Integer, tokens[1].GetInputType());
Assert.Equal(InputType.Integer, tokens[2].GetInputType());
Assert.Equal(InputType.Integer, tokens[3].GetInputType());
Assert.Equal(2, (tokens[0] as IntegerToken)?.GetValue());
Assert.Equal(4, (tokens[1] as IntegerToken)?.GetValue());
Assert.Equal(6, (tokens[2] as IntegerToken)?.GetValue());
Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue());
}
[Fact]
public void TestSimpleInput()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Expect(InputType.Integer)
.Expect(InputType.Integer)
.Expect(InputType.Integer)
.Expect(InputType.Integer)
.Build();
var parser = new TextParser(schema);
var tokens = parser.ParseLine(testInput1);
Assert.Equal(4, tokens.Count);
Assert.Equal(InputType.Integer, tokens[0].GetInputType());
Assert.Equal(InputType.Integer, tokens[1].GetInputType());
Assert.Equal(InputType.Integer, tokens[2].GetInputType());
Assert.Equal(InputType.Integer, tokens[3].GetInputType());
Assert.Equal(2, (tokens[0] as IntegerToken)?.GetValue());
Assert.Equal(4, (tokens[1] as IntegerToken)?.GetValue());
Assert.Equal(6, (tokens[2] as IntegerToken)?.GetValue());
Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue());
}
[Fact]
public void TestNestedRepetition()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Repeat(2)
.Expect(InputType.Integer)
.Repeat(2)
.Expect(InputType.String)
.EndRepetition()
.EndRepetition()
.Build();
var parser = new TextParser(schema);
var tokens = parser.ParseLine(testInput2);
Assert.Equal(6, tokens.Count);
Assert.Equal(InputType.Integer, tokens[0].GetInputType());
Assert.Equal(InputType.String, tokens[1].GetInputType());
Assert.Equal(InputType.String, tokens[2].GetInputType());
Assert.Equal(InputType.Integer, tokens[3].GetInputType());
Assert.Equal(InputType.String, tokens[4].GetInputType());
Assert.Equal(InputType.String, tokens[5].GetInputType());
Assert.Equal(2, (tokens[0] as IntegerToken)?.GetValue());
Assert.Equal("ab", (tokens[1] as StringToken)?.GetValue());
Assert.Equal("ba", (tokens[2] as StringToken)?.GetValue());
Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue());
Assert.Equal("cd", (tokens[4] as StringToken)?.GetValue());
Assert.Equal("dc", (tokens[5] as StringToken)?.GetValue());
}
}

View File

@ -1,14 +0,0 @@
namespace TextParser.Tests;
using TextParser;
public class UnitTest1
{
[Fact]
public void Test1()
{
var hwp = new HelloWorldProvider();
Assert.Equal("Hello, Simon!", hwp.GetHelloWorld("Simon"));
Assert.Equal("Hello world!", hwp.GetHelloWorld());
}
}

View File

@ -1,16 +0,0 @@
namespace TextParser;
public class HelloWorldProvider
{
public string GetHelloWorld(string? name = null)
{
if (string.IsNullOrEmpty(name))
{
return "Hello world!";
}
else
{
return $"Hello, {name}!";
}
}
}

View File

@ -0,0 +1,6 @@
namespace Parsing.Schema;
public enum BlockType
{
Integer, String, FixedRepetition, GreedyRepetition
}

View File

@ -0,0 +1,28 @@
namespace Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization;
abstract class BuildingBlockBase : IBuildingBlock
{
public BuildingBlockBase()
{
}
public abstract IToken ParseWord(InputProvider inputs);
public abstract bool CanParseWord(InputProvider inputs);
public abstract BlockType GetBlockType();
public virtual bool IsRepetitionType()
{
return false;
}
public virtual bool CheckIsDoneParsingAndReset()
{
// most blocks are always done parsing after consuming a token
// repetition blocks can consume multiple tokens
return true;
}
}

View File

@ -0,0 +1,72 @@
namespace Parsing.Schema.BuildingBlocks;
using System.IO.Pipelines;
using Parsing.Tokenization;
class FixedRepetitionBlock : BuildingBlockBase
{
private InputSchema inputSchema;
private InputSchemaContext context;
private int repetitionCount;
private int initRepetitionCount;
public FixedRepetitionBlock(InputSchema inputSchema, int repetitionCount)
{
this.inputSchema = inputSchema;
this.repetitionCount = repetitionCount;
this.initRepetitionCount = repetitionCount;
this.context = this.inputSchema.CreateContext();
}
public override IToken ParseWord(InputProvider inputs)
{
var result = inputSchema.ProcessNextWord(context, inputs);
if (context.HasFinished)
{
this.repetitionCount--;
if (this.repetitionCount > 0)
{
this.context = this.inputSchema.CreateContext();
}
}
return result;
}
public override bool CanParseWord(InputProvider inputs)
{
bool result;
if (this.repetitionCount == 0)
{
result = false;
}
else
{
result = inputSchema.CanProcessNextWord(context, inputs);
}
return result;
}
public override BlockType GetBlockType()
{
return BlockType.FixedRepetition;
}
public override bool IsRepetitionType()
{
return true;
}
public override bool CheckIsDoneParsingAndReset()
{
// we are done parsing once all repetitions are exhausted
var result = this.repetitionCount == 0;
if (result)
{
this.repetitionCount = this.initRepetitionCount;
this.context = this.inputSchema.CreateContext();
}
return result;
}
}

View File

@ -0,0 +1,16 @@
namespace Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization;
public interface IBuildingBlock
{
public IToken ParseWord(InputProvider inputs);
public bool CanParseWord(InputProvider inputs);
public BlockType GetBlockType();
public bool IsRepetitionType();
public bool CheckIsDoneParsingAndReset();
}

View File

@ -0,0 +1,31 @@
namespace Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization;
class IntegerBlock : BuildingBlockBase
{
public IntegerBlock()
{
}
public override IToken ParseWord(InputProvider inputs)
{
return new IntegerToken(inputs.YieldWord());
}
public override bool CanParseWord(InputProvider inputs)
{
using (inputs.GetLookaheadContext())
{
int number = 0;
var success = int.TryParse(inputs.YieldWord(), out number);
return success;
}
}
public override BlockType GetBlockType()
{
return BlockType.Integer;
}
}

View File

@ -0,0 +1,26 @@
namespace Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization;
class StringBlock : BuildingBlockBase
{
public StringBlock()
{
}
public override IToken ParseWord(InputProvider inputs)
{
return new StringToken(inputs.YieldWord());
}
public override bool CanParseWord(InputProvider inputs)
{
return true;
}
public override BlockType GetBlockType()
{
return BlockType.String;
}
}

View File

@ -0,0 +1,71 @@
namespace Parsing.Schema;
using Parsing.Schema;
using Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization;
using System.Collections;
public class InputSchemaContext
{
public int lastProcessedBlockIndex { get; set; } = 0;
public bool HasFinished { get; set; } = false;
}
public class InputSchema
{
private List<IBuildingBlock> buildingBlocks;
public InputSchema()
{
buildingBlocks = new List<IBuildingBlock>();
}
public void AddBuildingBlock(IBuildingBlock buildingBlock)
{
this.buildingBlocks.Add(buildingBlock);
}
public IToken ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
{
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
var token = nextBlock.ParseWord(inputs);
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset())
{
currentContext.lastProcessedBlockIndex++;
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
}
return token;
}
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
{
using (inputs.GetLookaheadContext())
{
if (currentContext.HasFinished)
{
return false;
}
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
return nextBlock.CanParseWord(inputs);
}
}
public IList<IToken> ProcessWordList(string[] words)
{
List<IToken> tokens = new List<IToken>();
InputProvider inputs = new InputProvider(words);
var overallContext = this.CreateContext();
while (this.CanProcessNextWord(overallContext, inputs))
{
tokens.Add(this.ProcessNextWord(overallContext, inputs));
}
return tokens;
}
public InputSchemaContext CreateContext()
{
return new InputSchemaContext();
}
}

View File

@ -0,0 +1,68 @@
namespace Parsing.Schema;
using Parsing.Schema.BuildingBlocks;
public class InputSchemaBuilder
{
private InputSchema schema = new InputSchema();
public InputSchemaBuilder()
{
}
public InputSchemaBuilder Expect(InputType type)
{
IBuildingBlock block;
switch (type)
{
case InputType.String:
block = new StringBlock();
break;
case InputType.Integer:
block = new IntegerBlock();
break;
default:
throw new Exception("Unrecognized InputType");
}
schema.AddBuildingBlock(block);
return this;
}
public InputSchemaBuilder Repeat(int repetitionCount)
{
// add another layer of parsing
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
newInputSchemaBuilder.NumRepetition = repetitionCount;
newInputSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
return newInputSchemaBuilder;
}
public InputSchemaBuilder EndRepetition()
{
// return back to upper layer of parsing
var currentBuilder = this as RepetitionSchemaBuilder;
if (currentBuilder == null)
{
throw new Exception("Invalid repetition definitions!");
}
var oldInputSchemaBuilder = currentBuilder.UpperLayerBuilder;
var currentSchema = currentBuilder.Build();
switch (currentBuilder.RepetitionType)
{
case RepetitionType.FixedRepetition:
oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
break;
default:
throw new Exception("Unrecognized RepetitionType");
}
return oldInputSchemaBuilder;
}
public InputSchema Build()
{
return schema;
}
}

View File

@ -0,0 +1,7 @@
namespace Parsing.Schema;
public enum InputType
{
Integer = BlockType.Integer,
String = BlockType.String
}

View File

@ -0,0 +1,15 @@
namespace Parsing.Schema;
public class RepetitionSchemaBuilder : InputSchemaBuilder
{
public RepetitionSchemaBuilder(InputSchemaBuilder upperLayerBuilder)
{
this.UpperLayerBuilder = upperLayerBuilder;
}
public InputSchemaBuilder UpperLayerBuilder { get; set; }
public int NumRepetition { get; set; }
public RepetitionType RepetitionType { get; set; }
}

View File

@ -0,0 +1,7 @@
namespace Parsing.Schema;
public enum RepetitionType
{
FixedRepetition = BlockType.FixedRepetition,
GreedyRepetition = BlockType.GreedyRepetition
}

36
TextParser/TextParser.cs Normal file
View File

@ -0,0 +1,36 @@
namespace Parsing;
using Parsing.Schema;
using Parsing.Tokenization;
public class TextParser
{
private string[] delimiters;
private bool removeEmptyEntries = false;
private InputSchema schema;
private InputSchemaContext context;
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true)
{
this.delimiters = delimiters ?? new string[] { " " };
this.removeEmptyEntries = removeEmptyEntries;
this.schema = schema;
this.context = this.schema.CreateContext();
}
private string[] ParseLineIntoWords(string line)
{
var options = StringSplitOptions.TrimEntries;
if (this.removeEmptyEntries)
{
options = options | StringSplitOptions.RemoveEmptyEntries;
}
return line.Split(this.delimiters, options);
}
public IList<IToken> ParseLine(string line)
{
var words = this.ParseLineIntoWords(line);
return this.schema.ProcessWordList(words);
}
}

View File

@ -0,0 +1,10 @@
namespace Parsing.Tokenization;
using Parsing.Schema;
public interface IToken
{
public string GetText();
public InputType GetInputType();
}

View File

@ -0,0 +1,6 @@
namespace Parsing.Tokenization;
public interface IValueToken<T> : IToken
{
public T GetValue();
}

View File

@ -0,0 +1,56 @@
namespace Parsing.Tokenization;
public class InputProvider
{
public class LookaheadContext : IDisposable
{
private InputProvider contextedProvider;
private int initialPosition;
public LookaheadContext(InputProvider contextedProvider)
{
this.contextedProvider = contextedProvider;
this.initialPosition = contextedProvider.CurrentPosition;
contextedProvider.IsBeingLookedAhead = true;
}
public void Dispose()
{
this.contextedProvider.CurrentPosition = this.initialPosition;
contextedProvider.IsBeingLookedAhead = false;
}
}
private string[] words;
private bool IsBeingLookedAhead { get; set; } = false;
private int CurrentPosition { get; set; }
public InputProvider(string[] words)
{
this.words = words;
this.CurrentPosition = 0;
}
public InputProvider.LookaheadContext GetLookaheadContext()
{
return new InputProvider.LookaheadContext(this);
}
public string YieldWord()
{
Console.WriteLine("current words:");
foreach (var word in words)
{
Console.WriteLine(word);
}
if (this.CurrentPosition > this.words.Length)
{
return string.Empty;
}
var wordToProcess = this.words[this.CurrentPosition];
this.CurrentPosition++;
return wordToProcess;
}
}

View File

@ -0,0 +1,28 @@
namespace Parsing.Tokenization;
using Parsing.Schema;
public class IntegerToken : IValueToken<int>
{
private string word;
public IntegerToken(string word)
{
this.word = word;
}
public string GetText()
{
return word;
}
public int GetValue()
{
return int.Parse(word);
}
public InputType GetInputType()
{
return InputType.Integer;
}
}

View File

@ -0,0 +1,28 @@
namespace Parsing.Tokenization;
using Parsing.Schema;
public class StringToken : IValueToken<string>
{
private string word;
public StringToken(string word)
{
this.word = word;
}
public string GetText()
{
return word;
}
public string GetValue()
{
return word;
}
public InputType GetInputType()
{
return InputType.String;
}
}