generated from Templates/Dotnet_Library
feat: added initial implementation of TextParser, ref: A24-3
Some checks failed
CI / linter (9.0.X, ubuntu-latest) (push) Failing after 1m28s
CI / tests_linux (9.0.X, ubuntu-latest) (push) Has been skipped
SonarQube Scan / SonarQube Trigger (push) Successful in 1m40s
Upload Python Package / Create Release (push) Successful in 9s
Upload Python Package / deploy (push) Failing after 1m13s
Some checks failed
CI / linter (9.0.X, ubuntu-latest) (push) Failing after 1m28s
CI / tests_linux (9.0.X, ubuntu-latest) (push) Has been skipped
SonarQube Scan / SonarQube Trigger (push) Successful in 1m40s
Upload Python Package / Create Release (push) Successful in 9s
Upload Python Package / deploy (push) Failing after 1m13s
This commit is contained in:
parent
683b03ffe0
commit
a4e4ee2b85
@ -2,6 +2,9 @@
|
|||||||
|
|
||||||
name: CI
|
name: CI
|
||||||
|
|
||||||
|
env:
|
||||||
|
SKIP_MAKE_SETUP_CHECK: 'true'
|
||||||
|
|
||||||
# Controls when the workflow will run
|
# Controls when the workflow will run
|
||||||
on:
|
on:
|
||||||
# Triggers the workflow on push or pull request events but only for the main branch
|
# Triggers the workflow on push or pull request events but only for the main branch
|
||||||
|
@ -2,6 +2,9 @@ name: Upload Python Package
|
|||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
|
|
||||||
|
env:
|
||||||
|
SKIP_MAKE_SETUP_CHECK: 'true'
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
# Sequence of patterns matched against refs/tags
|
# Sequence of patterns matched against refs/tags
|
||||||
|
2
Makefile
2
Makefile
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
.PHONY: issetup
|
.PHONY: issetup
|
||||||
issetup:
|
issetup:
|
||||||
@[ -f .git/hooks/commit-msg ] || (echo "You must run 'make setup' first to initialize the repo!" && exit 1)
|
@[ -f .git/hooks/commit-msg ] || [ -v SKIP_MAKE_SETUP_CHECK ] || (echo "You must run 'make setup' first to initialize the repo!" && exit 1)
|
||||||
|
|
||||||
.PHONY: setup
|
.PHONY: setup
|
||||||
setup:
|
setup:
|
||||||
|
93
TextParser.Tests/TextParserTests.cs
Normal file
93
TextParser.Tests/TextParserTests.cs
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
namespace TextParser.Tests;
|
||||||
|
|
||||||
|
using Parsing;
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
public class TextParserTests
|
||||||
|
{
|
||||||
|
private const string testInput1 = "2 4 6 8";
|
||||||
|
private const string testInput2 = "2 ab ba 8 cd dc";
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TestSimpleRepetition()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat(4)
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser(schema);
|
||||||
|
var tokens = parser.ParseLine(testInput1);
|
||||||
|
|
||||||
|
Assert.Equal(4, tokens.Count);
|
||||||
|
Assert.Equal(InputType.Integer, tokens[0].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, tokens[1].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, tokens[2].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, tokens[3].GetInputType());
|
||||||
|
Assert.Equal(2, (tokens[0] as IntegerToken)?.GetValue());
|
||||||
|
Assert.Equal(4, (tokens[1] as IntegerToken)?.GetValue());
|
||||||
|
Assert.Equal(6, (tokens[2] as IntegerToken)?.GetValue());
|
||||||
|
Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TestSimpleInput()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser(schema);
|
||||||
|
var tokens = parser.ParseLine(testInput1);
|
||||||
|
|
||||||
|
Assert.Equal(4, tokens.Count);
|
||||||
|
Assert.Equal(InputType.Integer, tokens[0].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, tokens[1].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, tokens[2].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, tokens[3].GetInputType());
|
||||||
|
Assert.Equal(2, (tokens[0] as IntegerToken)?.GetValue());
|
||||||
|
Assert.Equal(4, (tokens[1] as IntegerToken)?.GetValue());
|
||||||
|
Assert.Equal(6, (tokens[2] as IntegerToken)?.GetValue());
|
||||||
|
Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TestNestedRepetition()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat(2)
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Repeat(2)
|
||||||
|
.Expect(InputType.String)
|
||||||
|
.EndRepetition()
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser(schema);
|
||||||
|
var tokens = parser.ParseLine(testInput2);
|
||||||
|
|
||||||
|
Assert.Equal(6, tokens.Count);
|
||||||
|
Assert.Equal(InputType.Integer, tokens[0].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, tokens[1].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, tokens[2].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, tokens[3].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, tokens[4].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, tokens[5].GetInputType());
|
||||||
|
Assert.Equal(2, (tokens[0] as IntegerToken)?.GetValue());
|
||||||
|
Assert.Equal("ab", (tokens[1] as StringToken)?.GetValue());
|
||||||
|
Assert.Equal("ba", (tokens[2] as StringToken)?.GetValue());
|
||||||
|
Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue());
|
||||||
|
Assert.Equal("cd", (tokens[4] as StringToken)?.GetValue());
|
||||||
|
Assert.Equal("dc", (tokens[5] as StringToken)?.GetValue());
|
||||||
|
}
|
||||||
|
}
|
@ -1,14 +0,0 @@
|
|||||||
namespace TextParser.Tests;
|
|
||||||
|
|
||||||
using TextParser;
|
|
||||||
|
|
||||||
public class UnitTest1
|
|
||||||
{
|
|
||||||
[Fact]
|
|
||||||
public void Test1()
|
|
||||||
{
|
|
||||||
var hwp = new HelloWorldProvider();
|
|
||||||
Assert.Equal("Hello, Simon!", hwp.GetHelloWorld("Simon"));
|
|
||||||
Assert.Equal("Hello world!", hwp.GetHelloWorld());
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,16 +0,0 @@
|
|||||||
namespace TextParser;
|
|
||||||
|
|
||||||
public class HelloWorldProvider
|
|
||||||
{
|
|
||||||
public string GetHelloWorld(string? name = null)
|
|
||||||
{
|
|
||||||
if (string.IsNullOrEmpty(name))
|
|
||||||
{
|
|
||||||
return "Hello world!";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return $"Hello, {name}!";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
6
TextParser/Schema/BlockType.cs
Normal file
6
TextParser/Schema/BlockType.cs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
public enum BlockType
|
||||||
|
{
|
||||||
|
Integer, String, FixedRepetition, GreedyRepetition
|
||||||
|
}
|
28
TextParser/Schema/BuildingBlocks/BuildingBlockBase.cs
Normal file
28
TextParser/Schema/BuildingBlocks/BuildingBlockBase.cs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
abstract class BuildingBlockBase : IBuildingBlock
|
||||||
|
{
|
||||||
|
public BuildingBlockBase()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract IToken ParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
public abstract bool CanParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
public abstract BlockType GetBlockType();
|
||||||
|
|
||||||
|
public virtual bool IsRepetitionType()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public virtual bool CheckIsDoneParsingAndReset()
|
||||||
|
{
|
||||||
|
// most blocks are always done parsing after consuming a token
|
||||||
|
// repetition blocks can consume multiple tokens
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
72
TextParser/Schema/BuildingBlocks/FixedRepetitionBlock.cs
Normal file
72
TextParser/Schema/BuildingBlocks/FixedRepetitionBlock.cs
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using System.IO.Pipelines;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class FixedRepetitionBlock : BuildingBlockBase
|
||||||
|
{
|
||||||
|
private InputSchema inputSchema;
|
||||||
|
private InputSchemaContext context;
|
||||||
|
|
||||||
|
private int repetitionCount;
|
||||||
|
private int initRepetitionCount;
|
||||||
|
|
||||||
|
public FixedRepetitionBlock(InputSchema inputSchema, int repetitionCount)
|
||||||
|
{
|
||||||
|
this.inputSchema = inputSchema;
|
||||||
|
this.repetitionCount = repetitionCount;
|
||||||
|
this.initRepetitionCount = repetitionCount;
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public override IToken ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
var result = inputSchema.ProcessNextWord(context, inputs);
|
||||||
|
if (context.HasFinished)
|
||||||
|
{
|
||||||
|
this.repetitionCount--;
|
||||||
|
if (this.repetitionCount > 0)
|
||||||
|
{
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
bool result;
|
||||||
|
if (this.repetitionCount == 0)
|
||||||
|
{
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result = inputSchema.CanProcessNextWord(context, inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.FixedRepetition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool IsRepetitionType()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CheckIsDoneParsingAndReset()
|
||||||
|
{
|
||||||
|
// we are done parsing once all repetitions are exhausted
|
||||||
|
var result = this.repetitionCount == 0;
|
||||||
|
if (result)
|
||||||
|
{
|
||||||
|
this.repetitionCount = this.initRepetitionCount;
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
16
TextParser/Schema/BuildingBlocks/IBuildingBlock.cs
Normal file
16
TextParser/Schema/BuildingBlocks/IBuildingBlock.cs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
public interface IBuildingBlock
|
||||||
|
{
|
||||||
|
public IToken ParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
public bool CanParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
public BlockType GetBlockType();
|
||||||
|
|
||||||
|
public bool IsRepetitionType();
|
||||||
|
|
||||||
|
public bool CheckIsDoneParsingAndReset();
|
||||||
|
}
|
31
TextParser/Schema/BuildingBlocks/IntegerBlock.cs
Normal file
31
TextParser/Schema/BuildingBlocks/IntegerBlock.cs
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class IntegerBlock : BuildingBlockBase
|
||||||
|
{
|
||||||
|
|
||||||
|
public IntegerBlock()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public override IToken ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
return new IntegerToken(inputs.YieldWord());
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
int number = 0;
|
||||||
|
var success = int.TryParse(inputs.YieldWord(), out number);
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.Integer;
|
||||||
|
}
|
||||||
|
}
|
26
TextParser/Schema/BuildingBlocks/StringBlock.cs
Normal file
26
TextParser/Schema/BuildingBlocks/StringBlock.cs
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class StringBlock : BuildingBlockBase
|
||||||
|
{
|
||||||
|
|
||||||
|
public StringBlock()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public override IToken ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
return new StringToken(inputs.YieldWord());
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.String;
|
||||||
|
}
|
||||||
|
}
|
71
TextParser/Schema/InputSchema.cs
Normal file
71
TextParser/Schema/InputSchema.cs
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
using System.Collections;
|
||||||
|
|
||||||
|
public class InputSchemaContext
|
||||||
|
{
|
||||||
|
public int lastProcessedBlockIndex { get; set; } = 0;
|
||||||
|
public bool HasFinished { get; set; } = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class InputSchema
|
||||||
|
{
|
||||||
|
private List<IBuildingBlock> buildingBlocks;
|
||||||
|
|
||||||
|
public InputSchema()
|
||||||
|
{
|
||||||
|
buildingBlocks = new List<IBuildingBlock>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddBuildingBlock(IBuildingBlock buildingBlock)
|
||||||
|
{
|
||||||
|
this.buildingBlocks.Add(buildingBlock);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IToken ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||||
|
{
|
||||||
|
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||||
|
var token = nextBlock.ParseWord(inputs);
|
||||||
|
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset())
|
||||||
|
{
|
||||||
|
currentContext.lastProcessedBlockIndex++;
|
||||||
|
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
||||||
|
}
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||||
|
{
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
if (currentContext.HasFinished)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||||
|
return nextBlock.CanParseWord(inputs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public IList<IToken> ProcessWordList(string[] words)
|
||||||
|
{
|
||||||
|
List<IToken> tokens = new List<IToken>();
|
||||||
|
InputProvider inputs = new InputProvider(words);
|
||||||
|
var overallContext = this.CreateContext();
|
||||||
|
|
||||||
|
while (this.CanProcessNextWord(overallContext, inputs))
|
||||||
|
{
|
||||||
|
tokens.Add(this.ProcessNextWord(overallContext, inputs));
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputSchemaContext CreateContext()
|
||||||
|
{
|
||||||
|
return new InputSchemaContext();
|
||||||
|
}
|
||||||
|
}
|
68
TextParser/Schema/InputSchemaBuilder.cs
Normal file
68
TextParser/Schema/InputSchemaBuilder.cs
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
public class InputSchemaBuilder
|
||||||
|
{
|
||||||
|
private InputSchema schema = new InputSchema();
|
||||||
|
|
||||||
|
public InputSchemaBuilder()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputSchemaBuilder Expect(InputType type)
|
||||||
|
{
|
||||||
|
IBuildingBlock block;
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case InputType.String:
|
||||||
|
block = new StringBlock();
|
||||||
|
break;
|
||||||
|
case InputType.Integer:
|
||||||
|
block = new IntegerBlock();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception("Unrecognized InputType");
|
||||||
|
}
|
||||||
|
schema.AddBuildingBlock(block);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputSchemaBuilder Repeat(int repetitionCount)
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
|
||||||
|
newInputSchemaBuilder.NumRepetition = repetitionCount;
|
||||||
|
newInputSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
||||||
|
|
||||||
|
return newInputSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputSchemaBuilder EndRepetition()
|
||||||
|
{
|
||||||
|
// return back to upper layer of parsing
|
||||||
|
var currentBuilder = this as RepetitionSchemaBuilder;
|
||||||
|
if (currentBuilder == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid repetition definitions!");
|
||||||
|
}
|
||||||
|
var oldInputSchemaBuilder = currentBuilder.UpperLayerBuilder;
|
||||||
|
|
||||||
|
var currentSchema = currentBuilder.Build();
|
||||||
|
switch (currentBuilder.RepetitionType)
|
||||||
|
{
|
||||||
|
case RepetitionType.FixedRepetition:
|
||||||
|
oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception("Unrecognized RepetitionType");
|
||||||
|
}
|
||||||
|
|
||||||
|
return oldInputSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputSchema Build()
|
||||||
|
{
|
||||||
|
return schema;
|
||||||
|
}
|
||||||
|
}
|
7
TextParser/Schema/InputType.cs
Normal file
7
TextParser/Schema/InputType.cs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
public enum InputType
|
||||||
|
{
|
||||||
|
Integer = BlockType.Integer,
|
||||||
|
String = BlockType.String
|
||||||
|
}
|
15
TextParser/Schema/RepetitionSchemaBuilder.cs
Normal file
15
TextParser/Schema/RepetitionSchemaBuilder.cs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
public class RepetitionSchemaBuilder : InputSchemaBuilder
|
||||||
|
{
|
||||||
|
public RepetitionSchemaBuilder(InputSchemaBuilder upperLayerBuilder)
|
||||||
|
{
|
||||||
|
this.UpperLayerBuilder = upperLayerBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputSchemaBuilder UpperLayerBuilder { get; set; }
|
||||||
|
|
||||||
|
public int NumRepetition { get; set; }
|
||||||
|
|
||||||
|
public RepetitionType RepetitionType { get; set; }
|
||||||
|
}
|
7
TextParser/Schema/RepetitionType.cs
Normal file
7
TextParser/Schema/RepetitionType.cs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
public enum RepetitionType
|
||||||
|
{
|
||||||
|
FixedRepetition = BlockType.FixedRepetition,
|
||||||
|
GreedyRepetition = BlockType.GreedyRepetition
|
||||||
|
}
|
36
TextParser/TextParser.cs
Normal file
36
TextParser/TextParser.cs
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
namespace Parsing;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
public class TextParser
|
||||||
|
{
|
||||||
|
private string[] delimiters;
|
||||||
|
private bool removeEmptyEntries = false;
|
||||||
|
private InputSchema schema;
|
||||||
|
private InputSchemaContext context;
|
||||||
|
|
||||||
|
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true)
|
||||||
|
{
|
||||||
|
this.delimiters = delimiters ?? new string[] { " " };
|
||||||
|
this.removeEmptyEntries = removeEmptyEntries;
|
||||||
|
this.schema = schema;
|
||||||
|
this.context = this.schema.CreateContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
private string[] ParseLineIntoWords(string line)
|
||||||
|
{
|
||||||
|
var options = StringSplitOptions.TrimEntries;
|
||||||
|
if (this.removeEmptyEntries)
|
||||||
|
{
|
||||||
|
options = options | StringSplitOptions.RemoveEmptyEntries;
|
||||||
|
}
|
||||||
|
return line.Split(this.delimiters, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IList<IToken> ParseLine(string line)
|
||||||
|
{
|
||||||
|
var words = this.ParseLineIntoWords(line);
|
||||||
|
return this.schema.ProcessWordList(words);
|
||||||
|
}
|
||||||
|
}
|
10
TextParser/Tokenization/IToken.cs
Normal file
10
TextParser/Tokenization/IToken.cs
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
|
||||||
|
public interface IToken
|
||||||
|
{
|
||||||
|
public string GetText();
|
||||||
|
|
||||||
|
public InputType GetInputType();
|
||||||
|
}
|
6
TextParser/Tokenization/IValueToken.cs
Normal file
6
TextParser/Tokenization/IValueToken.cs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
public interface IValueToken<T> : IToken
|
||||||
|
{
|
||||||
|
public T GetValue();
|
||||||
|
}
|
56
TextParser/Tokenization/InputProvider.cs
Normal file
56
TextParser/Tokenization/InputProvider.cs
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
public class InputProvider
|
||||||
|
{
|
||||||
|
public class LookaheadContext : IDisposable
|
||||||
|
{
|
||||||
|
private InputProvider contextedProvider;
|
||||||
|
private int initialPosition;
|
||||||
|
|
||||||
|
public LookaheadContext(InputProvider contextedProvider)
|
||||||
|
{
|
||||||
|
this.contextedProvider = contextedProvider;
|
||||||
|
this.initialPosition = contextedProvider.CurrentPosition;
|
||||||
|
contextedProvider.IsBeingLookedAhead = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
this.contextedProvider.CurrentPosition = this.initialPosition;
|
||||||
|
contextedProvider.IsBeingLookedAhead = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private string[] words;
|
||||||
|
private bool IsBeingLookedAhead { get; set; } = false;
|
||||||
|
|
||||||
|
private int CurrentPosition { get; set; }
|
||||||
|
|
||||||
|
public InputProvider(string[] words)
|
||||||
|
{
|
||||||
|
this.words = words;
|
||||||
|
this.CurrentPosition = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputProvider.LookaheadContext GetLookaheadContext()
|
||||||
|
{
|
||||||
|
return new InputProvider.LookaheadContext(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
public string YieldWord()
|
||||||
|
{
|
||||||
|
Console.WriteLine("current words:");
|
||||||
|
foreach (var word in words)
|
||||||
|
{
|
||||||
|
Console.WriteLine(word);
|
||||||
|
}
|
||||||
|
if (this.CurrentPosition > this.words.Length)
|
||||||
|
{
|
||||||
|
return string.Empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
var wordToProcess = this.words[this.CurrentPosition];
|
||||||
|
this.CurrentPosition++;
|
||||||
|
return wordToProcess;
|
||||||
|
}
|
||||||
|
}
|
28
TextParser/Tokenization/IntegerToken.cs
Normal file
28
TextParser/Tokenization/IntegerToken.cs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
|
||||||
|
public class IntegerToken : IValueToken<int>
|
||||||
|
{
|
||||||
|
private string word;
|
||||||
|
|
||||||
|
public IntegerToken(string word)
|
||||||
|
{
|
||||||
|
this.word = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetText()
|
||||||
|
{
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int GetValue()
|
||||||
|
{
|
||||||
|
return int.Parse(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputType GetInputType()
|
||||||
|
{
|
||||||
|
return InputType.Integer;
|
||||||
|
}
|
||||||
|
}
|
28
TextParser/Tokenization/StringToken.cs
Normal file
28
TextParser/Tokenization/StringToken.cs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
|
||||||
|
public class StringToken : IValueToken<string>
|
||||||
|
{
|
||||||
|
private string word;
|
||||||
|
|
||||||
|
public StringToken(string word)
|
||||||
|
{
|
||||||
|
this.word = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetText()
|
||||||
|
{
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetValue()
|
||||||
|
{
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputType GetInputType()
|
||||||
|
{
|
||||||
|
return InputType.String;
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user