From d4ac62c59268ecf1a2d169f8e4fe9907e06afb3e Mon Sep 17 00:00:00 2001 From: Simon Diesenreiter Date: Fri, 13 Dec 2024 16:04:19 +0100 Subject: [PATCH] feat: add support for custom token types and longs, ref: NOISSUE --- TextParser/Data/TokenConverter.cs | 14 ++++--- TextParser/Schema/BlockType.cs | 2 + .../Schema/BuildingBlocks/CustomInputBlock.cs | 42 +++++++++++++++++++ TextParser/Schema/BuildingBlocks/LongBlock.cs | 35 ++++++++++++++++ .../Schema/BuildingBlocks/StringBlock.cs | 6 +++ TextParser/Schema/InputSchemaBuilder.cs | 18 ++++++++ TextParser/Schema/InputType.cs | 2 + TextParser/Tokenization/CustomToken.cs | 34 +++++++++++++++ TextParser/Tokenization/LongToken.cs | 28 +++++++++++++ 9 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 TextParser/Schema/BuildingBlocks/CustomInputBlock.cs create mode 100644 TextParser/Schema/BuildingBlocks/LongBlock.cs create mode 100644 TextParser/Tokenization/CustomToken.cs create mode 100644 TextParser/Tokenization/LongToken.cs diff --git a/TextParser/Data/TokenConverter.cs b/TextParser/Data/TokenConverter.cs index da7e77b..ce89918 100644 --- a/TextParser/Data/TokenConverter.cs +++ b/TextParser/Data/TokenConverter.cs @@ -13,7 +13,7 @@ public class TokenConverter { } - private List AsGenericCollection() where T : ICollection, new() + private List AsGenericCollection() where T : List, new() { List returnData = new List(); foreach (var tokenRow in this.rawTokens) @@ -25,11 +25,15 @@ public class TokenConverter { throw new Exception("No token was provided, but token was expected!"); } - IValueToken? valueToken = token as IValueToken; - if (valueToken == null) + + if (!token.GetType().IsAssignableTo(typeof(IValueToken))) { - throw new Exception("Provided token is not a ValueToken"); + Console.WriteLine(token.GetText()); + Type t = token.GetType(); + throw new Exception("Provided token is not a ValueToken - type: " + t.ToString()); } + + IValueToken valueToken = token as IValueToken; newRow.Add(valueToken.GetValue()); } @@ -161,7 +165,7 @@ public class TokenConverter { var newTokenListList = new List>(); - foreach(var tokenList in rawTokens) + foreach(var tokenList in this.rawTokens) { var newTokenList = new List(); foreach(var token in tokenList) diff --git a/TextParser/Schema/BlockType.cs b/TextParser/Schema/BlockType.cs index cf2f494..e989444 100644 --- a/TextParser/Schema/BlockType.cs +++ b/TextParser/Schema/BlockType.cs @@ -11,4 +11,6 @@ public enum BlockType FixedRepetition = 16, GreedyRepetition = 32, NonZeroRepetition = 64, + Custom = 128, + Long = 256, } diff --git a/TextParser/Schema/BuildingBlocks/CustomInputBlock.cs b/TextParser/Schema/BuildingBlocks/CustomInputBlock.cs new file mode 100644 index 0000000..2952f39 --- /dev/null +++ b/TextParser/Schema/BuildingBlocks/CustomInputBlock.cs @@ -0,0 +1,42 @@ +namespace Parsing.Schema.BuildingBlocks; + +using Parsing.Tokenization; + +class CustomInputBlock : BuildingBlockBase +{ + + private InputType definedInputType; + private Func wordConverter; + + public CustomInputBlock(InputType definedInputType, Func wordConverter) + { + this.definedInputType = definedInputType; + this.wordConverter = wordConverter; + } + + public override List ParseWord(InputProvider inputs) + { + return new List() { new CustomToken(inputs.YieldWord(), this.definedInputType, this.wordConverter) }; + } + + public override bool CanParseWord(InputProvider inputs) + { + string word = string.Empty; + using (inputs.GetLookaheadContext()) + { + word = inputs.YieldWord(); + } + + return this.CanParseWord(word); + } + + public override bool CanParseWord(string word) + { + return true; + } + + public override BlockType GetBlockType() + { + return BlockType.Custom; + } +} \ No newline at end of file diff --git a/TextParser/Schema/BuildingBlocks/LongBlock.cs b/TextParser/Schema/BuildingBlocks/LongBlock.cs new file mode 100644 index 0000000..de0b57b --- /dev/null +++ b/TextParser/Schema/BuildingBlocks/LongBlock.cs @@ -0,0 +1,35 @@ +namespace Parsing.Schema.BuildingBlocks; + +using Parsing.Tokenization; + +class LongBlock : BuildingBlockBase +{ + + public LongBlock() + { + } + + public override List ParseWord(InputProvider inputs) + { + return new List() { new LongToken(inputs.YieldWord()) }; + } + + public override bool CanParseWord(InputProvider inputs) + { + using (inputs.GetLookaheadContext()) + { + return this.CanParseWord(inputs.YieldWord()); + } + } + + public override bool CanParseWord(string word) + { + long number = 0; + return long.TryParse(word, out number); + } + + public override BlockType GetBlockType() + { + return BlockType.Long; + } +} \ No newline at end of file diff --git a/TextParser/Schema/BuildingBlocks/StringBlock.cs b/TextParser/Schema/BuildingBlocks/StringBlock.cs index db67695..7688421 100644 --- a/TextParser/Schema/BuildingBlocks/StringBlock.cs +++ b/TextParser/Schema/BuildingBlocks/StringBlock.cs @@ -28,6 +28,12 @@ class StringBlock : BuildingBlockBase public override bool CanParseWord(string word) { // Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything + LongBlock longBlock = new LongBlock(); + if (longBlock.CanParseWord(word)) + { + return false; + } + IntegerBlock intBlock = new IntegerBlock(); if (intBlock.CanParseWord(word)) { diff --git a/TextParser/Schema/InputSchemaBuilder.cs b/TextParser/Schema/InputSchemaBuilder.cs index 645794f..94f26af 100644 --- a/TextParser/Schema/InputSchemaBuilder.cs +++ b/TextParser/Schema/InputSchemaBuilder.cs @@ -21,6 +21,9 @@ public class InputSchemaBuilder : RepetitionSchemaBuilder(InputType type, InputType definedInputType, Func wordConverter) + { + IBuildingBlock block; + switch (type) + { + case InputType.Custom: + block = new CustomInputBlock(definedInputType, wordConverter); + break; + default: + throw new Exception("Unrecognized InputType"); + } + schema.AddBuildingBlock(block); + return this; + } + public InputSchemaBuilder Repeat(int repetitionCount) { // add another layer of parsing diff --git a/TextParser/Schema/InputType.cs b/TextParser/Schema/InputType.cs index b4c4616..fef05eb 100644 --- a/TextParser/Schema/InputType.cs +++ b/TextParser/Schema/InputType.cs @@ -7,4 +7,6 @@ public enum InputType String = BlockType.String, Fragment = BlockType.Fragment, Char = BlockType.Char, + Custom = BlockType.Custom, + Long = BlockType.Long, } diff --git a/TextParser/Tokenization/CustomToken.cs b/TextParser/Tokenization/CustomToken.cs new file mode 100644 index 0000000..5b4b6c4 --- /dev/null +++ b/TextParser/Tokenization/CustomToken.cs @@ -0,0 +1,34 @@ +namespace Parsing.Tokenization; + +using Parsing.Schema; + +public class CustomToken : IValueToken +{ + private string word; + + private InputType definedInputType; + + private Func wordConverter; + + public CustomToken(string word, InputType definedInputType, Func wordConverter) + { + this.word = word; + this.wordConverter = wordConverter; + this.definedInputType = definedInputType; + } + + public string GetText() + { + return word; + } + + public T GetValue() + { + return wordConverter(word); + } + + public InputType GetInputType() + { + return this.definedInputType; + } +} diff --git a/TextParser/Tokenization/LongToken.cs b/TextParser/Tokenization/LongToken.cs new file mode 100644 index 0000000..8953dab --- /dev/null +++ b/TextParser/Tokenization/LongToken.cs @@ -0,0 +1,28 @@ +namespace Parsing.Tokenization; + +using Parsing.Schema; + +public class LongToken : IValueToken +{ + private string word; + + public LongToken(string word) + { + this.word = word; + } + + public string GetText() + { + return word; + } + + public long GetValue() + { + return long.Parse(word); + } + + public InputType GetInputType() + { + return InputType.Long; + } +}