fix: allow for parsing single chars as input, ref: NOISSUE

This commit is contained in:
Simon Diesenreiter 2024-12-05 23:58:11 +01:00
parent b261773b13
commit 550c8280a6
13 changed files with 325 additions and 21 deletions

View File

@ -30,6 +30,9 @@ public class TextParserTests
private const string testInput9 = @"2 4 6 4 1 private const string testInput9 = @"2 4 6 4 1
3 5 4 7 6 3 5 4 7 6
4 6 8 3 9"; 4 6 8 3 9";
private const string testInput10 = @"abc
bca
cab";
[Fact] [Fact]
public void LineParser_TestSimpleRepetition() public void LineParser_TestSimpleRepetition()
@ -322,7 +325,7 @@ public class TextParserTests
[Fact] [Fact]
public void DataManipulator_SimpleOneDimensionalTest() public void DataManipulator_SimpleOneDimensionalTest()
{ {
var schemaBuilder = new InputSchemaBuilder(); var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder var schema = schemaBuilder
.Repeat() .Repeat()
@ -351,7 +354,7 @@ public class TextParserTests
[Fact] [Fact]
public void DataManipulator_SimpleTwoDimensionalTest() public void DataManipulator_SimpleTwoDimensionalTest()
{ {
var schemaBuilder = new InputSchemaBuilder(); var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder var schema = schemaBuilder
.Repeat() .Repeat()
@ -389,4 +392,27 @@ public class TextParserTests
Assert.Equal(Direction.SE, searchResults[4].Direction); Assert.Equal(Direction.SE, searchResults[4].Direction);
Assert.Equal(Direction.W, searchResults[5].Direction); Assert.Equal(Direction.W, searchResults[5].Direction);
} }
[Fact]
public void TextPArser_TestReadingChars()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Repeat()
.Expect(InputType.Char)
.EndRepetition()
.Build();
var parser = new TextParser<InputSchemaContext>(schema);
var row = parser
.SetInputText(testInput10)
.Parse()
.AsListRows<string>();
Assert.Equal(3, row.Count);
Assert.Equal("a", row[0][0]);
Assert.Equal(3, row[0].Count);
Assert.Equal(3, row[1].Count);
Assert.Equal(3, row[2].Count);
}
} }

View File

@ -4,10 +4,11 @@
public enum BlockType public enum BlockType
{ {
Integer = 1, Integer = 1,
String = 2, Char = 2,
String = 4,
// technically not a block type but keeping it here for consistency/having all types in one place // technically not a block type but keeping it here for consistency/having all types in one place
Fragment = 4, Fragment = 8,
FixedRepetition = 8, FixedRepetition = 16,
GreedyRepetition = 16, GreedyRepetition = 32,
NonZeroRepetition = 32, NonZeroRepetition = 64,
} }

View File

@ -8,7 +8,7 @@ abstract class BuildingBlockBase : IBuildingBlock
{ {
} }
public abstract IToken ParseWord(InputProvider inputs); public abstract List<IToken> ParseWord(InputProvider inputs);
public abstract bool CanParseWord(InputProvider inputs); public abstract bool CanParseWord(InputProvider inputs);

View File

@ -0,0 +1,49 @@
namespace Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization;
class CharBlock : BuildingBlockBase
{
public CharBlock()
{
}
public override List<IToken> ParseWord(InputProvider inputs)
{
var tokenList = new List<IToken>();
foreach (char c in inputs.YieldWord())
{
tokenList.Add(new StringToken(c.ToString()));
}
return tokenList;
}
public override bool CanParseWord(InputProvider inputs)
{
string word = string.Empty;
using (inputs.GetLookaheadContext())
{
word = inputs.YieldWord();
}
return this.CanParseWord(word);
}
public override bool CanParseWord(string word)
{
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
IntegerBlock intBlock = new IntegerBlock();
if (intBlock.CanParseWord(word))
{
return false;
}
return true;
}
public override BlockType GetBlockType()
{
return BlockType.String;
}
}

View File

@ -20,7 +20,7 @@ class FixedRepetitionBlock : BuildingBlockBase
this.context = this.inputSchema.CreateContext(); this.context = this.inputSchema.CreateContext();
} }
public override IToken ParseWord(InputProvider inputs) public override List<IToken> ParseWord(InputProvider inputs)
{ {
var result = inputSchema.ProcessNextWord(context, inputs); var result = inputSchema.ProcessNextWord(context, inputs);
if (context.HasFinished) if (context.HasFinished)
@ -31,7 +31,7 @@ class FixedRepetitionBlock : BuildingBlockBase
this.context = this.inputSchema.CreateContext(); this.context = this.inputSchema.CreateContext();
} }
} }
return result.Single(); return result;
} }
public override bool CanParseWord(InputProvider inputs) public override bool CanParseWord(InputProvider inputs)

View File

@ -15,14 +15,14 @@ class GreedyRepetitionBlock : BuildingBlockBase
this.context = this.inputSchema.CreateContext(); this.context = this.inputSchema.CreateContext();
} }
public override IToken ParseWord(InputProvider inputs) public override List<IToken> ParseWord(InputProvider inputs)
{ {
var result = inputSchema.ProcessNextWord(context, inputs); var result = inputSchema.ProcessNextWord(context, inputs);
if (!this.CanParseWord(inputs)) if (!this.CanParseWord(inputs))
{ {
this.context = this.inputSchema.CreateContext(); this.context = this.inputSchema.CreateContext();
} }
return result.Single(); return result;
} }
public override bool CanParseWord(InputProvider inputs) public override bool CanParseWord(InputProvider inputs)

View File

@ -4,7 +4,7 @@ using Parsing.Tokenization;
public interface IBuildingBlock public interface IBuildingBlock
{ {
public IToken ParseWord(InputProvider inputs); public List<IToken> ParseWord(InputProvider inputs);
public bool CanParseWord(InputProvider inputs); public bool CanParseWord(InputProvider inputs);

View File

@ -9,9 +9,9 @@ class IntegerBlock : BuildingBlockBase
{ {
} }
public override IToken ParseWord(InputProvider inputs) public override List<IToken> ParseWord(InputProvider inputs)
{ {
return new IntegerToken(inputs.YieldWord()); return new List<IToken>() { new IntegerToken(inputs.YieldWord()) };
} }
public override bool CanParseWord(InputProvider inputs) public override bool CanParseWord(InputProvider inputs)

View File

@ -9,9 +9,9 @@ class StringBlock : BuildingBlockBase
{ {
} }
public override IToken ParseWord(InputProvider inputs) public override List<IToken> ParseWord(InputProvider inputs)
{ {
return new StringToken(inputs.YieldWord()); return new List<IToken>() { new StringToken(inputs.YieldWord()) };
} }
public override bool CanParseWord(InputProvider inputs) public override bool CanParseWord(InputProvider inputs)

View File

@ -28,15 +28,13 @@ public class InputSchema : ISchema<InputSchemaContext>
public List<IToken> ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs) public List<IToken> ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
{ {
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex]; var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
var token = nextBlock.ParseWord(inputs); var tokens = nextBlock.ParseWord(inputs);
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs)) if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs))
{ {
currentContext.lastProcessedBlockIndex++; currentContext.lastProcessedBlockIndex++;
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count; currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
} }
var newTokenList = new List<IToken>(); return tokens;
newTokenList.Add(token);
return newTokenList;
} }
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs) public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)

View File

@ -21,6 +21,9 @@ public class InputSchemaBuilder : RepetitionSchemaBuilder<InputSchemaBuilder, In
case InputType.Integer: case InputType.Integer:
block = new IntegerBlock(); block = new IntegerBlock();
break; break;
case InputType.Char:
block = new CharBlock();
break;
default: default:
throw new Exception("Unrecognized InputType"); throw new Exception("Unrecognized InputType");
} }

View File

@ -6,4 +6,5 @@ public enum InputType
Integer = BlockType.Integer, Integer = BlockType.Integer,
String = BlockType.String, String = BlockType.String,
Fragment = BlockType.Fragment, Fragment = BlockType.Fragment,
Char = BlockType.Char,
} }

View File

@ -0,0 +1,226 @@
namespace Parsing;
using System;
using System.Collections.Generic;
using Parsing.Schema;
using Parsing.Tokenization;
public static class DataConversionHelpers
{
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
{
var newList = new List<TNewType>();
foreach (var token in tokenList)
{
var typedToken = token as IValueToken<TOldType>;
if (typedToken == null)
{
throw new Exception("Invalid Token type encountered during value conversion");
}
newList.Add(converter(typedToken.GetValue()));
}
return newList;
}
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
{
var newList = new List<TNewType>();
foreach (var token in tokenList)
{
var typedToken = token as IValueToken<TOldType>;
if (typedToken == null)
{
throw new Exception("Invalid Token type encountered during value conversion");
}
newList.AddRange(converter(typedToken.GetValue()));
}
return newList;
}
public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
{
var newListList = new List<List<TNewType>>();
foreach (var tokenList in tokenListList)
{
newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
}
return newListList;
}
}
public static class DataManipulationHelpers
{
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
{
if (data.Count < 2)
{
return data[0];
}
TType result = data[0];
for (int i = 1; i < data.Count; i++)
{
result = reducer(result, data[i]);
}
return result;
}
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
{
return reducer(data);
}
}
public class TokenConverter
{
protected List<List<IToken>> rawTokens = new List<List<IToken>>();
public TokenConverter()
{
}
private List<T> AsGenericCollection<T, U>() where T : ICollection<U>, new()
{
List<T> returnData = new List<T>();
foreach (var tokenRow in this.rawTokens)
{
T newRow = new T();
foreach (IToken token in tokenRow)
{
if (token == null)
{
throw new Exception("No token was provided, but token was expected!");
}
IValueToken<U>? valueToken = token as IValueToken<U>;
if (valueToken == null)
{
throw new Exception("Provided token is not a ValueToken");
}
newRow.Add(valueToken.GetValue());
}
returnData.Add(newRow);
}
return returnData;
}
private void CheckConversionPrerequisites()
{
// in order to convert rows to columns or grid we require every row to have the same length
int rowLength = this.rawTokens[0].Count;
foreach (var tokenRow in this.rawTokens)
{
if (tokenRow.Count != rowLength)
{
throw new Exception("Attempted to convert token dataset that is not able to be converted!");
}
}
}
public List<T> AsSingleStream<T>()
{
List<T> returnData = new List<T>();
foreach (var tokenRow in this.rawTokens)
{
foreach (IToken token in tokenRow)
{
if (token == null)
{
throw new Exception("No token was provided, but token was expected!");
}
IValueToken<T>? valueToken = token as IValueToken<T>;
if (valueToken == null)
{
throw new Exception("Provided token is not a ValueToken");
}
returnData.Add(valueToken.GetValue());
}
}
return returnData;
}
public List<T[]> AsRows<T>()
{
var listRows = this.AsListRows<T>();
var newList = new List<T[]>();
foreach (var rowList in listRows)
{
newList.Add(rowList.ToArray());
}
return newList;
}
public List<List<T>> AsListRows<T>()
{
return this.AsGenericCollection<List<T>, T>();
}
public List<T[]> AsColumns<T>()
{
var listColumns = this.AsListColumns<T>();
var newList = new List<T[]>();
foreach (var columnList in listColumns)
{
newList.Add(columnList.ToArray());
}
return newList;
}
public List<List<T>> AsListColumns<T>()
{
this.CheckConversionPrerequisites();
var rows = AsListRows<T>();
var columns = new List<List<T>>();
for (int i = 0; i < rows[0].Count; i++)
{
columns.Add(new List<T>());
}
foreach (var row in rows)
{
for (int i = 0; i < row.Count; i++)
{
columns[i].Add(row[i]);
}
}
return columns;
}
public T[][] AsGrid<T>()
{
this.CheckConversionPrerequisites();
var rowsList = AsRows<T>();
return rowsList.ToArray();
}
public List<List<IToken>> AsRawData()
{
return this.rawTokens;
}
public List<Fragment> AsFragments()
{
var items = this.AsSingleStream<Fragment>();
var newList = new List<Fragment>();
foreach (var item in items)
{
var typedItem = item as Fragment;
if (typedItem == null)
{
throw new Exception("Invalid token type encountered");
}
newList.Add(typedItem);
}
return newList;
}
}