generated from Templates/Dotnet_Library
fix: allow for parsing single chars as input, ref: NOISSUE
This commit is contained in:
parent
b261773b13
commit
550c8280a6
@ -30,6 +30,9 @@ public class TextParserTests
|
|||||||
private const string testInput9 = @"2 4 6 4 1
|
private const string testInput9 = @"2 4 6 4 1
|
||||||
3 5 4 7 6
|
3 5 4 7 6
|
||||||
4 6 8 3 9";
|
4 6 8 3 9";
|
||||||
|
private const string testInput10 = @"abc
|
||||||
|
bca
|
||||||
|
cab";
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void LineParser_TestSimpleRepetition()
|
public void LineParser_TestSimpleRepetition()
|
||||||
@ -389,4 +392,27 @@ public class TextParserTests
|
|||||||
Assert.Equal(Direction.SE, searchResults[4].Direction);
|
Assert.Equal(Direction.SE, searchResults[4].Direction);
|
||||||
Assert.Equal(Direction.W, searchResults[5].Direction);
|
Assert.Equal(Direction.W, searchResults[5].Direction);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TextPArser_TestReadingChars()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.Char)
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
|
var row = parser
|
||||||
|
.SetInputText(testInput10)
|
||||||
|
.Parse()
|
||||||
|
.AsListRows<string>();
|
||||||
|
|
||||||
|
Assert.Equal(3, row.Count);
|
||||||
|
Assert.Equal("a", row[0][0]);
|
||||||
|
Assert.Equal(3, row[0].Count);
|
||||||
|
Assert.Equal(3, row[1].Count);
|
||||||
|
Assert.Equal(3, row[2].Count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,11 @@
|
|||||||
public enum BlockType
|
public enum BlockType
|
||||||
{
|
{
|
||||||
Integer = 1,
|
Integer = 1,
|
||||||
String = 2,
|
Char = 2,
|
||||||
|
String = 4,
|
||||||
// technically not a block type but keeping it here for consistency/having all types in one place
|
// technically not a block type but keeping it here for consistency/having all types in one place
|
||||||
Fragment = 4,
|
Fragment = 8,
|
||||||
FixedRepetition = 8,
|
FixedRepetition = 16,
|
||||||
GreedyRepetition = 16,
|
GreedyRepetition = 32,
|
||||||
NonZeroRepetition = 32,
|
NonZeroRepetition = 64,
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@ abstract class BuildingBlockBase : IBuildingBlock
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract IToken ParseWord(InputProvider inputs);
|
public abstract List<IToken> ParseWord(InputProvider inputs);
|
||||||
|
|
||||||
public abstract bool CanParseWord(InputProvider inputs);
|
public abstract bool CanParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
49
TextParser/Schema/BuildingBlocks/CharBlock.cs
Normal file
49
TextParser/Schema/BuildingBlocks/CharBlock.cs
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class CharBlock : BuildingBlockBase
|
||||||
|
{
|
||||||
|
|
||||||
|
public CharBlock()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
var tokenList = new List<IToken>();
|
||||||
|
foreach (char c in inputs.YieldWord())
|
||||||
|
{
|
||||||
|
tokenList.Add(new StringToken(c.ToString()));
|
||||||
|
}
|
||||||
|
return tokenList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
string word = string.Empty;
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
word = inputs.YieldWord();
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.CanParseWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
|
||||||
|
IntegerBlock intBlock = new IntegerBlock();
|
||||||
|
if (intBlock.CanParseWord(word))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.String;
|
||||||
|
}
|
||||||
|
}
|
@ -20,7 +20,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
|||||||
this.context = this.inputSchema.CreateContext();
|
this.context = this.inputSchema.CreateContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
public override IToken ParseWord(InputProvider inputs)
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
var result = inputSchema.ProcessNextWord(context, inputs);
|
var result = inputSchema.ProcessNextWord(context, inputs);
|
||||||
if (context.HasFinished)
|
if (context.HasFinished)
|
||||||
@ -31,7 +31,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
|||||||
this.context = this.inputSchema.CreateContext();
|
this.context = this.inputSchema.CreateContext();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result.Single();
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
@ -15,14 +15,14 @@ class GreedyRepetitionBlock : BuildingBlockBase
|
|||||||
this.context = this.inputSchema.CreateContext();
|
this.context = this.inputSchema.CreateContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
public override IToken ParseWord(InputProvider inputs)
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
var result = inputSchema.ProcessNextWord(context, inputs);
|
var result = inputSchema.ProcessNextWord(context, inputs);
|
||||||
if (!this.CanParseWord(inputs))
|
if (!this.CanParseWord(inputs))
|
||||||
{
|
{
|
||||||
this.context = this.inputSchema.CreateContext();
|
this.context = this.inputSchema.CreateContext();
|
||||||
}
|
}
|
||||||
return result.Single();
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
@ -4,7 +4,7 @@ using Parsing.Tokenization;
|
|||||||
|
|
||||||
public interface IBuildingBlock
|
public interface IBuildingBlock
|
||||||
{
|
{
|
||||||
public IToken ParseWord(InputProvider inputs);
|
public List<IToken> ParseWord(InputProvider inputs);
|
||||||
|
|
||||||
public bool CanParseWord(InputProvider inputs);
|
public bool CanParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
@ -9,9 +9,9 @@ class IntegerBlock : BuildingBlockBase
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public override IToken ParseWord(InputProvider inputs)
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
return new IntegerToken(inputs.YieldWord());
|
return new List<IToken>() { new IntegerToken(inputs.YieldWord()) };
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
@ -9,9 +9,9 @@ class StringBlock : BuildingBlockBase
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public override IToken ParseWord(InputProvider inputs)
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
return new StringToken(inputs.YieldWord());
|
return new List<IToken>() { new StringToken(inputs.YieldWord()) };
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
@ -28,15 +28,13 @@ public class InputSchema : ISchema<InputSchemaContext>
|
|||||||
public List<IToken> ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
public List<IToken> ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||||
{
|
{
|
||||||
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||||
var token = nextBlock.ParseWord(inputs);
|
var tokens = nextBlock.ParseWord(inputs);
|
||||||
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs))
|
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs))
|
||||||
{
|
{
|
||||||
currentContext.lastProcessedBlockIndex++;
|
currentContext.lastProcessedBlockIndex++;
|
||||||
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
||||||
}
|
}
|
||||||
var newTokenList = new List<IToken>();
|
return tokens;
|
||||||
newTokenList.Add(token);
|
|
||||||
return newTokenList;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||||
|
@ -21,6 +21,9 @@ public class InputSchemaBuilder : RepetitionSchemaBuilder<InputSchemaBuilder, In
|
|||||||
case InputType.Integer:
|
case InputType.Integer:
|
||||||
block = new IntegerBlock();
|
block = new IntegerBlock();
|
||||||
break;
|
break;
|
||||||
|
case InputType.Char:
|
||||||
|
block = new CharBlock();
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Exception("Unrecognized InputType");
|
throw new Exception("Unrecognized InputType");
|
||||||
}
|
}
|
||||||
|
@ -6,4 +6,5 @@ public enum InputType
|
|||||||
Integer = BlockType.Integer,
|
Integer = BlockType.Integer,
|
||||||
String = BlockType.String,
|
String = BlockType.String,
|
||||||
Fragment = BlockType.Fragment,
|
Fragment = BlockType.Fragment,
|
||||||
|
Char = BlockType.Char,
|
||||||
}
|
}
|
||||||
|
226
TextParser/TokenConverter.cs
Normal file
226
TextParser/TokenConverter.cs
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
namespace Parsing;
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
public static class DataConversionHelpers
|
||||||
|
{
|
||||||
|
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
|
||||||
|
{
|
||||||
|
var newList = new List<TNewType>();
|
||||||
|
foreach (var token in tokenList)
|
||||||
|
{
|
||||||
|
var typedToken = token as IValueToken<TOldType>;
|
||||||
|
if (typedToken == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid Token type encountered during value conversion");
|
||||||
|
}
|
||||||
|
|
||||||
|
newList.Add(converter(typedToken.GetValue()));
|
||||||
|
}
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
|
||||||
|
{
|
||||||
|
var newList = new List<TNewType>();
|
||||||
|
foreach (var token in tokenList)
|
||||||
|
{
|
||||||
|
var typedToken = token as IValueToken<TOldType>;
|
||||||
|
if (typedToken == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid Token type encountered during value conversion");
|
||||||
|
}
|
||||||
|
|
||||||
|
newList.AddRange(converter(typedToken.GetValue()));
|
||||||
|
}
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
|
||||||
|
{
|
||||||
|
var newListList = new List<List<TNewType>>();
|
||||||
|
foreach (var tokenList in tokenListList)
|
||||||
|
{
|
||||||
|
newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
|
||||||
|
}
|
||||||
|
return newListList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class DataManipulationHelpers
|
||||||
|
{
|
||||||
|
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
|
||||||
|
{
|
||||||
|
if (data.Count < 2)
|
||||||
|
{
|
||||||
|
return data[0];
|
||||||
|
}
|
||||||
|
TType result = data[0];
|
||||||
|
for (int i = 1; i < data.Count; i++)
|
||||||
|
{
|
||||||
|
result = reducer(result, data[i]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
|
||||||
|
{
|
||||||
|
return reducer(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class TokenConverter
|
||||||
|
{
|
||||||
|
protected List<List<IToken>> rawTokens = new List<List<IToken>>();
|
||||||
|
|
||||||
|
public TokenConverter()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<T> AsGenericCollection<T, U>() where T : ICollection<U>, new()
|
||||||
|
{
|
||||||
|
List<T> returnData = new List<T>();
|
||||||
|
foreach (var tokenRow in this.rawTokens)
|
||||||
|
{
|
||||||
|
T newRow = new T();
|
||||||
|
foreach (IToken token in tokenRow)
|
||||||
|
{
|
||||||
|
if (token == null)
|
||||||
|
{
|
||||||
|
throw new Exception("No token was provided, but token was expected!");
|
||||||
|
}
|
||||||
|
IValueToken<U>? valueToken = token as IValueToken<U>;
|
||||||
|
if (valueToken == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Provided token is not a ValueToken");
|
||||||
|
}
|
||||||
|
newRow.Add(valueToken.GetValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
returnData.Add(newRow);
|
||||||
|
}
|
||||||
|
return returnData;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CheckConversionPrerequisites()
|
||||||
|
{
|
||||||
|
// in order to convert rows to columns or grid we require every row to have the same length
|
||||||
|
int rowLength = this.rawTokens[0].Count;
|
||||||
|
|
||||||
|
foreach (var tokenRow in this.rawTokens)
|
||||||
|
{
|
||||||
|
if (tokenRow.Count != rowLength)
|
||||||
|
{
|
||||||
|
throw new Exception("Attempted to convert token dataset that is not able to be converted!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<T> AsSingleStream<T>()
|
||||||
|
{
|
||||||
|
List<T> returnData = new List<T>();
|
||||||
|
foreach (var tokenRow in this.rawTokens)
|
||||||
|
{
|
||||||
|
foreach (IToken token in tokenRow)
|
||||||
|
{
|
||||||
|
if (token == null)
|
||||||
|
{
|
||||||
|
throw new Exception("No token was provided, but token was expected!");
|
||||||
|
}
|
||||||
|
IValueToken<T>? valueToken = token as IValueToken<T>;
|
||||||
|
if (valueToken == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Provided token is not a ValueToken");
|
||||||
|
}
|
||||||
|
returnData.Add(valueToken.GetValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return returnData;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<T[]> AsRows<T>()
|
||||||
|
{
|
||||||
|
var listRows = this.AsListRows<T>();
|
||||||
|
var newList = new List<T[]>();
|
||||||
|
|
||||||
|
foreach (var rowList in listRows)
|
||||||
|
{
|
||||||
|
newList.Add(rowList.ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<List<T>> AsListRows<T>()
|
||||||
|
{
|
||||||
|
return this.AsGenericCollection<List<T>, T>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<T[]> AsColumns<T>()
|
||||||
|
{
|
||||||
|
var listColumns = this.AsListColumns<T>();
|
||||||
|
var newList = new List<T[]>();
|
||||||
|
|
||||||
|
foreach (var columnList in listColumns)
|
||||||
|
{
|
||||||
|
newList.Add(columnList.ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<List<T>> AsListColumns<T>()
|
||||||
|
{
|
||||||
|
this.CheckConversionPrerequisites();
|
||||||
|
var rows = AsListRows<T>();
|
||||||
|
|
||||||
|
var columns = new List<List<T>>();
|
||||||
|
for (int i = 0; i < rows[0].Count; i++)
|
||||||
|
{
|
||||||
|
columns.Add(new List<T>());
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var row in rows)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < row.Count; i++)
|
||||||
|
{
|
||||||
|
columns[i].Add(row[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return columns;
|
||||||
|
}
|
||||||
|
|
||||||
|
public T[][] AsGrid<T>()
|
||||||
|
{
|
||||||
|
this.CheckConversionPrerequisites();
|
||||||
|
var rowsList = AsRows<T>();
|
||||||
|
return rowsList.ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<List<IToken>> AsRawData()
|
||||||
|
{
|
||||||
|
return this.rawTokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Fragment> AsFragments()
|
||||||
|
{
|
||||||
|
var items = this.AsSingleStream<Fragment>();
|
||||||
|
var newList = new List<Fragment>();
|
||||||
|
|
||||||
|
foreach (var item in items)
|
||||||
|
{
|
||||||
|
var typedItem = item as Fragment;
|
||||||
|
if (typedItem == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid token type encountered");
|
||||||
|
}
|
||||||
|
newList.Add(typedItem);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user