generated from Templates/Dotnet_Library
Compare commits
8 Commits
Author | SHA1 | Date | |
---|---|---|---|
f8c00da2b8 | |||
d4ac62c592 | |||
0f533c2018 | |||
2067fe06fc | |||
cc0f0a24d9 | |||
c41d665ab8 | |||
2fbdafa0e9 | |||
f942954678 |
40
HISTORY.md
40
HISTORY.md
@ -4,6 +4,46 @@ Changelog
|
||||
|
||||
(unreleased)
|
||||
------------
|
||||
- Feat: add support for custom token types and longs, ref: NOISSUE.
|
||||
[Simon Diesenreiter]
|
||||
|
||||
|
||||
0.9.3 (2024-12-13)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Remove duplicate TokenConverter definition, ref: NOISSUE. [Simon
|
||||
Diesenreiter]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
|
||||
|
||||
0.9.2 (2024-12-13)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- More bugfixes, ref: NOISSUE. [Simon Diesenreiter]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
|
||||
|
||||
0.9.1 (2024-12-13)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Fix build issues, ref: NOISSUE. [Simon Diesenreiter]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
|
||||
|
||||
0.9.0 (2024-12-13)
|
||||
------------------
|
||||
- Feat: add filter option to TokenConverter, ref: NOISSUE. [Simon
|
||||
Diesenreiter]
|
||||
|
||||
|
@ -33,6 +33,7 @@ public class TextParserTests
|
||||
private const string testInput10 = @"abc
|
||||
bca
|
||||
cab";
|
||||
private const string testInput11 = @"2 x y 4 x y 6 x y 4 x y 1 x y";
|
||||
|
||||
[Fact]
|
||||
public void LineParser_TestSimpleRepetition()
|
||||
@ -394,7 +395,7 @@ public class TextParserTests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TextPArser_TestReadingChars()
|
||||
public void TextParser_TestReadingChars()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
@ -415,4 +416,31 @@ public class TextParserTests
|
||||
Assert.Equal(3, row[1].Count);
|
||||
Assert.Equal(3, row[2].Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TextParser_TestFilter()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
.Repeat()
|
||||
.Expect(InputType.Integer)
|
||||
.Expect(InputType.Char)
|
||||
.Expect(InputType.Char)
|
||||
.EndRepetition()
|
||||
.Build();
|
||||
|
||||
var parser = new TextParser<InputSchemaContext>(schema);
|
||||
var numbers = parser
|
||||
.SetInputText(testInput11)
|
||||
.Parse()
|
||||
.Filter(InputType.Integer)
|
||||
.AsSingleStream<int>();
|
||||
|
||||
Assert.Equal(5, numbers.Count);
|
||||
Assert.Equal(2, numbers[0]);
|
||||
Assert.Equal(4, numbers[1]);
|
||||
Assert.Equal(6, numbers[2]);
|
||||
Assert.Equal(4, numbers[3]);
|
||||
Assert.Equal(1, numbers[4]);
|
||||
}
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ public static class DataManipulationHelpers
|
||||
var newList = new List<TNewType>();
|
||||
foreach (List<TType> dataItemList in data)
|
||||
{
|
||||
newList.Add(transformer(dataItem));
|
||||
newList.Add(transformer(dataItemList));
|
||||
}
|
||||
return newList;
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ public class TokenConverter
|
||||
{
|
||||
}
|
||||
|
||||
private List<T> AsGenericCollection<T, U>() where T : ICollection<U>, new()
|
||||
private List<T> AsGenericCollection<T, U>() where T : List<U>, new()
|
||||
{
|
||||
List<T> returnData = new List<T>();
|
||||
foreach (var tokenRow in this.rawTokens)
|
||||
@ -25,11 +25,15 @@ public class TokenConverter
|
||||
{
|
||||
throw new Exception("No token was provided, but token was expected!");
|
||||
}
|
||||
IValueToken<U>? valueToken = token as IValueToken<U>;
|
||||
if (valueToken == null)
|
||||
|
||||
if (!token.GetType().IsAssignableTo(typeof(IValueToken<U>)))
|
||||
{
|
||||
throw new Exception("Provided token is not a ValueToken");
|
||||
Console.WriteLine(token.GetText());
|
||||
Type t = token.GetType();
|
||||
throw new Exception("Provided token is not a ValueToken - type: " + t.ToString());
|
||||
}
|
||||
|
||||
IValueToken<U> valueToken = token as IValueToken<U>;
|
||||
newRow.Add(valueToken.GetValue());
|
||||
}
|
||||
|
||||
@ -157,19 +161,24 @@ public class TokenConverter
|
||||
return newList;
|
||||
}
|
||||
|
||||
public TokenConverter Filter<T>(params InputType[] inputTypes)
|
||||
public TokenConverter Filter(params InputType[] inputTypes)
|
||||
{
|
||||
var newTokenList = new List<List<IToken>>()
|
||||
var newTokenListList = new List<List<IToken>>();
|
||||
|
||||
foreach(var token in rawTokens)
|
||||
foreach(var tokenList in this.rawTokens)
|
||||
{
|
||||
if(inputTypes.Contains(token.GetInputType()))
|
||||
var newTokenList = new List<IToken>();
|
||||
foreach(var token in tokenList)
|
||||
{
|
||||
newTokenList.Add(token);
|
||||
if(inputTypes.Contains(token.GetInputType()))
|
||||
{
|
||||
newTokenList.Add(token);
|
||||
}
|
||||
}
|
||||
newTokenListList.Add(newTokenList);
|
||||
}
|
||||
|
||||
this.rawTokens = newTokenList;
|
||||
this.rawTokens = newTokenListList;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
@ -11,4 +11,6 @@ public enum BlockType
|
||||
FixedRepetition = 16,
|
||||
GreedyRepetition = 32,
|
||||
NonZeroRepetition = 64,
|
||||
Custom = 128,
|
||||
Long = 256,
|
||||
}
|
||||
|
42
TextParser/Schema/BuildingBlocks/CustomInputBlock.cs
Normal file
42
TextParser/Schema/BuildingBlocks/CustomInputBlock.cs
Normal file
@ -0,0 +1,42 @@
|
||||
namespace Parsing.Schema.BuildingBlocks;
|
||||
|
||||
using Parsing.Tokenization;
|
||||
|
||||
class CustomInputBlock<T> : BuildingBlockBase
|
||||
{
|
||||
|
||||
private InputType definedInputType;
|
||||
private Func<string, T> wordConverter;
|
||||
|
||||
public CustomInputBlock(InputType definedInputType, Func<string, T> wordConverter)
|
||||
{
|
||||
this.definedInputType = definedInputType;
|
||||
this.wordConverter = wordConverter;
|
||||
}
|
||||
|
||||
public override List<IToken> ParseWord(InputProvider inputs)
|
||||
{
|
||||
return new List<IToken>() { new CustomToken<T>(inputs.YieldWord(), this.definedInputType, this.wordConverter) };
|
||||
}
|
||||
|
||||
public override bool CanParseWord(InputProvider inputs)
|
||||
{
|
||||
string word = string.Empty;
|
||||
using (inputs.GetLookaheadContext())
|
||||
{
|
||||
word = inputs.YieldWord();
|
||||
}
|
||||
|
||||
return this.CanParseWord(word);
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public override BlockType GetBlockType()
|
||||
{
|
||||
return BlockType.Custom;
|
||||
}
|
||||
}
|
35
TextParser/Schema/BuildingBlocks/LongBlock.cs
Normal file
35
TextParser/Schema/BuildingBlocks/LongBlock.cs
Normal file
@ -0,0 +1,35 @@
|
||||
namespace Parsing.Schema.BuildingBlocks;
|
||||
|
||||
using Parsing.Tokenization;
|
||||
|
||||
class LongBlock : BuildingBlockBase
|
||||
{
|
||||
|
||||
public LongBlock()
|
||||
{
|
||||
}
|
||||
|
||||
public override List<IToken> ParseWord(InputProvider inputs)
|
||||
{
|
||||
return new List<IToken>() { new LongToken(inputs.YieldWord()) };
|
||||
}
|
||||
|
||||
public override bool CanParseWord(InputProvider inputs)
|
||||
{
|
||||
using (inputs.GetLookaheadContext())
|
||||
{
|
||||
return this.CanParseWord(inputs.YieldWord());
|
||||
}
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
long number = 0;
|
||||
return long.TryParse(word, out number);
|
||||
}
|
||||
|
||||
public override BlockType GetBlockType()
|
||||
{
|
||||
return BlockType.Long;
|
||||
}
|
||||
}
|
@ -28,6 +28,12 @@ class StringBlock : BuildingBlockBase
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
|
||||
LongBlock longBlock = new LongBlock();
|
||||
if (longBlock.CanParseWord(word))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
IntegerBlock intBlock = new IntegerBlock();
|
||||
if (intBlock.CanParseWord(word))
|
||||
{
|
||||
|
@ -21,6 +21,9 @@ public class InputSchemaBuilder : RepetitionSchemaBuilder<InputSchemaBuilder, In
|
||||
case InputType.Integer:
|
||||
block = new IntegerBlock();
|
||||
break;
|
||||
case InputType.Long:
|
||||
block = new LongBlock();
|
||||
break;
|
||||
case InputType.Char:
|
||||
block = new CharBlock();
|
||||
break;
|
||||
@ -31,6 +34,21 @@ public class InputSchemaBuilder : RepetitionSchemaBuilder<InputSchemaBuilder, In
|
||||
return this;
|
||||
}
|
||||
|
||||
public InputSchemaBuilder Expect<T>(InputType type, InputType definedInputType, Func<string, T> wordConverter)
|
||||
{
|
||||
IBuildingBlock block;
|
||||
switch (type)
|
||||
{
|
||||
case InputType.Custom:
|
||||
block = new CustomInputBlock<T>(definedInputType, wordConverter);
|
||||
break;
|
||||
default:
|
||||
throw new Exception("Unrecognized InputType");
|
||||
}
|
||||
schema.AddBuildingBlock(block);
|
||||
return this;
|
||||
}
|
||||
|
||||
public InputSchemaBuilder Repeat(int repetitionCount)
|
||||
{
|
||||
// add another layer of parsing
|
||||
|
@ -7,4 +7,6 @@ public enum InputType
|
||||
String = BlockType.String,
|
||||
Fragment = BlockType.Fragment,
|
||||
Char = BlockType.Char,
|
||||
Custom = BlockType.Custom,
|
||||
Long = BlockType.Long,
|
||||
}
|
||||
|
@ -1,226 +0,0 @@
|
||||
namespace Parsing;
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Parsing.Schema;
|
||||
using Parsing.Tokenization;
|
||||
|
||||
public static class DataConversionHelpers
|
||||
{
|
||||
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
|
||||
{
|
||||
var newList = new List<TNewType>();
|
||||
foreach (var token in tokenList)
|
||||
{
|
||||
var typedToken = token as IValueToken<TOldType>;
|
||||
if (typedToken == null)
|
||||
{
|
||||
throw new Exception("Invalid Token type encountered during value conversion");
|
||||
}
|
||||
|
||||
newList.Add(converter(typedToken.GetValue()));
|
||||
}
|
||||
return newList;
|
||||
}
|
||||
|
||||
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
|
||||
{
|
||||
var newList = new List<TNewType>();
|
||||
foreach (var token in tokenList)
|
||||
{
|
||||
var typedToken = token as IValueToken<TOldType>;
|
||||
if (typedToken == null)
|
||||
{
|
||||
throw new Exception("Invalid Token type encountered during value conversion");
|
||||
}
|
||||
|
||||
newList.AddRange(converter(typedToken.GetValue()));
|
||||
}
|
||||
return newList;
|
||||
}
|
||||
|
||||
public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
|
||||
{
|
||||
var newListList = new List<List<TNewType>>();
|
||||
foreach (var tokenList in tokenListList)
|
||||
{
|
||||
newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
|
||||
}
|
||||
return newListList;
|
||||
}
|
||||
}
|
||||
|
||||
public static class DataManipulationHelpers
|
||||
{
|
||||
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
|
||||
{
|
||||
if (data.Count < 2)
|
||||
{
|
||||
return data[0];
|
||||
}
|
||||
TType result = data[0];
|
||||
for (int i = 1; i < data.Count; i++)
|
||||
{
|
||||
result = reducer(result, data[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
|
||||
{
|
||||
return reducer(data);
|
||||
}
|
||||
}
|
||||
|
||||
public class TokenConverter
|
||||
{
|
||||
protected List<List<IToken>> rawTokens = new List<List<IToken>>();
|
||||
|
||||
public TokenConverter()
|
||||
{
|
||||
}
|
||||
|
||||
private List<T> AsGenericCollection<T, U>() where T : ICollection<U>, new()
|
||||
{
|
||||
List<T> returnData = new List<T>();
|
||||
foreach (var tokenRow in this.rawTokens)
|
||||
{
|
||||
T newRow = new T();
|
||||
foreach (IToken token in tokenRow)
|
||||
{
|
||||
if (token == null)
|
||||
{
|
||||
throw new Exception("No token was provided, but token was expected!");
|
||||
}
|
||||
IValueToken<U>? valueToken = token as IValueToken<U>;
|
||||
if (valueToken == null)
|
||||
{
|
||||
throw new Exception("Provided token is not a ValueToken");
|
||||
}
|
||||
newRow.Add(valueToken.GetValue());
|
||||
}
|
||||
|
||||
returnData.Add(newRow);
|
||||
}
|
||||
return returnData;
|
||||
}
|
||||
|
||||
private void CheckConversionPrerequisites()
|
||||
{
|
||||
// in order to convert rows to columns or grid we require every row to have the same length
|
||||
int rowLength = this.rawTokens[0].Count;
|
||||
|
||||
foreach (var tokenRow in this.rawTokens)
|
||||
{
|
||||
if (tokenRow.Count != rowLength)
|
||||
{
|
||||
throw new Exception("Attempted to convert token dataset that is not able to be converted!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<T> AsSingleStream<T>()
|
||||
{
|
||||
List<T> returnData = new List<T>();
|
||||
foreach (var tokenRow in this.rawTokens)
|
||||
{
|
||||
foreach (IToken token in tokenRow)
|
||||
{
|
||||
if (token == null)
|
||||
{
|
||||
throw new Exception("No token was provided, but token was expected!");
|
||||
}
|
||||
IValueToken<T>? valueToken = token as IValueToken<T>;
|
||||
if (valueToken == null)
|
||||
{
|
||||
throw new Exception("Provided token is not a ValueToken");
|
||||
}
|
||||
returnData.Add(valueToken.GetValue());
|
||||
}
|
||||
}
|
||||
return returnData;
|
||||
}
|
||||
|
||||
public List<T[]> AsRows<T>()
|
||||
{
|
||||
var listRows = this.AsListRows<T>();
|
||||
var newList = new List<T[]>();
|
||||
|
||||
foreach (var rowList in listRows)
|
||||
{
|
||||
newList.Add(rowList.ToArray());
|
||||
}
|
||||
|
||||
return newList;
|
||||
}
|
||||
|
||||
public List<List<T>> AsListRows<T>()
|
||||
{
|
||||
return this.AsGenericCollection<List<T>, T>();
|
||||
}
|
||||
|
||||
public List<T[]> AsColumns<T>()
|
||||
{
|
||||
var listColumns = this.AsListColumns<T>();
|
||||
var newList = new List<T[]>();
|
||||
|
||||
foreach (var columnList in listColumns)
|
||||
{
|
||||
newList.Add(columnList.ToArray());
|
||||
}
|
||||
|
||||
return newList;
|
||||
}
|
||||
|
||||
public List<List<T>> AsListColumns<T>()
|
||||
{
|
||||
this.CheckConversionPrerequisites();
|
||||
var rows = AsListRows<T>();
|
||||
|
||||
var columns = new List<List<T>>();
|
||||
for (int i = 0; i < rows[0].Count; i++)
|
||||
{
|
||||
columns.Add(new List<T>());
|
||||
}
|
||||
|
||||
foreach (var row in rows)
|
||||
{
|
||||
for (int i = 0; i < row.Count; i++)
|
||||
{
|
||||
columns[i].Add(row[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return columns;
|
||||
}
|
||||
|
||||
public T[][] AsGrid<T>()
|
||||
{
|
||||
this.CheckConversionPrerequisites();
|
||||
var rowsList = AsRows<T>();
|
||||
return rowsList.ToArray();
|
||||
}
|
||||
|
||||
public List<List<IToken>> AsRawData()
|
||||
{
|
||||
return this.rawTokens;
|
||||
}
|
||||
|
||||
public List<Fragment> AsFragments()
|
||||
{
|
||||
var items = this.AsSingleStream<Fragment>();
|
||||
var newList = new List<Fragment>();
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
var typedItem = item as Fragment;
|
||||
if (typedItem == null)
|
||||
{
|
||||
throw new Exception("Invalid token type encountered");
|
||||
}
|
||||
newList.Add(typedItem);
|
||||
}
|
||||
|
||||
return newList;
|
||||
}
|
||||
}
|
34
TextParser/Tokenization/CustomToken.cs
Normal file
34
TextParser/Tokenization/CustomToken.cs
Normal file
@ -0,0 +1,34 @@
|
||||
namespace Parsing.Tokenization;
|
||||
|
||||
using Parsing.Schema;
|
||||
|
||||
public class CustomToken<T> : IValueToken<T>
|
||||
{
|
||||
private string word;
|
||||
|
||||
private InputType definedInputType;
|
||||
|
||||
private Func<string, T> wordConverter;
|
||||
|
||||
public CustomToken(string word, InputType definedInputType, Func<string, T> wordConverter)
|
||||
{
|
||||
this.word = word;
|
||||
this.wordConverter = wordConverter;
|
||||
this.definedInputType = definedInputType;
|
||||
}
|
||||
|
||||
public string GetText()
|
||||
{
|
||||
return word;
|
||||
}
|
||||
|
||||
public T GetValue()
|
||||
{
|
||||
return wordConverter(word);
|
||||
}
|
||||
|
||||
public InputType GetInputType()
|
||||
{
|
||||
return this.definedInputType;
|
||||
}
|
||||
}
|
28
TextParser/Tokenization/LongToken.cs
Normal file
28
TextParser/Tokenization/LongToken.cs
Normal file
@ -0,0 +1,28 @@
|
||||
namespace Parsing.Tokenization;
|
||||
|
||||
using Parsing.Schema;
|
||||
|
||||
public class LongToken : IValueToken<long>
|
||||
{
|
||||
private string word;
|
||||
|
||||
public LongToken(string word)
|
||||
{
|
||||
this.word = word;
|
||||
}
|
||||
|
||||
public string GetText()
|
||||
{
|
||||
return word;
|
||||
}
|
||||
|
||||
public long GetValue()
|
||||
{
|
||||
return long.Parse(word);
|
||||
}
|
||||
|
||||
public InputType GetInputType()
|
||||
{
|
||||
return InputType.Long;
|
||||
}
|
||||
}
|
@ -1 +1 @@
|
||||
0.9.0
|
||||
0.10.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user