generated from Templates/Dotnet_Library
Compare commits
30 Commits
Author | SHA1 | Date | |
---|---|---|---|
f8c00da2b8 | |||
d4ac62c592 | |||
0f533c2018 | |||
2067fe06fc | |||
cc0f0a24d9 | |||
c41d665ab8 | |||
2fbdafa0e9 | |||
f942954678 | |||
4c67e8efb0 | |||
e83e99758a | |||
81ac797b4c | |||
e9aa60524c | |||
7e5ab9f799 | |||
fc137ebd03 | |||
1b2e9ad1ee | |||
550c8280a6 | |||
|
b261773b13 | ||
|
85d94eca2f | ||
2ed103abbf | |||
fab5d2eee7 | |||
7be09140e2 | |||
09bbba1293 | |||
c1705d9f96 | |||
62092d0380 | |||
|
29f7aa37da | ||
|
5f07f16f1f | ||
|
d5cfe494d8 | ||
|
0d85132a32 | ||
|
e15190ecd6 | ||
|
0dc5fb14c5 |
@ -17,23 +17,7 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
linter:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
dotnet-version: [9.0.X]
|
|
||||||
os: [ubuntu-latest]
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
- uses: actions/setup-dotnet@v4
|
|
||||||
with:
|
|
||||||
dotnet-version: ${{ matrix.dotnet-version }}
|
|
||||||
- name: Run linter
|
|
||||||
run: make lint
|
|
||||||
|
|
||||||
tests_linux:
|
tests_linux:
|
||||||
needs: linter
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
@ -64,5 +64,5 @@ jobs:
|
|||||||
QUOTED_REPOSITORY_SOURCE_NAME=$(dotnet nuget config get all | grep "/packages/${REPOSITORY_OWNER}/nuget/index.json" | awk '{print $2}' | awk -F= '{print $2}')
|
QUOTED_REPOSITORY_SOURCE_NAME=$(dotnet nuget config get all | grep "/packages/${REPOSITORY_OWNER}/nuget/index.json" | awk '{print $2}' | awk -F= '{print $2}')
|
||||||
REPOSITORY_SOURCE_NAME=${QUOTED_REPOSITORY_SOURCE_NAME:1:-1}
|
REPOSITORY_SOURCE_NAME=${QUOTED_REPOSITORY_SOURCE_NAME:1:-1}
|
||||||
fi
|
fi
|
||||||
dotnet pack --include-symbols --include-source -p:PackageVersion=$(cat $REPOSITORY_NAME/VERSION) DotnetTestLib.sln
|
dotnet pack --include-symbols --include-source -p:PackageVersion=$(cat $REPOSITORY_NAME/VERSION) TextParser.sln
|
||||||
dotnet nuget push -k $GITEA_PAT --source $REPOSITORY_SOURCE_NAME $REPOSITORY_NAME/bin/Release/$REPOSITORY_NAME.$(cat $REPOSITORY_NAME/VERSION).symbols.nupkg
|
dotnet nuget push -k $GITEA_PAT --source $REPOSITORY_SOURCE_NAME $REPOSITORY_NAME/bin/Release/$REPOSITORY_NAME.$(cat $REPOSITORY_NAME/VERSION).symbols.nupkg
|
||||||
|
126
HISTORY.md
126
HISTORY.md
@ -4,11 +4,137 @@ Changelog
|
|||||||
|
|
||||||
(unreleased)
|
(unreleased)
|
||||||
------------
|
------------
|
||||||
|
- Feat: add support for custom token types and longs, ref: NOISSUE.
|
||||||
|
[Simon Diesenreiter]
|
||||||
|
|
||||||
|
|
||||||
|
0.9.3 (2024-12-13)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Fix
|
||||||
|
~~~
|
||||||
|
- Remove duplicate TokenConverter definition, ref: NOISSUE. [Simon
|
||||||
|
Diesenreiter]
|
||||||
|
|
||||||
|
Other
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
|
||||||
|
0.9.2 (2024-12-13)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Fix
|
||||||
|
~~~
|
||||||
|
- More bugfixes, ref: NOISSUE. [Simon Diesenreiter]
|
||||||
|
|
||||||
|
Other
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
|
||||||
|
0.9.1 (2024-12-13)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Fix
|
||||||
|
~~~
|
||||||
|
- Fix build issues, ref: NOISSUE. [Simon Diesenreiter]
|
||||||
|
|
||||||
|
Other
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
|
||||||
|
0.9.0 (2024-12-13)
|
||||||
|
------------------
|
||||||
|
- Feat: add filter option to TokenConverter, ref: NOISSUE. [Simon
|
||||||
|
Diesenreiter]
|
||||||
|
|
||||||
|
|
||||||
|
0.8.0 (2024-12-12)
|
||||||
|
------------------
|
||||||
|
- Feat: adding sensible index constructors refs: NOISSUE. [Simon
|
||||||
|
Diesenreiter]
|
||||||
|
|
||||||
|
|
||||||
|
0.7.2 (2024-12-05)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Fix
|
||||||
|
~~~
|
||||||
|
- Add some missing API methods, ref: NOISSUE. [Simon Diesenreiter]
|
||||||
|
|
||||||
|
Other
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
|
||||||
|
0.7.1 (2024-12-05)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Fix
|
||||||
|
~~~
|
||||||
|
- Allow for parsing single chars as input, ref: NOISSUE. [Simon
|
||||||
|
Diesenreiter]
|
||||||
|
|
||||||
|
Other
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
|
||||||
|
0.7.0 (2024-12-05)
|
||||||
|
------------------
|
||||||
|
- Feat: implement generic data set manipulator, ref: NOISSUE. [Simon
|
||||||
|
Diesenreiter]
|
||||||
|
|
||||||
|
|
||||||
|
0.6.0 (2024-12-03)
|
||||||
|
------------------
|
||||||
|
- Feat: enable named literals, ref: NOISSUE. [Simon Diesenreiter]
|
||||||
|
|
||||||
|
|
||||||
|
0.5.1 (2024-12-03)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Fix
|
||||||
|
~~~
|
||||||
|
- Some bugfixes with fragment parser logic, ref: NOISSUE. [Simon
|
||||||
|
Diesenreiter]
|
||||||
|
|
||||||
|
Other
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
|
||||||
|
0.5.0 (2024-12-03)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Fix
|
||||||
|
~~~
|
||||||
|
- Fix bugs with fragment parsing support, refs: NOISSUE. [Simon
|
||||||
|
Diesenreiter]
|
||||||
|
|
||||||
|
Other
|
||||||
|
~~~~~
|
||||||
|
- Feat: initial support for fragment parsing, ref: NOISSUE. [Simon
|
||||||
|
Diesenreiter]
|
||||||
|
|
||||||
|
|
||||||
|
0.4.0 (2024-12-02)
|
||||||
|
------------------
|
||||||
|
- Style: fix linting errors, ref: NOISSUE. [Simon Diesenreiter]
|
||||||
|
- Feat: implement greedy repetition, ref: A24-13. [Simon Diesenreiter]
|
||||||
|
|
||||||
|
|
||||||
|
0.3.0 (2024-12-02)
|
||||||
|
------------------
|
||||||
|
- Feat: ci pipeline fix for releases, ref NOISSUE. [Simon Diesenreiter]
|
||||||
|
|
||||||
|
|
||||||
|
0.2.3 (2024-12-02)
|
||||||
|
------------------
|
||||||
|
|
||||||
Fix
|
Fix
|
||||||
~~~
|
~~~
|
||||||
- More release script fixes,ref: NOISSUE. [Simon Diesenreiter]
|
- More release script fixes,ref: NOISSUE. [Simon Diesenreiter]
|
||||||
|
|
||||||
|
Other
|
||||||
|
~~~~~
|
||||||
|
|
||||||
|
|
||||||
0.2.2 (2024-12-02)
|
0.2.2 (2024-12-02)
|
||||||
------------------
|
------------------
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
namespace TextParser.Tests;
|
namespace TextParser.Tests;
|
||||||
|
|
||||||
using Parsing;
|
using Parsing;
|
||||||
|
using Parsing.Data;
|
||||||
using Parsing.Schema;
|
using Parsing.Schema;
|
||||||
using Parsing.Schema.BuildingBlocks;
|
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
public class TextParserTests
|
public class TextParserTests
|
||||||
@ -12,6 +12,28 @@ public class TextParserTests
|
|||||||
private const string testInput3 = @"2 4 6 1
|
private const string testInput3 = @"2 4 6 1
|
||||||
3 5 7 2
|
3 5 7 2
|
||||||
4 6 8 3";
|
4 6 8 3";
|
||||||
|
private const string testInput4 = @"2 ab ba fd er sd
|
||||||
|
8 cd dc
|
||||||
|
7 uh 6 yp rt";
|
||||||
|
private const string testInput5 = @"asdfnums(2,5,3)ght
|
||||||
|
|
||||||
|
cv strs(test) jh 4,3,2
|
||||||
|
|
||||||
|
34,54,2nums(2,8) strs(aa,ab,ba,bb)aa,bb";
|
||||||
|
private const string testInput6 = @"adfdf1()324ddf3()svsdvs
|
||||||
|
davnsldkvjs2()m23423()
|
||||||
|
mcsodkcn owdjnfj 1() asdfnad 23234 2() sdvsdv";
|
||||||
|
private const string testInput7 = @"adfdf1()324ddf3()()()svsdvs
|
||||||
|
davnsldkvjs2()()m23423()()()
|
||||||
|
mcsodkcn owdjnfj 1() asdfnad 23234 2()() sdvsdv";
|
||||||
|
private const string testInput8 = @"2 4 6 4 1 3 5 4 7 2 4 6 8 3";
|
||||||
|
private const string testInput9 = @"2 4 6 4 1
|
||||||
|
3 5 4 7 6
|
||||||
|
4 6 8 3 9";
|
||||||
|
private const string testInput10 = @"abc
|
||||||
|
bca
|
||||||
|
cab";
|
||||||
|
private const string testInput11 = @"2 x y 4 x y 6 x y 4 x y 1 x y";
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void LineParser_TestSimpleRepetition()
|
public void LineParser_TestSimpleRepetition()
|
||||||
@ -23,7 +45,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new LineParser(schema);
|
var parser = new LineParser<InputSchemaContext>(schema);
|
||||||
var tokens = parser.ParseLine(testInput1);
|
var tokens = parser.ParseLine(testInput1);
|
||||||
|
|
||||||
Assert.Equal(4, tokens.Count);
|
Assert.Equal(4, tokens.Count);
|
||||||
@ -48,7 +70,7 @@ public class TextParserTests
|
|||||||
.Expect(InputType.Integer)
|
.Expect(InputType.Integer)
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new LineParser(schema);
|
var parser = new LineParser<InputSchemaContext>(schema);
|
||||||
var tokens = parser.ParseLine(testInput1);
|
var tokens = parser.ParseLine(testInput1);
|
||||||
|
|
||||||
Assert.Equal(4, tokens.Count);
|
Assert.Equal(4, tokens.Count);
|
||||||
@ -76,7 +98,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new LineParser(schema);
|
var parser = new LineParser<InputSchemaContext>(schema);
|
||||||
var tokens = parser.ParseLine(testInput2);
|
var tokens = parser.ParseLine(testInput2);
|
||||||
|
|
||||||
Assert.Equal(6, tokens.Count);
|
Assert.Equal(6, tokens.Count);
|
||||||
@ -104,7 +126,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new TextParser(schema);
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
var rows = parser
|
var rows = parser
|
||||||
.SetInputText(testInput3)
|
.SetInputText(testInput3)
|
||||||
.Parse()
|
.Parse()
|
||||||
@ -136,7 +158,7 @@ public class TextParserTests
|
|||||||
.EndRepetition()
|
.EndRepetition()
|
||||||
.Build();
|
.Build();
|
||||||
|
|
||||||
var parser = new TextParser(schema);
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
var columns = parser
|
var columns = parser
|
||||||
.SetInputText(testInput3)
|
.SetInputText(testInput3)
|
||||||
.Parse()
|
.Parse()
|
||||||
@ -157,4 +179,268 @@ public class TextParserTests
|
|||||||
Assert.Equal(2, columns[3][1]);
|
Assert.Equal(2, columns[3][1]);
|
||||||
Assert.Equal(3, columns[3][2]);
|
Assert.Equal(3, columns[3][2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TextParser_TestGreedyRepetitionAsRows()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.String)
|
||||||
|
.EndRepetition()
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
|
var rows = parser
|
||||||
|
.SetInputText(testInput4)
|
||||||
|
.Parse()
|
||||||
|
.AsRawData();
|
||||||
|
|
||||||
|
Assert.Equal(3, rows.Count);
|
||||||
|
Assert.Equal(6, rows[0].Count);
|
||||||
|
Assert.Equal(3, rows[1].Count);
|
||||||
|
Assert.Equal(5, rows[2].Count);
|
||||||
|
Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][1].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][2].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][3].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][4].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[0][5].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[1][1].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[1][2].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[2][1].GetInputType());
|
||||||
|
Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[2][3].GetInputType());
|
||||||
|
Assert.Equal(InputType.String, rows[2][4].GetInputType());
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FragmentParser_SimpleTest()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new FragmentSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.StartOptions()
|
||||||
|
.Option()
|
||||||
|
.Expect("nums(")
|
||||||
|
.Expect(InputType.Integer, "num")
|
||||||
|
.Repeat()
|
||||||
|
.Expect(",")
|
||||||
|
.Expect(InputType.Integer, "num")
|
||||||
|
.EndRepetition()
|
||||||
|
.Expect(")")
|
||||||
|
.Option()
|
||||||
|
.Expect("strs(")
|
||||||
|
.Expect(InputType.String, "str")
|
||||||
|
.Repeat()
|
||||||
|
.Expect(",")
|
||||||
|
.Expect(InputType.String, "str")
|
||||||
|
.EndRepetition()
|
||||||
|
.Expect(")")
|
||||||
|
.EndOptions()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = TextParser.Create(schema);
|
||||||
|
var fragmentData = parser
|
||||||
|
.SetInputText(testInput5)
|
||||||
|
.Parse()
|
||||||
|
.AsFragments();
|
||||||
|
|
||||||
|
var convertedData = fragmentData
|
||||||
|
.ConvertAll((Fragment f) =>
|
||||||
|
{
|
||||||
|
int numSum = 0;
|
||||||
|
foreach (var numString in f["num"])
|
||||||
|
{
|
||||||
|
numSum += int.Parse(numString);
|
||||||
|
}
|
||||||
|
return f["num"].Count + f["str"].Count + numSum;
|
||||||
|
});
|
||||||
|
|
||||||
|
Assert.Equal(4, fragmentData.Count);
|
||||||
|
Assert.Equal(3, fragmentData[0]["num"].Count);
|
||||||
|
Assert.Single(fragmentData[1]["str"]);
|
||||||
|
Assert.Equal(2, fragmentData[2]["num"].Count);
|
||||||
|
Assert.Equal(4, fragmentData[3]["str"].Count);
|
||||||
|
Assert.Equal(13, convertedData[0]);
|
||||||
|
Assert.Equal(1, convertedData[1]);
|
||||||
|
Assert.Equal(12, convertedData[2]);
|
||||||
|
Assert.Equal(4, convertedData[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FragmentParser_LiteralTest()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new FragmentSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.StartOptions()
|
||||||
|
.Option()
|
||||||
|
.Expect("1()", "option1")
|
||||||
|
.Option()
|
||||||
|
.Expect("2()", "option2")
|
||||||
|
.Option()
|
||||||
|
.Expect("3()", "option3")
|
||||||
|
.EndOptions()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = TextParser.Create(schema);
|
||||||
|
var fragmentData = parser
|
||||||
|
.SetInputText(testInput6)
|
||||||
|
.Parse()
|
||||||
|
.AsFragments();
|
||||||
|
|
||||||
|
var convertedData = fragmentData
|
||||||
|
.ConvertAll((Fragment f) =>
|
||||||
|
{
|
||||||
|
bool saw1 = f.ContainsKey("option1") ? f["option1"].Count > 0 : false;
|
||||||
|
bool saw2 = f.ContainsKey("option2") ? f["option2"].Count() > 0 : false;
|
||||||
|
bool saw3 = f.ContainsKey("option3") ? f["option3"].Count() > 0 : false;
|
||||||
|
int indicator = 0;
|
||||||
|
if (saw1)
|
||||||
|
{
|
||||||
|
indicator += 1;
|
||||||
|
}
|
||||||
|
if (saw2)
|
||||||
|
{
|
||||||
|
indicator += 2;
|
||||||
|
}
|
||||||
|
if (saw3)
|
||||||
|
{
|
||||||
|
indicator += 4;
|
||||||
|
}
|
||||||
|
return indicator;
|
||||||
|
});
|
||||||
|
|
||||||
|
Assert.Equal(6, convertedData.Count);
|
||||||
|
Assert.Equal(1, convertedData[0]);
|
||||||
|
Assert.Equal(4, convertedData[1]);
|
||||||
|
Assert.Equal(2, convertedData[2]);
|
||||||
|
Assert.Equal(4, convertedData[3]);
|
||||||
|
Assert.Equal(1, convertedData[4]);
|
||||||
|
Assert.Equal(2, convertedData[5]);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DataManipulator_SimpleOneDimensionalTest()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
|
var row = parser
|
||||||
|
.SetInputText(testInput8)
|
||||||
|
.Parse()
|
||||||
|
.AsSingleStream<int>();
|
||||||
|
|
||||||
|
var searchSequence = new List<int> { 4, 6 };
|
||||||
|
var manipulator = DefaultOneDimensionalManipulator.Create(row);
|
||||||
|
var searchResults = manipulator.FindInSet(searchSequence);
|
||||||
|
|
||||||
|
Assert.Equal(3, searchResults.Count);
|
||||||
|
Assert.Equal(1, searchResults[0].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(3, searchResults[1].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(10, searchResults[2].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(Direction.Forward, searchResults[0].Direction);
|
||||||
|
Assert.Equal(Direction.Backward, searchResults[1].Direction);
|
||||||
|
Assert.Equal(Direction.Forward, searchResults[2].Direction);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DataManipulator_SimpleTwoDimensionalTest()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
|
var row = parser
|
||||||
|
.SetInputText(testInput9)
|
||||||
|
.Parse()
|
||||||
|
.AsListRows<int>();
|
||||||
|
|
||||||
|
var searchSequence = new List<int> { 4, 6 };
|
||||||
|
var manipulator = DefaultTwoDimensionalManipulator.Create(row);
|
||||||
|
var searchResults = manipulator.FindInSet(searchSequence);
|
||||||
|
|
||||||
|
Assert.Equal(6, searchResults.Count);
|
||||||
|
Assert.Equal(0, searchResults[0].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(0, searchResults[0].DataIndex.GetIndices()[1]);
|
||||||
|
Assert.Equal(2, searchResults[1].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(1, searchResults[1].DataIndex.GetIndices()[1]);
|
||||||
|
Assert.Equal(2, searchResults[2].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(1, searchResults[2].DataIndex.GetIndices()[1]);
|
||||||
|
Assert.Equal(1, searchResults[3].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(2, searchResults[3].DataIndex.GetIndices()[1]);
|
||||||
|
Assert.Equal(3, searchResults[4].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(2, searchResults[4].DataIndex.GetIndices()[1]);
|
||||||
|
Assert.Equal(3, searchResults[5].DataIndex.GetIndices()[0]);
|
||||||
|
Assert.Equal(2, searchResults[5].DataIndex.GetIndices()[1]);
|
||||||
|
Assert.Equal(Direction.E, searchResults[0].Direction);
|
||||||
|
Assert.Equal(Direction.N, searchResults[1].Direction);
|
||||||
|
Assert.Equal(Direction.SW, searchResults[2].Direction);
|
||||||
|
Assert.Equal(Direction.E, searchResults[3].Direction);
|
||||||
|
Assert.Equal(Direction.SE, searchResults[4].Direction);
|
||||||
|
Assert.Equal(Direction.W, searchResults[5].Direction);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TextParser_TestReadingChars()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.Char)
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
|
var row = parser
|
||||||
|
.SetInputText(testInput10)
|
||||||
|
.Parse()
|
||||||
|
.AsListRows<string>();
|
||||||
|
|
||||||
|
Assert.Equal(3, row.Count);
|
||||||
|
Assert.Equal("a", row[0][0]);
|
||||||
|
Assert.Equal(3, row[0].Count);
|
||||||
|
Assert.Equal(3, row[1].Count);
|
||||||
|
Assert.Equal(3, row[2].Count);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TextParser_TestFilter()
|
||||||
|
{
|
||||||
|
var schemaBuilder = new InputSchemaBuilder();
|
||||||
|
var schema = schemaBuilder
|
||||||
|
.Repeat()
|
||||||
|
.Expect(InputType.Integer)
|
||||||
|
.Expect(InputType.Char)
|
||||||
|
.Expect(InputType.Char)
|
||||||
|
.EndRepetition()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
var parser = new TextParser<InputSchemaContext>(schema);
|
||||||
|
var numbers = parser
|
||||||
|
.SetInputText(testInput11)
|
||||||
|
.Parse()
|
||||||
|
.Filter(InputType.Integer)
|
||||||
|
.AsSingleStream<int>();
|
||||||
|
|
||||||
|
Assert.Equal(5, numbers.Count);
|
||||||
|
Assert.Equal(2, numbers[0]);
|
||||||
|
Assert.Equal(4, numbers[1]);
|
||||||
|
Assert.Equal(6, numbers[2]);
|
||||||
|
Assert.Equal(4, numbers[3]);
|
||||||
|
Assert.Equal(1, numbers[4]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
49
TextParser/Data/DataConversionHelpers.cs
Normal file
49
TextParser/Data/DataConversionHelpers.cs
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
namespace Parsing.Data;
|
||||||
|
|
||||||
|
using Parsing;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
public static class DataConversionHelpers
|
||||||
|
{
|
||||||
|
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
|
||||||
|
{
|
||||||
|
var newList = new List<TNewType>();
|
||||||
|
foreach (var token in tokenList)
|
||||||
|
{
|
||||||
|
var typedToken = token as IValueToken<TOldType>;
|
||||||
|
if (typedToken == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid Token type encountered during value conversion");
|
||||||
|
}
|
||||||
|
|
||||||
|
newList.Add(converter(typedToken.GetValue()));
|
||||||
|
}
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
|
||||||
|
{
|
||||||
|
var newList = new List<TNewType>();
|
||||||
|
foreach (var token in tokenList)
|
||||||
|
{
|
||||||
|
var typedToken = token as IValueToken<TOldType>;
|
||||||
|
if (typedToken == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid Token type encountered during value conversion");
|
||||||
|
}
|
||||||
|
|
||||||
|
newList.AddRange(converter(typedToken.GetValue()));
|
||||||
|
}
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
|
||||||
|
{
|
||||||
|
var newListList = new List<List<TNewType>>();
|
||||||
|
foreach (var tokenList in tokenListList)
|
||||||
|
{
|
||||||
|
newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
|
||||||
|
}
|
||||||
|
return newListList;
|
||||||
|
}
|
||||||
|
}
|
43
TextParser/Data/DataManipulationHelpers.cs
Normal file
43
TextParser/Data/DataManipulationHelpers.cs
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
namespace Parsing.Data;
|
||||||
|
|
||||||
|
public static class DataManipulationHelpers
|
||||||
|
{
|
||||||
|
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
|
||||||
|
{
|
||||||
|
if (data.Count < 2)
|
||||||
|
{
|
||||||
|
return data[0];
|
||||||
|
}
|
||||||
|
TType result = data[0];
|
||||||
|
for (int i = 1; i < data.Count; i++)
|
||||||
|
{
|
||||||
|
result = reducer(result, data[i]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
|
||||||
|
{
|
||||||
|
return reducer(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<TNewType> TransformData<TType, TNewType>(this List<TType> data, Func<TType, TNewType> transformer)
|
||||||
|
{
|
||||||
|
var newList = new List<TNewType>();
|
||||||
|
foreach (TType dataItem in data)
|
||||||
|
{
|
||||||
|
newList.Add(transformer(dataItem));
|
||||||
|
}
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<TNewType> TransformData<TType, TNewType>(this List<List<TType>> data, Func<List<TType>, TNewType> transformer)
|
||||||
|
{
|
||||||
|
var newList = new List<TNewType>();
|
||||||
|
foreach (List<TType> dataItemList in data)
|
||||||
|
{
|
||||||
|
newList.Add(transformer(dataItemList));
|
||||||
|
}
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
}
|
154
TextParser/Data/DataSetManipulatorBase.cs
Normal file
154
TextParser/Data/DataSetManipulatorBase.cs
Normal file
@ -0,0 +1,154 @@
|
|||||||
|
using Parsing.Data;
|
||||||
|
|
||||||
|
namespace Parsing.Data;
|
||||||
|
|
||||||
|
public class SearchResult<TIndexType>
|
||||||
|
{
|
||||||
|
public SearchResult(IDataIndex<TIndexType> dataIndex)
|
||||||
|
{
|
||||||
|
this.DataIndex = dataIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IDataIndex<TIndexType>? DataIndex { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class DirectionalSearchResult<TIndexType> : SearchResult<TIndexType>
|
||||||
|
{
|
||||||
|
public DirectionalSearchResult(IDataIndex<TIndexType> dataIndex, Direction direction, int length): base(dataIndex)
|
||||||
|
{
|
||||||
|
this.Direction = direction;
|
||||||
|
this.Length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Direction Direction { get; set; }
|
||||||
|
public int Length { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract class DataSetManipulatorBase<TCollectedType, TDataType, TIndexType> where TDataType : IEquatable<TDataType>
|
||||||
|
{
|
||||||
|
protected IDataSetIndexer<TDataType, TIndexType> indexer;
|
||||||
|
|
||||||
|
protected List<TCollectedType> dataSet;
|
||||||
|
|
||||||
|
public DataSetManipulatorBase(List<TCollectedType> dataSet, IDataSetIndexer<TDataType, TIndexType> indexer)
|
||||||
|
{
|
||||||
|
this.indexer = indexer;
|
||||||
|
this.dataSet = dataSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we do not know how to iterate a specific data set exactly, the implementation has to take care of validating directional input
|
||||||
|
protected abstract Direction ValidDirections();
|
||||||
|
|
||||||
|
protected void ValidateDirection(Direction d)
|
||||||
|
{
|
||||||
|
var allValidDirections = this.ValidDirections();
|
||||||
|
var isValid = ((d | allValidDirections) == allValidDirections) && ((d & allValidDirections) > 0);
|
||||||
|
|
||||||
|
if (!isValid)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Invalid search direction provided for given data set!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<Direction> SimplifyDirections(Direction d)
|
||||||
|
{
|
||||||
|
this.ValidateDirection(d);
|
||||||
|
var allDirections = DirectionProvider.GetAllDirections();
|
||||||
|
var singleDirections = new List<Direction>();
|
||||||
|
|
||||||
|
foreach (Direction direction in allDirections)
|
||||||
|
{
|
||||||
|
if ((direction & d) > 0)
|
||||||
|
{
|
||||||
|
singleDirections.Add(direction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return singleDirections;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Direction> GetValidDirectionList(Direction d)
|
||||||
|
{
|
||||||
|
return SimplifyDirections(this.ValidDirections());
|
||||||
|
}
|
||||||
|
|
||||||
|
// we do not know how to iterate a specific data set exactly, the implementation has to take care of ending traversal in any direction
|
||||||
|
public abstract bool IsValidIndex(IDataIndex<TIndexType> queryPosition);
|
||||||
|
|
||||||
|
// we do not know how to iterate a specific data set exactly, the implementation has to take care of traversing the set
|
||||||
|
public abstract IDataIndex<TIndexType> Move(IDataIndex<TIndexType> currentPosition, Direction direction);
|
||||||
|
|
||||||
|
public List<IDataIndex<TIndexType>> GetNeighborIndices(IDataIndex<TIndexType> currentPosition, Direction directions)
|
||||||
|
{
|
||||||
|
var singleDirections = this.SimplifyDirections(directions);
|
||||||
|
var neighbors = new List<IDataIndex<TIndexType>>();
|
||||||
|
|
||||||
|
foreach (var direction in singleDirections)
|
||||||
|
{
|
||||||
|
var newPosition = this.Move(currentPosition, direction);
|
||||||
|
if (this.IsValidIndex(newPosition))
|
||||||
|
{
|
||||||
|
neighbors.Add(newPosition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return neighbors;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we do not know how to iterate a specific data set exactly, but we only need to find specific items to be able to continue with any other algorithm
|
||||||
|
public abstract List<SearchResult<TIndexType>> FindInSet(TDataType data);
|
||||||
|
|
||||||
|
public List<DirectionalSearchResult<TIndexType>> FindAtPosition(IDataIndex<TIndexType> currentPosition, List<TDataType> data)
|
||||||
|
{
|
||||||
|
return this.FindAtPosition(currentPosition, data, this.ValidDirections());
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<DirectionalSearchResult<TIndexType>> FindAtPosition(IDataIndex<TIndexType> currentPosition, List<TDataType> data, Direction directions)
|
||||||
|
{
|
||||||
|
var results = new List<DirectionalSearchResult<TIndexType>>();
|
||||||
|
var givenDirections = this.SimplifyDirections(directions);
|
||||||
|
if (EqualityComparer<TDataType>.Default.Equals(this.indexer.Get(this.dataSet, currentPosition), data[0]))
|
||||||
|
{
|
||||||
|
// found valid search start point, now validate each given direction
|
||||||
|
foreach (var direction in givenDirections)
|
||||||
|
{
|
||||||
|
int searchIndex = 1;
|
||||||
|
var searchPosition = this.Move(currentPosition, direction); ;
|
||||||
|
while (searchIndex < data.Count && this.IsValidIndex(searchPosition)
|
||||||
|
&& EqualityComparer<TDataType>.Default.Equals(this.indexer.Get(this.dataSet, searchPosition), data[searchIndex]))
|
||||||
|
{
|
||||||
|
searchPosition = this.Move(searchPosition, direction);
|
||||||
|
searchIndex++;
|
||||||
|
}
|
||||||
|
if (searchIndex == data.Count)
|
||||||
|
{
|
||||||
|
var result = new DirectionalSearchResult<TIndexType>(currentPosition, direction, searchIndex);
|
||||||
|
results.Add(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<DirectionalSearchResult<TIndexType>> FindInSet(List<TDataType> data, Direction directions)
|
||||||
|
{
|
||||||
|
var result = new List<DirectionalSearchResult<TIndexType>>();
|
||||||
|
|
||||||
|
// find valid starting points in set and perform search from there
|
||||||
|
var startingPoints = this.FindInSet(data[0]);
|
||||||
|
foreach (var startingPoint in startingPoints)
|
||||||
|
{
|
||||||
|
foreach (var results in this.FindAtPosition(startingPoint.DataIndex, data, directions))
|
||||||
|
{
|
||||||
|
result.AddRange(results);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<DirectionalSearchResult<TIndexType>> FindInSet(List<TDataType> data)
|
||||||
|
{
|
||||||
|
return this.FindInSet(data, this.ValidDirections());
|
||||||
|
}
|
||||||
|
}
|
62
TextParser/Data/DefaultDataSetIndexer.cs
Normal file
62
TextParser/Data/DefaultDataSetIndexer.cs
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
|
||||||
|
|
||||||
|
public class DefaultDataSetIndexer<TDataType> : IDataSetIndexer<TDataType, int>
|
||||||
|
{
|
||||||
|
public TDataType Get<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, IDataIndex<int> index)
|
||||||
|
{
|
||||||
|
var indices = index.GetIndices();
|
||||||
|
return this.GetInternal(collection, indices.ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
private TDataType GetInternal<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, int[] indices)
|
||||||
|
{
|
||||||
|
if (indices.Length == 3)
|
||||||
|
{
|
||||||
|
return this.GetAtIndex((collection as List<List<List<TDataType>>>), indices[0], indices[1], indices[2]);
|
||||||
|
}
|
||||||
|
else if (indices.Length == 2)
|
||||||
|
{
|
||||||
|
return this.GetAtIndex((collection as List<List<TDataType>>), indices[0], indices[1]);
|
||||||
|
}
|
||||||
|
else if (indices.Length == 1)
|
||||||
|
{
|
||||||
|
return this.GetAtIndex((collection as List<TDataType>), indices[0]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Invalid Data Set access!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public TDataType Get<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, params int[] indices)
|
||||||
|
{
|
||||||
|
return this.GetInternal(collection, indices);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TDataType GetAtIndex(List<TDataType> collection, int index)
|
||||||
|
{
|
||||||
|
if (collection == null)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Invalid data set provided for access");
|
||||||
|
}
|
||||||
|
return collection[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
public TDataType GetAtIndex(List<List<TDataType>> collection, int x, int y)
|
||||||
|
{
|
||||||
|
if (collection == null)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Invalid data set provided for access");
|
||||||
|
}
|
||||||
|
return collection[collection.Count - y - 1][x];
|
||||||
|
}
|
||||||
|
|
||||||
|
public TDataType GetAtIndex(List<List<List<TDataType>>> collection, int x, int y, int z)
|
||||||
|
{
|
||||||
|
if (collection == null)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Invalid data set provided for access");
|
||||||
|
}
|
||||||
|
return collection[z][y][x];
|
||||||
|
}
|
||||||
|
}
|
59
TextParser/Data/DefaultOneDimensionalManipulator.cs
Normal file
59
TextParser/Data/DefaultOneDimensionalManipulator.cs
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using Parsing.Data;
|
||||||
|
|
||||||
|
namespace Parsing.Data;
|
||||||
|
|
||||||
|
public static class DefaultOneDimensionalManipulator
|
||||||
|
{
|
||||||
|
public static DefaultOneDimensionalManipulator<TDataType> Create<TDataType>(List<TDataType> dataSet) where TDataType : IEquatable<TDataType>
|
||||||
|
{
|
||||||
|
return new DefaultOneDimensionalManipulator<TDataType>(dataSet);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class DefaultOneDimensionalManipulator<TDataType> : DataSetManipulatorBase<TDataType, TDataType, int> where TDataType : IEquatable<TDataType>
|
||||||
|
{
|
||||||
|
public DefaultOneDimensionalManipulator(List<TDataType> dataSet) : base(dataSet, new DefaultDataSetIndexer<TDataType>())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
protected override Direction ValidDirections()
|
||||||
|
{
|
||||||
|
return (Direction.Left | Direction.Right);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool IsValidIndex(IDataIndex<int> queryPosition)
|
||||||
|
{
|
||||||
|
var index = queryPosition.GetIndices()[0];
|
||||||
|
return (index >= 0) && (index < this.dataSet.Count);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override IDataIndex<int> Move(IDataIndex<int> currentPosition, Direction direction)
|
||||||
|
{
|
||||||
|
switch (direction)
|
||||||
|
{
|
||||||
|
case Direction.Forward:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] + 1);
|
||||||
|
case Direction.Backward:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] - 1);
|
||||||
|
default:
|
||||||
|
throw new ArgumentOutOfRangeException("Direction was not accounted for move for current data set!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public override List<SearchResult<int>> FindInSet(TDataType data)
|
||||||
|
{
|
||||||
|
var results = new List<SearchResult<int>>();
|
||||||
|
|
||||||
|
for (int i = 0; i < this.dataSet.Count; i++)
|
||||||
|
{
|
||||||
|
if (EqualityComparer<TDataType>.Default.Equals(this.dataSet[i], data))
|
||||||
|
{
|
||||||
|
var singleResult = new SearchResult<int>(new DefaultPositionalDataIndex(i));
|
||||||
|
results.Add(singleResult);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
14
TextParser/Data/DefaultPositionalDataIndex.cs
Normal file
14
TextParser/Data/DefaultPositionalDataIndex.cs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
public class DefaultPositionalDataIndex : IDataIndex<int>
|
||||||
|
{
|
||||||
|
private List<int> indices = new List<int>();
|
||||||
|
|
||||||
|
public DefaultPositionalDataIndex(params int[] indices)
|
||||||
|
{
|
||||||
|
this.indices.AddRange(indices);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IList<int> GetIndices()
|
||||||
|
{
|
||||||
|
return indices;
|
||||||
|
}
|
||||||
|
}
|
82
TextParser/Data/DefaultTwoDimensionalManipulator.cs
Normal file
82
TextParser/Data/DefaultTwoDimensionalManipulator.cs
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using Parsing.Data;
|
||||||
|
|
||||||
|
namespace Parsing.Data;
|
||||||
|
|
||||||
|
public static class DefaultTwoDimensionalManipulator
|
||||||
|
{
|
||||||
|
public static DefaultTwoDimensionalManipulator<TDataType> Create<TDataType>(List<List<TDataType>> dataSet) where TDataType : IEquatable<TDataType>
|
||||||
|
{
|
||||||
|
return new DefaultTwoDimensionalManipulator<TDataType>(dataSet);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class DefaultTwoDimensionalManipulator<TDataType> : DataSetManipulatorBase<List<TDataType>, TDataType, int> where TDataType : IEquatable<TDataType>
|
||||||
|
{
|
||||||
|
public DefaultTwoDimensionalManipulator(List<List<TDataType>> dataSet) : base(dataSet, new DefaultDataSetIndexer<TDataType>())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
protected override Direction ValidDirections()
|
||||||
|
{
|
||||||
|
return (Direction.N
|
||||||
|
| Direction.NE
|
||||||
|
| Direction.E
|
||||||
|
| Direction.SE
|
||||||
|
| Direction.S
|
||||||
|
| Direction.SW
|
||||||
|
| Direction.W
|
||||||
|
| Direction.NW);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool IsValidIndex(IDataIndex<int> queryPosition)
|
||||||
|
{
|
||||||
|
var xIndex = queryPosition.GetIndices()[0];
|
||||||
|
var yIndex = queryPosition.GetIndices()[1];
|
||||||
|
return (yIndex >= 0) && (yIndex < this.dataSet.Count) && (xIndex >= 0) && (xIndex < this.dataSet[yIndex].Count);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override IDataIndex<int> Move(IDataIndex<int> currentPosition, Direction direction)
|
||||||
|
{
|
||||||
|
switch (direction)
|
||||||
|
{
|
||||||
|
case Direction.N:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0], currentPosition.GetIndices()[1] + 1);
|
||||||
|
case Direction.NE:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] + 1, currentPosition.GetIndices()[1] + 1);
|
||||||
|
case Direction.E:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] + 1, currentPosition.GetIndices()[1]);
|
||||||
|
case Direction.SE:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] + 1, currentPosition.GetIndices()[1] - 1);
|
||||||
|
case Direction.S:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0], currentPosition.GetIndices()[1] - 1);
|
||||||
|
case Direction.SW:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] - 1, currentPosition.GetIndices()[1] - 1);
|
||||||
|
case Direction.W:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] - 1, currentPosition.GetIndices()[1]);
|
||||||
|
case Direction.NW:
|
||||||
|
return new DefaultPositionalDataIndex(currentPosition.GetIndices()[0] - 1, currentPosition.GetIndices()[1] + 1);
|
||||||
|
default:
|
||||||
|
throw new ArgumentOutOfRangeException("Direction was not accounted for move for current data set!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public override List<SearchResult<int>> FindInSet(TDataType data)
|
||||||
|
{
|
||||||
|
var results = new List<SearchResult<int>>();
|
||||||
|
|
||||||
|
for (int y = 0; y < this.dataSet.Count; y++)
|
||||||
|
{
|
||||||
|
for (int x = 0; x < this.dataSet[this.dataSet.Count - y - 1].Count; x++)
|
||||||
|
{
|
||||||
|
if (EqualityComparer<TDataType>.Default.Equals(this.dataSet[this.dataSet.Count - y - 1][x], data))
|
||||||
|
{
|
||||||
|
var singleResult = new SearchResult<int>(new DefaultPositionalDataIndex(x, y));
|
||||||
|
results.Add(singleResult);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
43
TextParser/Data/Direction.cs
Normal file
43
TextParser/Data/Direction.cs
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
[Flags]
|
||||||
|
public enum Direction
|
||||||
|
{
|
||||||
|
N = 1,
|
||||||
|
NE = 2,
|
||||||
|
E = 4,
|
||||||
|
SE = 8,
|
||||||
|
S = 16,
|
||||||
|
SW = 32,
|
||||||
|
W = 64,
|
||||||
|
NW = 128,
|
||||||
|
Horizontal = E | W,
|
||||||
|
Vertical = N | S,
|
||||||
|
Cardinal = Horizontal | Vertical,
|
||||||
|
RisingDiagonal = NE | SW,
|
||||||
|
FallingDiagonal = NW | SE,
|
||||||
|
Diagonal = RisingDiagonal | FallingDiagonal,
|
||||||
|
All = Cardinal | Diagonal,
|
||||||
|
Left = W,
|
||||||
|
Right = E,
|
||||||
|
Up = N,
|
||||||
|
Down = S,
|
||||||
|
Forward = Right,
|
||||||
|
Backward = Left
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class DirectionProvider
|
||||||
|
{
|
||||||
|
public static Direction[] GetAllDirections()
|
||||||
|
{
|
||||||
|
var directions = new Direction[] {
|
||||||
|
Direction.N,
|
||||||
|
Direction.NE,
|
||||||
|
Direction.E,
|
||||||
|
Direction.SE,
|
||||||
|
Direction.S,
|
||||||
|
Direction.SW,
|
||||||
|
Direction.W,
|
||||||
|
Direction.NW
|
||||||
|
};
|
||||||
|
return directions;
|
||||||
|
}
|
||||||
|
}
|
4
TextParser/Data/IDataIndex.cs
Normal file
4
TextParser/Data/IDataIndex.cs
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
public interface IDataIndex<TIndexType>
|
||||||
|
{
|
||||||
|
public IList<TIndexType> GetIndices();
|
||||||
|
}
|
10
TextParser/Data/IDataSetIndexer.cs
Normal file
10
TextParser/Data/IDataSetIndexer.cs
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
public interface IDataSetIndexer<TDataType, TIndexType>
|
||||||
|
{
|
||||||
|
public TDataType Get<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, IDataIndex<TIndexType> index);
|
||||||
|
public TDataType Get<TGenericCollectionContentType>(List<TGenericCollectionContentType> collection, params TIndexType[] indices);
|
||||||
|
public TDataType GetAtIndex(List<TDataType> collection, TIndexType index);
|
||||||
|
|
||||||
|
public TDataType GetAtIndex(List<List<TDataType>> collection, TIndexType x, TIndexType y);
|
||||||
|
|
||||||
|
public TDataType GetAtIndex(List<List<List<TDataType>>> collection, TIndexType x, TIndexType y, TIndexType z);
|
||||||
|
}
|
0
TextParser/Data/ListIndexer.cs
Normal file
0
TextParser/Data/ListIndexer.cs
Normal file
185
TextParser/Data/TokenConverter.cs
Normal file
185
TextParser/Data/TokenConverter.cs
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
namespace Parsing.Data;
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
public class TokenConverter
|
||||||
|
{
|
||||||
|
protected List<List<IToken>> rawTokens = new List<List<IToken>>();
|
||||||
|
|
||||||
|
public TokenConverter()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<T> AsGenericCollection<T, U>() where T : List<U>, new()
|
||||||
|
{
|
||||||
|
List<T> returnData = new List<T>();
|
||||||
|
foreach (var tokenRow in this.rawTokens)
|
||||||
|
{
|
||||||
|
T newRow = new T();
|
||||||
|
foreach (IToken token in tokenRow)
|
||||||
|
{
|
||||||
|
if (token == null)
|
||||||
|
{
|
||||||
|
throw new Exception("No token was provided, but token was expected!");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!token.GetType().IsAssignableTo(typeof(IValueToken<U>)))
|
||||||
|
{
|
||||||
|
Console.WriteLine(token.GetText());
|
||||||
|
Type t = token.GetType();
|
||||||
|
throw new Exception("Provided token is not a ValueToken - type: " + t.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
IValueToken<U> valueToken = token as IValueToken<U>;
|
||||||
|
newRow.Add(valueToken.GetValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
returnData.Add(newRow);
|
||||||
|
}
|
||||||
|
return returnData;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CheckConversionPrerequisites()
|
||||||
|
{
|
||||||
|
// in order to convert rows to columns or grid we require every row to have the same length
|
||||||
|
int rowLength = this.rawTokens[0].Count;
|
||||||
|
|
||||||
|
foreach (var tokenRow in this.rawTokens)
|
||||||
|
{
|
||||||
|
if (tokenRow.Count != rowLength)
|
||||||
|
{
|
||||||
|
throw new Exception("Attempted to convert token dataset that is not able to be converted!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<T> AsSingleStream<T>()
|
||||||
|
{
|
||||||
|
List<T> returnData = new List<T>();
|
||||||
|
foreach (var tokenRow in this.rawTokens)
|
||||||
|
{
|
||||||
|
foreach (IToken token in tokenRow)
|
||||||
|
{
|
||||||
|
if (token == null)
|
||||||
|
{
|
||||||
|
throw new Exception("No token was provided, but token was expected!");
|
||||||
|
}
|
||||||
|
IValueToken<T>? valueToken = token as IValueToken<T>;
|
||||||
|
if (valueToken == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Provided token is not a ValueToken");
|
||||||
|
}
|
||||||
|
returnData.Add(valueToken.GetValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return returnData;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<T[]> AsRows<T>()
|
||||||
|
{
|
||||||
|
var listRows = this.AsListRows<T>();
|
||||||
|
var newList = new List<T[]>();
|
||||||
|
|
||||||
|
foreach (var rowList in listRows)
|
||||||
|
{
|
||||||
|
newList.Add(rowList.ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<List<T>> AsListRows<T>()
|
||||||
|
{
|
||||||
|
return this.AsGenericCollection<List<T>, T>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<T[]> AsColumns<T>()
|
||||||
|
{
|
||||||
|
var listColumns = this.AsListColumns<T>();
|
||||||
|
var newList = new List<T[]>();
|
||||||
|
|
||||||
|
foreach (var columnList in listColumns)
|
||||||
|
{
|
||||||
|
newList.Add(columnList.ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<List<T>> AsListColumns<T>()
|
||||||
|
{
|
||||||
|
this.CheckConversionPrerequisites();
|
||||||
|
var rows = AsListRows<T>();
|
||||||
|
|
||||||
|
var columns = new List<List<T>>();
|
||||||
|
for (int i = 0; i < rows[0].Count; i++)
|
||||||
|
{
|
||||||
|
columns.Add(new List<T>());
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var row in rows)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < row.Count; i++)
|
||||||
|
{
|
||||||
|
columns[i].Add(row[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return columns;
|
||||||
|
}
|
||||||
|
|
||||||
|
public T[][] AsGrid<T>()
|
||||||
|
{
|
||||||
|
this.CheckConversionPrerequisites();
|
||||||
|
var rowsList = AsRows<T>();
|
||||||
|
return rowsList.ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<List<IToken>> AsRawData()
|
||||||
|
{
|
||||||
|
return this.rawTokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Fragment> AsFragments()
|
||||||
|
{
|
||||||
|
var items = this.AsSingleStream<Fragment>();
|
||||||
|
var newList = new List<Fragment>();
|
||||||
|
|
||||||
|
foreach (var item in items)
|
||||||
|
{
|
||||||
|
var typedItem = item as Fragment;
|
||||||
|
if (typedItem == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid token type encountered");
|
||||||
|
}
|
||||||
|
newList.Add(typedItem);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TokenConverter Filter(params InputType[] inputTypes)
|
||||||
|
{
|
||||||
|
var newTokenListList = new List<List<IToken>>();
|
||||||
|
|
||||||
|
foreach(var tokenList in this.rawTokens)
|
||||||
|
{
|
||||||
|
var newTokenList = new List<IToken>();
|
||||||
|
foreach(var token in tokenList)
|
||||||
|
{
|
||||||
|
if(inputTypes.Contains(token.GetInputType()))
|
||||||
|
{
|
||||||
|
newTokenList.Add(token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newTokenListList.Add(newTokenList);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.rawTokens = newTokenListList;
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
@ -3,14 +3,14 @@ namespace Parsing;
|
|||||||
using Parsing.Schema;
|
using Parsing.Schema;
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
public class LineParser
|
public class LineParser<T> where T : ISchemaContext
|
||||||
{
|
{
|
||||||
private string[] delimiters;
|
private string[] delimiters;
|
||||||
private bool removeEmptyEntries = false;
|
private bool removeEmptyEntries = false;
|
||||||
private InputSchema schema;
|
private ISchema<T> schema;
|
||||||
private InputSchemaContext context;
|
private T context;
|
||||||
|
|
||||||
public LineParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true)
|
public LineParser(ISchema<T> schema, string[]? delimiters = null, bool removeEmptyEntries = true)
|
||||||
{
|
{
|
||||||
this.delimiters = delimiters ?? new string[] { " " };
|
this.delimiters = delimiters ?? new string[] { " " };
|
||||||
this.removeEmptyEntries = removeEmptyEntries;
|
this.removeEmptyEntries = removeEmptyEntries;
|
||||||
|
@ -1,6 +1,16 @@
|
|||||||
namespace Parsing.Schema;
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
[Flags]
|
||||||
public enum BlockType
|
public enum BlockType
|
||||||
{
|
{
|
||||||
Integer, String, FixedRepetition, GreedyRepetition
|
Integer = 1,
|
||||||
|
Char = 2,
|
||||||
|
String = 4,
|
||||||
|
// technically not a block type but keeping it here for consistency/having all types in one place
|
||||||
|
Fragment = 8,
|
||||||
|
FixedRepetition = 16,
|
||||||
|
GreedyRepetition = 32,
|
||||||
|
NonZeroRepetition = 64,
|
||||||
|
Custom = 128,
|
||||||
|
Long = 256,
|
||||||
}
|
}
|
||||||
|
@ -8,10 +8,12 @@ abstract class BuildingBlockBase : IBuildingBlock
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract IToken ParseWord(InputProvider inputs);
|
public abstract List<IToken> ParseWord(InputProvider inputs);
|
||||||
|
|
||||||
public abstract bool CanParseWord(InputProvider inputs);
|
public abstract bool CanParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
public abstract bool CanParseWord(string word);
|
||||||
|
|
||||||
public abstract BlockType GetBlockType();
|
public abstract BlockType GetBlockType();
|
||||||
|
|
||||||
public virtual bool IsRepetitionType()
|
public virtual bool IsRepetitionType()
|
||||||
@ -19,7 +21,7 @@ abstract class BuildingBlockBase : IBuildingBlock
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public virtual bool CheckIsDoneParsingAndReset()
|
public virtual bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||||
{
|
{
|
||||||
// most blocks are always done parsing after consuming a token
|
// most blocks are always done parsing after consuming a token
|
||||||
// repetition blocks can consume multiple tokens
|
// repetition blocks can consume multiple tokens
|
||||||
|
49
TextParser/Schema/BuildingBlocks/CharBlock.cs
Normal file
49
TextParser/Schema/BuildingBlocks/CharBlock.cs
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class CharBlock : BuildingBlockBase
|
||||||
|
{
|
||||||
|
|
||||||
|
public CharBlock()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
var tokenList = new List<IToken>();
|
||||||
|
foreach (char c in inputs.YieldWord())
|
||||||
|
{
|
||||||
|
tokenList.Add(new StringToken(c.ToString()));
|
||||||
|
}
|
||||||
|
return tokenList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
string word = string.Empty;
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
word = inputs.YieldWord();
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.CanParseWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
|
||||||
|
IntegerBlock intBlock = new IntegerBlock();
|
||||||
|
if (intBlock.CanParseWord(word))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.String;
|
||||||
|
}
|
||||||
|
}
|
42
TextParser/Schema/BuildingBlocks/CustomInputBlock.cs
Normal file
42
TextParser/Schema/BuildingBlocks/CustomInputBlock.cs
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class CustomInputBlock<T> : BuildingBlockBase
|
||||||
|
{
|
||||||
|
|
||||||
|
private InputType definedInputType;
|
||||||
|
private Func<string, T> wordConverter;
|
||||||
|
|
||||||
|
public CustomInputBlock(InputType definedInputType, Func<string, T> wordConverter)
|
||||||
|
{
|
||||||
|
this.definedInputType = definedInputType;
|
||||||
|
this.wordConverter = wordConverter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
return new List<IToken>() { new CustomToken<T>(inputs.YieldWord(), this.definedInputType, this.wordConverter) };
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
string word = string.Empty;
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
word = inputs.YieldWord();
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.CanParseWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.Custom;
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
namespace Parsing.Schema.BuildingBlocks;
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
using System.IO.Pipelines;
|
using System.IO.Pipelines;
|
||||||
|
using System.Linq;
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
class FixedRepetitionBlock : BuildingBlockBase
|
class FixedRepetitionBlock : BuildingBlockBase
|
||||||
@ -19,7 +20,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
|||||||
this.context = this.inputSchema.CreateContext();
|
this.context = this.inputSchema.CreateContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
public override IToken ParseWord(InputProvider inputs)
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
var result = inputSchema.ProcessNextWord(context, inputs);
|
var result = inputSchema.ProcessNextWord(context, inputs);
|
||||||
if (context.HasFinished)
|
if (context.HasFinished)
|
||||||
@ -48,6 +49,21 @@ class FixedRepetitionBlock : BuildingBlockBase
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
bool result;
|
||||||
|
if (this.repetitionCount == 0)
|
||||||
|
{
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result = inputSchema.CanProcessNextWord(context, word);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
public override BlockType GetBlockType()
|
public override BlockType GetBlockType()
|
||||||
{
|
{
|
||||||
return BlockType.FixedRepetition;
|
return BlockType.FixedRepetition;
|
||||||
@ -58,7 +74,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CheckIsDoneParsingAndReset()
|
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||||
{
|
{
|
||||||
// we are done parsing once all repetitions are exhausted
|
// we are done parsing once all repetitions are exhausted
|
||||||
var result = this.repetitionCount == 0;
|
var result = this.repetitionCount == 0;
|
||||||
|
58
TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs
Normal file
58
TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using System.IO.Pipelines;
|
||||||
|
using System.Linq;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class GreedyRepetitionBlock : BuildingBlockBase
|
||||||
|
{
|
||||||
|
private InputSchema inputSchema;
|
||||||
|
private InputSchemaContext context;
|
||||||
|
|
||||||
|
public GreedyRepetitionBlock(InputSchema inputSchema)
|
||||||
|
{
|
||||||
|
this.inputSchema = inputSchema;
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
var result = inputSchema.ProcessNextWord(context, inputs);
|
||||||
|
if (!this.CanParseWord(inputs))
|
||||||
|
{
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
return inputSchema.CanProcessNextWord(context, inputs) && inputs.CanYieldWord();
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
return inputSchema.CanProcessNextWord(context, word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.GreedyRepetition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool IsRepetitionType()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||||
|
{
|
||||||
|
// we are done parsing greedily once the next token doesn't match anymore
|
||||||
|
var result = !this.CanParseWord(inputs);
|
||||||
|
if (result)
|
||||||
|
{
|
||||||
|
this.context = this.inputSchema.CreateContext();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
@ -4,13 +4,15 @@ using Parsing.Tokenization;
|
|||||||
|
|
||||||
public interface IBuildingBlock
|
public interface IBuildingBlock
|
||||||
{
|
{
|
||||||
public IToken ParseWord(InputProvider inputs);
|
public List<IToken> ParseWord(InputProvider inputs);
|
||||||
|
|
||||||
public bool CanParseWord(InputProvider inputs);
|
public bool CanParseWord(InputProvider inputs);
|
||||||
|
|
||||||
|
public bool CanParseWord(string word);
|
||||||
|
|
||||||
public BlockType GetBlockType();
|
public BlockType GetBlockType();
|
||||||
|
|
||||||
public bool IsRepetitionType();
|
public bool IsRepetitionType();
|
||||||
|
|
||||||
public bool CheckIsDoneParsingAndReset();
|
public bool CheckIsDoneParsingAndReset(InputProvider inputs);
|
||||||
}
|
}
|
@ -9,21 +9,25 @@ class IntegerBlock : BuildingBlockBase
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public override IToken ParseWord(InputProvider inputs)
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
return new IntegerToken(inputs.YieldWord());
|
return new List<IToken>() { new IntegerToken(inputs.YieldWord()) };
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
using (inputs.GetLookaheadContext())
|
using (inputs.GetLookaheadContext())
|
||||||
{
|
{
|
||||||
int number = 0;
|
return this.CanParseWord(inputs.YieldWord());
|
||||||
var success = int.TryParse(inputs.YieldWord(), out number);
|
|
||||||
return success;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
int number = 0;
|
||||||
|
return int.TryParse(word, out number);
|
||||||
|
}
|
||||||
|
|
||||||
public override BlockType GetBlockType()
|
public override BlockType GetBlockType()
|
||||||
{
|
{
|
||||||
return BlockType.Integer;
|
return BlockType.Integer;
|
||||||
|
35
TextParser/Schema/BuildingBlocks/LongBlock.cs
Normal file
35
TextParser/Schema/BuildingBlocks/LongBlock.cs
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
namespace Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
|
class LongBlock : BuildingBlockBase
|
||||||
|
{
|
||||||
|
|
||||||
|
public LongBlock()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
return new List<IToken>() { new LongToken(inputs.YieldWord()) };
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
|
{
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
return this.CanParseWord(inputs.YieldWord());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
long number = 0;
|
||||||
|
return long.TryParse(word, out number);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override BlockType GetBlockType()
|
||||||
|
{
|
||||||
|
return BlockType.Long;
|
||||||
|
}
|
||||||
|
}
|
@ -9,13 +9,37 @@ class StringBlock : BuildingBlockBase
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public override IToken ParseWord(InputProvider inputs)
|
public override List<IToken> ParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
return new StringToken(inputs.YieldWord());
|
return new List<IToken>() { new StringToken(inputs.YieldWord()) };
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool CanParseWord(InputProvider inputs)
|
public override bool CanParseWord(InputProvider inputs)
|
||||||
{
|
{
|
||||||
|
string word = string.Empty;
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
word = inputs.YieldWord();
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.CanParseWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool CanParseWord(string word)
|
||||||
|
{
|
||||||
|
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
|
||||||
|
LongBlock longBlock = new LongBlock();
|
||||||
|
if (longBlock.CanParseWord(word))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
IntegerBlock intBlock = new IntegerBlock();
|
||||||
|
if (intBlock.CanParseWord(word))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
112
TextParser/Schema/FragmentSchema.cs
Normal file
112
TextParser/Schema/FragmentSchema.cs
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
using System.Collections;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
public class FragmentSchemaContext : ISchemaContext
|
||||||
|
{
|
||||||
|
public int lastProcessedBlockIndex { get; set; } = 0;
|
||||||
|
public bool HasFinished { get; set; } = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class FragmentSchema : ISchema<FragmentSchemaContext>
|
||||||
|
{
|
||||||
|
private string fragmentRegex;
|
||||||
|
private List<string> namedGroups = new List<string>();
|
||||||
|
private List<string> namedLiterals = new List<string>();
|
||||||
|
|
||||||
|
public FragmentSchema(string fragmentRegex, List<string> namedGroups, List<string> namedLiterals)
|
||||||
|
{
|
||||||
|
this.fragmentRegex = fragmentRegex;
|
||||||
|
this.namedGroups = namedGroups;
|
||||||
|
this.namedLiterals = namedLiterals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<IToken> ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
|
||||||
|
{
|
||||||
|
Regex r = new Regex(this.fragmentRegex);
|
||||||
|
|
||||||
|
var tokenList = new List<IToken>();
|
||||||
|
// one token per match
|
||||||
|
foreach (Match match in r.Matches(inputs.YieldWord()))
|
||||||
|
{
|
||||||
|
var newToken = new FragmentToken(match.Value);
|
||||||
|
// token contains data from all included matches
|
||||||
|
foreach (var groupName in this.namedGroups)
|
||||||
|
{
|
||||||
|
var captureList = new List<string>();
|
||||||
|
foreach (Capture capture in match.Groups[groupName].Captures)
|
||||||
|
{
|
||||||
|
captureList.Add(capture.Value);
|
||||||
|
}
|
||||||
|
newToken.AddMatch(groupName, captureList);
|
||||||
|
}
|
||||||
|
foreach (var literalName in this.namedLiterals)
|
||||||
|
{
|
||||||
|
var captureList = new List<string>();
|
||||||
|
if (match.Groups.Keys.Contains(literalName) && match.Groups[literalName].Length > 0)
|
||||||
|
{
|
||||||
|
captureList.Add(match.Groups[literalName].Length.ToString());
|
||||||
|
}
|
||||||
|
newToken.AddMatch(literalName, captureList);
|
||||||
|
}
|
||||||
|
tokenList.Add(newToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inputs.CanYieldWord())
|
||||||
|
{
|
||||||
|
currentContext.HasFinished = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokenList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
|
||||||
|
{
|
||||||
|
using (inputs.GetLookaheadContext())
|
||||||
|
{
|
||||||
|
return this.CanProcessNextWord(currentContext, inputs.YieldWord());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(FragmentSchemaContext currentContext, string word)
|
||||||
|
{
|
||||||
|
if (currentContext.HasFinished)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Regex r = new Regex(this.fragmentRegex);
|
||||||
|
return r.Match(word).Success;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<IToken> ProcessWordList(string[] words)
|
||||||
|
{
|
||||||
|
List<IToken> tokens = new List<IToken>();
|
||||||
|
InputProvider inputs = new InputProvider(words);
|
||||||
|
var overallContext = this.CreateContext();
|
||||||
|
|
||||||
|
while (!overallContext.HasFinished && inputs.CanYieldWord())
|
||||||
|
{
|
||||||
|
if (this.CanProcessNextWord(overallContext, inputs))
|
||||||
|
{
|
||||||
|
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
inputs.SkipCurrentWord();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
overallContext.HasFinished = true;
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaContext CreateContext()
|
||||||
|
{
|
||||||
|
return new FragmentSchemaContext();
|
||||||
|
}
|
||||||
|
}
|
142
TextParser/Schema/FragmentSchemaBuilder.cs
Normal file
142
TextParser/Schema/FragmentSchemaBuilder.cs
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuilder, FragmentSchema, FragmentSchemaContext>, ISchemaBuilder<FragmentSchema, FragmentSchemaContext>
|
||||||
|
{
|
||||||
|
protected string fragmentRegex = @"";
|
||||||
|
|
||||||
|
private List<string> namedGroups = new List<string>();
|
||||||
|
private List<string> namedLiterals = new List<string>();
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder StartOptions()
|
||||||
|
{
|
||||||
|
this.fragmentRegex += "((";
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder EndOptions()
|
||||||
|
{
|
||||||
|
this.fragmentRegex += "))";
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Option()
|
||||||
|
{
|
||||||
|
// if we just started an options group there is no need to add an option separator
|
||||||
|
if (!this.fragmentRegex.EndsWith("(") || this.fragmentRegex.EndsWith("\\("))
|
||||||
|
{
|
||||||
|
this.fragmentRegex += ")|(";
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Expect(InputType type, string name = "")
|
||||||
|
{
|
||||||
|
string groupNamePrefix = "";
|
||||||
|
if (!string.IsNullOrEmpty(name))
|
||||||
|
{
|
||||||
|
groupNamePrefix = "?<" + name + ">";
|
||||||
|
namedGroups.Add(name);
|
||||||
|
}
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case InputType.String:
|
||||||
|
this.fragmentRegex += "(" + groupNamePrefix + "\\w+)";
|
||||||
|
break;
|
||||||
|
case InputType.Integer:
|
||||||
|
this.fragmentRegex += "(" + groupNamePrefix + "\\d+)";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception("Unrecognized InputType");
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Expect(string literal, string name = "")
|
||||||
|
{
|
||||||
|
string groupNamePrefix = "";
|
||||||
|
if (!string.IsNullOrEmpty(name))
|
||||||
|
{
|
||||||
|
groupNamePrefix = "?<" + name + ">";
|
||||||
|
namedLiterals.Add(name);
|
||||||
|
}
|
||||||
|
this.fragmentRegex += "(" + groupNamePrefix + Regex.Escape(literal) + ")";
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Repeat(int repetitionCount)
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
|
newSchemaBuilder.NumRepetition = repetitionCount;
|
||||||
|
newSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
||||||
|
|
||||||
|
return newSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Repeat()
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
|
newSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
||||||
|
|
||||||
|
return newSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder Repeat(RepetitionType repetitionType)
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
|
newSchemaBuilder.RepetitionType = repetitionType;
|
||||||
|
|
||||||
|
return newSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchemaBuilder EndRepetition()
|
||||||
|
{
|
||||||
|
// return back to upper layer of parsing
|
||||||
|
var currentBuilder = this as FragmentSchemaBuilder;
|
||||||
|
if (currentBuilder == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Invalid repetition definitions!");
|
||||||
|
}
|
||||||
|
var oldSchemaBuilder = currentBuilder.UpperLayerBuilder;
|
||||||
|
if (oldSchemaBuilder == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Something went terribly wrong!");
|
||||||
|
}
|
||||||
|
|
||||||
|
var currentRegex = "(" + currentBuilder.fragmentRegex + ")";
|
||||||
|
switch (currentBuilder.RepetitionType)
|
||||||
|
{
|
||||||
|
case RepetitionType.FixedRepetition:
|
||||||
|
currentRegex += "{" + this.NumRepetition.ToString() + "}";
|
||||||
|
break;
|
||||||
|
case RepetitionType.GreedyRepetition:
|
||||||
|
currentRegex += "*";
|
||||||
|
break;
|
||||||
|
case RepetitionType.NonZeroRepetition:
|
||||||
|
case RepetitionType.NonZeroRepetition | RepetitionType.GreedyRepetition:
|
||||||
|
currentRegex += "+";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception("Unrecognized RepetitionType");
|
||||||
|
}
|
||||||
|
|
||||||
|
oldSchemaBuilder.fragmentRegex += "(" + currentRegex + ")";
|
||||||
|
|
||||||
|
return oldSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FragmentSchema Build()
|
||||||
|
{
|
||||||
|
var schema = new FragmentSchema(this.fragmentRegex, this.namedGroups, this.namedLiterals);
|
||||||
|
return schema;
|
||||||
|
}
|
||||||
|
}
|
25
TextParser/Schema/ISchema.cs
Normal file
25
TextParser/Schema/ISchema.cs
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
using Parsing.Tokenization;
|
||||||
|
using System.Collections;
|
||||||
|
|
||||||
|
public interface ISchemaContext
|
||||||
|
{
|
||||||
|
public int lastProcessedBlockIndex { get; set; }
|
||||||
|
public bool HasFinished { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public interface ISchema<T> where T : ISchemaContext
|
||||||
|
{
|
||||||
|
public List<IToken> ProcessNextWord(T currentContext, InputProvider inputs);
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(T currentContext, InputProvider inputs);
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(T currentContext, string word);
|
||||||
|
|
||||||
|
public List<IToken> ProcessWordList(string[] words);
|
||||||
|
|
||||||
|
public T CreateContext();
|
||||||
|
}
|
8
TextParser/Schema/ISchemaBuilder.cs
Normal file
8
TextParser/Schema/ISchemaBuilder.cs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
|
public interface ISchemaBuilder<T, U> where T : ISchema<U> where U : ISchemaContext
|
||||||
|
{
|
||||||
|
public T Build();
|
||||||
|
}
|
@ -5,13 +5,13 @@ using Parsing.Schema.BuildingBlocks;
|
|||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
using System.Collections;
|
using System.Collections;
|
||||||
|
|
||||||
public class InputSchemaContext
|
public class InputSchemaContext : ISchemaContext
|
||||||
{
|
{
|
||||||
public int lastProcessedBlockIndex { get; set; } = 0;
|
public int lastProcessedBlockIndex { get; set; } = 0;
|
||||||
public bool HasFinished { get; set; } = false;
|
public bool HasFinished { get; set; } = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public class InputSchema
|
public class InputSchema : ISchema<InputSchemaContext>
|
||||||
{
|
{
|
||||||
private List<IBuildingBlock> buildingBlocks;
|
private List<IBuildingBlock> buildingBlocks;
|
||||||
|
|
||||||
@ -25,16 +25,16 @@ public class InputSchema
|
|||||||
this.buildingBlocks.Add(buildingBlock);
|
this.buildingBlocks.Add(buildingBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
public IToken ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
public List<IToken> ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||||
{
|
{
|
||||||
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||||
var token = nextBlock.ParseWord(inputs);
|
var tokens = nextBlock.ParseWord(inputs);
|
||||||
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset())
|
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs))
|
||||||
{
|
{
|
||||||
currentContext.lastProcessedBlockIndex++;
|
currentContext.lastProcessedBlockIndex++;
|
||||||
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
||||||
}
|
}
|
||||||
return token;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||||
@ -50,6 +50,16 @@ public class InputSchema
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public bool CanProcessNextWord(InputSchemaContext currentContext, string word)
|
||||||
|
{
|
||||||
|
if (currentContext.HasFinished)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||||
|
return nextBlock.CanParseWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
public List<IToken> ProcessWordList(string[] words)
|
public List<IToken> ProcessWordList(string[] words)
|
||||||
{
|
{
|
||||||
List<IToken> tokens = new List<IToken>();
|
List<IToken> tokens = new List<IToken>();
|
||||||
@ -58,7 +68,7 @@ public class InputSchema
|
|||||||
|
|
||||||
while (this.CanProcessNextWord(overallContext, inputs))
|
while (this.CanProcessNextWord(overallContext, inputs))
|
||||||
{
|
{
|
||||||
tokens.Add(this.ProcessNextWord(overallContext, inputs));
|
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
|
||||||
}
|
}
|
||||||
|
|
||||||
return tokens;
|
return tokens;
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
using Parsing.Schema.BuildingBlocks;
|
using Parsing.Schema.BuildingBlocks;
|
||||||
|
|
||||||
public class InputSchemaBuilder
|
public class InputSchemaBuilder : RepetitionSchemaBuilder<InputSchemaBuilder, InputSchema, InputSchemaContext>, ISchemaBuilder<InputSchema, InputSchemaContext>
|
||||||
{
|
{
|
||||||
private InputSchema schema = new InputSchema();
|
private InputSchema schema = new InputSchema();
|
||||||
|
|
||||||
@ -21,6 +21,27 @@ public class InputSchemaBuilder
|
|||||||
case InputType.Integer:
|
case InputType.Integer:
|
||||||
block = new IntegerBlock();
|
block = new IntegerBlock();
|
||||||
break;
|
break;
|
||||||
|
case InputType.Long:
|
||||||
|
block = new LongBlock();
|
||||||
|
break;
|
||||||
|
case InputType.Char:
|
||||||
|
block = new CharBlock();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception("Unrecognized InputType");
|
||||||
|
}
|
||||||
|
schema.AddBuildingBlock(block);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputSchemaBuilder Expect<T>(InputType type, InputType definedInputType, Func<string, T> wordConverter)
|
||||||
|
{
|
||||||
|
IBuildingBlock block;
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case InputType.Custom:
|
||||||
|
block = new CustomInputBlock<T>(definedInputType, wordConverter);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Exception("Unrecognized InputType");
|
throw new Exception("Unrecognized InputType");
|
||||||
}
|
}
|
||||||
@ -31,34 +52,50 @@ public class InputSchemaBuilder
|
|||||||
public InputSchemaBuilder Repeat(int repetitionCount)
|
public InputSchemaBuilder Repeat(int repetitionCount)
|
||||||
{
|
{
|
||||||
// add another layer of parsing
|
// add another layer of parsing
|
||||||
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
|
var newInputSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
newInputSchemaBuilder.NumRepetition = repetitionCount;
|
newInputSchemaBuilder.NumRepetition = repetitionCount;
|
||||||
newInputSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
newInputSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
||||||
|
|
||||||
return newInputSchemaBuilder;
|
return newInputSchemaBuilder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public InputSchemaBuilder Repeat()
|
||||||
|
{
|
||||||
|
// add another layer of parsing
|
||||||
|
var newInputSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||||
|
newInputSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
||||||
|
|
||||||
|
return newInputSchemaBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
public InputSchemaBuilder EndRepetition()
|
public InputSchemaBuilder EndRepetition()
|
||||||
{
|
{
|
||||||
// return back to upper layer of parsing
|
// return back to upper layer of parsing
|
||||||
var currentBuilder = this as RepetitionSchemaBuilder;
|
var currentBuilder = this as InputSchemaBuilder;
|
||||||
if (currentBuilder == null)
|
if (currentBuilder == null)
|
||||||
{
|
{
|
||||||
throw new Exception("Invalid repetition definitions!");
|
throw new Exception("Invalid repetition definitions!");
|
||||||
}
|
}
|
||||||
var oldInputSchemaBuilder = currentBuilder.UpperLayerBuilder;
|
var oldSchemaBuilder = currentBuilder.UpperLayerBuilder;
|
||||||
|
if (oldSchemaBuilder == null)
|
||||||
|
{
|
||||||
|
throw new Exception("Something went terribly wrong!");
|
||||||
|
}
|
||||||
|
|
||||||
var currentSchema = currentBuilder.Build();
|
var currentSchema = currentBuilder.Build();
|
||||||
switch (currentBuilder.RepetitionType)
|
switch (currentBuilder.RepetitionType)
|
||||||
{
|
{
|
||||||
case RepetitionType.FixedRepetition:
|
case RepetitionType.FixedRepetition:
|
||||||
oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
|
oldSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
|
||||||
|
break;
|
||||||
|
case RepetitionType.GreedyRepetition:
|
||||||
|
oldSchemaBuilder.schema.AddBuildingBlock(new GreedyRepetitionBlock(currentSchema));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Exception("Unrecognized RepetitionType");
|
throw new Exception("Unrecognized RepetitionType");
|
||||||
}
|
}
|
||||||
|
|
||||||
return oldInputSchemaBuilder;
|
return oldSchemaBuilder;
|
||||||
}
|
}
|
||||||
|
|
||||||
public InputSchema Build()
|
public InputSchema Build()
|
||||||
|
@ -1,7 +1,12 @@
|
|||||||
namespace Parsing.Schema;
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
[Flags]
|
||||||
public enum InputType
|
public enum InputType
|
||||||
{
|
{
|
||||||
Integer = BlockType.Integer,
|
Integer = BlockType.Integer,
|
||||||
String = BlockType.String
|
String = BlockType.String,
|
||||||
|
Fragment = BlockType.Fragment,
|
||||||
|
Char = BlockType.Char,
|
||||||
|
Custom = BlockType.Custom,
|
||||||
|
Long = BlockType.Long,
|
||||||
}
|
}
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
namespace Parsing.Schema;
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
public class RepetitionSchemaBuilder : InputSchemaBuilder
|
public abstract class RepetitionSchemaBuilder<S, T, U> where S : RepetitionSchemaBuilder<S, T, U>, ISchemaBuilder<T, U>, new() where T : ISchema<U> where U : ISchemaContext
|
||||||
{
|
{
|
||||||
public RepetitionSchemaBuilder(InputSchemaBuilder upperLayerBuilder)
|
public S GetNewRepetitionSchemaBuilder(S upperLayerBuilder)
|
||||||
{
|
{
|
||||||
this.UpperLayerBuilder = upperLayerBuilder;
|
var newBuilder = new S();
|
||||||
|
newBuilder.UpperLayerBuilder = upperLayerBuilder;
|
||||||
|
return newBuilder;
|
||||||
}
|
}
|
||||||
|
|
||||||
public InputSchemaBuilder UpperLayerBuilder { get; set; }
|
public S? UpperLayerBuilder { get; set; }
|
||||||
|
|
||||||
public int NumRepetition { get; set; }
|
public int NumRepetition { get; set; }
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
namespace Parsing.Schema;
|
namespace Parsing.Schema;
|
||||||
|
|
||||||
|
[Flags]
|
||||||
public enum RepetitionType
|
public enum RepetitionType
|
||||||
{
|
{
|
||||||
FixedRepetition = BlockType.FixedRepetition,
|
FixedRepetition = BlockType.FixedRepetition,
|
||||||
GreedyRepetition = BlockType.GreedyRepetition
|
GreedyRepetition = BlockType.GreedyRepetition,
|
||||||
|
NonZeroRepetition = BlockType.NonZeroRepetition,
|
||||||
}
|
}
|
||||||
|
@ -2,23 +2,32 @@
|
|||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using Parsing.Data;
|
||||||
using Parsing.Schema;
|
using Parsing.Schema;
|
||||||
using Parsing.Tokenization;
|
using Parsing.Tokenization;
|
||||||
|
|
||||||
public class TextParser : TokenConverter
|
public static class TextParser
|
||||||
{
|
{
|
||||||
private LineParser lineParser;
|
public static TextParser<TContext> Create<TContext>(ISchema<TContext> schema, string[]? delimiters = null, bool removeEmptyEntries = true) where TContext : ISchemaContext
|
||||||
|
{
|
||||||
|
return new TextParser<TContext>(schema, delimiters, removeEmptyEntries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class TextParser<T> : TokenConverter where T : ISchemaContext
|
||||||
|
{
|
||||||
|
private LineParser<T> lineParser;
|
||||||
private string[] lines;
|
private string[] lines;
|
||||||
private bool removeEmptyEntries;
|
private bool removeEmptyEntries;
|
||||||
|
|
||||||
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true) : base()
|
public TextParser(ISchema<T> schema, string[]? delimiters = null, bool removeEmptyEntries = true) : base()
|
||||||
{
|
{
|
||||||
this.lineParser = new LineParser(schema, delimiters, removeEmptyEntries);
|
this.lineParser = new LineParser<T>(schema, delimiters, removeEmptyEntries);
|
||||||
this.lines = new string[] { };
|
this.lines = new string[] { };
|
||||||
this.removeEmptyEntries = removeEmptyEntries;
|
this.removeEmptyEntries = removeEmptyEntries;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TextParser SetInputText(string text)
|
public TextParser<T> SetInputText(string text)
|
||||||
{
|
{
|
||||||
var options = StringSplitOptions.TrimEntries;
|
var options = StringSplitOptions.TrimEntries;
|
||||||
if (removeEmptyEntries)
|
if (removeEmptyEntries)
|
||||||
@ -29,7 +38,7 @@ public class TextParser : TokenConverter
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TextParser Parse()
|
public TextParser<T> Parse()
|
||||||
{
|
{
|
||||||
foreach (var line in this.lines)
|
foreach (var line in this.lines)
|
||||||
{
|
{
|
||||||
|
@ -1,98 +0,0 @@
|
|||||||
namespace Parsing;
|
|
||||||
|
|
||||||
using System;
|
|
||||||
using System.Collections.Generic;
|
|
||||||
using Parsing.Schema;
|
|
||||||
using Parsing.Tokenization;
|
|
||||||
|
|
||||||
public class TokenConverter
|
|
||||||
{
|
|
||||||
protected List<List<IToken>> rawTokens = new List<List<IToken>>();
|
|
||||||
|
|
||||||
public TokenConverter()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<T> AsGenericCollection<T, U>() where T : ICollection<U>, new()
|
|
||||||
{
|
|
||||||
List<T> returnData = new List<T>();
|
|
||||||
foreach (var tokenRow in this.rawTokens)
|
|
||||||
{
|
|
||||||
T newRow = new T();
|
|
||||||
foreach (IToken token in tokenRow)
|
|
||||||
{
|
|
||||||
if (token == null)
|
|
||||||
{
|
|
||||||
throw new Exception("No token was provided, but token was expected!");
|
|
||||||
}
|
|
||||||
IValueToken<U>? valueToken = token as IValueToken<U>;
|
|
||||||
if (valueToken == null)
|
|
||||||
{
|
|
||||||
throw new Exception("Provided token is not a ValueToken");
|
|
||||||
}
|
|
||||||
newRow.Add(valueToken.GetValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
returnData.Add(newRow);
|
|
||||||
}
|
|
||||||
return returnData;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<T[]> AsRows<T>()
|
|
||||||
{
|
|
||||||
var listRows = this.AsListRows<T>();
|
|
||||||
var newList = new List<T[]>();
|
|
||||||
|
|
||||||
foreach (var rowList in listRows)
|
|
||||||
{
|
|
||||||
newList.Add(rowList.ToArray());
|
|
||||||
}
|
|
||||||
|
|
||||||
return newList;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<List<T>> AsListRows<T>()
|
|
||||||
{
|
|
||||||
return this.AsGenericCollection<List<T>, T>();
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<T[]> AsColumns<T>()
|
|
||||||
{
|
|
||||||
var listColumns = this.AsListColumns<T>();
|
|
||||||
var newList = new List<T[]>();
|
|
||||||
|
|
||||||
foreach (var columnList in listColumns)
|
|
||||||
{
|
|
||||||
newList.Add(columnList.ToArray());
|
|
||||||
}
|
|
||||||
|
|
||||||
return newList;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<List<T>> AsListColumns<T>()
|
|
||||||
{
|
|
||||||
var rows = AsListRows<T>();
|
|
||||||
|
|
||||||
var columns = new List<List<T>>();
|
|
||||||
for (int i = 0; i < rows[0].Count; i++)
|
|
||||||
{
|
|
||||||
columns.Add(new List<T>());
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach (var row in rows)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < row.Count; i++)
|
|
||||||
{
|
|
||||||
columns[i].Add(row[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return columns;
|
|
||||||
}
|
|
||||||
|
|
||||||
public T[][] AsGrid<T>()
|
|
||||||
{
|
|
||||||
var rowsList = AsRows<T>();
|
|
||||||
return rowsList.ToArray();
|
|
||||||
}
|
|
||||||
}
|
|
34
TextParser/Tokenization/CustomToken.cs
Normal file
34
TextParser/Tokenization/CustomToken.cs
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
|
||||||
|
public class CustomToken<T> : IValueToken<T>
|
||||||
|
{
|
||||||
|
private string word;
|
||||||
|
|
||||||
|
private InputType definedInputType;
|
||||||
|
|
||||||
|
private Func<string, T> wordConverter;
|
||||||
|
|
||||||
|
public CustomToken(string word, InputType definedInputType, Func<string, T> wordConverter)
|
||||||
|
{
|
||||||
|
this.word = word;
|
||||||
|
this.wordConverter = wordConverter;
|
||||||
|
this.definedInputType = definedInputType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetText()
|
||||||
|
{
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public T GetValue()
|
||||||
|
{
|
||||||
|
return wordConverter(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputType GetInputType()
|
||||||
|
{
|
||||||
|
return this.definedInputType;
|
||||||
|
}
|
||||||
|
}
|
39
TextParser/Tokenization/FragmentToken.cs
Normal file
39
TextParser/Tokenization/FragmentToken.cs
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using Parsing.Schema;
|
||||||
|
|
||||||
|
public class Fragment : Dictionary<string, List<string>>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public class FragmentToken : IValueToken<Fragment>
|
||||||
|
{
|
||||||
|
private string word;
|
||||||
|
private Fragment matches = new Fragment();
|
||||||
|
|
||||||
|
public FragmentToken(string word)
|
||||||
|
{
|
||||||
|
this.word = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetText()
|
||||||
|
{
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddMatch(string name, List<string> values)
|
||||||
|
{
|
||||||
|
this.matches.Add(name, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Fragment GetValue()
|
||||||
|
{
|
||||||
|
return this.matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputType GetInputType()
|
||||||
|
{
|
||||||
|
return InputType.Fragment;
|
||||||
|
}
|
||||||
|
}
|
@ -32,14 +32,30 @@ public class InputProvider
|
|||||||
this.CurrentPosition = 0;
|
this.CurrentPosition = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public InputProvider(string text)
|
||||||
|
{
|
||||||
|
this.words = text.Split("\n");
|
||||||
|
this.CurrentPosition = 0;
|
||||||
|
}
|
||||||
|
|
||||||
public InputProvider.LookaheadContext GetLookaheadContext()
|
public InputProvider.LookaheadContext GetLookaheadContext()
|
||||||
{
|
{
|
||||||
return new InputProvider.LookaheadContext(this);
|
return new InputProvider.LookaheadContext(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public bool CanYieldWord()
|
||||||
|
{
|
||||||
|
return this.CurrentPosition < this.words.Length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void SkipCurrentWord()
|
||||||
|
{
|
||||||
|
this.CurrentPosition += 1;
|
||||||
|
}
|
||||||
|
|
||||||
public string YieldWord()
|
public string YieldWord()
|
||||||
{
|
{
|
||||||
if (this.CurrentPosition > this.words.Length)
|
if (!this.CanYieldWord())
|
||||||
{
|
{
|
||||||
return string.Empty;
|
return string.Empty;
|
||||||
}
|
}
|
||||||
|
28
TextParser/Tokenization/LongToken.cs
Normal file
28
TextParser/Tokenization/LongToken.cs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
namespace Parsing.Tokenization;
|
||||||
|
|
||||||
|
using Parsing.Schema;
|
||||||
|
|
||||||
|
public class LongToken : IValueToken<long>
|
||||||
|
{
|
||||||
|
private string word;
|
||||||
|
|
||||||
|
public LongToken(string word)
|
||||||
|
{
|
||||||
|
this.word = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetText()
|
||||||
|
{
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long GetValue()
|
||||||
|
{
|
||||||
|
return long.Parse(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputType GetInputType()
|
||||||
|
{
|
||||||
|
return InputType.Long;
|
||||||
|
}
|
||||||
|
}
|
@ -1 +1 @@
|
|||||||
0.2.3
|
0.10.0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user