Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2ed103abbf | |||
| fab5d2eee7 | |||
| 7be09140e2 | |||
| 09bbba1293 | |||
| c1705d9f96 | |||
| 62092d0380 | |||
|
|
29f7aa37da | ||
|
|
5f07f16f1f | ||
|
|
d5cfe494d8 | ||
|
|
0d85132a32 | ||
|
|
e15190ecd6 | ||
|
|
0dc5fb14c5 | ||
|
|
308489fd01 | ||
|
|
6a0addf7a3 | ||
|
|
8b62259785 | ||
|
|
1c03622fd1 | ||
|
|
75af8ebc69 | ||
|
|
6d4b9315f4 | ||
|
|
263be78318 | ||
|
|
b74a8f5212 | ||
| 8707e0da3a |
@@ -46,7 +46,7 @@ create_file() {
|
||||
}
|
||||
|
||||
get_commit_range() {
|
||||
rm $TEMP_FILE_PATH/messages.txt
|
||||
rm -f $TEMP_FILE_PATH/messages.txt
|
||||
if [[ $LAST_TAG =~ $PATTERN ]]; then
|
||||
create_file true
|
||||
else
|
||||
@@ -66,17 +66,14 @@ start() {
|
||||
echo $message
|
||||
if echo $message | grep -Pq '(feat|style)(\([\w]+\))?!:([a-zA-Z0-9-_!\&\.\%\(\)\=\w\s]+)\s?(,?\s?)((ref(s?):?\s?)(([A-Z0-9]+\-[0-9]+)|(NOISSUE)))'; then
|
||||
increment_type="major"
|
||||
echo "a"
|
||||
break
|
||||
elif echo $message | grep -Pq '(feat|style)(\([\w]+\))?:([a-zA-Z0-9-_!\&\.\%\(\)\=\w\s]+)\s?(,?\s?)((ref(s?):?\s?)(([A-Z0-9]+\-[0-9]+)|(NOISSUE)))'; then
|
||||
if [ -z "$increment_type" ] || [ "$increment_type" == "patch" ]; then
|
||||
increment_type="minor"
|
||||
echo "b"
|
||||
fi
|
||||
elif echo $message | grep -Pq '(build|fix|perf|refactor|revert)(\(.+\))?:\s([a-zA-Z0-9-_!\&\.\%\(\)\=\w\s]+)\s?(,?\s?)((ref(s?):?\s?)(([A-Z0-9]+\-[0-9]+)|(NOISSUE)))'; then
|
||||
if [ -z "$increment_type" ]; then
|
||||
increment_type="patch"
|
||||
echo "c"
|
||||
fi
|
||||
fi
|
||||
done < $TEMP_FILE_PATH/messages.txt
|
||||
@@ -86,8 +83,9 @@ start() {
|
||||
echo "New version: $new_version"
|
||||
|
||||
gitchangelog | grep -v "[rR]elease:" > HISTORY.md
|
||||
git add DotnetTestLib/VERSION HISTORY.md
|
||||
echo $new_version > DotnetTestLib/VERSION
|
||||
echo $new_version > TextParser/VERSION
|
||||
git add TextParser/VERSION
|
||||
git add HISTORY.md
|
||||
git commit -m "release: version $new_version 🚀"
|
||||
echo "creating git tag : $new_version"
|
||||
git tag $new_version
|
||||
|
||||
@@ -17,23 +17,7 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
linter:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
dotnet-version: [9.0.X]
|
||||
os: [ubuntu-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: ${{ matrix.dotnet-version }}
|
||||
- name: Run linter
|
||||
run: make lint
|
||||
|
||||
tests_linux:
|
||||
needs: linter
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
|
||||
@@ -64,5 +64,5 @@ jobs:
|
||||
QUOTED_REPOSITORY_SOURCE_NAME=$(dotnet nuget config get all | grep "/packages/${REPOSITORY_OWNER}/nuget/index.json" | awk '{print $2}' | awk -F= '{print $2}')
|
||||
REPOSITORY_SOURCE_NAME=${QUOTED_REPOSITORY_SOURCE_NAME:1:-1}
|
||||
fi
|
||||
dotnet pack --include-symbols --include-source -p:PackageVersion=$(cat $REPOSITORY_NAME/VERSION) DotnetTestLib.sln
|
||||
dotnet pack --include-symbols --include-source -p:PackageVersion=$(cat $REPOSITORY_NAME/VERSION) TextParser.sln
|
||||
dotnet nuget push -k $GITEA_PAT --source $REPOSITORY_SOURCE_NAME $REPOSITORY_NAME/bin/Release/$REPOSITORY_NAME.$(cat $REPOSITORY_NAME/VERSION).symbols.nupkg
|
||||
|
||||
95
HISTORY.md
95
HISTORY.md
@@ -4,8 +4,97 @@ Changelog
|
||||
|
||||
(unreleased)
|
||||
------------
|
||||
- More bugfixes. [Simon Diesenreiter]
|
||||
- Fix linting errors. [Simon Diesenreiter]
|
||||
- Initial commit. [Simon Diesenreiter]
|
||||
- Feat: enable named literals, ref: NOISSUE. [Simon Diesenreiter]
|
||||
|
||||
|
||||
0.5.1 (2024-12-03)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Some bugfixes with fragment parser logic, ref: NOISSUE. [Simon
|
||||
Diesenreiter]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
|
||||
|
||||
0.5.0 (2024-12-03)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Fix bugs with fragment parsing support, refs: NOISSUE. [Simon
|
||||
Diesenreiter]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
- Feat: initial support for fragment parsing, ref: NOISSUE. [Simon
|
||||
Diesenreiter]
|
||||
|
||||
|
||||
0.4.0 (2024-12-02)
|
||||
------------------
|
||||
- Style: fix linting errors, ref: NOISSUE. [Simon Diesenreiter]
|
||||
- Feat: implement greedy repetition, ref: A24-13. [Simon Diesenreiter]
|
||||
|
||||
|
||||
0.3.0 (2024-12-02)
|
||||
------------------
|
||||
- Feat: ci pipeline fix for releases, ref NOISSUE. [Simon Diesenreiter]
|
||||
|
||||
|
||||
0.2.3 (2024-12-02)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- More release script fixes,ref: NOISSUE. [Simon Diesenreiter]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
|
||||
|
||||
0.2.2 (2024-12-02)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Increment version,refs:NOISSUE. [Simon Diesenreiter]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
|
||||
|
||||
0.2.1 (2024-12-02)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Fix file inclusion in tag commits, ref: NOISSUE. [Simon Diesenreiter]
|
||||
|
||||
|
||||
0.2.0 (2024-12-02)
|
||||
------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Fix some bugs and tests, ref: A24-3. [Simon Diesenreiter]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
- Ci: fix release shortcut, ref: NOISSUE. [Simon Diesenreiter]
|
||||
- Feat: add text parser and output format options, ref: A24-3. [Simon
|
||||
Diesenreiter]
|
||||
|
||||
|
||||
0.1.0 (2024-12-01)
|
||||
------------------
|
||||
- Feat: added initial implementation of TextParser, ref: A24-3. [Simon
|
||||
Diesenreiter]
|
||||
- ✅ Ready to clone and code. [simon]
|
||||
- Ci: initial commit for triggering migration, ref: NOISSUE. [Simon
|
||||
Diesenreiter]
|
||||
- Initial commit. [Projects <>]
|
||||
|
||||
|
||||
|
||||
12
Makefile
12
Makefile
@@ -2,7 +2,7 @@
|
||||
|
||||
.PHONY: issetup
|
||||
issetup:
|
||||
@[ -f .git/hooks/commit-msg ] || [ -v SKIP_MAKE_SETUP_CHECK ] || (echo "You must run 'make setup' first to initialize the repo!" && exit 1)
|
||||
@[ -f .git/hooks/commit-msg ] || [ -n SKIP_MAKE_SETUP_CHECK ] || (echo "You must run 'make setup' first to initialize the repo!" && exit 1)
|
||||
|
||||
.PHONY: setup
|
||||
setup:
|
||||
@@ -45,15 +45,7 @@ clean: issetup ## Clean unused files.
|
||||
|
||||
.PHONY: release
|
||||
release: issetup ## Create a new tag for release.
|
||||
@echo "WARNING: This operation will create a version tag and push to gitea"
|
||||
@read -p "Version? (provide the next x.y.z semver) : " TAG; echo "$$TAG" > TextParser/VERSION
|
||||
@gitchangelog > HISTORY.md
|
||||
@git add TextParser/VERSION HISTORY.md
|
||||
@git commit -m "release: version $$(cat TextParser/VERSION) 🚀"
|
||||
@echo "creating git tag : $$(cat TextParser/VERSION)"
|
||||
@git tag $$(cat TextParser/VERSION)
|
||||
@git push -u origin HEAD --tags
|
||||
@echo "Gitea Actions will detect the new tag and release the new version."
|
||||
@./.gitea/conventional_commits/generate-version.sh
|
||||
|
||||
.PHONY: docs
|
||||
docs: issetup ## Build the documentation.
|
||||
|
||||
@@ -9,9 +9,26 @@ public class TextParserTests
|
||||
{
|
||||
private const string testInput1 = "2 4 6 8";
|
||||
private const string testInput2 = "2 ab ba 8 cd dc";
|
||||
private const string testInput3 = @"2 4 6 1
|
||||
3 5 7 2
|
||||
4 6 8 3";
|
||||
private const string testInput4 = @"2 ab ba fd er sd
|
||||
8 cd dc
|
||||
7 uh 6 yp rt";
|
||||
private const string testInput5 = @"asdfnums(2,5,3)ght
|
||||
|
||||
cv strs(test) jh 4,3,2
|
||||
|
||||
34,54,2nums(2,8) strs(aa,ab,ba,bb)aa,bb";
|
||||
private const string testInput6 = @"adfdf1()324ddf3()svsdvs
|
||||
davnsldkvjs2()m23423()
|
||||
mcsodkcn owdjnfj 1() asdfnad 23234 2() sdvsdv";
|
||||
private const string testInput7 = @"adfdf1()324ddf3()()()svsdvs
|
||||
davnsldkvjs2()()m23423()()()
|
||||
mcsodkcn owdjnfj 1() asdfnad 23234 2()() sdvsdv";
|
||||
|
||||
[Fact]
|
||||
public void TestSimpleRepetition()
|
||||
public void LineParser_TestSimpleRepetition()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
@@ -20,7 +37,7 @@ public class TextParserTests
|
||||
.EndRepetition()
|
||||
.Build();
|
||||
|
||||
var parser = new TextParser(schema);
|
||||
var parser = new LineParser<InputSchemaContext>(schema);
|
||||
var tokens = parser.ParseLine(testInput1);
|
||||
|
||||
Assert.Equal(4, tokens.Count);
|
||||
@@ -35,7 +52,7 @@ public class TextParserTests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TestSimpleInput()
|
||||
public void LineParser_TestSimpleInput()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
@@ -45,7 +62,7 @@ public class TextParserTests
|
||||
.Expect(InputType.Integer)
|
||||
.Build();
|
||||
|
||||
var parser = new TextParser(schema);
|
||||
var parser = new LineParser<InputSchemaContext>(schema);
|
||||
var tokens = parser.ParseLine(testInput1);
|
||||
|
||||
Assert.Equal(4, tokens.Count);
|
||||
@@ -57,11 +74,11 @@ public class TextParserTests
|
||||
Assert.Equal(4, (tokens[1] as IntegerToken)?.GetValue());
|
||||
Assert.Equal(6, (tokens[2] as IntegerToken)?.GetValue());
|
||||
Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
[Fact]
|
||||
public void TestNestedRepetition()
|
||||
public void LineParser_TestNestedRepetition()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
@@ -73,7 +90,7 @@ public class TextParserTests
|
||||
.EndRepetition()
|
||||
.Build();
|
||||
|
||||
var parser = new TextParser(schema);
|
||||
var parser = new LineParser<InputSchemaContext>(schema);
|
||||
var tokens = parser.ParseLine(testInput2);
|
||||
|
||||
Assert.Equal(6, tokens.Count);
|
||||
@@ -90,4 +107,212 @@ public class TextParserTests
|
||||
Assert.Equal("cd", (tokens[4] as StringToken)?.GetValue());
|
||||
Assert.Equal("dc", (tokens[5] as StringToken)?.GetValue());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TextParser_TestRepetitionAsRows()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
.Repeat(4)
|
||||
.Expect(InputType.Integer)
|
||||
.EndRepetition()
|
||||
.Build();
|
||||
|
||||
var parser = new TextParser<InputSchemaContext>(schema);
|
||||
var rows = parser
|
||||
.SetInputText(testInput3)
|
||||
.Parse()
|
||||
.AsRows<int>();
|
||||
|
||||
Assert.Equal(3, rows.Count);
|
||||
Assert.Equal(4, rows[0].Length);
|
||||
Assert.Equal(2, rows[0][0]);
|
||||
Assert.Equal(4, rows[0][1]);
|
||||
Assert.Equal(6, rows[0][2]);
|
||||
Assert.Equal(1, rows[0][3]);
|
||||
Assert.Equal(3, rows[1][0]);
|
||||
Assert.Equal(5, rows[1][1]);
|
||||
Assert.Equal(7, rows[1][2]);
|
||||
Assert.Equal(2, rows[1][3]);
|
||||
Assert.Equal(4, rows[2][0]);
|
||||
Assert.Equal(6, rows[2][1]);
|
||||
Assert.Equal(8, rows[2][2]);
|
||||
Assert.Equal(3, rows[2][3]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TextParser_TestRepetitionAsColumns()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
.Repeat(4)
|
||||
.Expect(InputType.Integer)
|
||||
.EndRepetition()
|
||||
.Build();
|
||||
|
||||
var parser = new TextParser<InputSchemaContext>(schema);
|
||||
var columns = parser
|
||||
.SetInputText(testInput3)
|
||||
.Parse()
|
||||
.AsColumns<int>();
|
||||
|
||||
Assert.Equal(4, columns.Count);
|
||||
Assert.Equal(3, columns[0].Length);
|
||||
Assert.Equal(2, columns[0][0]);
|
||||
Assert.Equal(3, columns[0][1]);
|
||||
Assert.Equal(4, columns[0][2]);
|
||||
Assert.Equal(4, columns[1][0]);
|
||||
Assert.Equal(5, columns[1][1]);
|
||||
Assert.Equal(6, columns[1][2]);
|
||||
Assert.Equal(6, columns[2][0]);
|
||||
Assert.Equal(7, columns[2][1]);
|
||||
Assert.Equal(8, columns[2][2]);
|
||||
Assert.Equal(1, columns[3][0]);
|
||||
Assert.Equal(2, columns[3][1]);
|
||||
Assert.Equal(3, columns[3][2]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TextParser_TestGreedyRepetitionAsRows()
|
||||
{
|
||||
var schemaBuilder = new InputSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
.Repeat()
|
||||
.Expect(InputType.Integer)
|
||||
.Repeat()
|
||||
.Expect(InputType.String)
|
||||
.EndRepetition()
|
||||
.EndRepetition()
|
||||
.Build();
|
||||
|
||||
var parser = new TextParser<InputSchemaContext>(schema);
|
||||
var rows = parser
|
||||
.SetInputText(testInput4)
|
||||
.Parse()
|
||||
.AsRawData();
|
||||
|
||||
Assert.Equal(3, rows.Count);
|
||||
Assert.Equal(6, rows[0].Count);
|
||||
Assert.Equal(3, rows[1].Count);
|
||||
Assert.Equal(5, rows[2].Count);
|
||||
Assert.Equal(InputType.Integer, rows[0][0].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][1].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][2].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][3].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][4].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[0][5].GetInputType());
|
||||
Assert.Equal(InputType.Integer, rows[1][0].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[1][1].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[1][2].GetInputType());
|
||||
Assert.Equal(InputType.Integer, rows[2][0].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[2][1].GetInputType());
|
||||
Assert.Equal(InputType.Integer, rows[2][2].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[2][3].GetInputType());
|
||||
Assert.Equal(InputType.String, rows[2][4].GetInputType());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FragmentParser_SimpleTest()
|
||||
{
|
||||
var schemaBuilder = new FragmentSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
.StartOptions()
|
||||
.Option()
|
||||
.Expect("nums(")
|
||||
.Expect(InputType.Integer, "num")
|
||||
.Repeat()
|
||||
.Expect(",")
|
||||
.Expect(InputType.Integer, "num")
|
||||
.EndRepetition()
|
||||
.Expect(")")
|
||||
.Option()
|
||||
.Expect("strs(")
|
||||
.Expect(InputType.String, "str")
|
||||
.Repeat()
|
||||
.Expect(",")
|
||||
.Expect(InputType.String, "str")
|
||||
.EndRepetition()
|
||||
.Expect(")")
|
||||
.EndOptions()
|
||||
.Build();
|
||||
|
||||
var parser = TextParser.Create(schema);
|
||||
var fragmentData = parser
|
||||
.SetInputText(testInput5)
|
||||
.Parse()
|
||||
.AsFragments();
|
||||
|
||||
var convertedData = fragmentData
|
||||
.ConvertAll((Fragment f) =>
|
||||
{
|
||||
int numSum = 0;
|
||||
foreach (var numString in f["num"])
|
||||
{
|
||||
numSum += int.Parse(numString);
|
||||
}
|
||||
return f["num"].Count + f["str"].Count + numSum;
|
||||
});
|
||||
|
||||
Assert.Equal(4, fragmentData.Count);
|
||||
Assert.Equal(3, fragmentData[0]["num"].Count);
|
||||
Assert.Single(fragmentData[1]["str"]);
|
||||
Assert.Equal(2, fragmentData[2]["num"].Count);
|
||||
Assert.Equal(4, fragmentData[3]["str"].Count);
|
||||
Assert.Equal(13, convertedData[0]);
|
||||
Assert.Equal(1, convertedData[1]);
|
||||
Assert.Equal(12, convertedData[2]);
|
||||
Assert.Equal(4, convertedData[3]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FragmentParser_LiteralTest()
|
||||
{
|
||||
var schemaBuilder = new FragmentSchemaBuilder();
|
||||
var schema = schemaBuilder
|
||||
.StartOptions()
|
||||
.Option()
|
||||
.Expect("1()", "option1")
|
||||
.Option()
|
||||
.Expect("2()", "option2")
|
||||
.Option()
|
||||
.Expect("3()", "option3")
|
||||
.EndOptions()
|
||||
.Build();
|
||||
|
||||
var parser = TextParser.Create(schema);
|
||||
var fragmentData = parser
|
||||
.SetInputText(testInput6)
|
||||
.Parse()
|
||||
.AsFragments();
|
||||
|
||||
var convertedData = fragmentData
|
||||
.ConvertAll((Fragment f) =>
|
||||
{
|
||||
bool saw1 = f.ContainsKey("option1") ? f["option1"].Count > 0 : false;
|
||||
bool saw2 = f.ContainsKey("option2") ? f["option2"].Count() > 0 : false;
|
||||
bool saw3 = f.ContainsKey("option3") ? f["option3"].Count() > 0 : false;
|
||||
int indicator = 0;
|
||||
if (saw1)
|
||||
{
|
||||
indicator += 1;
|
||||
}
|
||||
if (saw2)
|
||||
{
|
||||
indicator += 2;
|
||||
}
|
||||
if (saw3)
|
||||
{
|
||||
indicator += 4;
|
||||
}
|
||||
return indicator;
|
||||
});
|
||||
|
||||
Assert.Equal(6, convertedData.Count);
|
||||
Assert.Equal(1, convertedData[0]);
|
||||
Assert.Equal(4, convertedData[1]);
|
||||
Assert.Equal(2, convertedData[2]);
|
||||
Assert.Equal(4, convertedData[3]);
|
||||
Assert.Equal(1, convertedData[4]);
|
||||
Assert.Equal(2, convertedData[5]);
|
||||
}
|
||||
}
|
||||
|
||||
37
TextParser/LineParser.cs
Normal file
37
TextParser/LineParser.cs
Normal file
@@ -0,0 +1,37 @@
|
||||
namespace Parsing;
|
||||
|
||||
using Parsing.Schema;
|
||||
using Parsing.Tokenization;
|
||||
|
||||
public class LineParser<T> where T : ISchemaContext
|
||||
{
|
||||
private string[] delimiters;
|
||||
private bool removeEmptyEntries = false;
|
||||
private ISchema<T> schema;
|
||||
private T context;
|
||||
|
||||
public LineParser(ISchema<T> schema, string[]? delimiters = null, bool removeEmptyEntries = true)
|
||||
{
|
||||
this.delimiters = delimiters ?? new string[] { " " };
|
||||
this.removeEmptyEntries = removeEmptyEntries;
|
||||
this.schema = schema;
|
||||
this.context = this.schema.CreateContext();
|
||||
}
|
||||
|
||||
private string[] ParseLineIntoWords(string line)
|
||||
{
|
||||
var options = StringSplitOptions.TrimEntries;
|
||||
if (this.removeEmptyEntries)
|
||||
{
|
||||
options = options | StringSplitOptions.RemoveEmptyEntries;
|
||||
}
|
||||
return line.Split(this.delimiters, options);
|
||||
}
|
||||
|
||||
public List<IToken> ParseLine(string line)
|
||||
{
|
||||
this.context = this.schema.CreateContext();
|
||||
var words = this.ParseLineIntoWords(line);
|
||||
return this.schema.ProcessWordList(words);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,13 @@
|
||||
namespace Parsing.Schema;
|
||||
|
||||
[Flags]
|
||||
public enum BlockType
|
||||
{
|
||||
Integer, String, FixedRepetition, GreedyRepetition
|
||||
Integer = 1,
|
||||
String = 2,
|
||||
// technically not a block type but keeping it here for consistency/having all types in one place
|
||||
Fragment = 4,
|
||||
FixedRepetition = 8,
|
||||
GreedyRepetition = 16,
|
||||
NonZeroRepetition = 32,
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@ abstract class BuildingBlockBase : IBuildingBlock
|
||||
|
||||
public abstract bool CanParseWord(InputProvider inputs);
|
||||
|
||||
public abstract bool CanParseWord(string word);
|
||||
|
||||
public abstract BlockType GetBlockType();
|
||||
|
||||
public virtual bool IsRepetitionType()
|
||||
@@ -19,7 +21,7 @@ abstract class BuildingBlockBase : IBuildingBlock
|
||||
return false;
|
||||
}
|
||||
|
||||
public virtual bool CheckIsDoneParsingAndReset()
|
||||
public virtual bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||
{
|
||||
// most blocks are always done parsing after consuming a token
|
||||
// repetition blocks can consume multiple tokens
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
namespace Parsing.Schema.BuildingBlocks;
|
||||
|
||||
using System.IO.Pipelines;
|
||||
using System.Linq;
|
||||
using Parsing.Tokenization;
|
||||
|
||||
class FixedRepetitionBlock : BuildingBlockBase
|
||||
@@ -30,7 +31,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
||||
this.context = this.inputSchema.CreateContext();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
return result.Single();
|
||||
}
|
||||
|
||||
public override bool CanParseWord(InputProvider inputs)
|
||||
@@ -48,6 +49,21 @@ class FixedRepetitionBlock : BuildingBlockBase
|
||||
return result;
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
bool result;
|
||||
if (this.repetitionCount == 0)
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = inputSchema.CanProcessNextWord(context, word);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public override BlockType GetBlockType()
|
||||
{
|
||||
return BlockType.FixedRepetition;
|
||||
@@ -58,7 +74,7 @@ class FixedRepetitionBlock : BuildingBlockBase
|
||||
return true;
|
||||
}
|
||||
|
||||
public override bool CheckIsDoneParsingAndReset()
|
||||
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||
{
|
||||
// we are done parsing once all repetitions are exhausted
|
||||
var result = this.repetitionCount == 0;
|
||||
|
||||
58
TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs
Normal file
58
TextParser/Schema/BuildingBlocks/GreedyRepetitionBlock.cs
Normal file
@@ -0,0 +1,58 @@
|
||||
namespace Parsing.Schema.BuildingBlocks;
|
||||
|
||||
using System.IO.Pipelines;
|
||||
using System.Linq;
|
||||
using Parsing.Tokenization;
|
||||
|
||||
class GreedyRepetitionBlock : BuildingBlockBase
|
||||
{
|
||||
private InputSchema inputSchema;
|
||||
private InputSchemaContext context;
|
||||
|
||||
public GreedyRepetitionBlock(InputSchema inputSchema)
|
||||
{
|
||||
this.inputSchema = inputSchema;
|
||||
this.context = this.inputSchema.CreateContext();
|
||||
}
|
||||
|
||||
public override IToken ParseWord(InputProvider inputs)
|
||||
{
|
||||
var result = inputSchema.ProcessNextWord(context, inputs);
|
||||
if (!this.CanParseWord(inputs))
|
||||
{
|
||||
this.context = this.inputSchema.CreateContext();
|
||||
}
|
||||
return result.Single();
|
||||
}
|
||||
|
||||
public override bool CanParseWord(InputProvider inputs)
|
||||
{
|
||||
return inputSchema.CanProcessNextWord(context, inputs) && inputs.CanYieldWord();
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
return inputSchema.CanProcessNextWord(context, word);
|
||||
}
|
||||
|
||||
public override BlockType GetBlockType()
|
||||
{
|
||||
return BlockType.GreedyRepetition;
|
||||
}
|
||||
|
||||
public override bool IsRepetitionType()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public override bool CheckIsDoneParsingAndReset(InputProvider inputs)
|
||||
{
|
||||
// we are done parsing greedily once the next token doesn't match anymore
|
||||
var result = !this.CanParseWord(inputs);
|
||||
if (result)
|
||||
{
|
||||
this.context = this.inputSchema.CreateContext();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -8,9 +8,11 @@ public interface IBuildingBlock
|
||||
|
||||
public bool CanParseWord(InputProvider inputs);
|
||||
|
||||
public bool CanParseWord(string word);
|
||||
|
||||
public BlockType GetBlockType();
|
||||
|
||||
public bool IsRepetitionType();
|
||||
|
||||
public bool CheckIsDoneParsingAndReset();
|
||||
public bool CheckIsDoneParsingAndReset(InputProvider inputs);
|
||||
}
|
||||
@@ -18,12 +18,16 @@ class IntegerBlock : BuildingBlockBase
|
||||
{
|
||||
using (inputs.GetLookaheadContext())
|
||||
{
|
||||
int number = 0;
|
||||
var success = int.TryParse(inputs.YieldWord(), out number);
|
||||
return success;
|
||||
return this.CanParseWord(inputs.YieldWord());
|
||||
}
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
int number = 0;
|
||||
return int.TryParse(word, out number);
|
||||
}
|
||||
|
||||
public override BlockType GetBlockType()
|
||||
{
|
||||
return BlockType.Integer;
|
||||
|
||||
@@ -16,6 +16,24 @@ class StringBlock : BuildingBlockBase
|
||||
|
||||
public override bool CanParseWord(InputProvider inputs)
|
||||
{
|
||||
string word = string.Empty;
|
||||
using (inputs.GetLookaheadContext())
|
||||
{
|
||||
word = inputs.YieldWord();
|
||||
}
|
||||
|
||||
return this.CanParseWord(word);
|
||||
}
|
||||
|
||||
public override bool CanParseWord(string word)
|
||||
{
|
||||
// Here we need to ensure we are not matching any non-string tokens, since string can match pretty much anything
|
||||
IntegerBlock intBlock = new IntegerBlock();
|
||||
if (intBlock.CanParseWord(word))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
112
TextParser/Schema/FragmentSchema.cs
Normal file
112
TextParser/Schema/FragmentSchema.cs
Normal file
@@ -0,0 +1,112 @@
|
||||
namespace Parsing.Schema;
|
||||
|
||||
using Parsing.Schema;
|
||||
using Parsing.Schema.BuildingBlocks;
|
||||
using Parsing.Tokenization;
|
||||
using System.Collections;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
public class FragmentSchemaContext : ISchemaContext
|
||||
{
|
||||
public int lastProcessedBlockIndex { get; set; } = 0;
|
||||
public bool HasFinished { get; set; } = false;
|
||||
}
|
||||
|
||||
public class FragmentSchema : ISchema<FragmentSchemaContext>
|
||||
{
|
||||
private string fragmentRegex;
|
||||
private List<string> namedGroups = new List<string>();
|
||||
private List<string> namedLiterals = new List<string>();
|
||||
|
||||
public FragmentSchema(string fragmentRegex, List<string> namedGroups, List<string> namedLiterals)
|
||||
{
|
||||
this.fragmentRegex = fragmentRegex;
|
||||
this.namedGroups = namedGroups;
|
||||
this.namedLiterals = namedLiterals;
|
||||
}
|
||||
|
||||
public List<IToken> ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
|
||||
{
|
||||
Regex r = new Regex(this.fragmentRegex);
|
||||
|
||||
var tokenList = new List<IToken>();
|
||||
// one token per match
|
||||
foreach (Match match in r.Matches(inputs.YieldWord()))
|
||||
{
|
||||
var newToken = new FragmentToken(match.Value);
|
||||
// token contains data from all included matches
|
||||
foreach (var groupName in this.namedGroups)
|
||||
{
|
||||
var captureList = new List<string>();
|
||||
foreach (Capture capture in match.Groups[groupName].Captures)
|
||||
{
|
||||
captureList.Add(capture.Value);
|
||||
}
|
||||
newToken.AddMatch(groupName, captureList);
|
||||
}
|
||||
foreach (var literalName in this.namedLiterals)
|
||||
{
|
||||
var captureList = new List<string>();
|
||||
if (match.Groups.Keys.Contains(literalName) && match.Groups[literalName].Length > 0)
|
||||
{
|
||||
captureList.Add(match.Groups[literalName].Length.ToString());
|
||||
}
|
||||
newToken.AddMatch(literalName, captureList);
|
||||
}
|
||||
tokenList.Add(newToken);
|
||||
}
|
||||
|
||||
if (!inputs.CanYieldWord())
|
||||
{
|
||||
currentContext.HasFinished = true;
|
||||
}
|
||||
|
||||
return tokenList;
|
||||
}
|
||||
|
||||
public bool CanProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
|
||||
{
|
||||
using (inputs.GetLookaheadContext())
|
||||
{
|
||||
return this.CanProcessNextWord(currentContext, inputs.YieldWord());
|
||||
}
|
||||
}
|
||||
|
||||
public bool CanProcessNextWord(FragmentSchemaContext currentContext, string word)
|
||||
{
|
||||
if (currentContext.HasFinished)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
Regex r = new Regex(this.fragmentRegex);
|
||||
return r.Match(word).Success;
|
||||
}
|
||||
|
||||
public List<IToken> ProcessWordList(string[] words)
|
||||
{
|
||||
List<IToken> tokens = new List<IToken>();
|
||||
InputProvider inputs = new InputProvider(words);
|
||||
var overallContext = this.CreateContext();
|
||||
|
||||
while (!overallContext.HasFinished && inputs.CanYieldWord())
|
||||
{
|
||||
if (this.CanProcessNextWord(overallContext, inputs))
|
||||
{
|
||||
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
|
||||
}
|
||||
else
|
||||
{
|
||||
inputs.SkipCurrentWord();
|
||||
}
|
||||
}
|
||||
|
||||
overallContext.HasFinished = true;
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
public FragmentSchemaContext CreateContext()
|
||||
{
|
||||
return new FragmentSchemaContext();
|
||||
}
|
||||
}
|
||||
142
TextParser/Schema/FragmentSchemaBuilder.cs
Normal file
142
TextParser/Schema/FragmentSchemaBuilder.cs
Normal file
@@ -0,0 +1,142 @@
|
||||
namespace Parsing.Schema;
|
||||
|
||||
using Parsing.Schema.BuildingBlocks;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
public class FragmentSchemaBuilder : RepetitionSchemaBuilder<FragmentSchemaBuilder, FragmentSchema, FragmentSchemaContext>, ISchemaBuilder<FragmentSchema, FragmentSchemaContext>
|
||||
{
|
||||
protected string fragmentRegex = @"";
|
||||
|
||||
private List<string> namedGroups = new List<string>();
|
||||
private List<string> namedLiterals = new List<string>();
|
||||
|
||||
public FragmentSchemaBuilder()
|
||||
{
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder StartOptions()
|
||||
{
|
||||
this.fragmentRegex += "((";
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder EndOptions()
|
||||
{
|
||||
this.fragmentRegex += "))";
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder Option()
|
||||
{
|
||||
// if we just started an options group there is no need to add an option separator
|
||||
if (!this.fragmentRegex.EndsWith("(") || this.fragmentRegex.EndsWith("\\("))
|
||||
{
|
||||
this.fragmentRegex += ")|(";
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder Expect(InputType type, string name = "")
|
||||
{
|
||||
string groupNamePrefix = "";
|
||||
if (!string.IsNullOrEmpty(name))
|
||||
{
|
||||
groupNamePrefix = "?<" + name + ">";
|
||||
namedGroups.Add(name);
|
||||
}
|
||||
switch (type)
|
||||
{
|
||||
case InputType.String:
|
||||
this.fragmentRegex += "(" + groupNamePrefix + "\\w+)";
|
||||
break;
|
||||
case InputType.Integer:
|
||||
this.fragmentRegex += "(" + groupNamePrefix + "\\d+)";
|
||||
break;
|
||||
default:
|
||||
throw new Exception("Unrecognized InputType");
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder Expect(string literal, string name = "")
|
||||
{
|
||||
string groupNamePrefix = "";
|
||||
if (!string.IsNullOrEmpty(name))
|
||||
{
|
||||
groupNamePrefix = "?<" + name + ">";
|
||||
namedLiterals.Add(name);
|
||||
}
|
||||
this.fragmentRegex += "(" + groupNamePrefix + Regex.Escape(literal) + ")";
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder Repeat(int repetitionCount)
|
||||
{
|
||||
// add another layer of parsing
|
||||
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||
newSchemaBuilder.NumRepetition = repetitionCount;
|
||||
newSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
||||
|
||||
return newSchemaBuilder;
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder Repeat()
|
||||
{
|
||||
// add another layer of parsing
|
||||
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||
newSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
||||
|
||||
return newSchemaBuilder;
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder Repeat(RepetitionType repetitionType)
|
||||
{
|
||||
// add another layer of parsing
|
||||
var newSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||
newSchemaBuilder.RepetitionType = repetitionType;
|
||||
|
||||
return newSchemaBuilder;
|
||||
}
|
||||
|
||||
public FragmentSchemaBuilder EndRepetition()
|
||||
{
|
||||
// return back to upper layer of parsing
|
||||
var currentBuilder = this as FragmentSchemaBuilder;
|
||||
if (currentBuilder == null)
|
||||
{
|
||||
throw new Exception("Invalid repetition definitions!");
|
||||
}
|
||||
var oldSchemaBuilder = currentBuilder.UpperLayerBuilder;
|
||||
if (oldSchemaBuilder == null)
|
||||
{
|
||||
throw new Exception("Something went terribly wrong!");
|
||||
}
|
||||
|
||||
var currentRegex = "(" + currentBuilder.fragmentRegex + ")";
|
||||
switch (currentBuilder.RepetitionType)
|
||||
{
|
||||
case RepetitionType.FixedRepetition:
|
||||
currentRegex += "{" + this.NumRepetition.ToString() + "}";
|
||||
break;
|
||||
case RepetitionType.GreedyRepetition:
|
||||
currentRegex += "*";
|
||||
break;
|
||||
case RepetitionType.NonZeroRepetition:
|
||||
case RepetitionType.NonZeroRepetition | RepetitionType.GreedyRepetition:
|
||||
currentRegex += "+";
|
||||
break;
|
||||
default:
|
||||
throw new Exception("Unrecognized RepetitionType");
|
||||
}
|
||||
|
||||
oldSchemaBuilder.fragmentRegex += "(" + currentRegex + ")";
|
||||
|
||||
return oldSchemaBuilder;
|
||||
}
|
||||
|
||||
public FragmentSchema Build()
|
||||
{
|
||||
var schema = new FragmentSchema(this.fragmentRegex, this.namedGroups, this.namedLiterals);
|
||||
return schema;
|
||||
}
|
||||
}
|
||||
25
TextParser/Schema/ISchema.cs
Normal file
25
TextParser/Schema/ISchema.cs
Normal file
@@ -0,0 +1,25 @@
|
||||
namespace Parsing.Schema;
|
||||
|
||||
using Parsing.Schema;
|
||||
using Parsing.Schema.BuildingBlocks;
|
||||
using Parsing.Tokenization;
|
||||
using System.Collections;
|
||||
|
||||
public interface ISchemaContext
|
||||
{
|
||||
public int lastProcessedBlockIndex { get; set; }
|
||||
public bool HasFinished { get; set; }
|
||||
}
|
||||
|
||||
public interface ISchema<T> where T : ISchemaContext
|
||||
{
|
||||
public List<IToken> ProcessNextWord(T currentContext, InputProvider inputs);
|
||||
|
||||
public bool CanProcessNextWord(T currentContext, InputProvider inputs);
|
||||
|
||||
public bool CanProcessNextWord(T currentContext, string word);
|
||||
|
||||
public List<IToken> ProcessWordList(string[] words);
|
||||
|
||||
public T CreateContext();
|
||||
}
|
||||
8
TextParser/Schema/ISchemaBuilder.cs
Normal file
8
TextParser/Schema/ISchemaBuilder.cs
Normal file
@@ -0,0 +1,8 @@
|
||||
namespace Parsing.Schema;
|
||||
|
||||
using Parsing.Schema.BuildingBlocks;
|
||||
|
||||
public interface ISchemaBuilder<T, U> where T : ISchema<U> where U : ISchemaContext
|
||||
{
|
||||
public T Build();
|
||||
}
|
||||
@@ -5,13 +5,13 @@ using Parsing.Schema.BuildingBlocks;
|
||||
using Parsing.Tokenization;
|
||||
using System.Collections;
|
||||
|
||||
public class InputSchemaContext
|
||||
public class InputSchemaContext : ISchemaContext
|
||||
{
|
||||
public int lastProcessedBlockIndex { get; set; } = 0;
|
||||
public bool HasFinished { get; set; } = false;
|
||||
}
|
||||
|
||||
public class InputSchema
|
||||
public class InputSchema : ISchema<InputSchemaContext>
|
||||
{
|
||||
private List<IBuildingBlock> buildingBlocks;
|
||||
|
||||
@@ -25,16 +25,18 @@ public class InputSchema
|
||||
this.buildingBlocks.Add(buildingBlock);
|
||||
}
|
||||
|
||||
public IToken ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||
public List<IToken> ProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||
{
|
||||
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||
var token = nextBlock.ParseWord(inputs);
|
||||
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset())
|
||||
if (!nextBlock.IsRepetitionType() || nextBlock.CheckIsDoneParsingAndReset(inputs))
|
||||
{
|
||||
currentContext.lastProcessedBlockIndex++;
|
||||
currentContext.HasFinished = currentContext.lastProcessedBlockIndex >= this.buildingBlocks.Count;
|
||||
}
|
||||
return token;
|
||||
var newTokenList = new List<IToken>();
|
||||
newTokenList.Add(token);
|
||||
return newTokenList;
|
||||
}
|
||||
|
||||
public bool CanProcessNextWord(InputSchemaContext currentContext, InputProvider inputs)
|
||||
@@ -50,7 +52,17 @@ public class InputSchema
|
||||
}
|
||||
}
|
||||
|
||||
public IList<IToken> ProcessWordList(string[] words)
|
||||
public bool CanProcessNextWord(InputSchemaContext currentContext, string word)
|
||||
{
|
||||
if (currentContext.HasFinished)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
var nextBlock = this.buildingBlocks[currentContext.lastProcessedBlockIndex];
|
||||
return nextBlock.CanParseWord(word);
|
||||
}
|
||||
|
||||
public List<IToken> ProcessWordList(string[] words)
|
||||
{
|
||||
List<IToken> tokens = new List<IToken>();
|
||||
InputProvider inputs = new InputProvider(words);
|
||||
@@ -58,7 +70,7 @@ public class InputSchema
|
||||
|
||||
while (this.CanProcessNextWord(overallContext, inputs))
|
||||
{
|
||||
tokens.Add(this.ProcessNextWord(overallContext, inputs));
|
||||
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
|
||||
}
|
||||
|
||||
return tokens;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
using Parsing.Schema.BuildingBlocks;
|
||||
|
||||
public class InputSchemaBuilder
|
||||
public class InputSchemaBuilder : RepetitionSchemaBuilder<InputSchemaBuilder, InputSchema, InputSchemaContext>, ISchemaBuilder<InputSchema, InputSchemaContext>
|
||||
{
|
||||
private InputSchema schema = new InputSchema();
|
||||
|
||||
@@ -31,34 +31,50 @@ public class InputSchemaBuilder
|
||||
public InputSchemaBuilder Repeat(int repetitionCount)
|
||||
{
|
||||
// add another layer of parsing
|
||||
var newInputSchemaBuilder = new RepetitionSchemaBuilder(this);
|
||||
var newInputSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||
newInputSchemaBuilder.NumRepetition = repetitionCount;
|
||||
newInputSchemaBuilder.RepetitionType = RepetitionType.FixedRepetition;
|
||||
|
||||
return newInputSchemaBuilder;
|
||||
}
|
||||
|
||||
public InputSchemaBuilder Repeat()
|
||||
{
|
||||
// add another layer of parsing
|
||||
var newInputSchemaBuilder = this.GetNewRepetitionSchemaBuilder(this);
|
||||
newInputSchemaBuilder.RepetitionType = RepetitionType.GreedyRepetition;
|
||||
|
||||
return newInputSchemaBuilder;
|
||||
}
|
||||
|
||||
public InputSchemaBuilder EndRepetition()
|
||||
{
|
||||
// return back to upper layer of parsing
|
||||
var currentBuilder = this as RepetitionSchemaBuilder;
|
||||
var currentBuilder = this as InputSchemaBuilder;
|
||||
if (currentBuilder == null)
|
||||
{
|
||||
throw new Exception("Invalid repetition definitions!");
|
||||
}
|
||||
var oldInputSchemaBuilder = currentBuilder.UpperLayerBuilder;
|
||||
var oldSchemaBuilder = currentBuilder.UpperLayerBuilder;
|
||||
if (oldSchemaBuilder == null)
|
||||
{
|
||||
throw new Exception("Something went terribly wrong!");
|
||||
}
|
||||
|
||||
var currentSchema = currentBuilder.Build();
|
||||
switch (currentBuilder.RepetitionType)
|
||||
{
|
||||
case RepetitionType.FixedRepetition:
|
||||
oldInputSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
|
||||
oldSchemaBuilder.schema.AddBuildingBlock(new FixedRepetitionBlock(currentSchema, currentBuilder.NumRepetition));
|
||||
break;
|
||||
case RepetitionType.GreedyRepetition:
|
||||
oldSchemaBuilder.schema.AddBuildingBlock(new GreedyRepetitionBlock(currentSchema));
|
||||
break;
|
||||
default:
|
||||
throw new Exception("Unrecognized RepetitionType");
|
||||
}
|
||||
|
||||
return oldInputSchemaBuilder;
|
||||
return oldSchemaBuilder;
|
||||
}
|
||||
|
||||
public InputSchema Build()
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
namespace Parsing.Schema;
|
||||
|
||||
[Flags]
|
||||
public enum InputType
|
||||
{
|
||||
Integer = BlockType.Integer,
|
||||
String = BlockType.String
|
||||
String = BlockType.String,
|
||||
Fragment = BlockType.Fragment,
|
||||
}
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
namespace Parsing.Schema;
|
||||
|
||||
public class RepetitionSchemaBuilder : InputSchemaBuilder
|
||||
public abstract class RepetitionSchemaBuilder<S, T, U> where S : RepetitionSchemaBuilder<S, T, U>, ISchemaBuilder<T, U>, new() where T : ISchema<U> where U : ISchemaContext
|
||||
{
|
||||
public RepetitionSchemaBuilder(InputSchemaBuilder upperLayerBuilder)
|
||||
public S GetNewRepetitionSchemaBuilder(S upperLayerBuilder)
|
||||
{
|
||||
this.UpperLayerBuilder = upperLayerBuilder;
|
||||
var newBuilder = new S();
|
||||
newBuilder.UpperLayerBuilder = upperLayerBuilder;
|
||||
return newBuilder;
|
||||
}
|
||||
|
||||
public InputSchemaBuilder UpperLayerBuilder { get; set; }
|
||||
public S? UpperLayerBuilder { get; set; }
|
||||
|
||||
public int NumRepetition { get; set; }
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
namespace Parsing.Schema;
|
||||
|
||||
[Flags]
|
||||
public enum RepetitionType
|
||||
{
|
||||
FixedRepetition = BlockType.FixedRepetition,
|
||||
GreedyRepetition = BlockType.GreedyRepetition
|
||||
GreedyRepetition = BlockType.GreedyRepetition,
|
||||
NonZeroRepetition = BlockType.NonZeroRepetition,
|
||||
}
|
||||
|
||||
@@ -1,36 +1,48 @@
|
||||
namespace Parsing;
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Parsing.Schema;
|
||||
using Parsing.Tokenization;
|
||||
|
||||
public class TextParser
|
||||
public static class TextParser
|
||||
{
|
||||
private string[] delimiters;
|
||||
private bool removeEmptyEntries = false;
|
||||
private InputSchema schema;
|
||||
private InputSchemaContext context;
|
||||
|
||||
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true)
|
||||
public static TextParser<TContext> Create<TContext>(ISchema<TContext> schema, string[]? delimiters = null, bool removeEmptyEntries = true) where TContext : ISchemaContext
|
||||
{
|
||||
this.delimiters = delimiters ?? new string[] { " " };
|
||||
return new TextParser<TContext>(schema, delimiters, removeEmptyEntries);
|
||||
}
|
||||
}
|
||||
|
||||
public class TextParser<T> : TokenConverter where T : ISchemaContext
|
||||
{
|
||||
private LineParser<T> lineParser;
|
||||
private string[] lines;
|
||||
private bool removeEmptyEntries;
|
||||
|
||||
public TextParser(ISchema<T> schema, string[]? delimiters = null, bool removeEmptyEntries = true) : base()
|
||||
{
|
||||
this.lineParser = new LineParser<T>(schema, delimiters, removeEmptyEntries);
|
||||
this.lines = new string[] { };
|
||||
this.removeEmptyEntries = removeEmptyEntries;
|
||||
this.schema = schema;
|
||||
this.context = this.schema.CreateContext();
|
||||
}
|
||||
|
||||
private string[] ParseLineIntoWords(string line)
|
||||
public TextParser<T> SetInputText(string text)
|
||||
{
|
||||
var options = StringSplitOptions.TrimEntries;
|
||||
if (this.removeEmptyEntries)
|
||||
if (removeEmptyEntries)
|
||||
{
|
||||
options = options | StringSplitOptions.RemoveEmptyEntries;
|
||||
}
|
||||
return line.Split(this.delimiters, options);
|
||||
this.lines = text.Split("\n", options);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IList<IToken> ParseLine(string line)
|
||||
public TextParser<T> Parse()
|
||||
{
|
||||
var words = this.ParseLineIntoWords(line);
|
||||
return this.schema.ProcessWordList(words);
|
||||
foreach (var line in this.lines)
|
||||
{
|
||||
this.rawTokens.Add(this.lineParser.ParseLine(line));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
226
TextParser/TokenConverter.cs
Normal file
226
TextParser/TokenConverter.cs
Normal file
@@ -0,0 +1,226 @@
|
||||
namespace Parsing;
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Parsing.Schema;
|
||||
using Parsing.Tokenization;
|
||||
|
||||
public static class DataConversionHelpers
|
||||
{
|
||||
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
|
||||
{
|
||||
var newList = new List<TNewType>();
|
||||
foreach (var token in tokenList)
|
||||
{
|
||||
var typedToken = token as IValueToken<TOldType>;
|
||||
if (typedToken == null)
|
||||
{
|
||||
throw new Exception("Invalid Token type encountered during value conversion");
|
||||
}
|
||||
|
||||
newList.Add(converter(typedToken.GetValue()));
|
||||
}
|
||||
return newList;
|
||||
}
|
||||
|
||||
public static List<TNewType> ConvertData<TTokenType, TNewType, TOldType>(this List<IToken> tokenList, Func<TOldType, List<TNewType>> converter) where TTokenType : IValueToken<TOldType>
|
||||
{
|
||||
var newList = new List<TNewType>();
|
||||
foreach (var token in tokenList)
|
||||
{
|
||||
var typedToken = token as IValueToken<TOldType>;
|
||||
if (typedToken == null)
|
||||
{
|
||||
throw new Exception("Invalid Token type encountered during value conversion");
|
||||
}
|
||||
|
||||
newList.AddRange(converter(typedToken.GetValue()));
|
||||
}
|
||||
return newList;
|
||||
}
|
||||
|
||||
public static List<List<TNewType>> ConvertData<TTokenType, TNewType, TOldType>(this List<List<IToken>> tokenListList, Func<TOldType, TNewType> converter) where TTokenType : IValueToken<TOldType>
|
||||
{
|
||||
var newListList = new List<List<TNewType>>();
|
||||
foreach (var tokenList in tokenListList)
|
||||
{
|
||||
newListList.Add(tokenList.ConvertData<TTokenType, TNewType, TOldType>(converter));
|
||||
}
|
||||
return newListList;
|
||||
}
|
||||
}
|
||||
|
||||
public static class DataManipulationHelpers
|
||||
{
|
||||
public static TType ReduceData<TType>(this List<TType> data, Func<TType, TType, TType> reducer)
|
||||
{
|
||||
if (data.Count < 2)
|
||||
{
|
||||
return data[0];
|
||||
}
|
||||
TType result = data[0];
|
||||
for (int i = 1; i < data.Count; i++)
|
||||
{
|
||||
result = reducer(result, data[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static TType ReduceData<TType>(this List<TType> data, Func<List<TType>, TType> reducer)
|
||||
{
|
||||
return reducer(data);
|
||||
}
|
||||
}
|
||||
|
||||
public class TokenConverter
|
||||
{
|
||||
protected List<List<IToken>> rawTokens = new List<List<IToken>>();
|
||||
|
||||
public TokenConverter()
|
||||
{
|
||||
}
|
||||
|
||||
private List<T> AsGenericCollection<T, U>() where T : ICollection<U>, new()
|
||||
{
|
||||
List<T> returnData = new List<T>();
|
||||
foreach (var tokenRow in this.rawTokens)
|
||||
{
|
||||
T newRow = new T();
|
||||
foreach (IToken token in tokenRow)
|
||||
{
|
||||
if (token == null)
|
||||
{
|
||||
throw new Exception("No token was provided, but token was expected!");
|
||||
}
|
||||
IValueToken<U>? valueToken = token as IValueToken<U>;
|
||||
if (valueToken == null)
|
||||
{
|
||||
throw new Exception("Provided token is not a ValueToken");
|
||||
}
|
||||
newRow.Add(valueToken.GetValue());
|
||||
}
|
||||
|
||||
returnData.Add(newRow);
|
||||
}
|
||||
return returnData;
|
||||
}
|
||||
|
||||
private void CheckConversionPrerequisites()
|
||||
{
|
||||
// in order to convert rows to columns or grid we require every row to have the same length
|
||||
int rowLength = this.rawTokens[0].Count;
|
||||
|
||||
foreach (var tokenRow in this.rawTokens)
|
||||
{
|
||||
if (tokenRow.Count != rowLength)
|
||||
{
|
||||
throw new Exception("Attempted to convert token dataset that is not able to be converted!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<T> AsSingleStream<T>()
|
||||
{
|
||||
List<T> returnData = new List<T>();
|
||||
foreach (var tokenRow in this.rawTokens)
|
||||
{
|
||||
foreach (IToken token in tokenRow)
|
||||
{
|
||||
if (token == null)
|
||||
{
|
||||
throw new Exception("No token was provided, but token was expected!");
|
||||
}
|
||||
IValueToken<T>? valueToken = token as IValueToken<T>;
|
||||
if (valueToken == null)
|
||||
{
|
||||
throw new Exception("Provided token is not a ValueToken");
|
||||
}
|
||||
returnData.Add(valueToken.GetValue());
|
||||
}
|
||||
}
|
||||
return returnData;
|
||||
}
|
||||
|
||||
public List<T[]> AsRows<T>()
|
||||
{
|
||||
var listRows = this.AsListRows<T>();
|
||||
var newList = new List<T[]>();
|
||||
|
||||
foreach (var rowList in listRows)
|
||||
{
|
||||
newList.Add(rowList.ToArray());
|
||||
}
|
||||
|
||||
return newList;
|
||||
}
|
||||
|
||||
public List<List<T>> AsListRows<T>()
|
||||
{
|
||||
return this.AsGenericCollection<List<T>, T>();
|
||||
}
|
||||
|
||||
public List<T[]> AsColumns<T>()
|
||||
{
|
||||
var listColumns = this.AsListColumns<T>();
|
||||
var newList = new List<T[]>();
|
||||
|
||||
foreach (var columnList in listColumns)
|
||||
{
|
||||
newList.Add(columnList.ToArray());
|
||||
}
|
||||
|
||||
return newList;
|
||||
}
|
||||
|
||||
public List<List<T>> AsListColumns<T>()
|
||||
{
|
||||
this.CheckConversionPrerequisites();
|
||||
var rows = AsListRows<T>();
|
||||
|
||||
var columns = new List<List<T>>();
|
||||
for (int i = 0; i < rows[0].Count; i++)
|
||||
{
|
||||
columns.Add(new List<T>());
|
||||
}
|
||||
|
||||
foreach (var row in rows)
|
||||
{
|
||||
for (int i = 0; i < row.Count; i++)
|
||||
{
|
||||
columns[i].Add(row[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return columns;
|
||||
}
|
||||
|
||||
public T[][] AsGrid<T>()
|
||||
{
|
||||
this.CheckConversionPrerequisites();
|
||||
var rowsList = AsRows<T>();
|
||||
return rowsList.ToArray();
|
||||
}
|
||||
|
||||
public List<List<IToken>> AsRawData()
|
||||
{
|
||||
return this.rawTokens;
|
||||
}
|
||||
|
||||
public List<Fragment> AsFragments()
|
||||
{
|
||||
var items = this.AsSingleStream<Fragment>();
|
||||
var newList = new List<Fragment>();
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
var typedItem = item as Fragment;
|
||||
if (typedItem == null)
|
||||
{
|
||||
throw new Exception("Invalid token type encountered");
|
||||
}
|
||||
newList.Add(typedItem);
|
||||
}
|
||||
|
||||
return newList;
|
||||
}
|
||||
}
|
||||
39
TextParser/Tokenization/FragmentToken.cs
Normal file
39
TextParser/Tokenization/FragmentToken.cs
Normal file
@@ -0,0 +1,39 @@
|
||||
namespace Parsing.Tokenization;
|
||||
|
||||
using System.Runtime.CompilerServices;
|
||||
using Parsing.Schema;
|
||||
|
||||
public class Fragment : Dictionary<string, List<string>>
|
||||
{
|
||||
}
|
||||
|
||||
public class FragmentToken : IValueToken<Fragment>
|
||||
{
|
||||
private string word;
|
||||
private Fragment matches = new Fragment();
|
||||
|
||||
public FragmentToken(string word)
|
||||
{
|
||||
this.word = word;
|
||||
}
|
||||
|
||||
public string GetText()
|
||||
{
|
||||
return word;
|
||||
}
|
||||
|
||||
public void AddMatch(string name, List<string> values)
|
||||
{
|
||||
this.matches.Add(name, values);
|
||||
}
|
||||
|
||||
public Fragment GetValue()
|
||||
{
|
||||
return this.matches;
|
||||
}
|
||||
|
||||
public InputType GetInputType()
|
||||
{
|
||||
return InputType.Fragment;
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,6 @@
|
||||
namespace Parsing.Tokenization;
|
||||
using System;
|
||||
|
||||
namespace Parsing.Tokenization;
|
||||
|
||||
public interface IValueToken<T> : IToken
|
||||
{
|
||||
|
||||
@@ -32,19 +32,30 @@ public class InputProvider
|
||||
this.CurrentPosition = 0;
|
||||
}
|
||||
|
||||
public InputProvider(string text)
|
||||
{
|
||||
this.words = text.Split("\n");
|
||||
this.CurrentPosition = 0;
|
||||
}
|
||||
|
||||
public InputProvider.LookaheadContext GetLookaheadContext()
|
||||
{
|
||||
return new InputProvider.LookaheadContext(this);
|
||||
}
|
||||
|
||||
public bool CanYieldWord()
|
||||
{
|
||||
return this.CurrentPosition < this.words.Length;
|
||||
}
|
||||
|
||||
public void SkipCurrentWord()
|
||||
{
|
||||
this.CurrentPosition += 1;
|
||||
}
|
||||
|
||||
public string YieldWord()
|
||||
{
|
||||
Console.WriteLine("current words:");
|
||||
foreach (var word in words)
|
||||
{
|
||||
Console.WriteLine(word);
|
||||
}
|
||||
if (this.CurrentPosition > this.words.Length)
|
||||
if (!this.CanYieldWord())
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
@@ -1 +1 @@
|
||||
0.1.1
|
||||
0.6.0
|
||||
|
||||
Reference in New Issue
Block a user