From 8707e0da3a7e5fc48974a32990a4c341ea065922 Mon Sep 17 00:00:00 2001 From: Simon Diesenreiter Date: Mon, 2 Dec 2024 01:22:20 -0800 Subject: [PATCH] feat: add text parser and output format options, ref: A24-3 --- TextParser.Tests/TextParserTests.cs | 53 ++++++++++++--- TextParser/LineParser.cs | 36 ++++++++++ TextParser/TextParser.cs | 35 +++++----- TextParser/TokenConverter.cs | 91 ++++++++++++++++++++++++++ TextParser/Tokenization/IValueToken.cs | 4 +- 5 files changed, 193 insertions(+), 26 deletions(-) create mode 100644 TextParser/LineParser.cs create mode 100644 TextParser/TokenConverter.cs diff --git a/TextParser.Tests/TextParserTests.cs b/TextParser.Tests/TextParserTests.cs index 16b125b..0cde1ab 100644 --- a/TextParser.Tests/TextParserTests.cs +++ b/TextParser.Tests/TextParserTests.cs @@ -9,9 +9,12 @@ public class TextParserTests { private const string testInput1 = "2 4 6 8"; private const string testInput2 = "2 ab ba 8 cd dc"; + private const string testInput3 = @"2 4 6 1 + 3 5 7 2 + 4 6 8 3"; [Fact] - public void TestSimpleRepetition() + public void LineParser_TestSimpleRepetition() { var schemaBuilder = new InputSchemaBuilder(); var schema = schemaBuilder @@ -20,7 +23,7 @@ public class TextParserTests .EndRepetition() .Build(); - var parser = new TextParser(schema); + var parser = new LineParser(schema); var tokens = parser.ParseLine(testInput1); Assert.Equal(4, tokens.Count); @@ -35,7 +38,7 @@ public class TextParserTests } [Fact] - public void TestSimpleInput() + public void LineParser_TestSimpleInput() { var schemaBuilder = new InputSchemaBuilder(); var schema = schemaBuilder @@ -45,7 +48,7 @@ public class TextParserTests .Expect(InputType.Integer) .Build(); - var parser = new TextParser(schema); + var parser = new LineParser(schema); var tokens = parser.ParseLine(testInput1); Assert.Equal(4, tokens.Count); @@ -57,11 +60,11 @@ public class TextParserTests Assert.Equal(4, (tokens[1] as IntegerToken)?.GetValue()); Assert.Equal(6, (tokens[2] as IntegerToken)?.GetValue()); Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue()); - } - - + } + + [Fact] - public void TestNestedRepetition() + public void LineParser_TestNestedRepetition() { var schemaBuilder = new InputSchemaBuilder(); var schema = schemaBuilder @@ -73,7 +76,7 @@ public class TextParserTests .EndRepetition() .Build(); - var parser = new TextParser(schema); + var parser = new LineParser(schema); var tokens = parser.ParseLine(testInput2); Assert.Equal(6, tokens.Count); @@ -90,4 +93,36 @@ public class TextParserTests Assert.Equal("cd", (tokens[4] as StringToken)?.GetValue()); Assert.Equal("dc", (tokens[5] as StringToken)?.GetValue()); } + + [Fact] + public void TextParser_TestRepetition() + { + var schemaBuilder = new InputSchemaBuilder(); + var schema = schemaBuilder + .Repeat(4) + .Expect(InputType.Integer) + .EndRepetition() + .Build(); + + var parser = new TextParser(schema); + var rows = parser + .SetInputText(testInput3) + .Parse() + .AsRows(); + + Assert.Equal(3, rows.Count); + Assert.Equal(4, rows[0].Length); + Assert.Equal(2, rows[0][0]); + Assert.Equal(4, rows[0][1]); + Assert.Equal(6, rows[0][2]); + Assert.Equal(1, rows[0][3]); + Assert.Equal(2, rows[1][0]); + Assert.Equal(4, rows[1][1]); + Assert.Equal(6, rows[1][2]); + Assert.Equal(1, rows[1][3]); + Assert.Equal(2, rows[2][0]); + Assert.Equal(4, rows[2][1]); + Assert.Equal(6, rows[2][2]); + Assert.Equal(1, rows[2][3]); + } } diff --git a/TextParser/LineParser.cs b/TextParser/LineParser.cs new file mode 100644 index 0000000..675005f --- /dev/null +++ b/TextParser/LineParser.cs @@ -0,0 +1,36 @@ +namespace Parsing; + +using Parsing.Schema; +using Parsing.Tokenization; + +public class LineParser +{ + private string[] delimiters; + private bool removeEmptyEntries = false; + private InputSchema schema; + private InputSchemaContext context; + + public LineParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true) + { + this.delimiters = delimiters ?? new string[] { " " }; + this.removeEmptyEntries = removeEmptyEntries; + this.schema = schema; + } + + private string[] ParseLineIntoWords(string line) + { + var options = StringSplitOptions.TrimEntries; + if (this.removeEmptyEntries) + { + options = options | StringSplitOptions.RemoveEmptyEntries; + } + return line.Split(this.delimiters, options); + } + + public IList ParseLine(string line) + { + this.context = this.schema.CreateContext(); + var words = this.ParseLineIntoWords(line); + return this.schema.ProcessWordList(words); + } +} diff --git a/TextParser/TextParser.cs b/TextParser/TextParser.cs index e770503..70bd5ca 100644 --- a/TextParser/TextParser.cs +++ b/TextParser/TextParser.cs @@ -1,36 +1,39 @@ namespace Parsing; +using System; +using System.Collections.Generic; using Parsing.Schema; using Parsing.Tokenization; -public class TextParser +public class TextParser : TokenConverter { - private string[] delimiters; - private bool removeEmptyEntries = false; - private InputSchema schema; - private InputSchemaContext context; + private LineParser lineParser; + private string[] lines; - public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true) + private List> rawTokens = new List>(); + + public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true) : base() { - this.delimiters = delimiters ?? new string[] { " " }; - this.removeEmptyEntries = removeEmptyEntries; - this.schema = schema; - this.context = this.schema.CreateContext(); + this.lineParser = new LineParser(schema ,delimiters, removeEmptyEntries); } - private string[] ParseLineIntoWords(string line) + public TextParser SetInputText(string text) { var options = StringSplitOptions.TrimEntries; - if (this.removeEmptyEntries) + if (removeEmptyEntries) { options = options | StringSplitOptions.RemoveEmptyEntries; } - return line.Split(this.delimiters, options); + this.lines = text.Split("\n", options); + return this; } - public IList ParseLine(string line) + public TextParser Parse() { - var words = this.ParseLineIntoWords(line); - return this.schema.ProcessWordList(words); + foreach(var line in this.lines) + { + this.rawTokens.Add(this.lineParser.ParseLine(line)); + } + return this; } } diff --git a/TextParser/TokenConverter.cs b/TextParser/TokenConverter.cs new file mode 100644 index 0000000..b548429 --- /dev/null +++ b/TextParser/TokenConverter.cs @@ -0,0 +1,91 @@ +namespace Parsing; + +using System; +using System.Collections.Generic; +using Parsing.Schema; +using Parsing.Tokenization; + +public class TokenConverter +{ + private List> rawTokens = new List>(); + + public TokenConverter() + { + } + + private List AsGenericCollection() where T : ICollection + { + List returnData = new List(); + foreach(var tokenRow in this.rawTokens) + { + T newRow = new T(); + foreach(IToken token in tokenRow) + { + IValueToken valueToken = token as IValueToken; + if (valueToken == null) + { + throw new Exception("Provided token is not a ValueToken"); + } + newRow.Add(valueToken.GetValue()); + } + } + } + + public List AsRows() + { + var listRows = this.AsListRows(); + var newList = new List(); + + foreach(var rowList in listRows) + { + newList.Add(rowList.ToArray()); + } + + return newList; + } + + public List> AsListRows() + { + return this.AsGenericCollection, T>(); + } + + public List AsColumns() + { + var listColumns = this.AsListColumns(); + var newList = new List(); + + foreach(var columnList in listColumns) + { + newList.Add(columnList.ToArray()); + } + + return newList; + } + + public List> AsListColumns() + { + var rows = AsListRows(); + + var columns = new List>(); + for(int i =0; i()); + } + + foreach(var row in rows) + { + for(int i = 0; i < row.Count; i++) + { + columns[i].Add(row[i]); + } + } + + return columns; + } + + public T[][] AsGrid() + { + var rowsList = AsRows(); + return rowsList.ToArray(); + } +} diff --git a/TextParser/Tokenization/IValueToken.cs b/TextParser/Tokenization/IValueToken.cs index a217488..3990c58 100644 --- a/TextParser/Tokenization/IValueToken.cs +++ b/TextParser/Tokenization/IValueToken.cs @@ -1,4 +1,6 @@ -namespace Parsing.Tokenization; +using System; + +namespace Parsing.Tokenization; public interface IValueToken : IToken {