feat: add text parser and output format options, ref: A24-3
Some checks failed
CI / linter (9.0.X, ubuntu-latest) (push) Failing after 1m51s
SonarQube Scan / SonarQube Trigger (push) Successful in 1m52s
CI / tests_linux (9.0.X, ubuntu-latest) (push) Has been skipped

This commit is contained in:
Simon Diesenreiter 2024-12-02 01:22:20 -08:00
parent a4e4ee2b85
commit 8707e0da3a
5 changed files with 193 additions and 26 deletions

View File

@ -9,9 +9,12 @@ public class TextParserTests
{ {
private const string testInput1 = "2 4 6 8"; private const string testInput1 = "2 4 6 8";
private const string testInput2 = "2 ab ba 8 cd dc"; private const string testInput2 = "2 ab ba 8 cd dc";
private const string testInput3 = @"2 4 6 1
3 5 7 2
4 6 8 3";
[Fact] [Fact]
public void TestSimpleRepetition() public void LineParser_TestSimpleRepetition()
{ {
var schemaBuilder = new InputSchemaBuilder(); var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder var schema = schemaBuilder
@ -20,7 +23,7 @@ public class TextParserTests
.EndRepetition() .EndRepetition()
.Build(); .Build();
var parser = new TextParser(schema); var parser = new LineParser(schema);
var tokens = parser.ParseLine(testInput1); var tokens = parser.ParseLine(testInput1);
Assert.Equal(4, tokens.Count); Assert.Equal(4, tokens.Count);
@ -35,7 +38,7 @@ public class TextParserTests
} }
[Fact] [Fact]
public void TestSimpleInput() public void LineParser_TestSimpleInput()
{ {
var schemaBuilder = new InputSchemaBuilder(); var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder var schema = schemaBuilder
@ -45,7 +48,7 @@ public class TextParserTests
.Expect(InputType.Integer) .Expect(InputType.Integer)
.Build(); .Build();
var parser = new TextParser(schema); var parser = new LineParser(schema);
var tokens = parser.ParseLine(testInput1); var tokens = parser.ParseLine(testInput1);
Assert.Equal(4, tokens.Count); Assert.Equal(4, tokens.Count);
@ -61,7 +64,7 @@ public class TextParserTests
[Fact] [Fact]
public void TestNestedRepetition() public void LineParser_TestNestedRepetition()
{ {
var schemaBuilder = new InputSchemaBuilder(); var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder var schema = schemaBuilder
@ -73,7 +76,7 @@ public class TextParserTests
.EndRepetition() .EndRepetition()
.Build(); .Build();
var parser = new TextParser(schema); var parser = new LineParser(schema);
var tokens = parser.ParseLine(testInput2); var tokens = parser.ParseLine(testInput2);
Assert.Equal(6, tokens.Count); Assert.Equal(6, tokens.Count);
@ -90,4 +93,36 @@ public class TextParserTests
Assert.Equal("cd", (tokens[4] as StringToken)?.GetValue()); Assert.Equal("cd", (tokens[4] as StringToken)?.GetValue());
Assert.Equal("dc", (tokens[5] as StringToken)?.GetValue()); Assert.Equal("dc", (tokens[5] as StringToken)?.GetValue());
} }
[Fact]
public void TextParser_TestRepetition()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Repeat(4)
.Expect(InputType.Integer)
.EndRepetition()
.Build();
var parser = new TextParser(schema);
var rows = parser
.SetInputText(testInput3)
.Parse()
.AsRows();
Assert.Equal(3, rows.Count);
Assert.Equal(4, rows[0].Length);
Assert.Equal(2, rows[0][0]);
Assert.Equal(4, rows[0][1]);
Assert.Equal(6, rows[0][2]);
Assert.Equal(1, rows[0][3]);
Assert.Equal(2, rows[1][0]);
Assert.Equal(4, rows[1][1]);
Assert.Equal(6, rows[1][2]);
Assert.Equal(1, rows[1][3]);
Assert.Equal(2, rows[2][0]);
Assert.Equal(4, rows[2][1]);
Assert.Equal(6, rows[2][2]);
Assert.Equal(1, rows[2][3]);
}
} }

36
TextParser/LineParser.cs Normal file
View File

@ -0,0 +1,36 @@
namespace Parsing;
using Parsing.Schema;
using Parsing.Tokenization;
public class LineParser
{
private string[] delimiters;
private bool removeEmptyEntries = false;
private InputSchema schema;
private InputSchemaContext context;
public LineParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true)
{
this.delimiters = delimiters ?? new string[] { " " };
this.removeEmptyEntries = removeEmptyEntries;
this.schema = schema;
}
private string[] ParseLineIntoWords(string line)
{
var options = StringSplitOptions.TrimEntries;
if (this.removeEmptyEntries)
{
options = options | StringSplitOptions.RemoveEmptyEntries;
}
return line.Split(this.delimiters, options);
}
public IList<IToken> ParseLine(string line)
{
this.context = this.schema.CreateContext();
var words = this.ParseLineIntoWords(line);
return this.schema.ProcessWordList(words);
}
}

View File

@ -1,36 +1,39 @@
namespace Parsing; namespace Parsing;
using System;
using System.Collections.Generic;
using Parsing.Schema; using Parsing.Schema;
using Parsing.Tokenization; using Parsing.Tokenization;
public class TextParser public class TextParser : TokenConverter
{ {
private string[] delimiters; private LineParser lineParser;
private bool removeEmptyEntries = false; private string[] lines;
private InputSchema schema;
private InputSchemaContext context;
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true) private List<List<IToken>> rawTokens = new List<List<IToken>>();
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true) : base()
{ {
this.delimiters = delimiters ?? new string[] { " " }; this.lineParser = new LineParser(schema ,delimiters, removeEmptyEntries);
this.removeEmptyEntries = removeEmptyEntries;
this.schema = schema;
this.context = this.schema.CreateContext();
} }
private string[] ParseLineIntoWords(string line) public TextParser SetInputText(string text)
{ {
var options = StringSplitOptions.TrimEntries; var options = StringSplitOptions.TrimEntries;
if (this.removeEmptyEntries) if (removeEmptyEntries)
{ {
options = options | StringSplitOptions.RemoveEmptyEntries; options = options | StringSplitOptions.RemoveEmptyEntries;
} }
return line.Split(this.delimiters, options); this.lines = text.Split("\n", options);
return this;
} }
public IList<IToken> ParseLine(string line) public TextParser Parse()
{ {
var words = this.ParseLineIntoWords(line); foreach(var line in this.lines)
return this.schema.ProcessWordList(words); {
this.rawTokens.Add(this.lineParser.ParseLine(line));
}
return this;
} }
} }

View File

@ -0,0 +1,91 @@
namespace Parsing;
using System;
using System.Collections.Generic;
using Parsing.Schema;
using Parsing.Tokenization;
public class TokenConverter
{
private List<List<IToken>> rawTokens = new List<List<IToken>>();
public TokenConverter()
{
}
private List<T> AsGenericCollection<T, U>() where T : ICollection<U>
{
List<T> returnData = new List<T>();
foreach(var tokenRow in this.rawTokens)
{
T newRow = new T();
foreach(IToken token in tokenRow)
{
IValueToken<U> valueToken = token as IValueToken<U>;
if (valueToken == null)
{
throw new Exception("Provided token is not a ValueToken");
}
newRow.Add(valueToken.GetValue());
}
}
}
public List<T[]> AsRows<T>()
{
var listRows = this.AsListRows<T>();
var newList = new List<T[]>();
foreach(var rowList in listRows)
{
newList.Add(rowList.ToArray());
}
return newList;
}
public List<List<T>> AsListRows<T>()
{
return this.AsGenericCollection<List<T>, T>();
}
public List<T[]> AsColumns<T>()
{
var listColumns = this.AsListColumns<T>();
var newList = new List<T[]>();
foreach(var columnList in listColumns)
{
newList.Add(columnList.ToArray());
}
return newList;
}
public List<List<T>> AsListColumns<T>()
{
var rows = AsListRows<T>();
var columns = new List<List<T>>();
for(int i =0; i<rows[0].Count; i++)
{
columns.Add(new List<T>());
}
foreach(var row in rows)
{
for(int i = 0; i < row.Count; i++)
{
columns[i].Add(row[i]);
}
}
return columns;
}
public T[][] AsGrid<T>()
{
var rowsList = AsRows<T>();
return rowsList.ToArray();
}
}

View File

@ -1,4 +1,6 @@
namespace Parsing.Tokenization; using System;
namespace Parsing.Tokenization;
public interface IValueToken<T> : IToken public interface IValueToken<T> : IToken
{ {