feat: add text parser and output format options, ref: A24-3
Some checks failed
CI / linter (9.0.X, ubuntu-latest) (push) Failing after 1m51s
SonarQube Scan / SonarQube Trigger (push) Successful in 1m52s
CI / tests_linux (9.0.X, ubuntu-latest) (push) Has been skipped

This commit is contained in:
Simon Diesenreiter 2024-12-02 01:22:20 -08:00
parent a4e4ee2b85
commit 8707e0da3a
5 changed files with 193 additions and 26 deletions

View File

@ -9,9 +9,12 @@ public class TextParserTests
{
private const string testInput1 = "2 4 6 8";
private const string testInput2 = "2 ab ba 8 cd dc";
private const string testInput3 = @"2 4 6 1
3 5 7 2
4 6 8 3";
[Fact]
public void TestSimpleRepetition()
public void LineParser_TestSimpleRepetition()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
@ -20,7 +23,7 @@ public class TextParserTests
.EndRepetition()
.Build();
var parser = new TextParser(schema);
var parser = new LineParser(schema);
var tokens = parser.ParseLine(testInput1);
Assert.Equal(4, tokens.Count);
@ -35,7 +38,7 @@ public class TextParserTests
}
[Fact]
public void TestSimpleInput()
public void LineParser_TestSimpleInput()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
@ -45,7 +48,7 @@ public class TextParserTests
.Expect(InputType.Integer)
.Build();
var parser = new TextParser(schema);
var parser = new LineParser(schema);
var tokens = parser.ParseLine(testInput1);
Assert.Equal(4, tokens.Count);
@ -57,11 +60,11 @@ public class TextParserTests
Assert.Equal(4, (tokens[1] as IntegerToken)?.GetValue());
Assert.Equal(6, (tokens[2] as IntegerToken)?.GetValue());
Assert.Equal(8, (tokens[3] as IntegerToken)?.GetValue());
}
}
[Fact]
public void TestNestedRepetition()
public void LineParser_TestNestedRepetition()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
@ -73,7 +76,7 @@ public class TextParserTests
.EndRepetition()
.Build();
var parser = new TextParser(schema);
var parser = new LineParser(schema);
var tokens = parser.ParseLine(testInput2);
Assert.Equal(6, tokens.Count);
@ -90,4 +93,36 @@ public class TextParserTests
Assert.Equal("cd", (tokens[4] as StringToken)?.GetValue());
Assert.Equal("dc", (tokens[5] as StringToken)?.GetValue());
}
[Fact]
public void TextParser_TestRepetition()
{
var schemaBuilder = new InputSchemaBuilder();
var schema = schemaBuilder
.Repeat(4)
.Expect(InputType.Integer)
.EndRepetition()
.Build();
var parser = new TextParser(schema);
var rows = parser
.SetInputText(testInput3)
.Parse()
.AsRows();
Assert.Equal(3, rows.Count);
Assert.Equal(4, rows[0].Length);
Assert.Equal(2, rows[0][0]);
Assert.Equal(4, rows[0][1]);
Assert.Equal(6, rows[0][2]);
Assert.Equal(1, rows[0][3]);
Assert.Equal(2, rows[1][0]);
Assert.Equal(4, rows[1][1]);
Assert.Equal(6, rows[1][2]);
Assert.Equal(1, rows[1][3]);
Assert.Equal(2, rows[2][0]);
Assert.Equal(4, rows[2][1]);
Assert.Equal(6, rows[2][2]);
Assert.Equal(1, rows[2][3]);
}
}

36
TextParser/LineParser.cs Normal file
View File

@ -0,0 +1,36 @@
namespace Parsing;
using Parsing.Schema;
using Parsing.Tokenization;
public class LineParser
{
private string[] delimiters;
private bool removeEmptyEntries = false;
private InputSchema schema;
private InputSchemaContext context;
public LineParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true)
{
this.delimiters = delimiters ?? new string[] { " " };
this.removeEmptyEntries = removeEmptyEntries;
this.schema = schema;
}
private string[] ParseLineIntoWords(string line)
{
var options = StringSplitOptions.TrimEntries;
if (this.removeEmptyEntries)
{
options = options | StringSplitOptions.RemoveEmptyEntries;
}
return line.Split(this.delimiters, options);
}
public IList<IToken> ParseLine(string line)
{
this.context = this.schema.CreateContext();
var words = this.ParseLineIntoWords(line);
return this.schema.ProcessWordList(words);
}
}

View File

@ -1,36 +1,39 @@
namespace Parsing;
using System;
using System.Collections.Generic;
using Parsing.Schema;
using Parsing.Tokenization;
public class TextParser
public class TextParser : TokenConverter
{
private string[] delimiters;
private bool removeEmptyEntries = false;
private InputSchema schema;
private InputSchemaContext context;
private LineParser lineParser;
private string[] lines;
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true)
private List<List<IToken>> rawTokens = new List<List<IToken>>();
public TextParser(InputSchema schema, string[]? delimiters = null, bool removeEmptyEntries = true) : base()
{
this.delimiters = delimiters ?? new string[] { " " };
this.removeEmptyEntries = removeEmptyEntries;
this.schema = schema;
this.context = this.schema.CreateContext();
this.lineParser = new LineParser(schema ,delimiters, removeEmptyEntries);
}
private string[] ParseLineIntoWords(string line)
public TextParser SetInputText(string text)
{
var options = StringSplitOptions.TrimEntries;
if (this.removeEmptyEntries)
if (removeEmptyEntries)
{
options = options | StringSplitOptions.RemoveEmptyEntries;
}
return line.Split(this.delimiters, options);
this.lines = text.Split("\n", options);
return this;
}
public IList<IToken> ParseLine(string line)
public TextParser Parse()
{
var words = this.ParseLineIntoWords(line);
return this.schema.ProcessWordList(words);
foreach(var line in this.lines)
{
this.rawTokens.Add(this.lineParser.ParseLine(line));
}
return this;
}
}

View File

@ -0,0 +1,91 @@
namespace Parsing;
using System;
using System.Collections.Generic;
using Parsing.Schema;
using Parsing.Tokenization;
public class TokenConverter
{
private List<List<IToken>> rawTokens = new List<List<IToken>>();
public TokenConverter()
{
}
private List<T> AsGenericCollection<T, U>() where T : ICollection<U>
{
List<T> returnData = new List<T>();
foreach(var tokenRow in this.rawTokens)
{
T newRow = new T();
foreach(IToken token in tokenRow)
{
IValueToken<U> valueToken = token as IValueToken<U>;
if (valueToken == null)
{
throw new Exception("Provided token is not a ValueToken");
}
newRow.Add(valueToken.GetValue());
}
}
}
public List<T[]> AsRows<T>()
{
var listRows = this.AsListRows<T>();
var newList = new List<T[]>();
foreach(var rowList in listRows)
{
newList.Add(rowList.ToArray());
}
return newList;
}
public List<List<T>> AsListRows<T>()
{
return this.AsGenericCollection<List<T>, T>();
}
public List<T[]> AsColumns<T>()
{
var listColumns = this.AsListColumns<T>();
var newList = new List<T[]>();
foreach(var columnList in listColumns)
{
newList.Add(columnList.ToArray());
}
return newList;
}
public List<List<T>> AsListColumns<T>()
{
var rows = AsListRows<T>();
var columns = new List<List<T>>();
for(int i =0; i<rows[0].Count; i++)
{
columns.Add(new List<T>());
}
foreach(var row in rows)
{
for(int i = 0; i < row.Count; i++)
{
columns[i].Add(row[i]);
}
}
return columns;
}
public T[][] AsGrid<T>()
{
var rowsList = AsRows<T>();
return rowsList.ToArray();
}
}

View File

@ -1,4 +1,6 @@
namespace Parsing.Tokenization;
using System;
namespace Parsing.Tokenization;
public interface IValueToken<T> : IToken
{