Files
TextParser/TextParser/Schema/FragmentSchema.cs

113 lines
3.4 KiB
C#

namespace Parsing.Schema;
using Parsing.Schema;
using Parsing.Schema.BuildingBlocks;
using Parsing.Tokenization;
using System.Collections;
using System.Text.RegularExpressions;
public class FragmentSchemaContext : ISchemaContext
{
public int lastProcessedBlockIndex { get; set; } = 0;
public bool HasFinished { get; set; } = false;
}
public class FragmentSchema : ISchema<FragmentSchemaContext>
{
private string fragmentRegex;
private List<string> namedGroups = new List<string>();
private List<string> namedLiterals = new List<string>();
public FragmentSchema(string fragmentRegex, List<string> namedGroups, List<string> namedLiterals)
{
this.fragmentRegex = fragmentRegex;
this.namedGroups = namedGroups;
this.namedLiterals = namedLiterals;
}
public List<IToken> ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
{
Regex r = new Regex(this.fragmentRegex);
var tokenList = new List<IToken>();
// one token per match
foreach (Match match in r.Matches(inputs.YieldWord()))
{
var newToken = new FragmentToken(match.Value);
// token contains data from all included matches
foreach (var groupName in this.namedGroups)
{
var captureList = new List<string>();
foreach (Capture capture in match.Groups[groupName].Captures)
{
captureList.Add(capture.Value);
}
newToken.AddMatch(groupName, captureList);
}
foreach (var literalName in this.namedLiterals)
{
var captureList = new List<string>();
if (match.Groups.Keys.Contains(literalName) && match.Groups[literalName].Length > 0)
{
captureList.Add(match.Groups[literalName].Length.ToString());
}
newToken.AddMatch(literalName, captureList);
}
tokenList.Add(newToken);
}
if (!inputs.CanYieldWord())
{
currentContext.HasFinished = true;
}
return tokenList;
}
public bool CanProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
{
using (inputs.GetLookaheadContext())
{
return this.CanProcessNextWord(currentContext, inputs.YieldWord());
}
}
public bool CanProcessNextWord(FragmentSchemaContext currentContext, string word)
{
if (currentContext.HasFinished)
{
return false;
}
Regex r = new Regex(this.fragmentRegex);
return r.Match(word).Success;
}
public List<IToken> ProcessWordList(string[] words)
{
List<IToken> tokens = new List<IToken>();
InputProvider inputs = new InputProvider(words);
var overallContext = this.CreateContext();
while (!overallContext.HasFinished && inputs.CanYieldWord())
{
if (this.CanProcessNextWord(overallContext, inputs))
{
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
}
else
{
inputs.SkipCurrentWord();
}
}
overallContext.HasFinished = true;
return tokens;
}
public FragmentSchemaContext CreateContext()
{
return new FragmentSchemaContext();
}
}