113 lines
3.4 KiB
C#
113 lines
3.4 KiB
C#
namespace Parsing.Schema;
|
|
|
|
using Parsing.Schema;
|
|
using Parsing.Schema.BuildingBlocks;
|
|
using Parsing.Tokenization;
|
|
using System.Collections;
|
|
using System.Text.RegularExpressions;
|
|
|
|
public class FragmentSchemaContext : ISchemaContext
|
|
{
|
|
public int lastProcessedBlockIndex { get; set; } = 0;
|
|
public bool HasFinished { get; set; } = false;
|
|
}
|
|
|
|
public class FragmentSchema : ISchema<FragmentSchemaContext>
|
|
{
|
|
private string fragmentRegex;
|
|
private List<string> namedGroups = new List<string>();
|
|
private List<string> namedLiterals = new List<string>();
|
|
|
|
public FragmentSchema(string fragmentRegex, List<string> namedGroups, List<string> namedLiterals)
|
|
{
|
|
this.fragmentRegex = fragmentRegex;
|
|
this.namedGroups = namedGroups;
|
|
this.namedLiterals = namedLiterals;
|
|
}
|
|
|
|
public List<IToken> ProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
|
|
{
|
|
Regex r = new Regex(this.fragmentRegex);
|
|
|
|
var tokenList = new List<IToken>();
|
|
// one token per match
|
|
foreach (Match match in r.Matches(inputs.YieldWord()))
|
|
{
|
|
var newToken = new FragmentToken(match.Value);
|
|
// token contains data from all included matches
|
|
foreach (var groupName in this.namedGroups)
|
|
{
|
|
var captureList = new List<string>();
|
|
foreach (Capture capture in match.Groups[groupName].Captures)
|
|
{
|
|
captureList.Add(capture.Value);
|
|
}
|
|
newToken.AddMatch(groupName, captureList);
|
|
}
|
|
foreach (var literalName in this.namedLiterals)
|
|
{
|
|
var captureList = new List<string>();
|
|
if (match.Groups.Keys.Contains(literalName) && match.Groups[literalName].Length > 0)
|
|
{
|
|
captureList.Add(match.Groups[literalName].Length.ToString());
|
|
}
|
|
newToken.AddMatch(literalName, captureList);
|
|
}
|
|
tokenList.Add(newToken);
|
|
}
|
|
|
|
if (!inputs.CanYieldWord())
|
|
{
|
|
currentContext.HasFinished = true;
|
|
}
|
|
|
|
return tokenList;
|
|
}
|
|
|
|
public bool CanProcessNextWord(FragmentSchemaContext currentContext, InputProvider inputs)
|
|
{
|
|
using (inputs.GetLookaheadContext())
|
|
{
|
|
return this.CanProcessNextWord(currentContext, inputs.YieldWord());
|
|
}
|
|
}
|
|
|
|
public bool CanProcessNextWord(FragmentSchemaContext currentContext, string word)
|
|
{
|
|
if (currentContext.HasFinished)
|
|
{
|
|
return false;
|
|
}
|
|
Regex r = new Regex(this.fragmentRegex);
|
|
return r.Match(word).Success;
|
|
}
|
|
|
|
public List<IToken> ProcessWordList(string[] words)
|
|
{
|
|
List<IToken> tokens = new List<IToken>();
|
|
InputProvider inputs = new InputProvider(words);
|
|
var overallContext = this.CreateContext();
|
|
|
|
while (!overallContext.HasFinished && inputs.CanYieldWord())
|
|
{
|
|
if (this.CanProcessNextWord(overallContext, inputs))
|
|
{
|
|
tokens.AddRange(this.ProcessNextWord(overallContext, inputs));
|
|
}
|
|
else
|
|
{
|
|
inputs.SkipCurrentWord();
|
|
}
|
|
}
|
|
|
|
overallContext.HasFinished = true;
|
|
|
|
return tokens;
|
|
}
|
|
|
|
public FragmentSchemaContext CreateContext()
|
|
{
|
|
return new FragmentSchemaContext();
|
|
}
|
|
}
|