• Avalanche.Core
Search Results for

    Show / Hide Table of Contents
    • Avalanche.Accessor
      • Introduction
      • IAccessor
        • IAccessor
        • IListAccessor
        • IMapAccessor
        • IRecordAccessor
        • IContentAccessor
        • IOneOfAccessor
        • IAnyAccessor
      • .Net
        • Introduction
        • IList<T>
        • IDictionary<K,V>
        • FieldInfo
        • OneOfAttribute
        • StructLayoutAttribute
        • Class
      • Protobuf
        • Introduction
      • Articles
        • Dependency Injection
        • AccessorMessages
    • Avalanche.Binding
      • Introduction
    • Avalanche.Core
      • License
    • Avalanche.DataType
      • Introduction
      • DataType
        • IDataType
        • IListType
        • IMapType
        • IRecordType
        • IFieldType
        • IOneOfType
        • IAnyType
        • IStringType
        • IValueType
        • IIntegerType
        • IEnumerationType
        • IRealType
      • .Net
        • Introduction
        • IList<T>
        • IDictionary<K,V>
        • FieldInfo
        • Enum
        • OneOfAttribute
        • StructLayoutAttribute
        • Class
      • Protobuf
        • Introduction
      • Articles
        • DataTypeRequest
        • PrintTree
        • DataTypeMessages
    • Avalanche.Emit
      • Introduction
      • TypeBuilder
      • ConstructorBuilder
      • MethodBuilder
      • PropertyBuilder
      • FieldBuilder
      • Emit
      • Utilities
    • Avalanche.FileSystem
      • Introduction
      • Abstractions
        • IFileSystem
          • IFileSystemBrowse
          • IFileSystemCreateDirectory
          • IFileSystemDelete
          • IFileSystemFileAttribute
          • IFileSystemMount
          • IFileSystemMove
          • IFileSystemObserve
          • IFileSystemOpen
        • IEvent
        • IEntry
        • IOption
        • IToken
      • FileSystem
      • VirtualFileSystem
      • MemoryFileSystem
      • EmbeddedFileSystem
      • HttpFileSystem
      • Decoration
      • IFileProvider
      • Events
      • Utilities
        • Dispose
        • File Scanner
        • Visit Tree
        • File Operation
    • Avalanche.Identity
      • Introduction
      • Identity
      • IdentityParts
      • IdentityInterner
      • IdentityComparer
      • Print Tree
      • IdentityAccessors
        • Introduction
        • TypeName
    • Avalanche.Localization
      • Introduction
      • Localization
      • LocalizationFile
      • LocalizationFiles
      • LocalizationFileSystem
      • LocalizationFileFormat
      • LocalizationLine
      • LocalizationLines
      • TemplateFormat
      • CultureProvider
      • FallbackCultureProvider
      • ResourceManager
      • LocalizationError
      • Microsoft.Extensions
        • Introduction
        • DependencyInjection
        • FileProvider
        • Logging
        • ITextLocalizer
        • IFileLocalizer
        • Localization
      • Asp.Net
        • Introduction
        • Supplying localization
        • Inject to pages
        • Culture Assigned
        • Minimalistic Api
        • Diagnostics
      • Pluralization
        • Introduction
        • Multiple plural parameters
        • Custom PluralRules
        • Invariant Culture
        • Unit Prefix
        • IPluralRule
        • IPluralNumber
        • IPluralRules
        • CLDRs
        • Unicode.CLDR40
        • Unicode.CLDR41
        • Unicode.CLDR42
      • Articles
        • Alphabet localization
        • Benchmarks
        • Caching
        • Class Library
        • Demo
        • Diagnostics
        • Embedded resources
        • Emplacement
        • File localization
        • Text localization
        • Printing templates
    • Avalanche.Message
      • Introduction
      • IMessage
      • IMessageProvider
      • IMessageDescription
      • IMessageDescriptions
      • MessageLevel
      • Message printing
      • Messages and Exceptions
      • Microsoft.Extensions
        • DependencyInjection
      • Articles
        • Aggregate Messages
        • Localization
        • Logging
        • Validation
    • Avalanche.Service
      • Introduction
      • Service
        • Introduction
        • IService
        • IServiceDisposable
        • IServiceDecoration
        • IServiceCast
        • IServiceObservable
        • IServiceContainer
        • Construction
        • Query
        • CancellationToken
        • CachePolicy
        • Scope
      • Handler
        • Introduction
        • IHandler
        • IHandlerCast
        • IHandlerDecoration
        • IHandlerWithOrder
        • CancellationToken
        • Cyclicity
        • Delegates
        • Invokable
        • ExportAttribute
        • OrderAttribute
        • PrintTree
        • Recursion
      • Query
        • Introduction
        • IQuery
        • IQueryCast
        • IQueryDecoration
      • Entry
        • Introduction
        • IEntry
        • IEntryCast
        • IEntryDecoration
        • IEntryObservable
        • IEntryVisitable
        • EntryState
      • Request
        • Introduction
        • IRequest
        • IRequestFor
        • IRequestToBeCached
        • IRequestToBeDisposed
        • RequestAttribute
        • ContextParameterAttribute
        • Print Tree
      • Dependency Injection
        • Introduction
        • Asp.Net
        • ServiceRequest<T>
        • Decorating a service
        • Handler
        • CachePolicy
        • CancellationToken
        • QueryLogger
      • Examples
        • NodeCount
        • Expression
        • Mapper
      • Articles
        • Benchmarks
        • Error Handling
        • ServiceMessages
    • Avalanche.StatusCode
      • Introduction
      • HResult
        • Introduction
        • HResult.Facilities
        • BasicMessages
        • RpcMessages
        • DispatchMessages
        • ItfMessages
        • Win32Messages
        • ClrMessages
      • System
        • Introduction
        • AccessControlMessages
        • AggregateMessages
        • AppDomainMessages
        • ArgumentMessages
        • ArgumentNullMessages
        • ArgumentOutOfRangeMessages
        • ArithmeticMessages
        • ArrayMessages
        • AssemblyMessages
        • BadImageFormatMessages
        • CodeContractMessages
        • CodePageMessages
        • CollectionsMessages
        • CompilerServiceMessages
        • CryptographyMessages
        • CultureMessages
        • DiagnosticsMessages
        • EventSourceMessages
        • ExecutionEngineMessages
        • FormatMessages
        • HostProtectionMessages
        • IOMessages
        • IndexOutOfRangeMessages
        • InteropServiceMessages
        • InvalidCastMessages
        • InvalidOperationMessages
        • IsolatedStorageMessages
        • LazyMessages
        • MarshalerMessages
        • MemoryMessages
        • MiscellaneousMessages
        • NotImplementedMessages
        • NotSupportedMessages
        • ObjectDisposedMessages
        • OperationCanceledMessages
        • OverflowMessages
        • PlatformMessages
        • PolicyMessages
        • PrincipalMessages
        • ProgramMessages
        • ReferenceMessages
        • ReflectionMessages
        • RegionMessages
        • RemotingMessages
        • ResourcesMessages
        • SecurityMessages
        • SerializationMessages
        • StackMessages
        • TaskMessages
        • TextMessages
        • ThreadingMessages
        • TimeZoneMessages
        • TypeMessages
        • XmlMessages
      • HttpStatusCode
      • OpcUaStatusCode
    • Avalanche.Template
      • Introduction
      • TemplateFormats
      • ITemplatePrintable
      • ITemplateFormatPrintable
      • ITemplateText
      • ITemplateBreakdown
      • ITemplateFormat
      • ITemplateFormats
      • Extract Arguments
      • Emplacement
    • Avalanche.Tokenizer
      • Introduction
      • IToken
      • ITokenizer
      • Tokenizers
    • Avalanche.Utilities
      • Introduction
      • Collections
        • Tuples
        • StructList
        • ArrayList
        • BijectionMap
        • LocakableDictionary
        • LockableList
        • MapList
        • Pipe
        • RingQueue
        • EnumerableExtensions
        • TupleUtilities
        • ArrayUtilities
      • Comparers
        • IGraphComparer
        • IGraphComparable
        • AlphaNumericComparer
        • EnumerableComparer
        • EnumerableGraphComparer
        • ReferenceComparer
        • KeyValuePairComparer
        • DefaultComparerProvider
        • RecordComparer
      • Cloners
        • ICloner
        • IGraphCloner
        • IGraphCloneable
        • ListCloner
        • DictionaryCloner
        • FieldCloner
        • PassthroughCloner
        • RecordCloner
        • ClonerProvider
      • Dispose
        • IDisposeAttachable
        • IDisposeBelatable
      • Provider
        • Introduction
        • ProviderBase
        • Delegate
        • Pipe
        • Cache
        • ResultCapture
        • AsReadOnly
        • AsService
        • IProviderEvent
      • Record
        • IRecordDescription
        • IFieldDescription
        • IConstructorDescription
        • IConstructionDescription
        • IParameterDescription
        • IRecordProviders
        • RecordDelegates
          • RecordCreate
          • RecordClone
          • RecordCopy
          • IRecordDelegates
        • FieldDelegates
          • FieldRead
          • FieldWrite
          • RecreateWith
          • IFieldDelegates
      • String
        • IEscaper
        • UnicodeString
      • Miscellaneous
        • IIdGenerator
        • Permutation
        • IReadOnly
        • IUserDataContainer
        • Void
    • Avalanche.Writer
      • Introduction
      • ConstantWriter
      • Context
      • ConvertWriter
      • DefaultConstructor
      • DelegateWriter
      • PassthroughWriter
      • Referer
      • TypeCast
      • Writer
      • WriterPipe
      • WriterMessages

    ITokenizer

    ITokenizer dismantles characters into token objects.

    /// <summary>Indicates implements tokenizer</summary>
    public interface ITokenizerBase { } 
    /// <summary>Dismantles tokens from char memory.</summary>
    public interface ITokenizer<T> : ITokenizerBase where T : IToken
    {
        /// <summary>Peek to test whether <paramref name="text"/> starts with <typeparamref name="T"/>.</summary>
        bool Peek(ReadOnlyMemory<char> text);
        /// <summary>Try take a <typeparamref name="T"/> token from <paramref name="text"/>.</summary>
        bool TryTake(ReadOnlyMemory<char> text, out T token);
    }
    

    .TryTake(memory, out token) tries to take next token. IntegerTokenizer tokenizes integer characters. .SliceAfter(arg) is extension method in Avalanche.Utilities.dll that returns the Memory region after 'arg'.

    // Text to tokenizer
    String @string = "1, x, 3, 4, 5, x, 7";
    ReadOnlyMemory<char> text = @string.AsMemory();
    // Get tokenizer from singleton
    var valueTokenizer = IntegerTokenizer.Instance;
    // Try take token
    if (valueTokenizer.TryTake(text, out DecimalToken token0))
    {
        // Print token
        WriteLine(token0); // "[0:1] DecimalToken "1""
        // Slice
        text = text.SliceAfter(token0.Memory);
    }
    

    ConstantTokenizer<T>(string) tokenizes only specific a string as T.

    // Create comma tokenizer 
    var commaTokenizer = new ConstantTokenizer<SeparatorToken>(",");
    // Try take ','
    if (commaTokenizer.TryTake(text, out SeparatorToken token2))
    {
        // Print token
        WriteLine(token2);
        // Slice
        text = text.SliceAfter(token2.Memory); // "[1:2] SeparatorToken ",""
    }
    

    WhitespaceTokenizer.Any tokenizes all white-spaces, and WhitespaceTokenizer.AllButNewLine all but new line into WhitespaceToken

    // Try take white-space 
    if (WhitespaceTokenizer.Any.TryTake(text, out WhitespaceToken token1))
    {
        // Print token
        WriteLine(token1); // "[2:3] WhitespaceToken " ""
        // Slice
        text = text.SliceAfter(token1.Memory);
    }
    

    UntilTokenizer(endCondition) tokenizes characters until end of stream or until end condition is found. End condition is determined by another tokenizer.

    // Create malformed tokenizer
    var malformedTokenizer = new UntilTokenizer<MalformedToken>(new AnyTokenizer(new ConstantTokenizer(","), WhitespaceTokenizer.Any));
    // Try take 'x'
    if (malformedTokenizer.TryTake(text, out MalformedToken token3))
    {
        // Print token
        WriteLine(token3); // "[3:4] MalformedToken "x""
        // Slice
        text = text.SliceAfter(token3.Memory);
    }
    

    .Take<T>() returns token or null.

    // Take ','
    IToken token4 = commaTokenizer.Take<IToken>(text)!;
    text = text.SliceAfter(token4.Memory);
    // Take ' '
    IToken token5 = WhitespaceTokenizer.Any.Take<IToken>(text)!;
    text = text.SliceAfter(token5.Memory);
    

    SequenceTokenizer( (tokenizer, required, yieldChildren), ... ) tokenizes a sequence of sub-tokenizers.

    // Create " , x " slot tokenizer
    var slotTokenizer = new SequenceTokenizer<ValueToken>(
        (WhitespaceTokenizer.Any, false, false),
        (commaTokenizer, false, false),
        (WhitespaceTokenizer.Any, false, false),
        (new AnyTokenizer(valueTokenizer, malformedTokenizer), true, true), 
        (WhitespaceTokenizer.Any, false, false)
    );
    // Try take '3 '
    if (slotTokenizer.TryTake(text, out IToken token6))
    {
        // Print token
        WriteLine(token6); // "[6:7] ValueToken "3""
        // Slice
        text = text.SliceAfter(token6.Memory);
    }
    

    WhileTokenizer(subTokenizer, yieldChildren) repeats while sub-tokenizer provides tokens.

    // Put together a while tokenizer that takes all integer/malformed parts and repeats while content lasts
    var whileTokenizer = new WhileTokenizer(slotTokenizer, false);
    // Try take all
    if (whileTokenizer.TryTake("1, x, 3, 4".AsMemory(), out IToken tokenAll)) WriteLine(tokenAll.PrintTree());
    

    Tokenizes remaining string "1, x, 3, 4" into following tree.

    CompositeToken:  "1, x, 3, 4"
    ├── ValueToken:  "1"
    │   └── DecimalToken:  "1"
    ├── ValueToken:  ", x"
    │   ├── SeparatorToken:  ","
    │   ├── WhitespaceToken:  " "
    │   └── MalformedToken:  "x"
    ├── ValueToken:  ", 3"
    │   ├── SeparatorToken:  ","
    │   ├── WhitespaceToken:  " "
    │   └── DecimalToken:  "3"
    └── ValueToken:  ", 4"
        ├── SeparatorToken:  ","
        ├── WhitespaceToken:  " "
        └── DecimalToken:  "4"
    

    .PrintTree() prints tokenizer composition as a tree.

    // Print tokenizer as tree
    WriteLine(whileTokenizer.PrintTree());
    
    WhileTokenizer { YieldChildren = False }
    └── ElementTokenizer = SequenceTokenizer
        ├── Tokenizers[0] = WhitespaceTokenizer { IncludeNewLine = False }
        ├── Tokenizers[1] = ConstantTokenizer { Text = "," }
        ├── Tokenizers[2] = WhitespaceTokenizer { IncludeNewLine = False }
        ├── Tokenizers[3] = AnyTokenizer
        │   ├── Tokenizers[0] = IntegerTokenizer { NumberFormat = System.Globalization.NumberFormatInfo }
        │   └── Tokenizers[1] = UntilTokenizer { EndsWithEndCondition = False }
        │       └── EndConditionTokenizer = AnyTokenizer
        │           ├── Tokenizers[0] = ConstantTokenizer { Text = "," }
        │           └── Tokenizers[1] = WhitespaceTokenizer { IncludeNewLine = False }
        └── Tokenizers[4] = WhitespaceTokenizer { IncludeNewLine = False }
    

    .VisitTree() visits each tokenizer.

    foreach (var line in whileTokenizer.VisitTree())
        WriteLine(line);
    

    .TakeAll<T> takes whole string as one token, or throws InvalidOperationException if could not tokenize all content.

    IToken compositeToken = whileTokenizer.TakeAll<IToken>("1, x, 3, 4, 5, x, 7");
    WriteLine(compositeToken.PrintTree(format: TokenPrintTreeExtensions.PrintFormat.DefaultLong));
    
    [0:19] CompositeToken:  "1, x, 3, 4, 5, x, 7"
    ├── [0:1] ValueToken:  "1"
    │   └── [0:1] DecimalToken:  "1"
    ├── [1:4] ValueToken:  ", x"
    │   ├── [1:2] SeparatorToken:  ","
    │   ├── [2:3] WhitespaceToken:  " "
    │   └── [3:4] MalformedToken:  "x"
    ├── [4:7] ValueToken:  ", 3"
    │   ├── [4:5] SeparatorToken:  ","
    │   ├── [5:6] WhitespaceToken:  " "
    │   └── [6:7] DecimalToken:  "3"
    ├── [7:10] ValueToken:  ", 4"
    │   ├── [7:8] SeparatorToken:  ","
    │   ├── [8:9] WhitespaceToken:  " "
    │   └── [9:10] DecimalToken:  "4"
    ├── [10:13] ValueToken:  ", 5"
    │   ├── [10:11] SeparatorToken:  ","
    │   ├── [11:12] WhitespaceToken:  " "
    │   └── [12:13] DecimalToken:  "5"
    ├── [13:16] ValueToken:  ", x"
    │   ├── [13:14] SeparatorToken:  ","
    │   ├── [14:15] WhitespaceToken:  " "
    │   └── [15:16] MalformedToken:  "x"
    └── [16:19] ValueToken:  ", 7"
        ├── [16:17] SeparatorToken:  ","
        ├── [17:18] WhitespaceToken:  " "
        └── [18:19] DecimalToken:  "7"
    

    Implementation

    TokenizerBase<T> is base class for most tokenizers. It implementations ITokenizer<IToken>, ITokenizer<T>, Peek.

    /// <summary>Tokenizes letters and digits as <see cref="IdentifierToken"/>.</summary>
    public class IdentifierTokenizer : TokenizerBase<IdentifierToken>
    {
        /// <summary>Singleton</summary>
        static IdentifierTokenizer instance = new IdentifierTokenizer();
        /// <summary>Singleton</summary>
        public static IdentifierTokenizer Instance => instance;
    
        public override bool TryTake(ReadOnlyMemory<char> text, out IdentifierToken token)
        {
            // Get span
            ReadOnlySpan<char> span = text.Span;
            // Accepted chars
            int ix = 0;
            // 
            for (int i=0; i<span.Length; i++)
            {
                // Not letter
                if (!char.IsLetterOrDigit(span[i])) break;
                // Accept char
                ix++;
            }
            // No chars were accepted
            if (ix == 0) { token = default!; return false; }
            // Return result
            token = new IdentifierToken { Memory = text.Slice(0, ix) };
            return true;
        }
    }
    
    // Text to tokenize
    ReadOnlyMemory<char> text = "ID001, ID002, ID003".AsMemory();
    // Get token
    if (IdentifierTokenizer.Instance.TryTake(text, out IdentifierToken identifierToken))
    {
        // Slice text
        text = text.SliceAfter(identifierToken.Memory);
        //
        WriteLine(identifierToken); // '[0:5] IdentifierToken "ID001"'
    }
    

    Full Example

    Full example
    using Avalanche.Tokenizer;
    using Avalanche.Utilities;
    using static System.Console;
    
    public class tokenizer
    {
        public static void Run()
        {
            {
                // <01>
                // Text to tokenizer
                String @string = "1, x, 3, 4, 5, x, 7";
                ReadOnlyMemory<char> text = @string.AsMemory();
                // Get tokenizer from singleton
                var valueTokenizer = IntegerTokenizer.Instance;
                // Try take token
                if (valueTokenizer.TryTake(text, out DecimalToken token0))
                {
                    // Print token
                    WriteLine(token0); // "[0:1] DecimalToken "1""
                    // Slice
                    text = text.SliceAfter(token0.Memory);
                }
                // </01>
                // <02>
                // Create comma tokenizer 
                var commaTokenizer = new ConstantTokenizer<SeparatorToken>(",");
                // Try take ','
                if (commaTokenizer.TryTake(text, out SeparatorToken token2))
                {
                    // Print token
                    WriteLine(token2);
                    // Slice
                    text = text.SliceAfter(token2.Memory); // "[1:2] SeparatorToken ",""
                }
                // </02>
                // <03>
                // Try take white-space 
                if (WhitespaceTokenizer.Any.TryTake(text, out WhitespaceToken token1))
                {
                    // Print token
                    WriteLine(token1); // "[2:3] WhitespaceToken " ""
                    // Slice
                    text = text.SliceAfter(token1.Memory);
                }
                // </03>
                // <04>
                // Create malformed tokenizer
                var malformedTokenizer = new UntilTokenizer<MalformedToken>(new AnyTokenizer(new ConstantTokenizer(","), WhitespaceTokenizer.Any));
                // Try take 'x'
                if (malformedTokenizer.TryTake(text, out MalformedToken token3))
                {
                    // Print token
                    WriteLine(token3); // "[3:4] MalformedToken "x""
                    // Slice
                    text = text.SliceAfter(token3.Memory);
                }
                // </04>
                // <05>
                // Take ','
                IToken token4 = commaTokenizer.Take<IToken>(text)!;
                text = text.SliceAfter(token4.Memory);
                // Take ' '
                IToken token5 = WhitespaceTokenizer.Any.Take<IToken>(text)!;
                text = text.SliceAfter(token5.Memory);
                // </05>
                // <06>
                // Create " , x " slot tokenizer
                var slotTokenizer = new SequenceTokenizer<ValueToken>(
                    (WhitespaceTokenizer.Any, false, false),
                    (commaTokenizer, false, false),
                    (WhitespaceTokenizer.Any, false, false),
                    (new AnyTokenizer(valueTokenizer, malformedTokenizer), true, true), 
                    (WhitespaceTokenizer.Any, false, false)
                );
                // Try take '3 '
                if (slotTokenizer.TryTake(text, out IToken token6))
                {
                    // Print token
                    WriteLine(token6); // "[6:7] ValueToken "3""
                    // Slice
                    text = text.SliceAfter(token6.Memory);
                }
                // </06>
                // <07>
                // Put together a while tokenizer that takes all integer/malformed parts and repeats while content lasts
                var whileTokenizer = new WhileTokenizer(slotTokenizer, false);
                // Try take all
                if (whileTokenizer.TryTake("1, x, 3, 4".AsMemory(), out IToken tokenAll)) WriteLine(tokenAll.PrintTree());
                // </07>
                // <08>
                // Print tokenizer as tree
                WriteLine(whileTokenizer.PrintTree());
                // </08>
                // <09>
                foreach (var line in whileTokenizer.VisitTree())
                    WriteLine(line);
                // </09>
                // <10>
                IToken compositeToken = whileTokenizer.TakeAll<IToken>("1, x, 3, 4, 5, x, 7");
                WriteLine(compositeToken.PrintTree(format: TokenPrintTreeExtensions.PrintFormat.DefaultLong));
                // </10>
            }
    
            {
                // <98>
                // Text to tokenize
                ReadOnlyMemory<char> text = "ID001, ID002, ID003".AsMemory();
                // Get token
                if (IdentifierTokenizer.Instance.TryTake(text, out IdentifierToken identifierToken))
                {
                    // Slice text
                    text = text.SliceAfter(identifierToken.Memory);
                    //
                    WriteLine(identifierToken); // '[0:5] IdentifierToken "ID001"'
                }
                // </98>
            }
        }
    
        // <99>
        /// <summary>Tokenizes letters and digits as <see cref="IdentifierToken"/>.</summary>
        public class IdentifierTokenizer : TokenizerBase<IdentifierToken>
        {
            /// <summary>Singleton</summary>
            static IdentifierTokenizer instance = new IdentifierTokenizer();
            /// <summary>Singleton</summary>
            public static IdentifierTokenizer Instance => instance;
    
            public override bool TryTake(ReadOnlyMemory<char> text, out IdentifierToken token)
            {
                // Get span
                ReadOnlySpan<char> span = text.Span;
                // Accepted chars
                int ix = 0;
                // 
                for (int i=0; i<span.Length; i++)
                {
                    // Not letter
                    if (!char.IsLetterOrDigit(span[i])) break;
                    // Accept char
                    ix++;
                }
                // No chars were accepted
                if (ix == 0) { token = default!; return false; }
                // Return result
                token = new IdentifierToken { Memory = text.Slice(0, ix) };
                return true;
            }
        }
        // </99>
    }
    
    
    In This Article
    Back to top Copyright © Toni Kalajainen