aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/ext_depends/D-YAML/source/dyaml/scanner.d
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2021-02-19 17:10:51 -0500
committerRalph Amissah <ralph.amissah@gmail.com>2021-02-24 16:46:47 -0500
commit02ca32ae0a5bc290918d2b2a3288e385b9cc6b11 (patch)
tree06379785e8a0165a7deb981c2eba362894820634 /src/ext_depends/D-YAML/source/dyaml/scanner.d
parentbuild from static source-tree pre fetch depends (diff)
external & build dependences in src tree
- external & build dependences boost licensed - ext_depends (external depends) - D-YAML - tinyendian - d2sqlite3 - imageformats - build_depends - dub2nix
Diffstat (limited to 'src/ext_depends/D-YAML/source/dyaml/scanner.d')
-rw-r--r--src/ext_depends/D-YAML/source/dyaml/scanner.d1788
1 files changed, 1788 insertions, 0 deletions
diff --git a/src/ext_depends/D-YAML/source/dyaml/scanner.d b/src/ext_depends/D-YAML/source/dyaml/scanner.d
new file mode 100644
index 0000000..2009521
--- /dev/null
+++ b/src/ext_depends/D-YAML/source/dyaml/scanner.d
@@ -0,0 +1,1788 @@
+
+// Copyright Ferdinand Majerech 2011-2014.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+/// YAML scanner.
+/// Code based on PyYAML: http://www.pyyaml.org
+module dyaml.scanner;
+
+
+import core.stdc.string;
+
+import std.algorithm;
+import std.array;
+import std.conv;
+import std.ascii : isAlphaNum, isDigit, isHexDigit;
+import std.exception;
+import std.string;
+import std.typecons;
+import std.traits : Unqual;
+import std.utf;
+
+import dyaml.escapes;
+import dyaml.exception;
+import dyaml.queue;
+import dyaml.reader;
+import dyaml.style;
+import dyaml.token;
+
+package:
+/// Scanner produces tokens of the following types:
+/// STREAM-START
+/// STREAM-END
+/// DIRECTIVE(name, value)
+/// DOCUMENT-START
+/// DOCUMENT-END
+/// BLOCK-SEQUENCE-START
+/// BLOCK-MAPPING-START
+/// BLOCK-END
+/// FLOW-SEQUENCE-START
+/// FLOW-MAPPING-START
+/// FLOW-SEQUENCE-END
+/// FLOW-MAPPING-END
+/// BLOCK-ENTRY
+/// FLOW-ENTRY
+/// KEY
+/// VALUE
+/// ALIAS(value)
+/// ANCHOR(value)
+/// TAG(value)
+/// SCALAR(value, plain, style)
+
+alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
+
+alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
+
+alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
+
+alias isNonLinebreakWhitespace = among!(' ', '\t');
+
+alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}',
+ '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n',
+ '\r', '\u0085', '\u2028', '\u2029');
+
+alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',',
+ '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
+
+alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
+
+alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
+
+alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
+
+/// Marked exception thrown at scanner errors.
+///
+/// See_Also: MarkedYAMLException
+class ScannerException : MarkedYAMLException
+{
+ mixin MarkedExceptionCtors;
+}
+
+/// Generates tokens from data provided by a Reader.
+struct Scanner
+{
+ private:
+ /// A simple key is a key that is not denoted by the '?' indicator.
+ /// For example:
+ /// ---
+ /// block simple key: value
+ /// ? not a simple key:
+ /// : { flow simple key: value }
+ /// We emit the KEY token before all keys, so when we find a potential simple
+ /// key, we try to locate the corresponding ':' indicator. Simple keys should be
+ /// limited to a single line and 1024 characters.
+ ///
+ /// 16 bytes on 64-bit.
+ static struct SimpleKey
+ {
+ /// Character index in reader where the key starts.
+ uint charIndex = uint.max;
+ /// Index of the key token from start (first token scanned being 0).
+ uint tokenIndex;
+ /// Line the key starts at.
+ uint line;
+ /// Column the key starts at.
+ ushort column;
+ /// Is this required to be a simple key?
+ bool required;
+ /// Is this struct "null" (invalid)?.
+ bool isNull;
+ }
+
+ /// Block chomping types.
+ enum Chomping
+ {
+ /// Strip all trailing line breaks. '-' indicator.
+ strip,
+ /// Line break of the last line is preserved, others discarded. Default.
+ clip,
+ /// All trailing line breaks are preserved. '+' indicator.
+ keep
+ }
+
+ /// Reader used to read from a file/stream.
+ Reader reader_;
+ /// Are we done scanning?
+ bool done_;
+
+ /// Level of nesting in flow context. If 0, we're in block context.
+ uint flowLevel_;
+ /// Current indentation level.
+ int indent_ = -1;
+ /// Past indentation levels. Used as a stack.
+ Appender!(int[]) indents_;
+
+ /// Processed tokens not yet emitted. Used as a queue.
+ Queue!Token tokens_;
+
+ /// Number of tokens emitted through the getToken method.
+ uint tokensTaken_;
+
+ /// Can a simple key start at the current position? A simple key may start:
+ /// - at the beginning of the line, not counting indentation spaces
+ /// (in block context),
+ /// - after '{', '[', ',' (in the flow context),
+ /// - after '?', ':', '-' (in the block context).
+ /// In the block context, this flag also signifies if a block collection
+ /// may start at the current position.
+ bool allowSimpleKey_ = true;
+
+ /// Possible simple keys indexed by flow levels.
+ SimpleKey[] possibleSimpleKeys_;
+
+ public:
+ /// Construct a Scanner using specified Reader.
+ this(Reader reader) @safe nothrow
+ {
+ // Return the next token, but do not delete it from the queue
+ reader_ = reader;
+ fetchStreamStart();
+ }
+
+ /// Advance to the next token
+ void popFront() @safe
+ {
+ ++tokensTaken_;
+ tokens_.pop();
+ }
+
+ /// Return the current token
+ const(Token) front() @safe
+ {
+ enforce(!empty, "No token left to peek");
+ return tokens_.peek();
+ }
+
+ /// Return whether there are any more tokens left.
+ bool empty() @safe
+ {
+ while (needMoreTokens())
+ {
+ fetchToken();
+ }
+ return tokens_.empty;
+ }
+
+ private:
+ /// Most scanning error messages have the same format; so build them with this
+ /// function.
+ string expected(T)(string expected, T found)
+ {
+ return text("expected ", expected, ", but found ", found);
+ }
+
+ /// Determine whether or not we need to fetch more tokens before peeking/getting a token.
+ bool needMoreTokens() @safe pure
+ {
+ if(done_) { return false; }
+ if(tokens_.empty) { return true; }
+
+ /// The current token may be a potential simple key, so we need to look further.
+ stalePossibleSimpleKeys();
+ return nextPossibleSimpleKey() == tokensTaken_;
+ }
+
+ /// Fetch at token, adding it to tokens_.
+ void fetchToken() @safe
+ {
+ // Eat whitespaces and comments until we reach the next token.
+ scanToNextToken();
+
+ // Remove obsolete possible simple keys.
+ stalePossibleSimpleKeys();
+
+ // Compare current indentation and column. It may add some tokens
+ // and decrease the current indentation level.
+ unwindIndent(reader_.column);
+
+ // Get the next character.
+ const dchar c = reader_.peekByte();
+
+ // Fetch the token.
+ if(c == '\0') { return fetchStreamEnd(); }
+ if(checkDirective()) { return fetchDirective(); }
+ if(checkDocumentStart()) { return fetchDocumentStart(); }
+ if(checkDocumentEnd()) { return fetchDocumentEnd(); }
+ // Order of the following checks is NOT significant.
+ switch(c)
+ {
+ case '[': return fetchFlowSequenceStart();
+ case '{': return fetchFlowMappingStart();
+ case ']': return fetchFlowSequenceEnd();
+ case '}': return fetchFlowMappingEnd();
+ case ',': return fetchFlowEntry();
+ case '!': return fetchTag();
+ case '\'': return fetchSingle();
+ case '\"': return fetchDouble();
+ case '*': return fetchAlias();
+ case '&': return fetchAnchor();
+ case '?': if(checkKey()) { return fetchKey(); } goto default;
+ case ':': if(checkValue()) { return fetchValue(); } goto default;
+ case '-': if(checkBlockEntry()) { return fetchBlockEntry(); } goto default;
+ case '|': if(flowLevel_ == 0) { return fetchLiteral(); } break;
+ case '>': if(flowLevel_ == 0) { return fetchFolded(); } break;
+ default: if(checkPlain()) { return fetchPlain(); }
+ }
+
+ throw new ScannerException("While scanning for the next token, found character " ~
+ "\'%s\', index %s that cannot start any token"
+ .format(c, to!int(c)), reader_.mark);
+ }
+
+
+ /// Return the token number of the nearest possible simple key.
+ uint nextPossibleSimpleKey() @safe pure nothrow @nogc
+ {
+ uint minTokenNumber = uint.max;
+ foreach(k, ref simpleKey; possibleSimpleKeys_)
+ {
+ if(simpleKey.isNull) { continue; }
+ minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
+ }
+ return minTokenNumber;
+ }
+
+ /// Remove entries that are no longer possible simple keys.
+ ///
+ /// According to the YAML specification, simple keys
+ /// - should be limited to a single line,
+ /// - should be no longer than 1024 characters.
+ /// Disabling this will allow simple keys of any length and
+ /// height (may cause problems if indentation is broken though).
+ void stalePossibleSimpleKeys() @safe pure
+ {
+ foreach(level, ref key; possibleSimpleKeys_)
+ {
+ if(key.isNull) { continue; }
+ if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
+ {
+ enforce(!key.required,
+ new ScannerException("While scanning a simple key",
+ Mark(reader_.name, key.line, key.column),
+ "could not find expected ':'", reader_.mark));
+ key.isNull = true;
+ }
+ }
+ }
+
+ /// Check if the next token starts a possible simple key and if so, save its position.
+ ///
+ /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
+ void savePossibleSimpleKey() @safe pure
+ {
+ // Check if a simple key is required at the current position.
+ const required = (flowLevel_ == 0 && indent_ == reader_.column);
+ assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~
+ "the first token in the current line. Therefore it is always allowed.");
+
+ if(!allowSimpleKey_) { return; }
+
+ // The next token might be a simple key, so save its number and position.
+ removePossibleSimpleKey();
+ const tokenCount = tokensTaken_ + cast(uint)tokens_.length;
+
+ const line = reader_.line;
+ const column = reader_.column;
+ const key = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line,
+ cast(ushort)min(column, ushort.max), required);
+
+ if(possibleSimpleKeys_.length <= flowLevel_)
+ {
+ const oldLength = possibleSimpleKeys_.length;
+ possibleSimpleKeys_.length = flowLevel_ + 1;
+ //No need to initialize the last element, it's already done in the next line.
+ possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
+ }
+ possibleSimpleKeys_[flowLevel_] = key;
+ }
+
+ /// Remove the saved possible key position at the current flow level.
+ void removePossibleSimpleKey() @safe pure
+ {
+ if(possibleSimpleKeys_.length <= flowLevel_) { return; }
+
+ if(!possibleSimpleKeys_[flowLevel_].isNull)
+ {
+ const key = possibleSimpleKeys_[flowLevel_];
+ enforce(!key.required,
+ new ScannerException("While scanning a simple key",
+ Mark(reader_.name, key.line, key.column),
+ "could not find expected ':'", reader_.mark));
+ possibleSimpleKeys_[flowLevel_].isNull = true;
+ }
+ }
+
+ /// Decrease indentation, removing entries in indents_.
+ ///
+ /// Params: column = Current column in the file/stream.
+ void unwindIndent(const int column) @safe
+ {
+ if(flowLevel_ > 0)
+ {
+ // In flow context, tokens should respect indentation.
+ // The condition should be `indent >= column` according to the spec.
+ // But this condition will prohibit intuitively correct
+ // constructions such as
+ // key : {
+ // }
+
+ // In the flow context, indentation is ignored. We make the scanner less
+ // restrictive than what the specification requires.
+ // if(pedantic_ && flowLevel_ > 0 && indent_ > column)
+ // {
+ // throw new ScannerException("Invalid intendation or unclosed '[' or '{'",
+ // reader_.mark)
+ // }
+ return;
+ }
+
+ // In block context, we may need to issue the BLOCK-END tokens.
+ while(indent_ > column)
+ {
+ indent_ = indents_.data.back;
+ assert(indents_.data.length);
+ indents_.shrinkTo(indents_.data.length - 1);
+ tokens_.push(blockEndToken(reader_.mark, reader_.mark));
+ }
+ }
+
+ /// Increase indentation if needed.
+ ///
+ /// Params: column = Current column in the file/stream.
+ ///
+ /// Returns: true if the indentation was increased, false otherwise.
+ bool addIndent(int column) @safe
+ {
+ if(indent_ >= column){return false;}
+ indents_ ~= indent_;
+ indent_ = column;
+ return true;
+ }
+
+
+ /// Add STREAM-START token.
+ void fetchStreamStart() @safe nothrow
+ {
+ tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding));
+ }
+
+ ///Add STREAM-END token.
+ void fetchStreamEnd() @safe
+ {
+ //Set intendation to -1 .
+ unwindIndent(-1);
+ removePossibleSimpleKey();
+ allowSimpleKey_ = false;
+ possibleSimpleKeys_.destroy;
+
+ tokens_.push(streamEndToken(reader_.mark, reader_.mark));
+ done_ = true;
+ }
+
+ /// Add DIRECTIVE token.
+ void fetchDirective() @safe
+ {
+ // Set intendation to -1 .
+ unwindIndent(-1);
+ // Reset simple keys.
+ removePossibleSimpleKey();
+ allowSimpleKey_ = false;
+
+ auto directive = scanDirective();
+ tokens_.push(directive);
+ }
+
+ /// Add DOCUMENT-START or DOCUMENT-END token.
+ void fetchDocumentIndicator(TokenID id)()
+ if(id == TokenID.documentStart || id == TokenID.documentEnd)
+ {
+ // Set indentation to -1 .
+ unwindIndent(-1);
+ // Reset simple keys. Note that there can't be a block collection after '---'.
+ removePossibleSimpleKey();
+ allowSimpleKey_ = false;
+
+ Mark startMark = reader_.mark;
+ reader_.forward(3);
+ tokens_.push(simpleToken!id(startMark, reader_.mark));
+ }
+
+ /// Aliases to add DOCUMENT-START or DOCUMENT-END token.
+ alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.documentStart);
+ alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.documentEnd);
+
+ /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
+ void fetchFlowCollectionStart(TokenID id)() @safe
+ {
+ // '[' and '{' may start a simple key.
+ savePossibleSimpleKey();
+ // Simple keys are allowed after '[' and '{'.
+ allowSimpleKey_ = true;
+ ++flowLevel_;
+
+ Mark startMark = reader_.mark;
+ reader_.forward();
+ tokens_.push(simpleToken!id(startMark, reader_.mark));
+ }
+
+ /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
+ alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.flowSequenceStart);
+ alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.flowMappingStart);
+
+ /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
+ void fetchFlowCollectionEnd(TokenID id)()
+ {
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+ // No simple keys after ']' and '}'.
+ allowSimpleKey_ = false;
+ --flowLevel_;
+
+ Mark startMark = reader_.mark;
+ reader_.forward();
+ tokens_.push(simpleToken!id(startMark, reader_.mark));
+ }
+
+ /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/
+ alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.flowSequenceEnd);
+ alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.flowMappingEnd);
+
+ /// Add FLOW-ENTRY token;
+ void fetchFlowEntry() @safe
+ {
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+ // Simple keys are allowed after ','.
+ allowSimpleKey_ = true;
+
+ Mark startMark = reader_.mark;
+ reader_.forward();
+ tokens_.push(flowEntryToken(startMark, reader_.mark));
+ }
+
+ /// Additional checks used in block context in fetchBlockEntry and fetchKey.
+ ///
+ /// Params: type = String representing the token type we might need to add.
+ /// id = Token type we might need to add.
+ void blockChecks(string type, TokenID id)()
+ {
+ enum context = type ~ " keys are not allowed here";
+ // Are we allowed to start a key (not neccesarily a simple one)?
+ enforce(allowSimpleKey_, new ScannerException(context, reader_.mark));
+
+ if(addIndent(reader_.column))
+ {
+ tokens_.push(simpleToken!id(reader_.mark, reader_.mark));
+ }
+ }
+
+ /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process.
+ void fetchBlockEntry() @safe
+ {
+ if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.blockSequenceStart)(); }
+
+ // It's an error for the block entry to occur in the flow context,
+ // but we let the parser detect this.
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+ // Simple keys are allowed after '-'.
+ allowSimpleKey_ = true;
+
+ Mark startMark = reader_.mark;
+ reader_.forward();
+ tokens_.push(blockEntryToken(startMark, reader_.mark));
+ }
+
+ /// Add KEY token. Might add BLOCK-MAPPING-START in the process.
+ void fetchKey() @safe
+ {
+ if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.blockMappingStart)(); }
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+ // Simple keys are allowed after '?' in the block context.
+ allowSimpleKey_ = (flowLevel_ == 0);
+
+ Mark startMark = reader_.mark;
+ reader_.forward();
+ tokens_.push(keyToken(startMark, reader_.mark));
+ }
+
+ /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process.
+ void fetchValue() @safe
+ {
+ //Do we determine a simple key?
+ if(possibleSimpleKeys_.length > flowLevel_ &&
+ !possibleSimpleKeys_[flowLevel_].isNull)
+ {
+ const key = possibleSimpleKeys_[flowLevel_];
+ possibleSimpleKeys_[flowLevel_].isNull = true;
+ Mark keyMark = Mark(reader_.name, key.line, key.column);
+ const idx = key.tokenIndex - tokensTaken_;
+
+ assert(idx >= 0);
+
+ // Add KEY.
+ // Manually inserting since tokens are immutable (need linked list).
+ tokens_.insert(keyToken(keyMark, keyMark), idx);
+
+ // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START.
+ if(flowLevel_ == 0 && addIndent(key.column))
+ {
+ tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx);
+ }
+
+ // There cannot be two simple keys in a row.
+ allowSimpleKey_ = false;
+ }
+ // Part of a complex key
+ else
+ {
+ // We can start a complex value if and only if we can start a simple key.
+ enforce(flowLevel_ > 0 || allowSimpleKey_,
+ new ScannerException("Mapping values are not allowed here", reader_.mark));
+
+ // If this value starts a new block mapping, we need to add
+ // BLOCK-MAPPING-START. It'll be detected as an error later by the parser.
+ if(flowLevel_ == 0 && addIndent(reader_.column))
+ {
+ tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark));
+ }
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+ // Simple keys are allowed after ':' in the block context.
+ allowSimpleKey_ = (flowLevel_ == 0);
+ }
+
+ // Add VALUE.
+ Mark startMark = reader_.mark;
+ reader_.forward();
+ tokens_.push(valueToken(startMark, reader_.mark));
+ }
+
+ /// Add ALIAS or ANCHOR token.
+ void fetchAnchor_(TokenID id)() @safe
+ if(id == TokenID.alias_ || id == TokenID.anchor)
+ {
+ // ALIAS/ANCHOR could be a simple key.
+ savePossibleSimpleKey();
+ // No simple keys after ALIAS/ANCHOR.
+ allowSimpleKey_ = false;
+
+ auto anchor = scanAnchor(id);
+ tokens_.push(anchor);
+ }
+
+ /// Aliases to add ALIAS or ANCHOR token.
+ alias fetchAlias = fetchAnchor_!(TokenID.alias_);
+ alias fetchAnchor = fetchAnchor_!(TokenID.anchor);
+
+ /// Add TAG token.
+ void fetchTag() @safe
+ {
+ //TAG could start a simple key.
+ savePossibleSimpleKey();
+ //No simple keys after TAG.
+ allowSimpleKey_ = false;
+
+ tokens_.push(scanTag());
+ }
+
+ /// Add block SCALAR token.
+ void fetchBlockScalar(ScalarStyle style)() @safe
+ if(style == ScalarStyle.literal || style == ScalarStyle.folded)
+ {
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+ // A simple key may follow a block scalar.
+ allowSimpleKey_ = true;
+
+ auto blockScalar = scanBlockScalar(style);
+ tokens_.push(blockScalar);
+ }
+
+ /// Aliases to add literal or folded block scalar.
+ alias fetchLiteral = fetchBlockScalar!(ScalarStyle.literal);
+ alias fetchFolded = fetchBlockScalar!(ScalarStyle.folded);
+
+ /// Add quoted flow SCALAR token.
+ void fetchFlowScalar(ScalarStyle quotes)()
+ {
+ // A flow scalar could be a simple key.
+ savePossibleSimpleKey();
+ // No simple keys after flow scalars.
+ allowSimpleKey_ = false;
+
+ // Scan and add SCALAR.
+ auto scalar = scanFlowScalar(quotes);
+ tokens_.push(scalar);
+ }
+
+ /// Aliases to add single or double quoted block scalar.
+ alias fetchSingle = fetchFlowScalar!(ScalarStyle.singleQuoted);
+ alias fetchDouble = fetchFlowScalar!(ScalarStyle.doubleQuoted);
+
+ /// Add plain SCALAR token.
+ void fetchPlain() @safe
+ {
+ // A plain scalar could be a simple key
+ savePossibleSimpleKey();
+ // No simple keys after plain scalars. But note that scanPlain() will
+ // change this flag if the scan is finished at the beginning of the line.
+ allowSimpleKey_ = false;
+ auto plain = scanPlain();
+
+ // Scan and add SCALAR. May change allowSimpleKey_
+ tokens_.push(plain);
+ }
+
+ pure:
+
+ ///Check if the next token is DIRECTIVE: ^ '%' ...
+ bool checkDirective() @safe
+ {
+ return reader_.peekByte() == '%' && reader_.column == 0;
+ }
+
+ /// Check if the next token is DOCUMENT-START: ^ '---' (' '|'\n')
+ bool checkDocumentStart() @safe
+ {
+ // Check one char first, then all 3, to prevent reading outside the buffer.
+ return reader_.column == 0 &&
+ reader_.peekByte() == '-' &&
+ reader_.prefix(3) == "---" &&
+ reader_.peek(3).isWhiteSpace;
+ }
+
+ /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n')
+ bool checkDocumentEnd() @safe
+ {
+ // Check one char first, then all 3, to prevent reading outside the buffer.
+ return reader_.column == 0 &&
+ reader_.peekByte() == '.' &&
+ reader_.prefix(3) == "..." &&
+ reader_.peek(3).isWhiteSpace;
+ }
+
+ /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n')
+ bool checkBlockEntry() @safe
+ {
+ return !!reader_.peek(1).isWhiteSpace;
+ }
+
+ /// Check if the next token is KEY(flow context): '?'
+ ///
+ /// or KEY(block context): '?' (' '|'\n')
+ bool checkKey() @safe
+ {
+ return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
+ }
+
+ /// Check if the next token is VALUE(flow context): ':'
+ ///
+ /// or VALUE(block context): ':' (' '|'\n')
+ bool checkValue() @safe
+ {
+ return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
+ }
+
+ /// Check if the next token is a plain scalar.
+ ///
+ /// A plain scalar may start with any non-space character except:
+ /// '-', '?', ':', ',', '[', ']', '{', '}',
+ /// '#', '&', '*', '!', '|', '>', '\'', '\"',
+ /// '%', '@', '`'.
+ ///
+ /// It may also start with
+ /// '-', '?', ':'
+ /// if it is followed by a non-space character.
+ ///
+ /// Note that we limit the last rule to the block context (except the
+ /// '-' character) because we want the flow context to be space
+ /// independent.
+ bool checkPlain() @safe
+ {
+ const c = reader_.peek();
+ if(!c.isNonScalarStartCharacter)
+ {
+ return true;
+ }
+ return !reader_.peek(1).isWhiteSpace &&
+ (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
+ }
+
+ /// Move to the next non-space character.
+ void findNextNonSpace() @safe
+ {
+ while(reader_.peekByte() == ' ') { reader_.forward(); }
+ }
+
+ /// Scan a string of alphanumeric or "-_" characters.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanAlphaNumericToSlice(string name)(const Mark startMark)
+ {
+ size_t length;
+ dchar c = reader_.peek();
+ while(c.isAlphaNum || c.among!('-', '_')) { c = reader_.peek(++length); }
+
+ enforce(length > 0, new ScannerException("While scanning " ~ name,
+ startMark, expected("alphanumeric, '-' or '_'", c), reader_.mark));
+
+ reader_.sliceBuilder.write(reader_.get(length));
+ }
+
+ /// Scan and throw away all characters until next line break.
+ void scanToNextBreak() @safe
+ {
+ while(!reader_.peek().isBreak) { reader_.forward(); }
+ }
+
+ /// Scan all characters until next line break.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanToNextBreakToSlice() @safe
+ {
+ uint length;
+ while(!reader_.peek(length).isBreak)
+ {
+ ++length;
+ }
+ reader_.sliceBuilder.write(reader_.get(length));
+ }
+
+
+ /// Move to next token in the file/stream.
+ ///
+ /// We ignore spaces, line breaks and comments.
+ /// If we find a line break in the block context, we set
+ /// allowSimpleKey` on.
+ ///
+ /// We do not yet support BOM inside the stream as the
+ /// specification requires. Any such mark will be considered as a part
+ /// of the document.
+ void scanToNextToken() @safe
+ {
+ // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is:
+ // Tabs cannot precede tokens
+ // BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
+ // KEY(block), VALUE(block), BLOCK-ENTRY
+ // So the checking code is
+ // if <TAB>:
+ // allowSimpleKey_ = false
+ // We also need to add the check for `allowSimpleKey_ == true` to
+ // `unwindIndent` before issuing BLOCK-END.
+ // Scanners for block, flow, and plain scalars need to be modified.
+
+ for(;;)
+ {
+ //All whitespace in flow context is ignored, even whitespace
+ // not allowed in other contexts
+ if (flowLevel_ > 0)
+ {
+ while(reader_.peekByte().isNonLinebreakWhitespace) { reader_.forward(); }
+ }
+ else
+ {
+ findNextNonSpace();
+ }
+ if(reader_.peekByte() == '#') { scanToNextBreak(); }
+ if(scanLineBreak() != '\0')
+ {
+ if(flowLevel_ == 0) { allowSimpleKey_ = true; }
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ /// Scan directive token.
+ Token scanDirective() @safe
+ {
+ Mark startMark = reader_.mark;
+ // Skip the '%'.
+ reader_.forward();
+
+ // Scan directive name
+ reader_.sliceBuilder.begin();
+ scanDirectiveNameToSlice(startMark);
+ const name = reader_.sliceBuilder.finish();
+
+ reader_.sliceBuilder.begin();
+
+ // Index where tag handle ends and suffix starts in a tag directive value.
+ uint tagHandleEnd = uint.max;
+ if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); }
+ else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); }
+ char[] value = reader_.sliceBuilder.finish();
+
+ Mark endMark = reader_.mark;
+
+ DirectiveType directive;
+ if(name == "YAML") { directive = DirectiveType.yaml; }
+ else if(name == "TAG") { directive = DirectiveType.tag; }
+ else
+ {
+ directive = DirectiveType.reserved;
+ scanToNextBreak();
+ }
+
+ scanDirectiveIgnoredLine(startMark);
+
+ return directiveToken(startMark, endMark, value, directive, tagHandleEnd);
+ }
+
+ /// Scan name of a directive token.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanDirectiveNameToSlice(const Mark startMark) @safe
+ {
+ // Scan directive name.
+ scanAlphaNumericToSlice!"a directive"(startMark);
+
+ enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
+ new ScannerException("While scanning a directive", startMark,
+ expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark));
+ }
+
+ /// Scan value of a YAML directive token. Returns major, minor version separated by '.'.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanYAMLDirectiveValueToSlice(const Mark startMark) @safe
+ {
+ findNextNonSpace();
+
+ scanYAMLDirectiveNumberToSlice(startMark);
+
+ enforce(reader_.peekByte() == '.',
+ new ScannerException("While scanning a directive", startMark,
+ expected("digit or '.'", reader_.peek()), reader_.mark));
+ // Skip the '.'.
+ reader_.forward();
+
+ reader_.sliceBuilder.write('.');
+ scanYAMLDirectiveNumberToSlice(startMark);
+
+ enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
+ new ScannerException("While scanning a directive", startMark,
+ expected("digit or '.'", reader_.peek()), reader_.mark));
+ }
+
+ /// Scan a number from a YAML directive.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanYAMLDirectiveNumberToSlice(const Mark startMark) @safe
+ {
+ enforce(isDigit(reader_.peek()),
+ new ScannerException("While scanning a directive", startMark,
+ expected("digit", reader_.peek()), reader_.mark));
+
+ // Already found the first digit in the enforce(), so set length to 1.
+ uint length = 1;
+ while(reader_.peek(length).isDigit) { ++length; }
+
+ reader_.sliceBuilder.write(reader_.get(length));
+ }
+
+ /// Scan value of a tag directive.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ ///
+ /// Returns: Length of tag handle (which is before tag prefix) in scanned data
+ uint scanTagDirectiveValueToSlice(const Mark startMark) @safe
+ {
+ findNextNonSpace();
+ const startLength = reader_.sliceBuilder.length;
+ scanTagDirectiveHandleToSlice(startMark);
+ const handleLength = cast(uint)(reader_.sliceBuilder.length - startLength);
+ findNextNonSpace();
+ scanTagDirectivePrefixToSlice(startMark);
+
+ return handleLength;
+ }
+
+ /// Scan handle of a tag directive.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanTagDirectiveHandleToSlice(const Mark startMark) @safe
+ {
+ scanTagHandleToSlice!"directive"(startMark);
+ enforce(reader_.peekByte() == ' ',
+ new ScannerException("While scanning a directive handle", startMark,
+ expected("' '", reader_.peek()), reader_.mark));
+ }
+
+ /// Scan prefix of a tag directive.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanTagDirectivePrefixToSlice(const Mark startMark) @safe
+ {
+ scanTagURIToSlice!"directive"(startMark);
+ enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
+ new ScannerException("While scanning a directive prefix", startMark,
+ expected("' '", reader_.peek()), reader_.mark));
+ }
+
+ /// Scan (and ignore) ignored line after a directive.
+ void scanDirectiveIgnoredLine(const Mark startMark) @safe
+ {
+ findNextNonSpace();
+ if(reader_.peekByte() == '#') { scanToNextBreak(); }
+ enforce(reader_.peek().isBreak,
+ new ScannerException("While scanning a directive", startMark,
+ expected("comment or a line break", reader_.peek()), reader_.mark));
+ scanLineBreak();
+ }
+
+
+ /// Scan an alias or an anchor.
+ ///
+ /// The specification does not restrict characters for anchors and
+ /// aliases. This may lead to problems, for instance, the document:
+ /// [ *alias, value ]
+ /// can be interpteted in two ways, as
+ /// [ "value" ]
+ /// and
+ /// [ *alias , "value" ]
+ /// Therefore we restrict aliases to ASCII alphanumeric characters.
+ Token scanAnchor(const TokenID id) @safe
+ {
+ const startMark = reader_.mark;
+ const dchar i = reader_.get();
+
+ reader_.sliceBuilder.begin();
+ if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); }
+ else { scanAlphaNumericToSlice!"an anchor"(startMark); }
+ // On error, value is discarded as we return immediately
+ char[] value = reader_.sliceBuilder.finish();
+
+ enum anchorCtx = "While scanning an anchor";
+ enum aliasCtx = "While scanning an alias";
+ enforce(reader_.peek().isWhiteSpace ||
+ reader_.peekByte().among!('?', ':', ',', ']', '}', '%', '@'),
+ new ScannerException(i == '*' ? aliasCtx : anchorCtx, startMark,
+ expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark));
+
+ if(id == TokenID.alias_)
+ {
+ return aliasToken(startMark, reader_.mark, value);
+ }
+ if(id == TokenID.anchor)
+ {
+ return anchorToken(startMark, reader_.mark, value);
+ }
+ assert(false, "This code should never be reached");
+ }
+
+ /// Scan a tag token.
+ Token scanTag() @safe
+ {
+ const startMark = reader_.mark;
+ dchar c = reader_.peek(1);
+
+ reader_.sliceBuilder.begin();
+ scope(failure) { reader_.sliceBuilder.finish(); }
+ // Index where tag handle ends and tag suffix starts in the tag value
+ // (slice) we will produce.
+ uint handleEnd;
+
+ if(c == '<')
+ {
+ reader_.forward(2);
+
+ handleEnd = 0;
+ scanTagURIToSlice!"tag"(startMark);
+ enforce(reader_.peekByte() == '>',
+ new ScannerException("While scanning a tag", startMark,
+ expected("'>'", reader_.peek()), reader_.mark));
+ reader_.forward();
+ }
+ else if(c.isWhiteSpace)
+ {
+ reader_.forward();
+ handleEnd = 0;
+ reader_.sliceBuilder.write('!');
+ }
+ else
+ {
+ uint length = 1;
+ bool useHandle;
+
+ while(!c.isBreakOrSpace)
+ {
+ if(c == '!')
+ {
+ useHandle = true;
+ break;
+ }
+ ++length;
+ c = reader_.peek(length);
+ }
+
+ if(useHandle)
+ {
+ scanTagHandleToSlice!"tag"(startMark);
+ handleEnd = cast(uint)reader_.sliceBuilder.length;
+ }
+ else
+ {
+ reader_.forward();
+ reader_.sliceBuilder.write('!');
+ handleEnd = cast(uint)reader_.sliceBuilder.length;
+ }
+
+ scanTagURIToSlice!"tag"(startMark);
+ }
+
+ enforce(reader_.peek().isBreakOrSpace,
+ new ScannerException("While scanning a tag", startMark, expected("' '", reader_.peek()),
+ reader_.mark));
+
+ char[] slice = reader_.sliceBuilder.finish();
+ return tagToken(startMark, reader_.mark, slice, handleEnd);
+ }
+
+ /// Scan a block scalar token with specified style.
+ Token scanBlockScalar(const ScalarStyle style) @safe
+ {
+ const startMark = reader_.mark;
+
+ // Scan the header.
+ reader_.forward();
+
+ const indicators = scanBlockScalarIndicators(startMark);
+
+ const chomping = indicators[0];
+ const increment = indicators[1];
+ scanBlockScalarIgnoredLine(startMark);
+
+ // Determine the indentation level and go to the first non-empty line.
+ Mark endMark;
+ uint indent = max(1, indent_ + 1);
+
+ reader_.sliceBuilder.begin();
+ alias Transaction = SliceBuilder.Transaction;
+ // Used to strip the last line breaks written to the slice at the end of the
+ // scalar, which may be needed based on chomping.
+ Transaction breaksTransaction = Transaction(&reader_.sliceBuilder);
+ // Read the first indentation/line breaks before the scalar.
+ size_t startLen = reader_.sliceBuilder.length;
+ if(increment == int.min)
+ {
+ auto indentation = scanBlockScalarIndentationToSlice();
+ endMark = indentation[1];
+ indent = max(indent, indentation[0]);
+ }
+ else
+ {
+ indent += increment - 1;
+ endMark = scanBlockScalarBreaksToSlice(indent);
+ }
+
+ // int.max means there's no line break (int.max is outside UTF-32).
+ dchar lineBreak = cast(dchar)int.max;
+
+ // Scan the inner part of the block scalar.
+ while(reader_.column == indent && reader_.peekByte() != '\0')
+ {
+ breaksTransaction.commit();
+ const bool leadingNonSpace = !reader_.peekByte().among!(' ', '\t');
+ // This is where the 'interesting' non-whitespace data gets read.
+ scanToNextBreakToSlice();
+ lineBreak = scanLineBreak();
+
+
+ // This transaction serves to rollback data read in the
+ // scanBlockScalarBreaksToSlice() call.
+ breaksTransaction = Transaction(&reader_.sliceBuilder);
+ startLen = reader_.sliceBuilder.length;
+ // The line breaks should actually be written _after_ the if() block
+ // below. We work around that by inserting
+ endMark = scanBlockScalarBreaksToSlice(indent);
+
+ // This will not run during the last iteration (see the if() vs the
+ // while()), hence breaksTransaction rollback (which happens after this
+ // loop) will never roll back data written in this if() block.
+ if(reader_.column == indent && reader_.peekByte() != '\0')
+ {
+ // Unfortunately, folding rules are ambiguous.
+
+ // This is the folding according to the specification:
+ if(style == ScalarStyle.folded && lineBreak == '\n' &&
+ leadingNonSpace && !reader_.peekByte().among!(' ', '\t'))
+ {
+ // No breaks were scanned; no need to insert the space in the
+ // middle of slice.
+ if(startLen == reader_.sliceBuilder.length)
+ {
+ reader_.sliceBuilder.write(' ');
+ }
+ }
+ else
+ {
+ // We need to insert in the middle of the slice in case any line
+ // breaks were scanned.
+ reader_.sliceBuilder.insert(lineBreak, startLen);
+ }
+
+ ////this is Clark Evans's interpretation (also in the spec
+ ////examples):
+ //
+ //if(style == ScalarStyle.folded && lineBreak == '\n')
+ //{
+ // if(startLen == endLen)
+ // {
+ // if(!" \t"d.canFind(reader_.peekByte()))
+ // {
+ // reader_.sliceBuilder.write(' ');
+ // }
+ // else
+ // {
+ // chunks ~= lineBreak;
+ // }
+ // }
+ //}
+ //else
+ //{
+ // reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
+ //}
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ // If chompint is Keep, we keep (commit) the last scanned line breaks
+ // (which are at the end of the scalar). Otherwise re remove them (end the
+ // transaction).
+ if(chomping == Chomping.keep) { breaksTransaction.commit(); }
+ else { breaksTransaction.end(); }
+ if(chomping != Chomping.strip && lineBreak != int.max)
+ {
+ // If chomping is Keep, we keep the line break but the first line break
+ // that isn't stripped (since chomping isn't Strip in this branch) must
+ // be inserted _before_ the other line breaks.
+ if(chomping == Chomping.keep)
+ {
+ reader_.sliceBuilder.insert(lineBreak, startLen);
+ }
+ // If chomping is not Keep, breaksTransaction was cancelled so we can
+ // directly write the first line break (as it isn't stripped - chomping
+ // is not Strip)
+ else
+ {
+ reader_.sliceBuilder.write(lineBreak);
+ }
+ }
+
+ char[] slice = reader_.sliceBuilder.finish();
+ return scalarToken(startMark, endMark, slice, style);
+ }
+
+ /// Scan chomping and indentation indicators of a scalar token.
+ Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe
+ {
+ auto chomping = Chomping.clip;
+ int increment = int.min;
+ dchar c = reader_.peek();
+
+ /// Indicators can be in any order.
+ if(getChomping(c, chomping))
+ {
+ getIncrement(c, increment, startMark);
+ }
+ else
+ {
+ const gotIncrement = getIncrement(c, increment, startMark);
+ if(gotIncrement) { getChomping(c, chomping); }
+ }
+
+ enforce(c.among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
+ new ScannerException("While scanning a block scalar", startMark,
+ expected("chomping or indentation indicator", c), reader_.mark));
+
+ return tuple(chomping, increment);
+ }
+
+ /// Get chomping indicator, if detected. Return false otherwise.
+ ///
+ /// Used in scanBlockScalarIndicators.
+ ///
+ /// Params:
+ ///
+ /// c = The character that may be a chomping indicator.
+ /// chomping = Write the chomping value here, if detected.
+ bool getChomping(ref dchar c, ref Chomping chomping) @safe
+ {
+ if(!c.among!('+', '-')) { return false; }
+ chomping = c == '+' ? Chomping.keep : Chomping.strip;
+ reader_.forward();
+ c = reader_.peek();
+ return true;
+ }
+
+ /// Get increment indicator, if detected. Return false otherwise.
+ ///
+ /// Used in scanBlockScalarIndicators.
+ ///
+ /// Params:
+ ///
+ /// c = The character that may be an increment indicator.
+ /// If an increment indicator is detected, this will be updated to
+ /// the next character in the Reader.
+ /// increment = Write the increment value here, if detected.
+ /// startMark = Mark for error messages.
+ bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe
+ {
+ if(!c.isDigit) { return false; }
+ // Convert a digit to integer.
+ increment = c - '0';
+ assert(increment < 10 && increment >= 0, "Digit has invalid value");
+
+ enforce(increment > 0,
+ new ScannerException("While scanning a block scalar", startMark,
+ expected("indentation indicator in range 1-9", "0"), reader_.mark));
+
+ reader_.forward();
+ c = reader_.peek();
+ return true;
+ }
+
+ /// Scan (and ignore) ignored line in a block scalar.
+ void scanBlockScalarIgnoredLine(const Mark startMark) @safe
+ {
+ findNextNonSpace();
+ if(reader_.peekByte()== '#') { scanToNextBreak(); }
+
+ enforce(reader_.peek().isBreak,
+ new ScannerException("While scanning a block scalar", startMark,
+ expected("comment or line break", reader_.peek()), reader_.mark));
+
+ scanLineBreak();
+ }
+
+ /// Scan indentation in a block scalar, returning line breaks, max indent and end mark.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @safe
+ {
+ uint maxIndent;
+ Mark endMark = reader_.mark;
+
+ while(reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'))
+ {
+ if(reader_.peekByte() != ' ')
+ {
+ reader_.sliceBuilder.write(scanLineBreak());
+ endMark = reader_.mark;
+ continue;
+ }
+ reader_.forward();
+ maxIndent = max(reader_.column, maxIndent);
+ }
+
+ return tuple(maxIndent, endMark);
+ }
+
+ /// Scan line breaks at lower or specified indentation in a block scalar.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ Mark scanBlockScalarBreaksToSlice(const uint indent) @safe
+ {
+ Mark endMark = reader_.mark;
+
+ for(;;)
+ {
+ while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); }
+ if(!reader_.peek().among!('\n', '\r', '\u0085', '\u2028', '\u2029')) { break; }
+ reader_.sliceBuilder.write(scanLineBreak());
+ endMark = reader_.mark;
+ }
+
+ return endMark;
+ }
+
+ /// Scan a qouted flow scalar token with specified quotes.
+ Token scanFlowScalar(const ScalarStyle quotes) @safe
+ {
+ const startMark = reader_.mark;
+ const quote = reader_.get();
+
+ reader_.sliceBuilder.begin();
+
+ scanFlowScalarNonSpacesToSlice(quotes, startMark);
+
+ while(reader_.peek() != quote)
+ {
+ scanFlowScalarSpacesToSlice(startMark);
+ scanFlowScalarNonSpacesToSlice(quotes, startMark);
+ }
+ reader_.forward();
+
+ auto slice = reader_.sliceBuilder.finish();
+ return scalarToken(startMark, reader_.mark, slice, quotes);
+ }
+
+ /// Scan nonspace characters in a flow scalar.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark)
+ @safe
+ {
+ for(;;)
+ {
+ dchar c = reader_.peek();
+
+ size_t numCodePoints;
+ while(!reader_.peek(numCodePoints).isFlowScalarBreakSpace) { ++numCodePoints; }
+
+ if (numCodePoints > 0) { reader_.sliceBuilder.write(reader_.get(numCodePoints)); }
+
+ c = reader_.peek();
+ if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'')
+ {
+ reader_.forward(2);
+ reader_.sliceBuilder.write('\'');
+ }
+ else if((quotes == ScalarStyle.doubleQuoted && c == '\'') ||
+ (quotes == ScalarStyle.singleQuoted && c.among!('"', '\\')))
+ {
+ reader_.forward();
+ reader_.sliceBuilder.write(c);
+ }
+ else if(quotes == ScalarStyle.doubleQuoted && c == '\\')
+ {
+ reader_.forward();
+ c = reader_.peek();
+ if(c.among!(escapes))
+ {
+ reader_.forward();
+ // Escaping has been moved to Parser as it can't be done in
+ // place (in a slice) in case of '\P' and '\L' (very uncommon,
+ // but we don't want to break the spec)
+ char[2] escapeSequence = ['\\', cast(char)c];
+ reader_.sliceBuilder.write(escapeSequence);
+ }
+ else if(c.among!(escapeHexCodeList))
+ {
+ const hexLength = dyaml.escapes.escapeHexLength(c);
+ reader_.forward();
+
+ foreach(i; 0 .. hexLength) {
+ enforce(reader_.peek(i).isHexDigit,
+ new ScannerException("While scanning a double quoted scalar", startMark,
+ expected("escape sequence of hexadecimal numbers",
+ reader_.peek(i)), reader_.mark));
+ }
+ char[] hex = reader_.get(hexLength);
+
+ enforce((hex.length > 0) && (hex.length <= 8),
+ new ScannerException("While scanning a double quoted scalar", startMark,
+ "overflow when parsing an escape sequence of " ~
+ "hexadecimal numbers.", reader_.mark));
+
+ char[2] escapeStart = ['\\', cast(char) c];
+ reader_.sliceBuilder.write(escapeStart);
+ reader_.sliceBuilder.write(hex);
+
+ }
+ else if(c.among!('\n', '\r', '\u0085', '\u2028', '\u2029'))
+ {
+ scanLineBreak();
+ scanFlowScalarBreaksToSlice(startMark);
+ }
+ else
+ {
+ throw new ScannerException("While scanning a double quoted scalar", startMark,
+ text("found unsupported escape character ", c),
+ reader_.mark);
+ }
+ }
+ else { return; }
+ }
+ }
+
+ /// Scan space characters in a flow scalar.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// spaces into that slice.
+ void scanFlowScalarSpacesToSlice(const Mark startMark) @safe
+ {
+ // Increase length as long as we see whitespace.
+ size_t length;
+ while(reader_.peekByte(length).among!(' ', '\t')) { ++length; }
+ auto whitespaces = reader_.prefixBytes(length);
+
+ // Can check the last byte without striding because '\0' is ASCII
+ const c = reader_.peek(length);
+ enforce(c != '\0',
+ new ScannerException("While scanning a quoted scalar", startMark,
+ "found unexpected end of buffer", reader_.mark));
+
+ // Spaces not followed by a line break.
+ if(!c.among!('\n', '\r', '\u0085', '\u2028', '\u2029'))
+ {
+ reader_.forward(length);
+ reader_.sliceBuilder.write(whitespaces);
+ return;
+ }
+
+ // There's a line break after the spaces.
+ reader_.forward(length);
+ const lineBreak = scanLineBreak();
+
+ if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
+
+ // If we have extra line breaks after the first, scan them into the
+ // slice.
+ const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark);
+
+ // No extra breaks, one normal line break. Replace it with a space.
+ if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
+ }
+
+ /// Scan line breaks in a flow scalar.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// line breaks into that slice.
+ bool scanFlowScalarBreaksToSlice(const Mark startMark) @safe
+ {
+ // True if at least one line break was found.
+ bool anyBreaks;
+ for(;;)
+ {
+ // Instead of checking indentation, we check for document separators.
+ const prefix = reader_.prefix(3);
+ enforce(!(prefix == "---" || prefix == "...") ||
+ !reader_.peek(3).isWhiteSpace,
+ new ScannerException("While scanning a quoted scalar", startMark,
+ "found unexpected document separator", reader_.mark));
+
+ // Skip any whitespaces.
+ while(reader_.peekByte().among!(' ', '\t')) { reader_.forward(); }
+
+ // Encountered a non-whitespace non-linebreak character, so we're done.
+ if(!reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) { break; }
+
+ const lineBreak = scanLineBreak();
+ anyBreaks = true;
+ reader_.sliceBuilder.write(lineBreak);
+ }
+ return anyBreaks;
+ }
+
+ /// Scan plain scalar token (no block, no quotes).
+ Token scanPlain() @safe
+ {
+ // We keep track of the allowSimpleKey_ flag here.
+ // Indentation rules are loosed for the flow context
+ const startMark = reader_.mark;
+ Mark endMark = startMark;
+ const indent = indent_ + 1;
+
+ // We allow zero indentation for scalars, but then we need to check for
+ // document separators at the beginning of the line.
+ // if(indent == 0) { indent = 1; }
+
+ reader_.sliceBuilder.begin();
+
+ alias Transaction = SliceBuilder.Transaction;
+ Transaction spacesTransaction;
+ // Stop at a comment.
+ while(reader_.peekByte() != '#')
+ {
+ // Scan the entire plain scalar.
+ size_t length;
+ dchar c = reader_.peek(length);
+ for(;;)
+ {
+ const cNext = reader_.peek(length + 1);
+ if(c.isWhiteSpace ||
+ (flowLevel_ == 0 && c == ':' && cNext.isWhiteSpace) ||
+ (flowLevel_ > 0 && c.among!(',', ':', '?', '[', ']', '{', '}')))
+ {
+ break;
+ }
+ ++length;
+ c = cNext;
+ }
+
+ // It's not clear what we should do with ':' in the flow context.
+ enforce(flowLevel_ == 0 || c != ':' ||
+ reader_.peek(length + 1).isWhiteSpace ||
+ reader_.peek(length + 1).among!(',', '[', ']', '{', '}'),
+ new ScannerException("While scanning a plain scalar", startMark,
+ "found unexpected ':' . Please check " ~
+ "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.",
+ reader_.mark));
+
+ if(length == 0) { break; }
+
+ allowSimpleKey_ = false;
+
+ reader_.sliceBuilder.write(reader_.get(length));
+
+ endMark = reader_.mark;
+
+ spacesTransaction.commit();
+ spacesTransaction = Transaction(&reader_.sliceBuilder);
+
+ const startLength = reader_.sliceBuilder.length;
+ scanPlainSpacesToSlice();
+ if(startLength == reader_.sliceBuilder.length ||
+ (flowLevel_ == 0 && reader_.column < indent))
+ {
+ break;
+ }
+ }
+
+ spacesTransaction.end();
+ char[] slice = reader_.sliceBuilder.finish();
+
+ return scalarToken(startMark, endMark, slice, ScalarStyle.plain);
+ }
+
+ /// Scan spaces in a plain scalar.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the spaces
+ /// into that slice.
+ void scanPlainSpacesToSlice() @safe
+ {
+ // The specification is really confusing about tabs in plain scalars.
+ // We just forbid them completely. Do not use tabs in YAML!
+
+ // Get as many plain spaces as there are.
+ size_t length;
+ while(reader_.peekByte(length) == ' ') { ++length; }
+ char[] whitespaces = reader_.prefixBytes(length);
+ reader_.forward(length);
+
+ const dchar c = reader_.peek();
+ if(!c.isNSChar)
+ {
+ // We have spaces, but no newline.
+ if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
+ return;
+ }
+
+ // Newline after the spaces (if any)
+ const lineBreak = scanLineBreak();
+ allowSimpleKey_ = true;
+
+ static bool end(Reader reader_) @safe pure
+ {
+ const prefix = reader_.prefix(3);
+ return ("---" == prefix || "..." == prefix)
+ && reader_.peek(3).among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
+ }
+
+ if(end(reader_)) { return; }
+
+ bool extraBreaks;
+
+ alias Transaction = SliceBuilder.Transaction;
+ auto transaction = Transaction(&reader_.sliceBuilder);
+ if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
+ while(reader_.peek().isNSChar)
+ {
+ if(reader_.peekByte() == ' ') { reader_.forward(); }
+ else
+ {
+ const lBreak = scanLineBreak();
+ extraBreaks = true;
+ reader_.sliceBuilder.write(lBreak);
+
+ if(end(reader_)) { return; }
+ }
+ }
+ transaction.commit();
+
+ // No line breaks, only a space.
+ if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
+ }
+
+ /// Scan handle of a tag token.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanTagHandleToSlice(string name)(const Mark startMark)
+ {
+ dchar c = reader_.peek();
+ enum contextMsg = "While scanning a " ~ name;
+ enforce(c == '!',
+ new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark));
+
+ uint length = 1;
+ c = reader_.peek(length);
+ if(c != ' ')
+ {
+ while(c.isAlphaNum || c.among!('-', '_'))
+ {
+ ++length;
+ c = reader_.peek(length);
+ }
+ enforce(c == '!',
+ new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark));
+ ++length;
+ }
+
+ reader_.sliceBuilder.write(reader_.get(length));
+ }
+
+ /// Scan URI in a tag token.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanTagURIToSlice(string name)(const Mark startMark)
+ {
+ // Note: we do not check if URI is well-formed.
+ dchar c = reader_.peek();
+ const startLen = reader_.sliceBuilder.length;
+ {
+ uint length;
+ while(c.isAlphaNum || c.isURIChar)
+ {
+ if(c == '%')
+ {
+ auto chars = reader_.get(length);
+ reader_.sliceBuilder.write(chars);
+ length = 0;
+ scanURIEscapesToSlice!name(startMark);
+ }
+ else { ++length; }
+ c = reader_.peek(length);
+ }
+ if(length > 0)
+ {
+ auto chars = reader_.get(length);
+ reader_.sliceBuilder.write(chars);
+ length = 0;
+ }
+ }
+ // OK if we scanned something, error otherwise.
+ enum contextMsg = "While parsing a " ~ name;
+ enforce(reader_.sliceBuilder.length > startLen,
+ new ScannerException(contextMsg, startMark, expected("URI", c), reader_.mark));
+ }
+
+ // Not @nogc yet because std.utf.decode is not @nogc
+ /// Scan URI escape sequences.
+ ///
+ /// Assumes that the caller is building a slice in Reader, and puts the scanned
+ /// characters into that slice.
+ void scanURIEscapesToSlice(string name)(const Mark startMark)
+ {
+ import core.exception : UnicodeException;
+ // URI escapes encode a UTF-8 string. We store UTF-8 code units here for
+ // decoding into UTF-32.
+ Appender!string buffer;
+
+
+ enum contextMsg = "While scanning a " ~ name;
+ while(reader_.peekByte() == '%')
+ {
+ reader_.forward();
+ char[2] nextByte = [reader_.peekByte(), reader_.peekByte(1)];
+
+ enforce(nextByte[0].isHexDigit && nextByte[1].isHexDigit,
+ new ScannerException(contextMsg, startMark,
+ expected("URI escape sequence of 2 hexadecimal " ~
+ "numbers", nextByte), reader_.mark));
+
+ buffer ~= nextByte[].to!ubyte(16);
+
+ reader_.forward(2);
+ }
+ try
+ {
+ foreach (dchar chr; buffer.data)
+ {
+ reader_.sliceBuilder.write(chr);
+ }
+ }
+ catch (UnicodeException)
+ {
+ throw new ScannerException(contextMsg, startMark,
+ "Invalid UTF-8 data encoded in URI escape sequence",
+ reader_.mark);
+ }
+ }
+
+
+ /// Scan a line break, if any.
+ ///
+ /// Transforms:
+ /// '\r\n' : '\n'
+ /// '\r' : '\n'
+ /// '\n' : '\n'
+ /// '\u0085' : '\n'
+ /// '\u2028' : '\u2028'
+ /// '\u2029 : '\u2029'
+ /// no break : '\0'
+ dchar scanLineBreak() @safe
+ {
+ // Fast path for ASCII line breaks.
+ const b = reader_.peekByte();
+ if(b < 0x80)
+ {
+ if(b == '\n' || b == '\r')
+ {
+ if(reader_.prefix(2) == "\r\n") { reader_.forward(2); }
+ else { reader_.forward(); }
+ return '\n';
+ }
+ return '\0';
+ }
+
+ const c = reader_.peek();
+ if(c == '\x85')
+ {
+ reader_.forward();
+ return '\n';
+ }
+ if(c == '\u2028' || c == '\u2029')
+ {
+ reader_.forward();
+ return c;
+ }
+ return '\0';
+ }
+}