import { all, char, seq, text, until } from 'tokenizer-dsl';
// https://www.w3.org/TR/xml/#NT-S
var isSpaceChar = function (charCode) {
    return charCode === 32 /* ' ' */
        || charCode === 9 /* '\t' */
        || charCode === 13 /* '\r' */
        || charCode === 10 /* '\n' */;
};
// https://www.w3.org/TR/xml/#NT-NameStartChar
var isTagNameStartChar = function (charCode) {
    return charCode >= 97 /* 'a' */ && charCode <= 122 /* 'z' */
        || charCode >= 65 /* 'A' */ && charCode <= 90 /* 'Z' */
        || charCode === 95 /* '_' */
        || charCode === 58 /* ':' */
        || charCode >= 0xc0 && charCode <= 0xd6
        || charCode >= 0xd8 && charCode <= 0xf6
        || charCode >= 0xf8 && charCode <= 0x2ff
        || charCode >= 0x370 && charCode <= 0x37d
        || charCode >= 0x37f && charCode <= 0x1fff
        || charCode >= 0x200c && charCode <= 0x200d
        || charCode >= 0x2070 && charCode <= 0x218f
        || charCode >= 0x2c00 && charCode <= 0x2fef
        || charCode >= 0x3001 && charCode <= 0xd7ff
        || charCode >= 0xf900 && charCode <= 0xfdcf
        || charCode >= 0xfdf0 && charCode <= 0xfffd
        || charCode >= 0x10000 && charCode <= 0xeffff;
};
/**
 * Check if char should be treated as a whitespace inside a tag.
 */
var isTagSpaceChar = function (charCode) {
    // isSpaceChar(charCode)
    return charCode === 32 /* ' ' */
        || charCode === 9 /* '\t' */
        || charCode === 13 /* '\r' */
        || charCode === 10 /* '\n' */
        //
        || charCode === 47 /* '/' */;
};
var isNotTagNameChar = function (charCode) {
    // isSpaceChar(charCode)
    return charCode === 32 /* ' ' */
        || charCode === 9 /* '\t' */
        || charCode === 13 /* '\r' */
        || charCode === 10 /* '\n' */
        //
        || charCode === 47 /* '/' */
        || charCode === 62 /* '>' */;
};
var isNotAttributeNameChar = function (charCode) {
    // isSpaceChar(charCode)
    return charCode === 32 /* ' ' */
        || charCode === 9 /* '\t' */
        || charCode === 13 /* '\r' */
        || charCode === 10 /* '\n' */
        //
        || charCode === 47 /* '/' */
        || charCode === 62 /* '>' */
        || charCode === 61 /* '=' */;
};
var isNotUnquotedValueChar = function (charCode) {
    //isSpaceChar(charCode)
    return charCode === 32 /* ' ' */
        || charCode === 9 /* '\t' */
        || charCode === 13 /* '\r' */
        || charCode === 10 /* '\n' */
        //
        || charCode === 62 /* '>' */;
};
var takeText = until(text('<'));
var takeUntilGt = until(text('>'), { inclusive: true });
var takeTagNameStartChar = char(isTagNameStartChar);
var takeTagNameChars = until(char(isNotTagNameChar), { openEnded: true, endOffset: 1 });
// <…
var takeStartTagOpening = seq(text('<'), takeTagNameStartChar, takeTagNameChars);
// </…
var takeEndTagOpening = seq(text('</'), takeTagNameStartChar, takeTagNameChars);
var takeAttributeName = until(char(isNotAttributeNameChar), { openEnded: true });
var takeTagSpace = all(char(isTagSpaceChar));
var takeSpace = all(char(isSpaceChar));
// =
var takeEq = seq(takeSpace, text('='), takeSpace);
// "…"
var takeQuotValue = seq(text('"'), until(text('"'), { inclusive: true, openEnded: true, endOffset: 1 }));
// '…'
var takeAposValue = seq(text('\''), until(text('\''), { inclusive: true, openEnded: true, endOffset: 1 }));
// okay
var takeUnquotedValue = until(char(isNotUnquotedValueChar), { openEnded: true });
// <!-- … -->
var takeComment = seq(text('<!--'), until(text('-->'), { inclusive: true, openEnded: true, endOffset: 3 }));
// <! … >
var takeDtd = seq(text('<!'), until(text('>'), { inclusive: true, openEnded: true, endOffset: 1 }));
// <? … ?>
var takeProcessingInstruction = seq(text('<?'), until(text('?>'), { inclusive: true, openEnded: true, endOffset: 2 }));
// <![CDATA[ … ]]>
var takeCdata = seq(text('<![CDATA['), until(text(']]>'), { inclusive: true, openEnded: true, endOffset: 3 }));
// <!DOCTYPE … >
var takeDoctype = seq(text('<!DOCTYPE', { caseInsensitive: true }), until(text('>'), { inclusive: true, openEnded: true, endOffset: 1 }));
/**
 * Reads attributes from the source.
 *
 * @param chunk The string to read attributes from.
 * @param index The index in `chunk` from which to start reading.
 * @param chunkOffset The offset of the `chunk` in scope of the whole input.
 * @param attributes An array-like object to which {@link IAttributeToken} objects are added.
 * @param options Tokenization options.
 * @param parserOptions Parsing options.
 * @returns The index in `chunk` at which reading was completed.
 */
export function tokenizeAttributes(chunk, index, chunkOffset, attributes, options, parserOptions) {
    var attributeTokenPool = options.attributeTokenPool;
    var decodeAttribute = parserOptions.decodeAttribute, renameAttribute = parserOptions.renameAttribute;
    var charCount = chunk.length;
    var attributeCount = 0;
    while (index < charCount) {
        var k = takeTagSpace(chunk, index);
        var j = takeAttributeName(chunk, k);
        // No attributes are available
        if (j === k) {
            break;
        }
        var token = attributes[attributeCount] = attributeTokenPool.take();
        var rawName = chunk.substring(k, j);
        token.rawName = rawName;
        token.name = renameAttribute != null ? renameAttribute(rawName) : rawName;
        token.nameStart = token.start = chunkOffset + k;
        token.nameEnd = chunkOffset + j;
        k = j;
        j = takeEq(chunk, k);
        var rawValue = void 0;
        var value = void 0;
        var valueStart = -1;
        var valueEnd = -1;
        var quoted = false;
        // Equals sign presents, so there may be a value
        if (j !== -1 /* NO_MATCH */) {
            k = j;
            rawValue = value = null;
            // Quoted value
            j = takeQuotValue(chunk, k);
            if (j === -1 /* NO_MATCH */) {
                j = takeAposValue(chunk, k);
            }
            if (j !== -1 /* NO_MATCH */) {
                valueStart = k + 1;
                valueEnd = j - 1;
                quoted = true;
                k = Math.min(j, charCount);
            }
            else {
                // Unquoted value
                j = takeUnquotedValue(chunk, k);
                if (j !== k) {
                    valueStart = k;
                    valueEnd = j;
                    k = j;
                }
            }
            if (valueStart !== -1) {
                rawValue = chunk.substring(valueStart, valueEnd);
                value = decodeAttribute != null ? decodeAttribute(rawValue) : rawValue;
                valueStart += chunkOffset;
                valueEnd += chunkOffset;
            }
        }
        token.rawValue = rawValue;
        token.value = value;
        token.valueStart = valueStart;
        token.valueEnd = valueEnd;
        token.quoted = quoted;
        token.end = chunkOffset + k;
        ++attributeCount;
        index = k;
    }
    // Clean up array-like object
    for (var i = attributeCount; i < attributes.length; ++i) {
        attributes[i] = undefined;
    }
    attributes.length = attributeCount;
    return index;
}
/**
 * Reads markup tokens from the string.
 *
 * **Note:** Tokenizer doesn't return allocated tokens back to pools.
 *
 * @param chunk The chunk of the input to read tokens from.
 * @param streaming If set to `true` then tokenizer stops when an ambiguous char sequence is met.
 * @param chunkOffset The offset of the `chunk` in scope of the whole input.
 * @param options Tokenization options.
 * @param parserOptions Parsing options.
 * @param handler SAX handler that is notified about parsed tokens.
 * @returns The index in `chunk` right after the last parsed character.
 */
export function tokenize(chunk, streaming, chunkOffset, options, parserOptions, handler) {
    var startTagTokenPool = options.startTagTokenPool, endTagToken = options.endTagToken, dataToken = options.dataToken;
    var cdataEnabled = parserOptions.cdataEnabled, processingInstructionsEnabled = parserOptions.processingInstructionsEnabled, selfClosingEnabled = parserOptions.selfClosingEnabled, decodeText = parserOptions.decodeText, renameTag = parserOptions.renameTag, checkCdataTag = parserOptions.checkCdataTag;
    var startTagCallback = handler.startTag, endTagCallback = handler.endTag, textCallback = handler.text, commentCallback = handler.comment, processingInstructionCallback = handler.processingInstruction, cdataCallback = handler.cdata, doctypeCallback = handler.doctype;
    var textStart = -1;
    var textEnd = 0;
    var tagParsingEnabled = true;
    var startTagName;
    var charCount = chunk.length;
    var i = 0;
    var j;
    // This function is inlined by Terser
    var triggerTextCallback = function () {
        if (textStart !== -1) {
            triggerDataCallback(chunk, chunkOffset, 3 /* TEXT */, dataToken, textCallback, textStart, textEnd, 0, 0, decodeText);
            textStart = -1;
        }
    };
    while (i < charCount) {
        // Text
        if (textStart === -1) {
            var k = takeText(chunk, i);
            if (k === -1 /* NO_MATCH */ && (k = charCount) && streaming) {
                break;
            }
            if (k !== i) {
                textStart = i;
                textEnd = i = k;
                continue;
            }
        }
        if (tagParsingEnabled) {
            // Start tag
            j = takeStartTagOpening(chunk, i);
            if (j !== -1 /* NO_MATCH */) {
                var token = startTagTokenPool.take();
                var attributes = token.attributes;
                var nameStart = i + 1;
                var nameEnd = j;
                var rawTagName = chunk.substring(nameStart, nameEnd);
                var tagName = renameTag != null ? renameTag(rawTagName) : rawTagName;
                j = tokenizeAttributes(chunk, j, chunkOffset, attributes, options, parserOptions);
                // Skip malformed content and excessive whitespaces
                var k = takeUntilGt(chunk, j);
                if (k === -1 /* NO_MATCH */) {
                    // Unterminated start tag
                    return i;
                }
                var selfClosing = selfClosingEnabled && k - j >= 2 && chunk.charCodeAt(k - 2) === 47 /* '/' */ || false;
                /*@__INLINE__*/
                triggerTextCallback();
                token.rawName = rawTagName;
                token.name = tagName;
                token.selfClosing = selfClosing;
                token.start = chunkOffset + i;
                token.end = chunkOffset + k;
                token.nameStart = chunkOffset + nameStart;
                token.nameEnd = chunkOffset + nameEnd;
                if (!selfClosing) {
                    startTagName = tagName;
                    tagParsingEnabled = !(checkCdataTag === null || checkCdataTag === void 0 ? void 0 : checkCdataTag(token));
                }
                i = k;
                startTagCallback === null || startTagCallback === void 0 ? void 0 : startTagCallback(token);
                // Start tag token and its attributes must be returned to the pool owner
                continue;
            }
        }
        // End tag
        j = takeEndTagOpening(chunk, i);
        if (j !== -1 /* NO_MATCH */) {
            var nameStart = i + 2;
            var nameEnd = j;
            var rawTagName = chunk.substring(nameStart, nameEnd);
            var tagName = renameTag != null ? renameTag(rawTagName) : rawTagName;
            if (tagParsingEnabled || startTagName === tagName) {
                // Resume tag parsing if CDATA content tag has ended
                tagParsingEnabled = true;
                // Skip malformed content and excessive whitespaces
                var k = takeUntilGt(chunk, j);
                if (k === -1 /* NO_MATCH */) {
                    // Unterminated end tag
                    return i;
                }
                /*@__INLINE__*/
                triggerTextCallback();
                if (endTagCallback) {
                    endTagToken.rawName = rawTagName;
                    endTagToken.name = tagName;
                    endTagToken.start = chunkOffset + i;
                    endTagToken.end = chunkOffset + k;
                    endTagToken.nameStart = chunkOffset + nameStart;
                    endTagToken.nameEnd = chunkOffset + nameEnd;
                    endTagCallback(endTagToken);
                }
                i = k;
                continue;
            }
        }
        if (tagParsingEnabled) {
            var k = void 0;
            // Comment
            k = j = takeComment(chunk, i);
            if (j !== -1 /* NO_MATCH */) {
                if (j > charCount && streaming) {
                    return i;
                }
                /*@__INLINE__*/
                triggerTextCallback();
                i = triggerDataCallback(chunk, chunkOffset, 8 /* COMMENT */, dataToken, commentCallback, i, j, 4, 3, decodeText);
                continue;
            }
            // Doctype
            k = j = takeDoctype(chunk, i);
            if (j !== -1 /* NO_MATCH */) {
                if (j > charCount && streaming) {
                    return i;
                }
                /*@__INLINE__*/
                triggerTextCallback();
                i = triggerDataCallback(chunk, chunkOffset, 10 /* DOCTYPE */, dataToken, doctypeCallback, i, j, 9, 1);
                continue;
            }
            // CDATA section
            j = takeCdata(chunk, i);
            if (j !== -1 /* NO_MATCH */) {
                if (j > charCount && streaming) {
                    return i;
                }
                /*@__INLINE__*/
                triggerTextCallback();
                if (cdataEnabled) {
                    i = triggerDataCallback(chunk, chunkOffset, 4 /* CDATA_SECTION */, dataToken, cdataCallback, i, j, 9, 3);
                }
                else {
                    i = triggerDataCallback(chunk, chunkOffset, 8 /* COMMENT */, dataToken, commentCallback, i, j, 2, 1);
                }
                continue;
            }
            // Processing instruction
            j = takeProcessingInstruction(chunk, i);
            if (j !== -1 /* NO_MATCH */) {
                if (j > charCount && streaming) {
                    return i;
                }
                /*@__INLINE__*/
                triggerTextCallback();
                if (processingInstructionsEnabled) {
                    i = triggerDataCallback(chunk, chunkOffset, 7 /* PROCESSING_INSTRUCTION */, dataToken, processingInstructionCallback, i, j, 2, 2);
                }
                else {
                    i = triggerDataCallback(chunk, chunkOffset, 8 /* COMMENT */, dataToken, commentCallback, i, j, 1, 1);
                }
                continue;
            }
            // DTD
            j = takeDtd(chunk, i);
            if (j !== -1 /* NO_MATCH */) {
                if (j > charCount && streaming) {
                    return i;
                }
                /*@__INLINE__*/
                triggerTextCallback();
                if (cdataEnabled) {
                    i = Math.min(j, charCount);
                }
                else {
                    i = triggerDataCallback(chunk, chunkOffset, 8 /* COMMENT */, dataToken, commentCallback, i, j, 2, 1, decodeText);
                }
                continue;
            }
        }
        // Concat with existing text
        if (textStart === -1) {
            textStart = i;
        }
        textEnd = takeText(chunk, i + 1);
        if (textEnd === -1) {
            textEnd = charCount;
            break;
        }
        i = textEnd;
    }
    if (streaming) {
        if (textStart !== -1) {
            return textStart;
        }
        return i;
    }
    /*@__INLINE__*/
    triggerTextCallback();
    return i;
}
/**
 * Populates `dataToken` and passes it to `dataCallback`.
 */
function triggerDataCallback(chunk, chunkOffset, tokenType, dataToken, dataCallback, start, end, offsetStart, offsetEnd, decodeData) {
    var charCount = chunk.length;
    var index = Math.min(end, charCount);
    if (!dataCallback) {
        return index;
    }
    var dataStart = start + offsetStart;
    var dataEnd = Math.min(end - offsetEnd, charCount);
    var rawData = chunk.substring(dataStart, dataEnd);
    dataToken.tokenType = tokenType;
    dataToken.rawData = rawData;
    dataToken.data = decodeData != null ? decodeData(rawData) : rawData;
    dataToken.start = chunkOffset + start;
    dataToken.end = chunkOffset + index;
    dataToken.dataStart = chunkOffset + dataStart;
    dataToken.dataEnd = chunkOffset + dataEnd;
    dataCallback(dataToken);
    return index;
}
