diff options
Diffstat (limited to 'node_modules/xss/lib/parser.js')
-rw-r--r-- | node_modules/xss/lib/parser.js | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/node_modules/xss/lib/parser.js b/node_modules/xss/lib/parser.js new file mode 100644 index 0000000..7c15def --- /dev/null +++ b/node_modules/xss/lib/parser.js @@ -0,0 +1,239 @@ +/** + * Simple HTML Parser + * + * @author Zongmin Lei<leizongmin@gmail.com> + */ + +var _ = require("./util"); + +/** + * get tag name + * + * @param {String} html e.g. '<a hef="#">' + * @return {String} + */ +function getTagName(html) { + var i = _.spaceIndex(html); + if (i === -1) { + var tagName = html.slice(1, -1); + } else { + var tagName = html.slice(1, i + 1); + } + tagName = _.trim(tagName).toLowerCase(); + if (tagName.slice(0, 1) === "/") tagName = tagName.slice(1); + if (tagName.slice(-1) === "/") tagName = tagName.slice(0, -1); + return tagName; +} + +/** + * is close tag? + * + * @param {String} html 如:'<a hef="#">' + * @return {Boolean} + */ +function isClosing(html) { + return html.slice(0, 2) === "</"; +} + +/** + * parse input html and returns processed html + * + * @param {String} html + * @param {Function} onTag e.g. function (sourcePosition, position, tag, html, isClosing) + * @param {Function} escapeHtml + * @return {String} + */ +function parseTag(html, onTag, escapeHtml) { + "user strict"; + + var rethtml = ""; + var lastPos = 0; + var tagStart = false; + var quoteStart = false; + var currentPos = 0; + var len = html.length; + var currentTagName = ""; + var currentHtml = ""; + + for (currentPos = 0; currentPos < len; currentPos++) { + var c = html.charAt(currentPos); + if (tagStart === false) { + if (c === "<") { + tagStart = currentPos; + continue; + } + } else { + if (quoteStart === false) { + if (c === "<") { + rethtml += escapeHtml(html.slice(lastPos, currentPos)); + tagStart = currentPos; + lastPos = currentPos; + continue; + } + if (c === ">") { + rethtml += escapeHtml(html.slice(lastPos, tagStart)); + currentHtml = html.slice(tagStart, currentPos + 1); + currentTagName = getTagName(currentHtml); + rethtml += onTag( + tagStart, + rethtml.length, + currentTagName, + currentHtml, + isClosing(currentHtml) + ); + lastPos = currentPos + 1; + tagStart = false; + continue; + } + if ((c === '"' || c === "'") && html.charAt(currentPos - 1) === "=") { + quoteStart = c; + continue; + } + } else { + if (c === quoteStart) { + quoteStart = false; + continue; + } + } + } + } + if (lastPos < html.length) { + rethtml += escapeHtml(html.substr(lastPos)); + } + + return rethtml; +} + +var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9_:\.\-]/gim; + +/** + * parse input attributes and returns processed attributes + * + * @param {String} html e.g. `href="#" target="_blank"` + * @param {Function} onAttr e.g. `function (name, value)` + * @return {String} + */ +function parseAttr(html, onAttr) { + "user strict"; + + var lastPos = 0; + var retAttrs = []; + var tmpName = false; + var len = html.length; + + function addAttr(name, value) { + name = _.trim(name); + name = name.replace(REGEXP_ILLEGAL_ATTR_NAME, "").toLowerCase(); + if (name.length < 1) return; + var ret = onAttr(name, value || ""); + if (ret) retAttrs.push(ret); + } + + // 逐个分析字符 + for (var i = 0; i < len; i++) { + var c = html.charAt(i); + var v, j; + if (tmpName === false && c === "=") { + tmpName = html.slice(lastPos, i); + lastPos = i + 1; + continue; + } + if (tmpName !== false) { + if ( + i === lastPos && + (c === '"' || c === "'") && + html.charAt(i - 1) === "=" + ) { + j = html.indexOf(c, i + 1); + if (j === -1) { + break; + } else { + v = _.trim(html.slice(lastPos + 1, j)); + addAttr(tmpName, v); + tmpName = false; + i = j; + lastPos = i + 1; + continue; + } + } + } + if (/\s|\n|\t/.test(c)) { + html = html.replace(/\s|\n|\t/g, " "); + if (tmpName === false) { + j = findNextEqual(html, i); + if (j === -1) { + v = _.trim(html.slice(lastPos, i)); + addAttr(v); + tmpName = false; + lastPos = i + 1; + continue; + } else { + i = j - 1; + continue; + } + } else { + j = findBeforeEqual(html, i - 1); + if (j === -1) { + v = _.trim(html.slice(lastPos, i)); + v = stripQuoteWrap(v); + addAttr(tmpName, v); + tmpName = false; + lastPos = i + 1; + continue; + } else { + continue; + } + } + } + } + + if (lastPos < html.length) { + if (tmpName === false) { + addAttr(html.slice(lastPos)); + } else { + addAttr(tmpName, stripQuoteWrap(_.trim(html.slice(lastPos)))); + } + } + + return _.trim(retAttrs.join(" ")); +} + +function findNextEqual(str, i) { + for (; i < str.length; i++) { + var c = str[i]; + if (c === " ") continue; + if (c === "=") return i; + return -1; + } +} + +function findBeforeEqual(str, i) { + for (; i > 0; i--) { + var c = str[i]; + if (c === " ") continue; + if (c === "=") return i; + return -1; + } +} + +function isQuoteWrapString(text) { + if ( + (text[0] === '"' && text[text.length - 1] === '"') || + (text[0] === "'" && text[text.length - 1] === "'") + ) { + return true; + } else { + return false; + } +} + +function stripQuoteWrap(text) { + if (isQuoteWrapString(text)) { + return text.substr(1, text.length - 2); + } else { + return text; + } +} + +exports.parseTag = parseTag; +exports.parseAttr = parseAttr; |