sax.js 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413
  1. ;(function (sax) { // wrapper for non-node envs
  2. sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
  3. sax.SAXParser = SAXParser
  4. // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
  5. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
  6. // since that's the earliest that a buffer overrun could occur. This way, checks are
  7. // as rare as required, but as often as necessary to ensure never crossing this bound.
  8. // Furthermore, buffers are only tested at most once per write(), so passing a very
  9. // large string into write() might have undesirable effects, but this is manageable by
  10. // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
  11. // edge case, result in creating at most one complete copy of the string passed in.
  12. // Set to Infinity to have unlimited buffers.
  13. sax.MAX_BUFFER_LENGTH = 64 * 1024
  14. var buffers = [
  15. 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
  16. 'procInstName', 'procInstBody', 'entity', 'attribName',
  17. 'attribValue', 'cdata', 'script'
  18. ]
  19. sax.EVENTS = [
  20. 'text',
  21. 'processinginstruction',
  22. 'sgmldeclaration',
  23. 'doctype',
  24. 'comment',
  25. 'opentagstart',
  26. 'attribute',
  27. 'opentag',
  28. 'closetag',
  29. 'opencdata',
  30. 'cdata',
  31. 'closecdata',
  32. 'error',
  33. 'end',
  34. 'ready',
  35. 'script',
  36. 'opennamespace',
  37. 'closenamespace'
  38. ]
  39. function SAXParser (strict, opt) {
  40. if (!(this instanceof SAXParser)) {
  41. return new SAXParser(strict, opt)
  42. }
  43. var parser = this
  44. clearBuffers(parser)
  45. parser.q = parser.c = ''
  46. parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
  47. parser.opt = opt || {}
  48. parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
  49. parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
  50. parser.tags = []
  51. parser.closed = parser.closedRoot = parser.sawRoot = false
  52. parser.tag = parser.error = null
  53. parser.strict = !!strict
  54. parser.noscript = !!(strict || parser.opt.noscript)
  55. parser.state = S.BEGIN
  56. parser.strictEntities = parser.opt.strictEntities
  57. parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
  58. parser.attribList = []
  59. // namespaces form a prototype chain.
  60. // it always points at the current tag,
  61. // which protos to its parent tag.
  62. if (parser.opt.xmlns) {
  63. parser.ns = Object.create(rootNS)
  64. }
  65. // mostly just for error reporting
  66. parser.trackPosition = parser.opt.position !== false
  67. if (parser.trackPosition) {
  68. parser.position = parser.line = parser.column = 0
  69. }
  70. emit(parser, 'onready')
  71. }
  72. if (!Object.create) {
  73. Object.create = function (o) {
  74. function F () {}
  75. F.prototype = o
  76. var newf = new F()
  77. return newf
  78. }
  79. }
  80. if (!Object.keys) {
  81. Object.keys = function (o) {
  82. var a = []
  83. for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
  84. return a
  85. }
  86. }
  87. function checkBufferLength (parser) {
  88. var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
  89. var maxActual = 0
  90. for (var i = 0, l = buffers.length; i < l; i++) {
  91. var len = parser[buffers[i]].length
  92. if (len > maxAllowed) {
  93. // Text/cdata nodes can get big, and since they're buffered,
  94. // we can get here under normal conditions.
  95. // Avoid issues by emitting the text node now,
  96. // so at least it won't get any bigger.
  97. switch (buffers[i]) {
  98. case 'textNode':
  99. closeText(parser)
  100. break
  101. case 'cdata':
  102. emitNode(parser, 'oncdata', parser.cdata)
  103. parser.cdata = ''
  104. break
  105. case 'script':
  106. emitNode(parser, 'onscript', parser.script)
  107. parser.script = ''
  108. break
  109. default:
  110. error(parser, 'Max buffer length exceeded: ' + buffers[i])
  111. }
  112. }
  113. maxActual = Math.max(maxActual, len)
  114. }
  115. // schedule the next check for the earliest possible buffer overrun.
  116. var m = sax.MAX_BUFFER_LENGTH - maxActual
  117. parser.bufferCheckPosition = m + parser.position
  118. }
  119. function clearBuffers (parser) {
  120. for (var i = 0, l = buffers.length; i < l; i++) {
  121. parser[buffers[i]] = ''
  122. }
  123. }
  124. function flushBuffers (parser) {
  125. closeText(parser)
  126. if (parser.cdata !== '') {
  127. emitNode(parser, 'oncdata', parser.cdata)
  128. parser.cdata = ''
  129. }
  130. if (parser.script !== '') {
  131. emitNode(parser, 'onscript', parser.script)
  132. parser.script = ''
  133. }
  134. }
  135. SAXParser.prototype = {
  136. end: function () { end(this) },
  137. write: write,
  138. resume: function () { this.error = null; return this },
  139. close: function () { return this.write(null) },
  140. flush: function () { flushBuffers(this) }
  141. }
  142. // this really needs to be replaced with character classes.
  143. // XML allows all manner of ridiculous numbers and digits.
  144. var CDATA = '[CDATA['
  145. var DOCTYPE = 'DOCTYPE'
  146. var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
  147. var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
  148. var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
  149. // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
  150. // This implementation works on strings, a single character at a time
  151. // as such, it cannot ever support astral-plane characters (10000-EFFFF)
  152. // without a significant breaking change to either this parser, or the
  153. // JavaScript language. Implementation of an emoji-capable xml parser
  154. // is left as an exercise for the reader.
  155. var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  156. var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  157. var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  158. var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  159. function isWhitespace (c) {
  160. return c === ' ' || c === '\n' || c === '\r' || c === '\t'
  161. }
  162. function isQuote (c) {
  163. return c === '"' || c === '\''
  164. }
  165. function isAttribEnd (c) {
  166. return c === '>' || isWhitespace(c)
  167. }
  168. function isMatch (regex, c) {
  169. return regex.test(c)
  170. }
  171. function notMatch (regex, c) {
  172. return !isMatch(regex, c)
  173. }
  174. var S = 0
  175. sax.STATE = {
  176. BEGIN: S++, // leading byte order mark or whitespace
  177. BEGIN_WHITESPACE: S++, // leading whitespace
  178. TEXT: S++, // general stuff
  179. TEXT_ENTITY: S++, // &amp and such.
  180. OPEN_WAKA: S++, // <
  181. SGML_DECL: S++, // <!BLARG
  182. SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
  183. DOCTYPE: S++, // <!DOCTYPE
  184. DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
  185. DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
  186. DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
  187. COMMENT_STARTING: S++, // <!-
  188. COMMENT: S++, // <!--
  189. COMMENT_ENDING: S++, // <!-- blah -
  190. COMMENT_ENDED: S++, // <!-- blah --
  191. CDATA: S++, // <![CDATA[ something
  192. CDATA_ENDING: S++, // ]
  193. CDATA_ENDING_2: S++, // ]]
  194. PROC_INST: S++, // <?hi
  195. PROC_INST_BODY: S++, // <?hi there
  196. PROC_INST_ENDING: S++, // <?hi "there" ?
  197. OPEN_TAG: S++, // <strong
  198. OPEN_TAG_SLASH: S++, // <strong /
  199. ATTRIB: S++, // <a
  200. ATTRIB_NAME: S++, // <a foo
  201. ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
  202. ATTRIB_VALUE: S++, // <a foo=
  203. ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
  204. ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
  205. ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
  206. ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
  207. ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
  208. CLOSE_TAG: S++, // </a
  209. CLOSE_TAG_SAW_WHITE: S++, // </a >
  210. SCRIPT: S++, // <script> ...
  211. SCRIPT_ENDING: S++ // <script> ... <
  212. }
  213. sax.XML_ENTITIES = {
  214. 'amp': '&',
  215. 'gt': '>',
  216. 'lt': '<',
  217. 'quot': '"',
  218. 'apos': "'"
  219. }
  220. sax.ENTITIES = {
  221. 'amp': '&',
  222. 'gt': '>',
  223. 'lt': '<',
  224. 'quot': '"',
  225. 'apos': "'",
  226. 'AElig': 198,
  227. 'Aacute': 193,
  228. 'Acirc': 194,
  229. 'Agrave': 192,
  230. 'Aring': 197,
  231. 'Atilde': 195,
  232. 'Auml': 196,
  233. 'Ccedil': 199,
  234. 'ETH': 208,
  235. 'Eacute': 201,
  236. 'Ecirc': 202,
  237. 'Egrave': 200,
  238. 'Euml': 203,
  239. 'Iacute': 205,
  240. 'Icirc': 206,
  241. 'Igrave': 204,
  242. 'Iuml': 207,
  243. 'Ntilde': 209,
  244. 'Oacute': 211,
  245. 'Ocirc': 212,
  246. 'Ograve': 210,
  247. 'Oslash': 216,
  248. 'Otilde': 213,
  249. 'Ouml': 214,
  250. 'THORN': 222,
  251. 'Uacute': 218,
  252. 'Ucirc': 219,
  253. 'Ugrave': 217,
  254. 'Uuml': 220,
  255. 'Yacute': 221,
  256. 'aacute': 225,
  257. 'acirc': 226,
  258. 'aelig': 230,
  259. 'agrave': 224,
  260. 'aring': 229,
  261. 'atilde': 227,
  262. 'auml': 228,
  263. 'ccedil': 231,
  264. 'eacute': 233,
  265. 'ecirc': 234,
  266. 'egrave': 232,
  267. 'eth': 240,
  268. 'euml': 235,
  269. 'iacute': 237,
  270. 'icirc': 238,
  271. 'igrave': 236,
  272. 'iuml': 239,
  273. 'ntilde': 241,
  274. 'oacute': 243,
  275. 'ocirc': 244,
  276. 'ograve': 242,
  277. 'oslash': 248,
  278. 'otilde': 245,
  279. 'ouml': 246,
  280. 'szlig': 223,
  281. 'thorn': 254,
  282. 'uacute': 250,
  283. 'ucirc': 251,
  284. 'ugrave': 249,
  285. 'uuml': 252,
  286. 'yacute': 253,
  287. 'yuml': 255,
  288. 'copy': 169,
  289. 'reg': 174,
  290. 'nbsp': 160,
  291. 'iexcl': 161,
  292. 'cent': 162,
  293. 'pound': 163,
  294. 'curren': 164,
  295. 'yen': 165,
  296. 'brvbar': 166,
  297. 'sect': 167,
  298. 'uml': 168,
  299. 'ordf': 170,
  300. 'laquo': 171,
  301. 'not': 172,
  302. 'shy': 173,
  303. 'macr': 175,
  304. 'deg': 176,
  305. 'plusmn': 177,
  306. 'sup1': 185,
  307. 'sup2': 178,
  308. 'sup3': 179,
  309. 'acute': 180,
  310. 'micro': 181,
  311. 'para': 182,
  312. 'middot': 183,
  313. 'cedil': 184,
  314. 'ordm': 186,
  315. 'raquo': 187,
  316. 'frac14': 188,
  317. 'frac12': 189,
  318. 'frac34': 190,
  319. 'iquest': 191,
  320. 'times': 215,
  321. 'divide': 247,
  322. 'OElig': 338,
  323. 'oelig': 339,
  324. 'Scaron': 352,
  325. 'scaron': 353,
  326. 'Yuml': 376,
  327. 'fnof': 402,
  328. 'circ': 710,
  329. 'tilde': 732,
  330. 'Alpha': 913,
  331. 'Beta': 914,
  332. 'Gamma': 915,
  333. 'Delta': 916,
  334. 'Epsilon': 917,
  335. 'Zeta': 918,
  336. 'Eta': 919,
  337. 'Theta': 920,
  338. 'Iota': 921,
  339. 'Kappa': 922,
  340. 'Lambda': 923,
  341. 'Mu': 924,
  342. 'Nu': 925,
  343. 'Xi': 926,
  344. 'Omicron': 927,
  345. 'Pi': 928,
  346. 'Rho': 929,
  347. 'Sigma': 931,
  348. 'Tau': 932,
  349. 'Upsilon': 933,
  350. 'Phi': 934,
  351. 'Chi': 935,
  352. 'Psi': 936,
  353. 'Omega': 937,
  354. 'alpha': 945,
  355. 'beta': 946,
  356. 'gamma': 947,
  357. 'delta': 948,
  358. 'epsilon': 949,
  359. 'zeta': 950,
  360. 'eta': 951,
  361. 'theta': 952,
  362. 'iota': 953,
  363. 'kappa': 954,
  364. 'lambda': 955,
  365. 'mu': 956,
  366. 'nu': 957,
  367. 'xi': 958,
  368. 'omicron': 959,
  369. 'pi': 960,
  370. 'rho': 961,
  371. 'sigmaf': 962,
  372. 'sigma': 963,
  373. 'tau': 964,
  374. 'upsilon': 965,
  375. 'phi': 966,
  376. 'chi': 967,
  377. 'psi': 968,
  378. 'omega': 969,
  379. 'thetasym': 977,
  380. 'upsih': 978,
  381. 'piv': 982,
  382. 'ensp': 8194,
  383. 'emsp': 8195,
  384. 'thinsp': 8201,
  385. 'zwnj': 8204,
  386. 'zwj': 8205,
  387. 'lrm': 8206,
  388. 'rlm': 8207,
  389. 'ndash': 8211,
  390. 'mdash': 8212,
  391. 'lsquo': 8216,
  392. 'rsquo': 8217,
  393. 'sbquo': 8218,
  394. 'ldquo': 8220,
  395. 'rdquo': 8221,
  396. 'bdquo': 8222,
  397. 'dagger': 8224,
  398. 'Dagger': 8225,
  399. 'bull': 8226,
  400. 'hellip': 8230,
  401. 'permil': 8240,
  402. 'prime': 8242,
  403. 'Prime': 8243,
  404. 'lsaquo': 8249,
  405. 'rsaquo': 8250,
  406. 'oline': 8254,
  407. 'frasl': 8260,
  408. 'euro': 8364,
  409. 'image': 8465,
  410. 'weierp': 8472,
  411. 'real': 8476,
  412. 'trade': 8482,
  413. 'alefsym': 8501,
  414. 'larr': 8592,
  415. 'uarr': 8593,
  416. 'rarr': 8594,
  417. 'darr': 8595,
  418. 'harr': 8596,
  419. 'crarr': 8629,
  420. 'lArr': 8656,
  421. 'uArr': 8657,
  422. 'rArr': 8658,
  423. 'dArr': 8659,
  424. 'hArr': 8660,
  425. 'forall': 8704,
  426. 'part': 8706,
  427. 'exist': 8707,
  428. 'empty': 8709,
  429. 'nabla': 8711,
  430. 'isin': 8712,
  431. 'notin': 8713,
  432. 'ni': 8715,
  433. 'prod': 8719,
  434. 'sum': 8721,
  435. 'minus': 8722,
  436. 'lowast': 8727,
  437. 'radic': 8730,
  438. 'prop': 8733,
  439. 'infin': 8734,
  440. 'ang': 8736,
  441. 'and': 8743,
  442. 'or': 8744,
  443. 'cap': 8745,
  444. 'cup': 8746,
  445. 'int': 8747,
  446. 'there4': 8756,
  447. 'sim': 8764,
  448. 'cong': 8773,
  449. 'asymp': 8776,
  450. 'ne': 8800,
  451. 'equiv': 8801,
  452. 'le': 8804,
  453. 'ge': 8805,
  454. 'sub': 8834,
  455. 'sup': 8835,
  456. 'nsub': 8836,
  457. 'sube': 8838,
  458. 'supe': 8839,
  459. 'oplus': 8853,
  460. 'otimes': 8855,
  461. 'perp': 8869,
  462. 'sdot': 8901,
  463. 'lceil': 8968,
  464. 'rceil': 8969,
  465. 'lfloor': 8970,
  466. 'rfloor': 8971,
  467. 'lang': 9001,
  468. 'rang': 9002,
  469. 'loz': 9674,
  470. 'spades': 9824,
  471. 'clubs': 9827,
  472. 'hearts': 9829,
  473. 'diams': 9830
  474. }
  475. Object.keys(sax.ENTITIES).forEach(function (key) {
  476. var e = sax.ENTITIES[key]
  477. var s = typeof e === 'number' ? String.fromCharCode(e) : e
  478. sax.ENTITIES[key] = s
  479. })
  480. for (var s in sax.STATE) {
  481. sax.STATE[sax.STATE[s]] = s
  482. }
  483. // shorthand
  484. S = sax.STATE
  485. function emit (parser, event, data) {
  486. parser[event] && parser[event](data)
  487. }
  488. function emitNode (parser, nodeType, data) {
  489. if (parser.textNode) closeText(parser)
  490. emit(parser, nodeType, data)
  491. }
  492. function closeText (parser) {
  493. parser.textNode = textopts(parser.opt, parser.textNode)
  494. if (parser.textNode) emit(parser, 'ontext', parser.textNode)
  495. parser.textNode = ''
  496. }
  497. function textopts (opt, text) {
  498. if (opt.trim) text = text.trim()
  499. if (opt.normalize) text = text.replace(/\s+/g, ' ')
  500. return text
  501. }
  502. function error (parser, reason) {
  503. closeText(parser)
  504. const message = reason +
  505. '\nLine: ' + parser.line +
  506. '\nColumn: ' + parser.column +
  507. '\nChar: ' + parser.c
  508. const error = new Error(message)
  509. error.reason = reason
  510. error.line = parser.line
  511. error.column = parser.column
  512. parser.error = error
  513. emit(parser, 'onerror', error)
  514. return parser
  515. }
  516. function end (parser) {
  517. if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
  518. if ((parser.state !== S.BEGIN) &&
  519. (parser.state !== S.BEGIN_WHITESPACE) &&
  520. (parser.state !== S.TEXT)) {
  521. error(parser, 'Unexpected end')
  522. }
  523. closeText(parser)
  524. parser.c = ''
  525. parser.closed = true
  526. emit(parser, 'onend')
  527. SAXParser.call(parser, parser.strict, parser.opt)
  528. return parser
  529. }
  530. function strictFail (parser, message) {
  531. if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
  532. throw new Error('bad call to strictFail')
  533. }
  534. if (parser.strict) {
  535. error(parser, message)
  536. }
  537. }
  538. function newTag (parser) {
  539. if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
  540. var parent = parser.tags[parser.tags.length - 1] || parser
  541. var tag = parser.tag = { name: parser.tagName, attributes: {} }
  542. // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
  543. if (parser.opt.xmlns) {
  544. tag.ns = parent.ns
  545. }
  546. parser.attribList.length = 0
  547. emitNode(parser, 'onopentagstart', tag)
  548. }
  549. function qname (name, attribute) {
  550. var i = name.indexOf(':')
  551. var qualName = i < 0 ? [ '', name ] : name.split(':')
  552. var prefix = qualName[0]
  553. var local = qualName[1]
  554. // <x "xmlns"="http://foo">
  555. if (attribute && name === 'xmlns') {
  556. prefix = 'xmlns'
  557. local = ''
  558. }
  559. return { prefix: prefix, local: local }
  560. }
  561. function attrib (parser) {
  562. if (!parser.strict) {
  563. parser.attribName = parser.attribName[parser.looseCase]()
  564. }
  565. if (parser.attribList.indexOf(parser.attribName) !== -1 ||
  566. parser.tag.attributes.hasOwnProperty(parser.attribName)) {
  567. parser.attribName = parser.attribValue = ''
  568. return
  569. }
  570. if (parser.opt.xmlns) {
  571. var qn = qname(parser.attribName, true)
  572. var prefix = qn.prefix
  573. var local = qn.local
  574. if (prefix === 'xmlns') {
  575. // namespace binding attribute. push the binding into scope
  576. if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
  577. strictFail(parser,
  578. 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
  579. 'Actual: ' + parser.attribValue)
  580. } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
  581. strictFail(parser,
  582. 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
  583. 'Actual: ' + parser.attribValue)
  584. } else {
  585. var tag = parser.tag
  586. var parent = parser.tags[parser.tags.length - 1] || parser
  587. if (tag.ns === parent.ns) {
  588. tag.ns = Object.create(parent.ns)
  589. }
  590. tag.ns[local] = parser.attribValue
  591. }
  592. }
  593. // defer onattribute events until all attributes have been seen
  594. // so any new bindings can take effect. preserve attribute order
  595. // so deferred events can be emitted in document order
  596. parser.attribList.push([parser.attribName, parser.attribValue])
  597. } else {
  598. // in non-xmlns mode, we can emit the event right away
  599. parser.tag.attributes[parser.attribName] = parser.attribValue
  600. emitNode(parser, 'onattribute', {
  601. name: parser.attribName,
  602. value: parser.attribValue
  603. })
  604. }
  605. parser.attribName = parser.attribValue = ''
  606. }
  607. function openTag (parser, selfClosing) {
  608. if (parser.opt.xmlns) {
  609. // emit namespace binding events
  610. var tag = parser.tag
  611. // add namespace info to tag
  612. var qn = qname(parser.tagName)
  613. tag.prefix = qn.prefix
  614. tag.local = qn.local
  615. tag.uri = tag.ns[qn.prefix] || ''
  616. if (tag.prefix && !tag.uri) {
  617. strictFail(parser, 'Unbound namespace prefix: ' +
  618. JSON.stringify(parser.tagName))
  619. tag.uri = qn.prefix
  620. }
  621. var parent = parser.tags[parser.tags.length - 1] || parser
  622. if (tag.ns && parent.ns !== tag.ns) {
  623. Object.keys(tag.ns).forEach(function (p) {
  624. emitNode(parser, 'onopennamespace', {
  625. prefix: p,
  626. uri: tag.ns[p]
  627. })
  628. })
  629. }
  630. // handle deferred onattribute events
  631. // Note: do not apply default ns to attributes:
  632. // http://www.w3.org/TR/REC-xml-names/#defaulting
  633. for (var i = 0, l = parser.attribList.length; i < l; i++) {
  634. var nv = parser.attribList[i]
  635. var name = nv[0]
  636. var value = nv[1]
  637. var qualName = qname(name, true)
  638. var prefix = qualName.prefix
  639. var local = qualName.local
  640. var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
  641. var a = {
  642. name: name,
  643. value: value,
  644. prefix: prefix,
  645. local: local,
  646. uri: uri
  647. }
  648. // if there's any attributes with an undefined namespace,
  649. // then fail on them now.
  650. if (prefix && prefix !== 'xmlns' && !uri) {
  651. strictFail(parser, 'Unbound namespace prefix: ' +
  652. JSON.stringify(prefix))
  653. a.uri = prefix
  654. }
  655. parser.tag.attributes[name] = a
  656. emitNode(parser, 'onattribute', a)
  657. }
  658. parser.attribList.length = 0
  659. }
  660. parser.tag.isSelfClosing = !!selfClosing
  661. // process the tag
  662. parser.sawRoot = true
  663. parser.tags.push(parser.tag)
  664. emitNode(parser, 'onopentag', parser.tag)
  665. if (!selfClosing) {
  666. // special case for <script> in non-strict mode.
  667. if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
  668. parser.state = S.SCRIPT
  669. } else {
  670. parser.state = S.TEXT
  671. }
  672. parser.tag = null
  673. parser.tagName = ''
  674. }
  675. parser.attribName = parser.attribValue = ''
  676. parser.attribList.length = 0
  677. }
  678. function closeTag (parser) {
  679. if (!parser.tagName) {
  680. strictFail(parser, 'Weird empty close tag.')
  681. parser.textNode += '</>'
  682. parser.state = S.TEXT
  683. return
  684. }
  685. if (parser.script) {
  686. if (parser.tagName !== 'script') {
  687. parser.script += '</' + parser.tagName + '>'
  688. parser.tagName = ''
  689. parser.state = S.SCRIPT
  690. return
  691. }
  692. emitNode(parser, 'onscript', parser.script)
  693. parser.script = ''
  694. }
  695. // first make sure that the closing tag actually exists.
  696. // <a><b></c></b></a> will close everything, otherwise.
  697. var t = parser.tags.length
  698. var tagName = parser.tagName
  699. if (!parser.strict) {
  700. tagName = tagName[parser.looseCase]()
  701. }
  702. var closeTo = tagName
  703. while (t--) {
  704. var close = parser.tags[t]
  705. if (close.name !== closeTo) {
  706. // fail the first time in strict mode
  707. strictFail(parser, 'Unexpected close tag')
  708. } else {
  709. break
  710. }
  711. }
  712. // didn't find it. we already failed for strict, so just abort.
  713. if (t < 0) {
  714. strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
  715. parser.textNode += '</' + parser.tagName + '>'
  716. parser.state = S.TEXT
  717. return
  718. }
  719. parser.tagName = tagName
  720. var s = parser.tags.length
  721. while (s-- > t) {
  722. var tag = parser.tag = parser.tags.pop()
  723. parser.tagName = parser.tag.name
  724. emitNode(parser, 'onclosetag', parser.tagName)
  725. var x = {}
  726. for (var i in tag.ns) {
  727. x[i] = tag.ns[i]
  728. }
  729. var parent = parser.tags[parser.tags.length - 1] || parser
  730. if (parser.opt.xmlns && tag.ns !== parent.ns) {
  731. // remove namespace bindings introduced by tag
  732. Object.keys(tag.ns).forEach(function (p) {
  733. var n = tag.ns[p]
  734. emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
  735. })
  736. }
  737. }
  738. if (t === 0) parser.closedRoot = true
  739. parser.tagName = parser.attribValue = parser.attribName = ''
  740. parser.attribList.length = 0
  741. parser.state = S.TEXT
  742. }
  743. function parseEntity (parser) {
  744. var entity = parser.entity
  745. var entityLC = entity.toLowerCase()
  746. var num
  747. var numStr = ''
  748. if (parser.ENTITIES[entity]) {
  749. return parser.ENTITIES[entity]
  750. }
  751. if (parser.ENTITIES[entityLC]) {
  752. return parser.ENTITIES[entityLC]
  753. }
  754. entity = entityLC
  755. if (entity.charAt(0) === '#') {
  756. if (entity.charAt(1) === 'x') {
  757. entity = entity.slice(2)
  758. num = parseInt(entity, 16)
  759. numStr = num.toString(16)
  760. } else {
  761. entity = entity.slice(1)
  762. num = parseInt(entity, 10)
  763. numStr = num.toString(10)
  764. }
  765. }
  766. entity = entity.replace(/^0+/, '')
  767. if (isNaN(num) || numStr.toLowerCase() !== entity) {
  768. strictFail(parser, 'Invalid character entity')
  769. return '&' + parser.entity + ';'
  770. }
  771. return String.fromCodePoint(num)
  772. }
  773. function beginWhiteSpace (parser, c) {
  774. if (c === '<') {
  775. parser.state = S.OPEN_WAKA
  776. parser.startTagPosition = parser.position
  777. } else if (!isWhitespace(c)) {
  778. // have to process this as a text node.
  779. // weird, but happens.
  780. strictFail(parser, 'Non-whitespace before first tag.')
  781. parser.textNode = c
  782. parser.state = S.TEXT
  783. }
  784. }
  785. function charAt (chunk, i) {
  786. var result = ''
  787. if (i < chunk.length) {
  788. result = chunk.charAt(i)
  789. }
  790. return result
  791. }
  792. function write (chunk) {
  793. var parser = this
  794. if (this.error) {
  795. throw this.error
  796. }
  797. if (parser.closed) {
  798. return error(parser,
  799. 'Cannot write after close. Assign an onready handler.')
  800. }
  801. if (chunk === null) {
  802. return end(parser)
  803. }
  804. if (typeof chunk === 'object') {
  805. chunk = chunk.toString()
  806. }
  807. var i = 0
  808. var c = ''
  809. while (true) {
  810. c = charAt(chunk, i++)
  811. parser.c = c
  812. if (!c) {
  813. break
  814. }
  815. if (parser.trackPosition) {
  816. parser.position++
  817. if (c === '\n') {
  818. parser.line++
  819. parser.column = 0
  820. } else {
  821. parser.column++
  822. }
  823. }
  824. switch (parser.state) {
  825. case S.BEGIN:
  826. parser.state = S.BEGIN_WHITESPACE
  827. if (c === '\uFEFF') {
  828. continue
  829. }
  830. beginWhiteSpace(parser, c)
  831. continue
  832. case S.BEGIN_WHITESPACE:
  833. beginWhiteSpace(parser, c)
  834. continue
  835. case S.TEXT:
  836. if (parser.sawRoot && !parser.closedRoot) {
  837. var starti = i - 1
  838. while (c && c !== '<' && c !== '&') {
  839. c = charAt(chunk, i++)
  840. if (c && parser.trackPosition) {
  841. parser.position++
  842. if (c === '\n') {
  843. parser.line++
  844. parser.column = 0
  845. } else {
  846. parser.column++
  847. }
  848. }
  849. }
  850. parser.textNode += chunk.substring(starti, i - 1)
  851. }
  852. if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
  853. parser.state = S.OPEN_WAKA
  854. parser.startTagPosition = parser.position
  855. } else {
  856. if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) {
  857. strictFail(parser, 'Text data outside of root node.')
  858. }
  859. if (c === '&') {
  860. parser.state = S.TEXT_ENTITY
  861. } else {
  862. parser.textNode += c
  863. }
  864. }
  865. continue
  866. case S.SCRIPT:
  867. // only non-strict
  868. if (c === '<') {
  869. parser.state = S.SCRIPT_ENDING
  870. } else {
  871. parser.script += c
  872. }
  873. continue
  874. case S.SCRIPT_ENDING:
  875. if (c === '/') {
  876. parser.state = S.CLOSE_TAG
  877. } else {
  878. parser.script += '<' + c
  879. parser.state = S.SCRIPT
  880. }
  881. continue
  882. case S.OPEN_WAKA:
  883. // either a /, ?, !, or text is coming next.
  884. if (c === '!') {
  885. parser.state = S.SGML_DECL
  886. parser.sgmlDecl = ''
  887. } else if (isWhitespace(c)) {
  888. // wait for it...
  889. } else if (isMatch(nameStart, c)) {
  890. parser.state = S.OPEN_TAG
  891. parser.tagName = c
  892. } else if (c === '/') {
  893. parser.state = S.CLOSE_TAG
  894. parser.tagName = ''
  895. } else if (c === '?') {
  896. parser.state = S.PROC_INST
  897. parser.procInstName = parser.procInstBody = ''
  898. } else {
  899. strictFail(parser, 'Unencoded <')
  900. // if there was some whitespace, then add that in.
  901. if (parser.startTagPosition + 1 < parser.position) {
  902. var pad = parser.position - parser.startTagPosition
  903. c = new Array(pad).join(' ') + c
  904. }
  905. parser.textNode += '<' + c
  906. parser.state = S.TEXT
  907. }
  908. continue
  909. case S.SGML_DECL:
  910. if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
  911. emitNode(parser, 'onopencdata')
  912. parser.state = S.CDATA
  913. parser.sgmlDecl = ''
  914. parser.cdata = ''
  915. } else if (parser.sgmlDecl + c === '--') {
  916. parser.state = S.COMMENT
  917. parser.comment = ''
  918. parser.sgmlDecl = ''
  919. } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
  920. parser.state = S.DOCTYPE
  921. if (parser.doctype || parser.sawRoot) {
  922. strictFail(parser,
  923. 'Inappropriately located doctype declaration')
  924. }
  925. parser.doctype = ''
  926. parser.sgmlDecl = ''
  927. } else if (c === '>') {
  928. emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
  929. parser.sgmlDecl = ''
  930. parser.state = S.TEXT
  931. } else if (isQuote(c)) {
  932. parser.state = S.SGML_DECL_QUOTED
  933. parser.sgmlDecl += c
  934. } else {
  935. parser.sgmlDecl += c
  936. }
  937. continue
  938. case S.SGML_DECL_QUOTED:
  939. if (c === parser.q) {
  940. parser.state = S.SGML_DECL
  941. parser.q = ''
  942. }
  943. parser.sgmlDecl += c
  944. continue
  945. case S.DOCTYPE:
  946. if (c === '>') {
  947. parser.state = S.TEXT
  948. emitNode(parser, 'ondoctype', parser.doctype)
  949. parser.doctype = true // just remember that we saw it.
  950. } else {
  951. parser.doctype += c
  952. if (c === '[') {
  953. parser.state = S.DOCTYPE_DTD
  954. } else if (isQuote(c)) {
  955. parser.state = S.DOCTYPE_QUOTED
  956. parser.q = c
  957. }
  958. }
  959. continue
  960. case S.DOCTYPE_QUOTED:
  961. parser.doctype += c
  962. if (c === parser.q) {
  963. parser.q = ''
  964. parser.state = S.DOCTYPE
  965. }
  966. continue
  967. case S.DOCTYPE_DTD:
  968. parser.doctype += c
  969. if (c === ']') {
  970. parser.state = S.DOCTYPE
  971. } else if (isQuote(c)) {
  972. parser.state = S.DOCTYPE_DTD_QUOTED
  973. parser.q = c
  974. }
  975. continue
  976. case S.DOCTYPE_DTD_QUOTED:
  977. parser.doctype += c
  978. if (c === parser.q) {
  979. parser.state = S.DOCTYPE_DTD
  980. parser.q = ''
  981. }
  982. continue
  983. case S.COMMENT:
  984. if (c === '-') {
  985. parser.state = S.COMMENT_ENDING
  986. } else {
  987. parser.comment += c
  988. }
  989. continue
  990. case S.COMMENT_ENDING:
  991. if (c === '-') {
  992. parser.state = S.COMMENT_ENDED
  993. parser.comment = textopts(parser.opt, parser.comment)
  994. if (parser.comment) {
  995. emitNode(parser, 'oncomment', parser.comment)
  996. }
  997. parser.comment = ''
  998. } else {
  999. parser.comment += '-' + c
  1000. parser.state = S.COMMENT
  1001. }
  1002. continue
  1003. case S.COMMENT_ENDED:
  1004. if (c !== '>') {
  1005. strictFail(parser, 'Malformed comment')
  1006. // allow <!-- blah -- bloo --> in non-strict mode,
  1007. // which is a comment of " blah -- bloo "
  1008. parser.comment += '--' + c
  1009. parser.state = S.COMMENT
  1010. } else {
  1011. parser.state = S.TEXT
  1012. }
  1013. continue
  1014. case S.CDATA:
  1015. if (c === ']') {
  1016. parser.state = S.CDATA_ENDING
  1017. } else {
  1018. parser.cdata += c
  1019. }
  1020. continue
  1021. case S.CDATA_ENDING:
  1022. if (c === ']') {
  1023. parser.state = S.CDATA_ENDING_2
  1024. } else {
  1025. parser.cdata += ']' + c
  1026. parser.state = S.CDATA
  1027. }
  1028. continue
  1029. case S.CDATA_ENDING_2:
  1030. if (c === '>') {
  1031. if (parser.cdata) {
  1032. emitNode(parser, 'oncdata', parser.cdata)
  1033. }
  1034. emitNode(parser, 'onclosecdata')
  1035. parser.cdata = ''
  1036. parser.state = S.TEXT
  1037. } else if (c === ']') {
  1038. parser.cdata += ']'
  1039. } else {
  1040. parser.cdata += ']]' + c
  1041. parser.state = S.CDATA
  1042. }
  1043. continue
  1044. case S.PROC_INST:
  1045. if (c === '?') {
  1046. parser.state = S.PROC_INST_ENDING
  1047. } else if (isWhitespace(c)) {
  1048. parser.state = S.PROC_INST_BODY
  1049. } else {
  1050. parser.procInstName += c
  1051. }
  1052. continue
  1053. case S.PROC_INST_BODY:
  1054. if (!parser.procInstBody && isWhitespace(c)) {
  1055. continue
  1056. } else if (c === '?') {
  1057. parser.state = S.PROC_INST_ENDING
  1058. } else {
  1059. parser.procInstBody += c
  1060. }
  1061. continue
  1062. case S.PROC_INST_ENDING:
  1063. if (c === '>') {
  1064. emitNode(parser, 'onprocessinginstruction', {
  1065. name: parser.procInstName,
  1066. body: parser.procInstBody
  1067. })
  1068. parser.procInstName = parser.procInstBody = ''
  1069. parser.state = S.TEXT
  1070. } else {
  1071. parser.procInstBody += '?' + c
  1072. parser.state = S.PROC_INST_BODY
  1073. }
  1074. continue
  1075. case S.OPEN_TAG:
  1076. if (isMatch(nameBody, c)) {
  1077. parser.tagName += c
  1078. } else {
  1079. newTag(parser)
  1080. if (c === '>') {
  1081. openTag(parser)
  1082. } else if (c === '/') {
  1083. parser.state = S.OPEN_TAG_SLASH
  1084. } else {
  1085. if (!isWhitespace(c)) {
  1086. strictFail(parser, 'Invalid character in tag name')
  1087. }
  1088. parser.state = S.ATTRIB
  1089. }
  1090. }
  1091. continue
  1092. case S.OPEN_TAG_SLASH:
  1093. if (c === '>') {
  1094. openTag(parser, true)
  1095. closeTag(parser)
  1096. } else {
  1097. strictFail(parser, 'Forward-slash in opening tag not followed by >')
  1098. parser.state = S.ATTRIB
  1099. }
  1100. continue
  1101. case S.ATTRIB:
  1102. // haven't read the attribute name yet.
  1103. if (isWhitespace(c)) {
  1104. continue
  1105. } else if (c === '>') {
  1106. openTag(parser)
  1107. } else if (c === '/') {
  1108. parser.state = S.OPEN_TAG_SLASH
  1109. } else if (isMatch(nameStart, c)) {
  1110. parser.attribName = c
  1111. parser.attribValue = ''
  1112. parser.state = S.ATTRIB_NAME
  1113. } else {
  1114. strictFail(parser, 'Invalid attribute name')
  1115. }
  1116. continue
  1117. case S.ATTRIB_NAME:
  1118. if (c === '=') {
  1119. parser.state = S.ATTRIB_VALUE
  1120. } else if (c === '>') {
  1121. strictFail(parser, 'Attribute without value')
  1122. parser.attribValue = parser.attribName
  1123. attrib(parser)
  1124. openTag(parser)
  1125. } else if (isWhitespace(c)) {
  1126. parser.state = S.ATTRIB_NAME_SAW_WHITE
  1127. } else if (isMatch(nameBody, c)) {
  1128. parser.attribName += c
  1129. } else {
  1130. strictFail(parser, 'Invalid attribute name')
  1131. }
  1132. continue
  1133. case S.ATTRIB_NAME_SAW_WHITE:
  1134. if (c === '=') {
  1135. parser.state = S.ATTRIB_VALUE
  1136. } else if (isWhitespace(c)) {
  1137. continue
  1138. } else {
  1139. strictFail(parser, 'Attribute without value')
  1140. parser.tag.attributes[parser.attribName] = ''
  1141. parser.attribValue = ''
  1142. emitNode(parser, 'onattribute', {
  1143. name: parser.attribName,
  1144. value: ''
  1145. })
  1146. parser.attribName = ''
  1147. if (c === '>') {
  1148. openTag(parser)
  1149. } else if (isMatch(nameStart, c)) {
  1150. parser.attribName = c
  1151. parser.state = S.ATTRIB_NAME
  1152. } else {
  1153. strictFail(parser, 'Invalid attribute name')
  1154. parser.state = S.ATTRIB
  1155. }
  1156. }
  1157. continue
  1158. case S.ATTRIB_VALUE:
  1159. if (isWhitespace(c)) {
  1160. continue
  1161. } else if (isQuote(c)) {
  1162. parser.q = c
  1163. parser.state = S.ATTRIB_VALUE_QUOTED
  1164. } else {
  1165. strictFail(parser, 'Unquoted attribute value')
  1166. parser.state = S.ATTRIB_VALUE_UNQUOTED
  1167. parser.attribValue = c
  1168. }
  1169. continue
  1170. case S.ATTRIB_VALUE_QUOTED:
  1171. if (c !== parser.q) {
  1172. if (c === '&') {
  1173. parser.state = S.ATTRIB_VALUE_ENTITY_Q
  1174. } else {
  1175. parser.attribValue += c
  1176. }
  1177. continue
  1178. }
  1179. attrib(parser)
  1180. parser.q = ''
  1181. parser.state = S.ATTRIB_VALUE_CLOSED
  1182. continue
  1183. case S.ATTRIB_VALUE_CLOSED:
  1184. if (isWhitespace(c)) {
  1185. parser.state = S.ATTRIB
  1186. } else if (c === '>') {
  1187. openTag(parser)
  1188. } else if (c === '/') {
  1189. parser.state = S.OPEN_TAG_SLASH
  1190. } else if (isMatch(nameStart, c)) {
  1191. strictFail(parser, 'No whitespace between attributes')
  1192. parser.attribName = c
  1193. parser.attribValue = ''
  1194. parser.state = S.ATTRIB_NAME
  1195. } else {
  1196. strictFail(parser, 'Invalid attribute name')
  1197. }
  1198. continue
  1199. case S.ATTRIB_VALUE_UNQUOTED:
  1200. if (!isAttribEnd(c)) {
  1201. if (c === '&') {
  1202. parser.state = S.ATTRIB_VALUE_ENTITY_U
  1203. } else {
  1204. parser.attribValue += c
  1205. }
  1206. continue
  1207. }
  1208. attrib(parser)
  1209. if (c === '>') {
  1210. openTag(parser)
  1211. } else {
  1212. parser.state = S.ATTRIB
  1213. }
  1214. continue
  1215. case S.CLOSE_TAG:
  1216. if (!parser.tagName) {
  1217. if (isWhitespace(c)) {
  1218. continue
  1219. } else if (notMatch(nameStart, c)) {
  1220. if (parser.script) {
  1221. parser.script += '</' + c
  1222. parser.state = S.SCRIPT
  1223. } else {
  1224. strictFail(parser, 'Invalid tagname in closing tag.')
  1225. }
  1226. } else {
  1227. parser.tagName = c
  1228. }
  1229. } else if (c === '>') {
  1230. closeTag(parser)
  1231. } else if (isMatch(nameBody, c)) {
  1232. parser.tagName += c
  1233. } else if (parser.script) {
  1234. parser.script += '</' + parser.tagName
  1235. parser.tagName = ''
  1236. parser.state = S.SCRIPT
  1237. } else {
  1238. if (!isWhitespace(c)) {
  1239. strictFail(parser, 'Invalid tagname in closing tag')
  1240. }
  1241. parser.state = S.CLOSE_TAG_SAW_WHITE
  1242. }
  1243. continue
  1244. case S.CLOSE_TAG_SAW_WHITE:
  1245. if (isWhitespace(c)) {
  1246. continue
  1247. }
  1248. if (c === '>') {
  1249. closeTag(parser)
  1250. } else {
  1251. strictFail(parser, 'Invalid characters in closing tag')
  1252. }
  1253. continue
  1254. case S.TEXT_ENTITY:
  1255. case S.ATTRIB_VALUE_ENTITY_Q:
  1256. case S.ATTRIB_VALUE_ENTITY_U:
  1257. var returnState
  1258. var buffer
  1259. switch (parser.state) {
  1260. case S.TEXT_ENTITY:
  1261. returnState = S.TEXT
  1262. buffer = 'textNode'
  1263. break
  1264. case S.ATTRIB_VALUE_ENTITY_Q:
  1265. returnState = S.ATTRIB_VALUE_QUOTED
  1266. buffer = 'attribValue'
  1267. break
  1268. case S.ATTRIB_VALUE_ENTITY_U:
  1269. returnState = S.ATTRIB_VALUE_UNQUOTED
  1270. buffer = 'attribValue'
  1271. break
  1272. }
  1273. if (c === ';') {
  1274. var parsedEntity = parseEntity(parser)
  1275. // Custom entities can contain tags, so we potentially need to parse the result
  1276. if (parser.state === S.TEXT_ENTITY && !sax.ENTITIES[parser.entity] && parsedEntity !== '&' + parser.entity + ';') {
  1277. chunk = chunk.slice(0, i) + parsedEntity + chunk.slice(i)
  1278. } else {
  1279. parser[buffer] += parsedEntity
  1280. }
  1281. parser.entity = ''
  1282. parser.state = returnState
  1283. } else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
  1284. parser.entity += c
  1285. } else {
  1286. strictFail(parser, 'Invalid character in entity name')
  1287. parser[buffer] += '&' + parser.entity + c
  1288. parser.entity = ''
  1289. parser.state = returnState
  1290. }
  1291. continue
  1292. default:
  1293. throw new Error(parser, 'Unknown state: ' + parser.state)
  1294. }
  1295. } // while
  1296. if (parser.position >= parser.bufferCheckPosition) {
  1297. checkBufferLength(parser)
  1298. }
  1299. return parser
  1300. }
  1301. })(typeof exports === 'undefined' ? this.sax = {} : exports)