diff --git a/README.md b/README.md index 9ac4e7f..0179dfb 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ const result = parse(soapResponse, { | `attributeNamePrefix` | `string` | `'@_'` | Prefix prepended to attribute names in the output object | | `parseTagValue` | `boolean` | `true` | Coerce numeric/boolean text values to their JS types | | `processEntities` | `boolean` | `true` | Decode XML entities (`&` → `&`, `<` → `<`, etc.) | +| `maxDepth` | `number` | `256` | Maximum nesting depth allowed. Throws if exceeded | +| `strict` | `boolean` | `false` | Throw on malformed XML (unclosed, mismatched, or extra closing tags) | ### `build(obj: Record, options?: BuildOptions): string` @@ -105,7 +107,7 @@ The parser produces the same object shape as `fast-xml-parser`: - Processing instructions (skipped) - Self-closing tags - XML declaration (skipped) -- Built-in XML entities (`&`, `<`, `>`, `"`, `'`) +- XML entities: named (`&`, `<`, `>`, `"`, `'`) and numeric (`{`, `{`) ## License diff --git a/src/__tests__/build.test.ts b/src/__tests__/build.test.ts index bad76b6..7272aa2 100644 --- a/src/__tests__/build.test.ts +++ b/src/__tests__/build.test.ts @@ -123,6 +123,51 @@ describe('build', () => { }); }); + describe('builder edge cases', () => { + it('builds elements with special characters in text that need escaping', () => { + expect(build({ root: 'Tom & Jerry <"friends">' })).toBe( + 'Tom & Jerry <"friends">', + ); + }); + + it('handles deeply nested objects', () => { + const deep = { a: { b: { c: { d: { e: 'val' } } } } }; + expect(build(deep)).toBe('val'); + }); + + it('handles arrays of objects with mixed content', () => { + const obj = { + root: { + item: [ + { '@_id': '1', '#text': 'first' }, + { '@_id': '2', child: 'nested' }, + 'plain', + ], + }, + }; + const result = build(obj); + expect(result).toContain('first'); + expect(result).toContain('nested'); + expect(result).toContain('plain'); + }); + + it('handles numeric zero values', () => { + expect(build({ root: { count: 0 } })).toBe('0'); + }); + + it('handles false boolean values', () => { + expect(build({ root: { flag: false } })).toBe('false'); + }); + + it('handles empty object as self-closing', () => { + expect(build({ root: {} })).toBe(''); + }); + + it('handles empty array (no output for key)', () => { + expect(build({ root: { items: [] } })).toBe(''); + }); + }); + describe('round-trip', () => { it('parse -> build -> parse produces equivalent structure', () => { const xml = 'firstsecond'; diff --git a/src/__tests__/parse.test.ts b/src/__tests__/parse.test.ts index 55c83f3..c003d61 100644 --- a/src/__tests__/parse.test.ts +++ b/src/__tests__/parse.test.ts @@ -294,6 +294,12 @@ describe('parse', () => { expect(() => parse('text', { strict: true })).not.toThrow(); }); + it('throws on extra closing tags in strict mode', () => { + expect(() => parse('text', { strict: true })).toThrow( + 'Unexpected closing tag ', + ); + }); + it('silently ignores unclosed tags when strict is false (default)', () => { const result = parse('text'); expect(result).toEqual({}); @@ -351,4 +357,151 @@ describe('parse', () => { expect(execResult.exceptionStackTrace).toEqual({ '@_nil': 'true' }); }); }); + + describe('numeric entity handling', () => { + it('decodes decimal numeric entities', () => { + expect(parse('Hello')).toEqual({ root: 'Hello' }); + }); + + it('decodes hex numeric entities', () => { + expect(parse('Hello')).toEqual({ root: 'Hello' }); + }); + + it('decodes mixed named and numeric entities', () => { + expect(parse('<div class="test">')).toEqual({ root: '
' }); + }); + + it('leaves numeric entities as-is when processEntities is false', () => { + expect(parse('Hello', { processEntities: false })).toEqual({ root: 'Hello' }); + }); + + it('handles numeric entities in attribute values', () => { + const result = parse('text'); + expect(result).toEqual({ root: { '#text': 'text', '@_title': 'Hello' } }); + }); + + it('leaves invalid numeric entities as-is instead of crashing', () => { + expect(parse('')).toEqual({ root: '�' }); + expect(parse('')).toEqual({ root: '�' }); + }); + }); + + describe('malformed XML handling', () => { + it('handles unclosed tags gracefully in non-strict mode', () => { + expect(() => parse('text')).not.toThrow(); + }); + + it('handles extra closing tags gracefully in non-strict mode', () => { + const result = parse('text'); + expect(result).toEqual({ root: 'text' }); + }); + + it('handles completely empty input', () => { + expect(parse('')).toEqual({}); + }); + + it('handles whitespace-only input', () => { + expect(parse(' \n\t ')).toEqual({}); + }); + + it('handles XML with only a declaration', () => { + expect(parse('')).toEqual({}); + }); + + it('handles XML with only comments', () => { + expect(parse('')).toEqual({}); + }); + + it('handles unescaped ampersand in text content without crashing', () => { + expect(() => parse('AT&T')).not.toThrow(); + }); + }); + + describe('whitespace edge cases', () => { + it('handles whitespace-only text nodes between elements', () => { + const result = parse('\n 1\n 2\n'); + expect(result).toEqual({ root: { a: 1, b: 2 } }); + }); + + it('preserves significant whitespace in text content with trimValues false', () => { + const result = parse(' line1\n line2 ', { trimValues: false }); + expect(result).toEqual({ root: ' line1\n line2 ' }); + }); + + it('does not crash on malformed tags with whitespace in names', () => { + // < root > is not valid XML — just verify no crash + const result = parse('< root >value'); + expect(result).toEqual({ '': 'value' }); + }); + }); + + describe('CDATA edge cases', () => { + it('handles multiple adjacent CDATA sections', () => { + const result = parse(''); + expect(result).toEqual({ root: 'part1part2' }); + }); + + it('handles CDATA mixed with regular text', () => { + const result = parse('before]]>after'); + expect(result).toEqual({ root: 'beforeafter' }); + }); + + it('handles empty CDATA', () => { + const result = parse(''); + expect(result).toEqual({ root: '' }); + }); + + it('handles CDATA with newlines and special characters', () => { + const result = parse(']]>'); + expect(result).toEqual({ root: 'line1\nline2\t& ' }); + }); + }); + + describe('attribute edge cases', () => { + it('handles attributes with empty values', () => { + const result = parse('text'); + expect(result).toEqual({ root: { '#text': 'text', '@_attr': '' } }); + }); + + it('handles attributes with encoded entities in values', () => { + const result = parse('text'); + expect(result).toEqual({ root: { '#text': 'text', '@_title': 'a & b' } }); + }); + + it('handles attributes with encoded entities when processEntities is false', () => { + const result = parse('text', { processEntities: false }); + expect(result).toEqual({ root: { '#text': 'text', '@_title': 'a & b' } }); + }); + + it('handles many attributes on one element', () => { + const result = parse('text'); + expect(result).toEqual({ + root: { '#text': 'text', '@_a': '1', '@_b': '2', '@_c': '3', '@_d': '4', '@_e': '5' }, + }); + }); + + it('handles duplicate attributes (last wins)', () => { + const result = parse('text'); + expect(result).toEqual({ root: { '#text': 'text', '@_attr': 'second' } }); + }); + }); + + describe('large payloads', () => { + it('handles XML with many sibling elements', () => { + const items = Array.from({ length: 1000 }, (_, i) => `${i}`).join(''); + const xml = `${items}`; + const result = parse(xml); + const arr = (result.root as any).item as number[]; + expect(arr).toHaveLength(1000); + expect(arr[0]).toBe(0); + expect(arr[999]).toBe(999); + }); + + it('handles XML with very long text content', () => { + const longText = 'x'.repeat(100_000); + const xml = `${longText}`; + const result = parse(xml); + expect(result.root).toBe(longText); + }); + }); }); diff --git a/src/parse.ts b/src/parse.ts index c75d5ed..184c8d9 100644 --- a/src/parse.ts +++ b/src/parse.ts @@ -8,10 +8,19 @@ const ENTITY_MAP: Record = { ''': "'", }; -const ENTITY_RE = /&(?:amp|lt|gt|quot|apos);/g; +const ENTITY_RE = /&(?:amp|lt|gt|quot|apos|#x[0-9a-fA-F]+|#[0-9]+);/g; function decodeEntities(str: string): string { - return str.replace(ENTITY_RE, match => ENTITY_MAP[match] || match); + return str.replace(ENTITY_RE, match => { + if (match in ENTITY_MAP) return ENTITY_MAP[match]!; + try { + if (match.startsWith('&#x')) return String.fromCodePoint(parseInt(match.slice(3, -1), 16)); + if (match.startsWith('&#')) return String.fromCodePoint(parseInt(match.slice(2, -1), 10)); + } catch { + return match; + } + return match; + }); } function removeNS(name: string): string { @@ -137,6 +146,17 @@ export function parse(xml: string, options?: ParseOptions): Record', i + 2); if (end === -1) break; + // Guard against extra closing tags that would pop the root sentinel + if (stack.length <= 1) { + if (strict) { + let closingName = xml.slice(i + 2, end).trim(); + if (rmNSPrefix) closingName = removeNS(closingName); + throw new Error(`Unexpected closing tag `); + } + i = end + 1; + continue; + } + if (strict) { let closingName = xml.slice(i + 2, end).trim(); if (rmNSPrefix) closingName = removeNS(closingName);