diff --git a/apps/server/src/integrations/import/utils/confluence-emoji.spec.ts b/apps/server/src/integrations/import/utils/confluence-emoji.spec.ts new file mode 100644 index 000000000..5718ab374 --- /dev/null +++ b/apps/server/src/integrations/import/utils/confluence-emoji.spec.ts @@ -0,0 +1,46 @@ +import { parseConfluenceEmojiId } from './confluence-emoji'; + +describe('parseConfluenceEmojiId', () => { + it('parses a single code point id', () => { + expect(parseConfluenceEmojiId('1f600')).toBe('πŸ˜€'); + expect(parseConfluenceEmojiId('1F600')).toBe('πŸ˜€'); + }); + + it('parses a country flag (two regional indicator code points)', () => { + expect(parseConfluenceEmojiId('1f1f3-1f1ec')).toBe('πŸ‡³πŸ‡¬'); + expect(parseConfluenceEmojiId('1f1fa-1f1f8')).toBe('πŸ‡ΊπŸ‡Έ'); + }); + + it('parses a ZWJ sequence (three code points)', () => { + expect(parseConfluenceEmojiId('1f468-200d-1f4bb')).toBe('πŸ‘¨β€πŸ’»'); + }); + + it('parses a five-component family ZWJ sequence', () => { + // πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦ = man, ZWJ, woman, ZWJ, girl, ZWJ, boy + expect(parseConfluenceEmojiId('1f468-200d-1f469-200d-1f467-200d-1f466')).toBe( + 'πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦', + ); + }); + + it('returns null for missing input', () => { + expect(parseConfluenceEmojiId(undefined)).toBeNull(); + expect(parseConfluenceEmojiId(null)).toBeNull(); + expect(parseConfluenceEmojiId('')).toBeNull(); + }); + + it('returns null when any segment is not pure hex', () => { + expect(parseConfluenceEmojiId('1f600-NG')).toBeNull(); + expect(parseConfluenceEmojiId('not-hex')).toBeNull(); + expect(parseConfluenceEmojiId('1f600--1f1ec')).toBeNull(); + expect(parseConfluenceEmojiId('1f600 1f1ec')).toBeNull(); + }); + + it('returns null when a segment parses to a non-positive value', () => { + expect(parseConfluenceEmojiId('0')).toBeNull(); + }); + + it('returns null for code points outside the valid Unicode range', () => { + // 0x110000 is one past the highest valid code point. + expect(parseConfluenceEmojiId('110000')).toBeNull(); + }); +}); diff --git a/apps/server/src/integrations/import/utils/confluence-emoji.ts b/apps/server/src/integrations/import/utils/confluence-emoji.ts new file mode 100644 index 000000000..b6b5a7bae --- /dev/null +++ b/apps/server/src/integrations/import/utils/confluence-emoji.ts @@ -0,0 +1,28 @@ +/** + * Parse a Confluence emoji id (hex code points joined by hyphens) into a + * Unicode string. Confluence emits ids in both single- and multi-code-point + * forms: + * + * "1f600" β†’ "πŸ˜€" + * "1f1f3-1f1ec" β†’ "πŸ‡³πŸ‡¬" (flag: Nigeria) + * "1f468-200d-1f4bb" β†’ "πŸ‘¨β€πŸ’»" (man technologist, ZWJ sequence) + * + * Returns null when the input is missing, empty, or doesn't parse cleanly as + * hyphen-separated hex code points. + */ +export function parseConfluenceEmojiId( + raw: string | undefined | null, +): string | null { + if (!raw) return null; + const parts = raw.split('-'); + if (parts.length === 0) return null; + if (!parts.every((p) => /^[0-9a-fA-F]+$/.test(p))) return null; + const codePoints = parts.map((p) => parseInt(p, 16)); + if (codePoints.some((cp) => !Number.isFinite(cp) || cp <= 0)) return null; + try { + return String.fromCodePoint(...codePoints); + } catch { + // Out-of-range code points throw RangeError on String.fromCodePoint. + return null; + } +}