feat(import): add Confluence emoji-id parser with tests

This commit is contained in:
Philipinho
2026-05-16 01:28:10 +01:00
parent 3e66aff1e2
commit bbb1b5eb26
2 changed files with 74 additions and 0 deletions
@@ -0,0 +1,46 @@
import { parseConfluenceEmojiId } from './confluence-emoji';
describe('parseConfluenceEmojiId', () => {
it('parses a single code point id', () => {
expect(parseConfluenceEmojiId('1f600')).toBe('😀');
expect(parseConfluenceEmojiId('1F600')).toBe('😀');
});
it('parses a country flag (two regional indicator code points)', () => {
expect(parseConfluenceEmojiId('1f1f3-1f1ec')).toBe('🇳🇬');
expect(parseConfluenceEmojiId('1f1fa-1f1f8')).toBe('🇺🇸');
});
it('parses a ZWJ sequence (three code points)', () => {
expect(parseConfluenceEmojiId('1f468-200d-1f4bb')).toBe('👨‍💻');
});
it('parses a five-component family ZWJ sequence', () => {
// 👨‍👩‍👧‍👦 = man, ZWJ, woman, ZWJ, girl, ZWJ, boy
expect(parseConfluenceEmojiId('1f468-200d-1f469-200d-1f467-200d-1f466')).toBe(
'👨‍👩‍👧‍👦',
);
});
it('returns null for missing input', () => {
expect(parseConfluenceEmojiId(undefined)).toBeNull();
expect(parseConfluenceEmojiId(null)).toBeNull();
expect(parseConfluenceEmojiId('')).toBeNull();
});
it('returns null when any segment is not pure hex', () => {
expect(parseConfluenceEmojiId('1f600-NG')).toBeNull();
expect(parseConfluenceEmojiId('not-hex')).toBeNull();
expect(parseConfluenceEmojiId('1f600--1f1ec')).toBeNull();
expect(parseConfluenceEmojiId('1f600 1f1ec')).toBeNull();
});
it('returns null when a segment parses to a non-positive value', () => {
expect(parseConfluenceEmojiId('0')).toBeNull();
});
it('returns null for code points outside the valid Unicode range', () => {
// 0x110000 is one past the highest valid code point.
expect(parseConfluenceEmojiId('110000')).toBeNull();
});
});
@@ -0,0 +1,28 @@
/**
* Parse a Confluence emoji id (hex code points joined by hyphens) into a
* Unicode string. Confluence emits ids in both single- and multi-code-point
* forms:
*
* "1f600" → "😀"
* "1f1f3-1f1ec" → "🇳🇬" (flag: Nigeria)
* "1f468-200d-1f4bb" → "👨‍💻" (man technologist, ZWJ sequence)
*
* Returns null when the input is missing, empty, or doesn't parse cleanly as
* hyphen-separated hex code points.
*/
export function parseConfluenceEmojiId(
raw: string | undefined | null,
): string | null {
if (!raw) return null;
const parts = raw.split('-');
if (parts.length === 0) return null;
if (!parts.every((p) => /^[0-9a-fA-F]+$/.test(p))) return null;
const codePoints = parts.map((p) => parseInt(p, 16));
if (codePoints.some((cp) => !Number.isFinite(cp) || cp <= 0)) return null;
try {
return String.fromCodePoint(...codePoints);
} catch {
// Out-of-range code points throw RangeError on String.fromCodePoint.
return null;
}
}