feat: add unaccent support for accent-insensitive search (#1402)

- Add PostgreSQL unaccent and pg_trgm extensions
- Create immutable f_unaccent wrapper function for performance
- Update all search queries to use f_unaccent for accent-insensitive matching
- Add 1MB limit to tsvector content to prevent errors on large documents
- Update full-text search trigger to use f_unaccent
- Fix MultiSelect client-side filtering to show server results properly
This commit is contained in:
Philip Okugbe
2025-07-29 22:47:13 +01:00
committed by GitHub
parent f90c5a636b
commit 5da92a538a
10 changed files with 154 additions and 64 deletions
@@ -0,0 +1,50 @@
import { type Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
// Create unaccent extension
await sql`CREATE EXTENSION IF NOT EXISTS unaccent`.execute(db);
// Create pg_trgm extension
await sql`CREATE EXTENSION IF NOT EXISTS pg_trgm`.execute(db);
// Create IMMUTABLE wrapper function for unaccent
// This allows us to create indexes on unaccented columns for better performance
// https://stackoverflow.com/a/11007216/8299075
await sql`
CREATE OR REPLACE FUNCTION f_unaccent(text) RETURNS text
AS $$
SELECT unaccent('unaccent', $1);
$$ LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT;
`.execute(db);
// Update the pages tsvector trigger to use the immutable function
await sql`
CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$
begin
new.tsv :=
setweight(to_tsvector('english', f_unaccent(coalesce(new.title, ''))), 'A') ||
setweight(to_tsvector('english', f_unaccent(substring(coalesce(new.text_content, ''), 1, 1000000))), 'B');
return new;
end;
$$ LANGUAGE plpgsql;
`.execute(db);
}
export async function down(db: Kysely<any>): Promise<void> {
await sql`
CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$
begin
new.tsv :=
setweight(to_tsvector('english', coalesce(new.title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(new.text_content, '')), 'B');
return new;
end;
$$ LANGUAGE plpgsql;
`.execute(db);
await sql`DROP FUNCTION IF EXISTS f_unaccent(text)`.execute(db);
await sql`DROP EXTENSION IF EXISTS pg_trgm`.execute(db);
await sql`DROP EXTENSION IF EXISTS unaccent`.execute(db);
}
@@ -6,6 +6,7 @@ import {
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
import { dbOrTx, executeTx } from '@docmost/db/utils';
import { sql } from 'kysely';
import { GroupUser, InsertableGroupUser } from '@docmost/db/types/entity.types';
import { PaginationOptions } from '../../pagination/pagination-options';
import { executeWithPagination } from '@docmost/db/pagination/pagination';
@@ -56,7 +57,7 @@ export class GroupUserRepo {
if (pagination.query) {
query = query.where((eb) =>
eb('users.name', 'ilike', `%${pagination.query}%`),
eb(sql`f_unaccent(users.name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`),
);
}
@@ -114,10 +114,10 @@ export class GroupRepo {
if (pagination.query) {
query = query.where((eb) =>
eb('name', 'ilike', `%${pagination.query}%`).or(
'description',
eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or(
sql`f_unaccent(description)`,
'ilike',
`%${pagination.query}%`,
sql`f_unaccent(${'%' + pagination.query + '%'})`,
),
);
}
@@ -2,6 +2,7 @@ import { BadRequestException, Injectable } from '@nestjs/common';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
import { dbOrTx } from '@docmost/db/utils';
import { sql } from 'kysely';
import {
InsertableSpaceMember,
SpaceMember,
@@ -119,9 +120,21 @@ export class SpaceMemberRepo {
if (pagination.query) {
query = query.where((eb) =>
eb('users.name', 'ilike', `%${pagination.query}%`)
.or('users.email', 'ilike', `%${pagination.query}%`)
.or('groups.name', 'ilike', `%${pagination.query}%`),
eb(
sql`f_unaccent(users.name)`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
)
.or(
sql`users.email`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
)
.or(
sql`f_unaccent(groups.name)`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
),
);
}
@@ -228,10 +241,14 @@ export class SpaceMemberRepo {
if (pagination.query) {
query = query.where((eb) =>
eb('name', 'ilike', `%${pagination.query}%`).or(
'description',
eb(
sql`f_unaccent(name)`,
'ilike',
`%${pagination.query}%`,
sql`f_unaccent(${'%' + pagination.query + '%'})`,
).or(
sql`f_unaccent(description)`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
),
);
}
@@ -110,10 +110,10 @@ export class SpaceRepo {
if (pagination.query) {
query = query.where((eb) =>
eb('name', 'ilike', `%${pagination.query}%`).or(
'description',
eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or(
sql`f_unaccent(description)`,
'ilike',
`%${pagination.query}%`,
sql`f_unaccent(${'%' + pagination.query + '%'})`,
),
);
}
@@ -4,6 +4,7 @@ import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
import { DB, Users } from '@docmost/db/types/db';
import { hashPassword } from '../../../common/helpers';
import { dbOrTx } from '@docmost/db/utils';
import { sql } from 'kysely';
import {
InsertableUser,
UpdatableUser,
@@ -149,10 +150,14 @@ export class UserRepo {
if (pagination.query) {
query = query.where((eb) =>
eb('users.name', 'ilike', `%${pagination.query}%`).or(
'users.email',
eb(
sql`f_unaccent(users.name)`,
'ilike',
`%${pagination.query}%`,
sql`f_unaccent(${'%' + pagination.query + '%'})`,
).or(
sql`users.email`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
),
);
}