V2 - WIP

2026-05-07 06:23:06 +08:00 · 2026-01-21 11:45:41 +00:00
parent 669ff0435f
commit f44bb3f121
13 changed files with 578 additions and 2 deletions
@@ -8,5 +8,10 @@
  },
  "main": "dist/index.js",
  "module": "./src/index.ts",
-  "types": "dist/index.d.ts"
+  "types": "dist/index.d.ts",
+  "dependencies": {
+    "diff": "^8.0.3",
+    "prosemirror-changeset": "^2.3.1",
+    "rfc6902": "^5.1.2"
+  }
 }
@@ -23,3 +23,4 @@ export * from "./lib/subpages";
 export * from "./lib/highlight";
 export * from "./lib/heading/heading";
 export * from "./lib/unique-id";
+export * from "./lib/recreate-transform";
@@ -0,0 +1,145 @@
+# prosemirror-changeset
+
+This is a helper module that can turn a sequence of document changes
+into a set of insertions and deletions, for example to display them in
+a change-tracking interface. Such a set can be built up incrementally,
+in order to do such change tracking in a halfway performant way during
+live editing.
+
+This code is licensed under an [MIT
+licence](https://github.com/ProseMirror/prosemirror-changeset/blob/master/LICENSE).
+
+## Programming interface
+
+Insertions and deletions are represented as ‘spans’—ranges in the
+document. The deleted spans refer to the original document, whereas
+the inserted ones point into the current document.
+
+It is possible to associate arbitrary data values with such spans, for
+example to track the user that made the change, the timestamp at which
+it was made, or the step data necessary to invert it again.
+
+### class Change`<Data = any>`
+
+A replaced range with metadata associated with it.
+
+* **`fromA`**`: number`\
+  The start of the range deleted/replaced in the old document.
+
+* **`toA`**`: number`\
+  The end of the range in the old document.
+
+* **`fromB`**`: number`\
+  The start of the range inserted in the new document.
+
+* **`toB`**`: number`\
+  The end of the range in the new document.
+
+* **`deleted`**`: readonly Span[]`\
+  Data associated with the deleted content. The length of these
+  spans adds up to `this.toA - this.fromA`.
+
+* **`inserted`**`: readonly Span[]`\
+  Data associated with the inserted content. Length adds up to
+  `this.toB - this.fromB`.
+
+* `static `**`merge`**`<Data>(x: readonly Change[], y: readonly Change[], combine: fn(dataA: Data, dataB: Data) → Data) → readonly Change[]`\
+  This merges two changesets (the end document of x should be the
+  start document of y) into a single one spanning the start of x to
+  the end of y.
+
+
+### class Span`<Data = any>`
+
+Stores metadata for a part of a change.
+
+* **`length`**`: number`\
+  The length of this span.
+
+* **`data`**`: Data`\
+  The data associated with this span.
+
+
+### class ChangeSet`<Data = any>`
+
+A change set tracks the changes to a document from a given point
+in the past. It condenses a number of step maps down to a flat
+sequence of replacements, and simplifies replacments that
+partially undo themselves by comparing their content.
+
+* **`changes`**`: readonly Change[]`\
+  Replaced regions.
+
+* **`addSteps`**`(newDoc: Node, maps: readonly StepMap[], data: Data | readonly Data[]) → ChangeSet`\
+  Computes a new changeset by adding the given step maps and
+  metadata (either as an array, per-map, or as a single value to be
+  associated with all maps) to the current set. Will not mutate the
+  old set.
+
+  Note that due to simplification that happens after each add,
+  incrementally adding steps might create a different final set
+  than adding all those changes at once, since different document
+  tokens might be matched during simplification depending on the
+  boundaries of the current changed ranges.
+
+* **`startDoc`**`: Node`\
+  The starting document of the change set.
+
+* **`map`**`(f: fn(range: Span) → Data) → ChangeSet`\
+  Map the span's data values in the given set through a function
+  and construct a new set with the resulting data.
+
+* **`changedRange`**`(b: ChangeSet, maps?: readonly StepMap[]) → {from: number, to: number}`\
+  Compare two changesets and return the range in which they are
+  changed, if any. If the document changed between the maps, pass
+  the maps for the steps that changed it as second argument, and
+  make sure the method is called on the old set and passed the new
+  set. The returned positions will be in new document coordinates.
+
+* `static `**`create`**`<Data = any>(doc: Node, combine?: fn(dataA: Data, dataB: Data) → Data = (a, b) => a === b ? a : null as any, tokenEncoder?: TokenEncoder = DefaultEncoder) → ChangeSet`\
+  Create a changeset with the given base object and configuration.
+
+  The `combine` function is used to compare and combine metadata—it
+  should return null when metadata isn't compatible, and a combined
+  version for a merged range when it is.
+
+  When given, a token encoder determines how document tokens are
+  serialized and compared when diffing the content produced by
+  changes. The default is to just compare nodes by name and text
+  by character, ignoring marks and attributes.
+
+
+* **`simplifyChanges`**`(changes: readonly Change[], doc: Node) → Change[]`\
+  Simplifies a set of changes for presentation. This makes the
+  assumption that having both insertions and deletions within a word
+  is confusing, and, when such changes occur without a word boundary
+  between them, they should be expanded to cover the entire set of
+  words (in the new document) they touch. An exception is made for
+  single-character replacements.
+
+
+### interface TokenEncoder`<T>`
+
+A token encoder can be passed when creating a `ChangeSet` in order
+to influence the way the library runs its diffing algorithm. The
+encoder determines how document tokens (such as nodes and
+characters) are encoded and compared.
+
+Note that both the encoding and the comparison may run a lot, and
+doing non-trivial work in these functions could impact
+performance.
+
+* **`encodeCharacter`**`(char: number, marks: readonly Mark[]) → T`\
+  Encode a given character, with the given marks applied.
+
+* **`encodeNodeStart`**`(node: Node) → T`\
+  Encode the start of a node or, if this is a leaf node, the
+  entire node.
+
+* **`encodeNodeEnd`**`(node: Node) → T`\
+  Encode the end token for the given node. It is valid to encode
+  every end token in the same way.
+
+* **`compareTokens`**`(a: T, b: T) → boolean`\
+  Compare the given tokens. Should return true when they count as
+  equal.
@@ -0,0 +1,30 @@
+# prosemirror-recreate-transform
+
+> reduced and modified fork of https://gitlab.com/mpapp-public/prosemirror-recreate-steps
+
+This is a non-core module of [ProseMirror](http://prosemirror.net).
+ProseMirror is a well-behaved rich semantic content editor based on
+contentEditable, with support for collaborative editing and custom
+document schemas.
+
+Every change to the document is recorded by ProseMirror as a step.
+This module allows recreating the steps needed to go from document
+A to B should these not be available otherwise. Recreating steps
+can be interesting for example in order to show the changes between
+two document versions without having access to the original steps.
+
+Recreating a `Transform` works this way:
+
+```js
+import { recreateTransform } from "@technik-sde/prosemirror-recreate-transform";
+
+let tr = recreateTransform(
+    startDoc, 
+    endDoc, 
+    {
+        complexSteps: true, // Whether step types other than ReplaceStep are allowed.
+        wordDiffs: false, // Whether diffs in text nodes should cover entire words.
+        simplifyDiffs: true // Whether steps should be merged, where possible
+    }
+);
+```
@@ -0,0 +1,3 @@
+export function copy<T>(value: T): T {
+  return JSON.parse(JSON.stringify(value));
+}
@@ -0,0 +1,17 @@
+import { AnyObject } from "./types";
+
+/**
+ * get target value from json-pointer (e.g. /content/0/content)
+ * @param  {AnyObject} obj  object to resolve path into
+ * @param  {string}    path json-pointer
+ * @return {any} target value
+ */
+export function getFromPath(obj: AnyObject, path: string): any {
+  const pathParts = path.split("/");
+  pathParts.shift(); // remove root-entry
+  while (pathParts.length) {
+    const property = pathParts.shift();
+    obj = obj[property];
+  }
+  return obj;
+}
@@ -0,0 +1,29 @@
+import { ReplaceStep } from "@tiptap/pm/transform";
+import { Node } from "@tiptap/pm/model";
+
+export function getReplaceStep(fromDoc: Node, toDoc: Node) {
+  let start = toDoc.content.findDiffStart(fromDoc.content);
+  if (start === null) {
+    return false;
+  }
+
+  // @ts-ignore property access to content
+  let { a: endA, b: endB } = toDoc.content.findDiffEnd(fromDoc.content);
+  const overlap = start - Math.min(endA, endB);
+  if (overlap > 0) {
+    // If there is an overlap, there is some freedom of choice in how to calculate the
+    // start/end boundary. for an inserted/removed slice. We choose the extreme with
+    // the lowest depth value.
+    if (
+      fromDoc.resolve(start - overlap).depth <
+      toDoc.resolve(endA + overlap).depth
+    ) {
+      start -= overlap;
+    } else {
+      endA += overlap;
+      endB += overlap;
+    }
+  }
+
+  return new ReplaceStep(start, endB, toDoc.slice(start, endA));
+}
@@ -0,0 +1,4 @@
+// https://gitlab.com/mpapp-public/prosemirror-recreate-steps
+// https://github.com/sueddeutsche/prosemirror-recreate-transform
+export { recreateTransform, RecreateTransform } from "./recreateTransform";
+export type { Options } from "./recreateTransform";
@@ -0,0 +1,279 @@
+import { Transform } from "@tiptap/pm/transform";
+import { Node, Schema } from "@tiptap/pm/model";
+import { applyPatch, createPatch, Operation } from "rfc6902";
+import { diffWordsWithSpace, diffChars } from "diff";
+import { AnyObject } from "./types";
+import { getReplaceStep } from "./getReplaceStep";
+import { simplifyTransform } from "./simplifyTransform";
+import { removeMarks } from "./removeMarks";
+import { getFromPath } from "./getFromPath";
+import { copy } from "./copy";
+
+export interface Options {
+  complexSteps?: boolean;
+  wordDiffs?: boolean;
+  simplifyDiff?: boolean;
+}
+
+export class RecreateTransform {
+  fromDoc: Node;
+  toDoc: Node;
+  complexSteps: boolean;
+  wordDiffs: boolean;
+  simplifyDiff: boolean;
+  schema: Schema;
+  tr: Transform;
+  /* current working document data, may get updated while recalculating node steps */
+  currentJSON: AnyObject;
+  /* final document as json data */
+  finalJSON: AnyObject;
+  ops: Array<Operation>;
+
+  constructor(fromDoc: Node, toDoc: Node, options: Options = {}) {
+    const o = {
+      complexSteps: true,
+      wordDiffs: false,
+      simplifyDiff: true,
+      ...options,
+    };
+
+    this.fromDoc = fromDoc;
+    this.toDoc = toDoc;
+    this.complexSteps = o.complexSteps; // Whether to return steps other than ReplaceSteps
+    this.wordDiffs = o.wordDiffs; // Whether to make text diffs cover entire words
+    this.simplifyDiff = o.simplifyDiff;
+    this.schema = fromDoc.type.schema;
+    this.tr = new Transform(fromDoc);
+  }
+
+  init() {
+    if (this.complexSteps) {
+      // For First steps: we create versions of the documents without marks as
+      // these will only confuse the diffing mechanism and marks won't cause
+      // any mapping changes anyway.
+      this.currentJSON = removeMarks(this.fromDoc).toJSON();
+      this.finalJSON = removeMarks(this.toDoc).toJSON();
+      this.ops = createPatch(this.currentJSON, this.finalJSON);
+      this.recreateChangeContentSteps();
+      this.recreateChangeMarkSteps();
+    } else {
+      // We don't differentiate between mark changes and other changes.
+      this.currentJSON = this.fromDoc.toJSON();
+      this.finalJSON = this.toDoc.toJSON();
+      this.ops = createPatch(this.currentJSON, this.finalJSON);
+      this.recreateChangeContentSteps();
+    }
+
+    if (this.simplifyDiff) {
+      this.tr = simplifyTransform(this.tr) || this.tr;
+    }
+
+    return this.tr;
+  }
+
+  /** convert json-diff to prosemirror steps */
+  recreateChangeContentSteps() {
+    // First step: find content changing steps.
+    let ops = [];
+    while (this.ops.length) {
+      // get next
+      let op = this.ops.shift();
+      ops.push(op);
+
+      let toDoc;
+      const afterStepJSON = copy(this.currentJSON); // working document receiving patches
+      const pathParts = op.path.split("/");
+
+      // collect operations until we receive a valid document:
+      // apply ops-patches until a valid prosemirror document is retrieved,
+      // then try to create a transformation step or retry with next operation
+      while (toDoc == null) {
+        applyPatch(afterStepJSON, [op]);
+
+        try {
+          toDoc = this.schema.nodeFromJSON(afterStepJSON);
+          toDoc.check();
+        } catch (error) {
+          toDoc = null;
+          if (this.ops.length > 0) {
+            op = this.ops.shift();
+            ops.push(op);
+          } else {
+            throw new Error(`No valid diff possible applying ${op.path}`);
+          }
+        }
+      }
+
+      // apply operation (ignoring afterStepJSON)
+      if (
+        this.complexSteps &&
+        ops.length === 1 &&
+        (pathParts.includes("attrs") || pathParts.includes("type"))
+      ) {
+        // Node markup is changing
+        this.addSetNodeMarkup(); // a lost update is ignored
+        ops = [];
+        // console.log("%cop", logStyle, "- update node", ops);
+      } else if (
+        ops.length === 1 &&
+        op.op === "replace" &&
+        pathParts[pathParts.length - 1] === "text"
+      ) {
+        // Text is being replaced, we apply text diffing to find the smallest possible diffs.
+        this.addReplaceTextSteps(op, afterStepJSON);
+        ops = [];
+        // console.log("%cop", logStyle, "- replace", ops);
+      } else if (this.addReplaceStep(toDoc, afterStepJSON)) {
+        // operations have been applied
+        ops = [];
+        // console.log("%cop", logStyle, "- other", ops);
+      }
+    }
+  }
+
+  /** update node with attrs and marks, may also change type */
+  addSetNodeMarkup() {
+    // first diff in document is supposed to be a node-change (in type and/or attributes)
+    // thus simply find the first change and apply a node change step, then recalculate the diff
+    // after updating the document
+    const fromDoc = this.schema.nodeFromJSON(this.currentJSON);
+    const toDoc = this.schema.nodeFromJSON(this.finalJSON);
+    const start = toDoc.content.findDiffStart(fromDoc.content);
+    // @note start is the same (first) position for current and target document
+    const fromNode = fromDoc.nodeAt(start);
+    const toNode = toDoc.nodeAt(start);
+
+    if (start != null) {
+      // @note this completly updates all attributes in one step, by completely replacing node
+      const nodeType = fromNode.type === toNode.type ? null : toNode.type;
+      try {
+        this.tr.setNodeMarkup(start, nodeType, toNode.attrs, toNode.marks);
+      } catch (e) {
+        // if nodetypes differ, the updated node-type and contents might not be compatible
+        // with schema and requires a replace
+        if (nodeType && e.message.includes("Invalid content")) {
+          // @todo add test-case for this scenario
+          this.tr.replaceWith(start, start + fromNode.nodeSize, toNode);
+        } else {
+          throw e;
+        }
+      }
+      this.currentJSON = removeMarks(this.tr.doc).toJSON();
+      // setting the node markup may have invalidated the following ops, so we calculate them again.
+      this.ops = createPatch(this.currentJSON, this.finalJSON);
+      return true;
+    }
+    return false;
+  }
+
+  recreateChangeMarkSteps() {
+    // Now the documents should be the same, except their marks, so everything should map 1:1.
+    // Second step: Iterate through the toDoc and make sure all marks are the same in tr.doc
+    this.toDoc.descendants((tNode, tPos) => {
+      if (!tNode.isInline) {
+        return true;
+      }
+
+      this.tr.doc.nodesBetween(tPos, tPos + tNode.nodeSize, (fNode, fPos) => {
+        if (!fNode.isInline) {
+          return true;
+        }
+        const from = Math.max(tPos, fPos);
+        const to = Math.min(tPos + tNode.nodeSize, fPos + fNode.nodeSize);
+        fNode.marks.forEach((nodeMark) => {
+          if (!nodeMark.isInSet(tNode.marks)) {
+            this.tr.removeMark(from, to, nodeMark);
+          }
+        });
+        tNode.marks.forEach((nodeMark) => {
+          if (!nodeMark.isInSet(fNode.marks)) {
+            this.tr.addMark(from, to, nodeMark);
+          }
+        });
+      });
+    });
+  }
+
+  /**
+   * retrieve and possibly apply replace-step based from doc changes
+   * From http://prosemirror.net/examples/footnote/
+   */
+  addReplaceStep(toDoc: Node, afterStepJSON: AnyObject) {
+    const fromDoc = this.schema.nodeFromJSON(this.currentJSON);
+    const step = getReplaceStep(fromDoc, toDoc);
+
+    if (!step) {
+      return false;
+    } else if (!this.tr.maybeStep(step).failed) {
+      this.currentJSON = afterStepJSON;
+      return true; // @change previously null
+    }
+
+    throw new Error("No valid step found.");
+  }
+
+  /** retrieve and possibly apply text replace-steps based from doc changes */
+  addReplaceTextSteps(op, afterStepJSON) {
+    // We find the position number of the first character in the string
+    const op1 = { ...op, value: "xx" };
+    const op2 = { ...op, value: "yy" };
+    const afterOP1JSON = copy(this.currentJSON);
+    const afterOP2JSON = copy(this.currentJSON);
+    applyPatch(afterOP1JSON, [op1]);
+    applyPatch(afterOP2JSON, [op2]);
+    const op1Doc = this.schema.nodeFromJSON(afterOP1JSON);
+    const op2Doc = this.schema.nodeFromJSON(afterOP2JSON);
+
+    // get text diffs
+    const finalText = op.value;
+    const currentText = getFromPath(this.currentJSON, op.path);
+    const textDiffs = this.wordDiffs
+      ? diffWordsWithSpace(currentText, finalText)
+      : diffChars(currentText, finalText);
+
+    let offset = op1Doc.content.findDiffStart(op2Doc.content);
+    const marks = op1Doc.resolve(offset + 1).marks();
+
+    while (textDiffs.length) {
+      const diff = textDiffs.shift();
+
+      if (diff.added) {
+        const textNode = this.schema
+          .nodeFromJSON({ type: "text", text: diff.value })
+          .mark(marks);
+
+        if (textDiffs.length && textDiffs[0].removed) {
+          const nextDiff = textDiffs.shift();
+          this.tr.replaceWith(offset, offset + nextDiff.value.length, textNode);
+        } else {
+          this.tr.insert(offset, textNode);
+        }
+        offset += diff.value.length;
+      } else if (diff.removed) {
+        if (textDiffs.length && textDiffs[0].added) {
+          const nextDiff = textDiffs.shift();
+          const textNode = this.schema
+            .nodeFromJSON({ type: "text", text: nextDiff.value })
+            .mark(marks);
+          this.tr.replaceWith(offset, offset + diff.value.length, textNode);
+          offset += nextDiff.value.length;
+        } else {
+          this.tr.delete(offset, offset + diff.value.length);
+        }
+      } else {
+        offset += diff.value.length;
+      }
+    }
+
+    this.currentJSON = afterStepJSON;
+  }
+}
+
+export function recreateTransform(
+  fromDoc: Node,
+  toDoc: Node,
+  options: Options = {},
+): Transform {
+  const recreator = new RecreateTransform(fromDoc, toDoc, options);
+  return recreator.init();
+}
@@ -0,0 +1,8 @@
+import { Transform } from "@tiptap/pm/transform";
+import { Node } from "@tiptap/pm/model";
+
+export function removeMarks(doc: Node) {
+  const tr = new Transform(doc);
+  tr.removeMark(0, doc.nodeSize - 2);
+  return tr.doc;
+}
@@ -0,0 +1,30 @@
+import { Transform, ReplaceStep, Step } from "@tiptap/pm/transform";
+import { getReplaceStep } from "./getReplaceStep";
+
+// join adjacent ReplaceSteps
+export function simplifyTransform(tr: Transform) {
+  if (!tr.steps.length) {
+    return undefined;
+  }
+
+  const newTr = new Transform(tr.docs[0]);
+  const oldSteps = tr.steps.slice();
+
+  while (oldSteps.length) {
+    let step = oldSteps.shift();
+    while (oldSteps.length && step.merge(oldSteps[0])) {
+      const addedStep = oldSteps.shift();
+      if (step instanceof ReplaceStep && addedStep instanceof ReplaceStep) {
+        step = getReplaceStep(
+          newTr.doc,
+          addedStep.apply(step.apply(newTr.doc).doc).doc,
+          // @ts-ignore
+        ) as Step<any>;
+      } else {
+        step = step.merge(addedStep);
+      }
+    }
+    newTr.step(step);
+  }
+  return newTr;
+}
@@ -0,0 +1,3 @@
+export interface AnyObject {
+    [p: string]: any;
+}
@@ -745,7 +745,17 @@ importers:
        specifier: ^8.24.1
        version: 8.24.1(eslint@9.20.1(jiti@1.21.0))(typescript@5.7.3)

-  packages/editor-ext: {}
+  packages/editor-ext:
+    dependencies:
+      diff:
+        specifier: ^8.0.3
+        version: 8.0.3
+      prosemirror-changeset:
+        specifier: ^2.3.1
+        version: 2.3.1
+      rfc6902:
+        specifier: ^5.1.2
+        version: 5.1.2

 packages:

@@ -6130,6 +6140,10 @@ packages:
    resolution: {integrity: sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==}
    engines: {node: '>=0.3.1'}

+  diff@8.0.3:
+    resolution: {integrity: sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ==}
+    engines: {node: '>=0.3.1'}
+
  dijkstrajs@1.0.3:
    resolution: {integrity: sha512-qiSlmBq9+BCdCA/L46dw8Uy93mloxsPSbwnm5yrKn2vMPiy8KyAskTF6zuV/j5BMsmOGZDPs7KjU+mjb670kfA==}

@@ -8036,6 +8050,7 @@ packages:
  next@14.2.10:
    resolution: {integrity: sha512-sDDExXnh33cY3RkS9JuFEKaS4HmlWmDKP1VJioucCG6z5KuA008DPsDZOzi8UfqEk3Ii+2NCQSJrfbEWtZZfww==}
    engines: {node: '>=18.17.0'}
+    deprecated: This version has a security vulnerability. Please upgrade to a patched version. See https://nextjs.org/blog/security-update-2025-12-11 for more details.
    hasBin: true
    peerDependencies:
      '@opentelemetry/api': ^1.1.0
@@ -9085,6 +9100,9 @@ packages:
    resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==}
    engines: {iojs: '>=1.0.0', node: '>=0.10.0'}

+  rfc6902@5.1.2:
+    resolution: {integrity: sha512-zxcb+PWlE8PwX0tiKE6zP97THQ8/lHmeiwucRrJ3YFupWEmp25RmFSlB1dNTqjkovwqG4iq+u1gzJMBS3um8mA==}
+
  rfdc@1.3.1:
    resolution: {integrity: sha512-r5a3l5HzYlIC68TpmYKlxWjmOP6wiPJ1vWv2HeLhNsRZMrCkxeqxiHlQ21oXmQ4F3SiryXBHhAD7JZqvOJjFmg==}

@@ -16788,6 +16806,8 @@ snapshots:

  diff@5.2.0: {}

+  diff@8.0.3: {}
+
  dijkstrajs@1.0.3: {}

  dingbat-to-unicode@1.0.1: {}
@@ -20355,6 +20375,8 @@ snapshots:

  reusify@1.1.0: {}

+  rfc6902@5.1.2: {}
+
  rfdc@1.3.1: {}

  rimraf@3.0.2: