Specialized Structures
Level 4: Advanced Mastery - Master specialized data structures for complex applications
Specialized Structures
Advanced Segment Trees
Segment Tree with Lazy Propagation
Segment trees with lazy propagation efficiently handle range updates and queries in O(log n) time by deferring updates until necessary.
🏢 Real-Life Analogy:
Think of lazy propagation like a corporate announcement: instead of calling each employee individually, you tell department heads, and they pass the message down only when needed.
⚡ Advanced Operations:
- Range Updates: Add/multiply values in range [l, r]
- Range Queries: Sum, min, max in range [l, r]
- Lazy Propagation: Defer updates for efficiency
- Persistent Variants: Keep all versions of the tree
TypeScript
// Segment Tree with Lazy Propagation class LazySegmentTree { private tree: number[]; private lazy: number[]; private n: number; constructor(arr: number[]) { this.n = arr.length; this.tree = new Array(4 * this.n).fill(0); this.lazy = new Array(4 * this.n).fill(0); this.build(arr, 1, 0, this.n - 1); } private build(arr: number[], node: number, start: number, end: number): void { if (start === end) { this.tree[node] = arr[start]; } else { const mid = Math.floor((start + end) / 2); this.build(arr, 2 * node, start, mid); this.build(arr, 2 * node + 1, mid + 1, end); this.tree[node] = this.tree[2 * node] + this.tree[2 * node + 1]; } } private updateLazy(node: number, start: number, end: number): void { if (this.lazy[node] !== 0) { this.tree[node] += this.lazy[node] * (end - start + 1); if (start !== end) { this.lazy[2 * node] += this.lazy[node]; this.lazy[2 * node + 1] += this.lazy[node]; } this.lazy[node] = 0; } } private updateRangeHelper(node: number, start: number, end: number, l: number, r: number, val: number): void { this.updateLazy(node, start, end); if (start > r || end < l) return; if (start >= l && end <= r) { this.tree[node] += val * (end - start + 1); if (start !== end) { this.lazy[2 * node] += val; this.lazy[2 * node + 1] += val; } return; } const mid = Math.floor((start + end) / 2); this.updateRangeHelper(2 * node, start, mid, l, r, val); this.updateRangeHelper(2 * node + 1, mid + 1, end, l, r, val); this.updateLazy(2 * node, start, mid); this.updateLazy(2 * node + 1, mid + 1, end); this.tree[node] = this.tree[2 * node] + this.tree[2 * node + 1]; } private queryRangeHelper(node: number, start: number, end: number, l: number, r: number): number { if (start > r || end < l) return 0; this.updateLazy(node, start, end); if (start >= l && end <= r) { return this.tree[node]; } const mid = Math.floor((start + end) / 2); return this.queryRangeHelper(2 * node, start, mid, l, r) + this.queryRangeHelper(2 * node + 1, mid + 1, end, l, r); } updateRange(l: number, r: number, val: number): void { this.updateRangeHelper(1, 0, this.n - 1, l, r, val); } queryRange(l: number, r: number): number { return this.queryRangeHelper(1, 0, this.n - 1, l, r); } } // Fenwick Tree (Binary Indexed Tree) class FenwickTree { private tree: number[]; private n: number; constructor(size: number) { this.n = size; this.tree = new Array(size + 1).fill(0); } // Add val to index i update(i: number, val: number): void { for (++i; i <= this.n; i += i & (-i)) { this.tree[i] += val; } } // Get prefix sum from 0 to i query(i: number): number { let sum = 0; for (++i; i > 0; i -= i & (-i)) { sum += this.tree[i]; } return sum; } // Get range sum from l to r rangeQuery(l: number, r: number): number { return this.query(r) - (l > 0 ? this.query(l - 1) : 0); } // Find kth element (0-indexed) kthElement(k: number): number { let pos = 0; for (let i = Math.floor(Math.log2(this.n)); i >= 0; i--) { const newPos = pos + (1 << i); if (newPos <= this.n && this.tree[newPos] <= k) { k -= this.tree[newPos]; pos = newPos; } } return pos; } } // 2D Fenwick Tree class FenwickTree2D { private tree: number[][]; private n: number; private m: number; constructor(rows: number, cols: number) { this.n = rows; this.m = cols; this.tree = Array(rows + 1).fill(0).map(() => Array(cols + 1).fill(0)); } update(r: number, c: number, val: number): void { for (let i = r + 1; i <= this.n; i += i & (-i)) { for (let j = c + 1; j <= this.m; j += j & (-j)) { this.tree[i][j] += val; } } } query(r: number, c: number): number { let sum = 0; for (let i = r + 1; i > 0; i -= i & (-i)) { for (let j = c + 1; j > 0; j -= j & (-j)) { sum += this.tree[i][j]; } } return sum; } rangeQuery(r1: number, c1: number, r2: number, c2: number): number { return this.query(r2, c2) - (r1 > 0 ? this.query(r1 - 1, c2) : 0) - (c1 > 0 ? this.query(r2, c1 - 1) : 0) + (r1 > 0 && c1 > 0 ? this.query(r1 - 1, c1 - 1) : 0); } }
Python
# Segment Tree with Lazy Propagation class LazySegmentTree: def __init__(self, arr): self.n = len(arr) self.tree = [0] * (4 * self.n) self.lazy = [0] * (4 * self.n) self.build(arr, 1, 0, self.n - 1) def build(self, arr, node, start, end): if start == end: self.tree[node] = arr[start] else: mid = (start + end) // 2 self.build(arr, 2 * node, start, mid) self.build(arr, 2 * node + 1, mid + 1, end) self.tree[node] = self.tree[2 * node] + self.tree[2 * node + 1] def update_lazy(self, node, start, end): if self.lazy[node] != 0: self.tree[node] += self.lazy[node] * (end - start + 1) if start != end: self.lazy[2 * node] += self.lazy[node] self.lazy[2 * node + 1] += self.lazy[node] self.lazy[node] = 0 def update_range_helper(self, node, start, end, l, r, val): self.update_lazy(node, start, end) if start > r or end < l: return if start >= l and end <= r: self.tree[node] += val * (end - start + 1) if start != end: self.lazy[2 * node] += val self.lazy[2 * node + 1] += val return mid = (start + end) // 2 self.update_range_helper(2 * node, start, mid, l, r, val) self.update_range_helper(2 * node + 1, mid + 1, end, l, r, val) self.update_lazy(2 * node, start, mid) self.update_lazy(2 * node + 1, mid + 1, end) self.tree[node] = self.tree[2 * node] + self.tree[2 * node + 1] def query_range_helper(self, node, start, end, l, r): if start > r or end < l: return 0 self.update_lazy(node, start, end) if start >= l and end <= r: return self.tree[node] mid = (start + end) // 2 return (self.query_range_helper(2 * node, start, mid, l, r) + self.query_range_helper(2 * node + 1, mid + 1, end, l, r)) def update_range(self, l, r, val): self.update_range_helper(1, 0, self.n - 1, l, r, val) def query_range(self, l, r): return self.query_range_helper(1, 0, self.n - 1, l, r) # Fenwick Tree (Binary Indexed Tree) class FenwickTree: def __init__(self, size): self.n = size self.tree = [0] * (size + 1) def update(self, i, val): """Add val to index i""" i += 1 # Convert to 1-indexed while i <= self.n: self.tree[i] += val i += i & (-i) def query(self, i): """Get prefix sum from 0 to i""" i += 1 # Convert to 1-indexed result = 0 while i > 0: result += self.tree[i] i -= i & (-i) return result def range_query(self, l, r): """Get range sum from l to r""" return self.query(r) - (self.query(l - 1) if l > 0 else 0) def kth_element(self, k): """Find kth smallest element (0-indexed)""" pos = 0 bit_mask = 1 while bit_mask <= self.n: bit_mask <<= 1 bit_mask >>= 1 while bit_mask > 0: new_pos = pos + bit_mask if new_pos <= self.n and self.tree[new_pos] <= k: k -= self.tree[new_pos] pos = new_pos bit_mask >>= 1 return pos # Range Fenwick Tree (for range updates and point queries) class RangeFenwickTree: def __init__(self, size): self.n = size self.tree = [0] * (size + 1) def update_range(self, l, r, val): """Add val to range [l, r]""" self._update(l, val) self._update(r + 1, -val) def _update(self, i, val): """Internal update function""" i += 1 # Convert to 1-indexed while i <= self.n: self.tree[i] += val i += i & (-i) def query(self, i): """Get value at index i""" i += 1 # Convert to 1-indexed result = 0 while i > 0: result += self.tree[i] i -= i & (-i) return result # 2D Fenwick Tree class FenwickTree2D: def __init__(self, rows, cols): self.n = rows self.m = cols self.tree = [[0] * (cols + 1) for _ in range(rows + 1)] def update(self, r, c, val): """Add val to position (r, c)""" i = r + 1 while i <= self.n: j = c + 1 while j <= self.m: self.tree[i][j] += val j += j & (-j) i += i & (-i) def query(self, r, c): """Get sum of rectangle from (0,0) to (r,c)""" result = 0 i = r + 1 while i > 0: j = c + 1 while j > 0: result += self.tree[i][j] j -= j & (-j) i -= i & (-i) return result def range_query(self, r1, c1, r2, c2): """Get sum of rectangle from (r1,c1) to (r2,c2)""" return (self.query(r2, c2) - (self.query(r1 - 1, c2) if r1 > 0 else 0) - (self.query(r2, c1 - 1) if c1 > 0 else 0) + (self.query(r1 - 1, c1 - 1) if r1 > 0 and c1 > 0 else 0)) # Order Statistics Tree (using Fenwick Tree) class OrderStatisticsTree: def __init__(self, max_val): self.max_val = max_val self.fenwick = FenwickTree(max_val + 1) self.count = 0 def insert(self, val): """Insert a value""" self.fenwick.update(val, 1) self.count += 1 def delete(self, val): """Delete a value""" self.fenwick.update(val, -1) self.count -= 1 def kth_smallest(self, k): """Find kth smallest element (1-indexed)""" return self.fenwick.kth_element(k - 1) def count_less(self, val): """Count elements less than val""" if val <= 0: return 0 return self.fenwick.query(val - 1) def count_range(self, l, r): """Count elements in range [l, r]""" return self.fenwick.range_query(l, r)
OCaml
(* Lazy Segment Tree *) module LazySegmentTree = struct type t = { tree: int array; lazy: int array; n: int; } let create arr = let n = Array.length arr in let tree = Array.make (4 * n) 0 in let lazy = Array.make (4 * n) 0 in let rec build node start end_ = if start = end_ then tree.(node) <- arr.(start) else let mid = (start + end_) / 2 in build (2 * node) start mid; build (2 * node + 1) (mid + 1) end_; tree.(node) <- tree.(2 * node) + tree.(2 * node + 1) in build 1 0 (n - 1); { tree; lazy; n } let update_lazy st node start end_ = if st.lazy.(node) <> 0 then begin st.tree.(node) <- st.tree.(node) + st.lazy.(node) * (end_ - start + 1); if start <> end_ then begin st.lazy.(2 * node) <- st.lazy.(2 * node) + st.lazy.(node); st.lazy.(2 * node + 1) <- st.lazy.(2 * node + 1) + st.lazy.(node) end; st.lazy.(node) <- 0 end let rec update_range st node start end_ l r val = update_lazy st node start end_; if start > r || end_ < l then () else if start >= l && end_ <= r then begin st.tree.(node) <- st.tree.(node) + val * (end_ - start + 1); if start <> end_ then begin st.lazy.(2 * node) <- st.lazy.(2 * node) + val; st.lazy.(2 * node + 1) <- st.lazy.(2 * node + 1) + val end end else begin let mid = (start + end_) / 2 in update_range st (2 * node) start mid l r val; update_range st (2 * node + 1) (mid + 1) end_ l r val; update_lazy st (2 * node) start mid; update_lazy st (2 * node + 1) (mid + 1) end_; st.tree.(node) <- st.tree.(2 * node) + st.tree.(2 * node + 1) end let rec query_range st node start end_ l r = if start > r || end_ < l then 0 else begin update_lazy st node start end_; if start >= l && end_ <= r then st.tree.(node) else let mid = (start + end_) / 2 in query_range st (2 * node) start mid l r + query_range st (2 * node + 1) (mid + 1) end_ l r end let update st l r val = update_range st 1 0 (st.n - 1) l r val let query st l r = query_range st 1 0 (st.n - 1) l r end (* Fenwick Tree *) module FenwickTree = struct type t = { tree: int array; n: int; } let create size = { tree = Array.make (size + 1) 0; n = size } let update ft i val = let i = ref (i + 1) in while !i <= ft.n do ft.tree.(!i) <- ft.tree.(!i) + val; i := !i + (!i land (-!i)) done let query ft i = let i = ref (i + 1) in let sum = ref 0 in while !i > 0 do sum := !sum + ft.tree.(!i); i := !i - (!i land (-!i)) done; !sum let range_query ft l r = query ft r - (if l > 0 then query ft (l - 1) else 0) end
Operation | Segment Tree | Fenwick Tree | Notes |
---|---|---|---|
Point Update | O(log n) | O(log n) | Single element modification |
Range Update | O(log n) | O(log n)* | *Requires difference array |
Range Query | O(log n) | O(log n) | Sum, min, max, etc. |
Space | O(n) | O(n) | Fenwick uses less memory |
Suffix Structures
Suffix Arrays and Suffix Trees
Suffix structures are powerful data structures for string processing that allow efficient pattern matching, longest common substring, and many other string operations.
📚 Real-Life Analogy:
Think of a suffix tree as an index at the back of a book, but instead of just words, it indexes every possible ending of every word, allowing you to find any substring instantly.
🔤 Suffix Structure Applications:
- Pattern Matching: Find all occurrences in O(m + occ)
- Longest Repeated Substring: Find longest substring that appears twice
- Longest Common Substring: Between multiple strings
- Palindrome Finding: All palindromic substrings
TypeScript
// Suffix Array Construction using DC3 Algorithm class SuffixArray { private text: string; private suffixArray: number[]; private lcp: number[]; // Longest Common Prefix array constructor(text: string) { this.text = text + '$'; // Add sentinel this.suffixArray = this.buildSuffixArray(); this.lcp = this.buildLCPArray(); } // Build suffix array using naive O(n log n) approach private buildSuffixArray(): number[] { const n = this.text.length; const suffixes: [string, number][] = []; for (let i = 0; i < n; i++) { suffixes.push([this.text.substring(i), i]); } suffixes.sort((a, b) => a[0].localeCompare(b[0])); return suffixes.map(s => s[1]); } // Build LCP array using Kasai's algorithm private buildLCPArray(): number[] { const n = this.text.length; const lcp = new Array(n).fill(0); const rank = new Array(n).fill(0); // Build rank array (inverse of suffix array) for (let i = 0; i < n; i++) { rank[this.suffixArray[i]] = i; } let h = 0; for (let i = 0; i < n; i++) { if (rank[i] > 0) { const j = this.suffixArray[rank[i] - 1]; while (i + h < n && j + h < n && this.text[i + h] === this.text[j + h]) { h++; } lcp[rank[i]] = h; if (h > 0) h--; } } return lcp; } // Find all occurrences of pattern findPattern(pattern: string): number[] { const n = this.suffixArray.length; let left = 0, right = n - 1; // Binary search for leftmost occurrence while (left < right) { const mid = Math.floor((left + right) / 2); const suffix = this.text.substring(this.suffixArray[mid]); if (suffix < pattern) { left = mid + 1; } else { right = mid; } } const positions: number[] = []; // Collect all occurrences while (left < n) { const suffix = this.text.substring(this.suffixArray[left]); if (!suffix.startsWith(pattern)) break; positions.push(this.suffixArray[left]); left++; } return positions; } // Find longest repeated substring longestRepeatedSubstring(): string { let maxLen = 0; let maxIndex = 0; for (let i = 1; i < this.lcp.length; i++) { if (this.lcp[i] > maxLen) { maxLen = this.lcp[i]; maxIndex = i; } } if (maxLen === 0) return ""; return this.text.substring( this.suffixArray[maxIndex], this.suffixArray[maxIndex] + maxLen ); } } // Suffix Tree implementation class SuffixTreeNode { children: Map= new Map(); start: number; end: number; suffixLink?: SuffixTreeNode; constructor(start: number, end: number) { this.start = start; this.end = end; } } class SuffixTree { private root: SuffixTreeNode; private text: string; constructor(text: string) { this.text = text + '$'; this.root = new SuffixTreeNode(-1, -1); this.build(); } private build(): void { // Ukkonen's algorithm implementation const n = this.text.length; let activeNode = this.root; let activeEdge = -1; let activeLength = 0; let remainingSuffixCount = 0; let leafEnd = -1; for (let i = 0; i < n; i++) { leafEnd = i; remainingSuffixCount++; while (remainingSuffixCount > 0) { if (activeLength === 0) { activeEdge = i; } const edge = this.text[activeEdge]; const next = activeNode.children.get(edge); if (!next) { // Create new leaf activeNode.children.set(edge, new SuffixTreeNode(i, n - 1)); if (activeNode !== this.root) { activeNode = activeNode.suffixLink || this.root; } remainingSuffixCount--; } else { // Walk down the edge const edgeLength = next.end - next.start + 1; if (activeLength >= edgeLength) { activeEdge += edgeLength; activeLength -= edgeLength; activeNode = next; continue; } if (this.text[next.start + activeLength] === this.text[i]) { activeLength++; break; } // Split edge const split = new SuffixTreeNode(next.start, next.start + activeLength - 1); activeNode.children.set(edge, split); split.children.set(this.text[i], new SuffixTreeNode(i, n - 1)); next.start += activeLength; split.children.set(this.text[next.start], next); if (activeNode !== this.root) { activeNode = activeNode.suffixLink || this.root; } remainingSuffixCount--; } } } } // Check if pattern exists contains(pattern: string): boolean { let node = this.root; let i = 0; while (i < pattern.length) { const edge = node.children.get(pattern[i]); if (!edge) return false; let j = 0; while (j < edge.end - edge.start + 1 && i < pattern.length) { if (this.text[edge.start + j] !== pattern[i]) { return false; } i++; j++; } if (i < pattern.length) { node = edge; } } return true; } }
Python
# Suffix Array and LCP Construction class SuffixArray: def __init__(self, text): self.text = text + '$' # Add sentinel self.n = len(self.text) self.suffix_array = self._build_suffix_array() self.lcp = self._build_lcp_array() def _build_suffix_array(self): """Build suffix array using improved O(n log n) approach""" # Create initial suffix array with positions suffixes = [(self.text[i:], i) for i in range(self.n)] suffixes.sort() return [suffix[1] for suffix in suffixes] def _build_lcp_array(self): """Build LCP array using Kasai's algorithm""" lcp = [0] * self.n rank = [0] * self.n # Build rank array (inverse of suffix array) for i in range(self.n): rank[self.suffix_array[i]] = i h = 0 for i in range(self.n): if rank[i] > 0: j = self.suffix_array[rank[i] - 1] while i + h < self.n and j + h < self.n and \ self.text[i + h] == self.text[j + h]: h += 1 lcp[rank[i]] = h if h > 0: h -= 1 return lcp def find_pattern(self, pattern): """Find all occurrences of pattern""" left, right = 0, self.n - 1 # Binary search for leftmost occurrence while left < right: mid = (left + right) // 2 suffix = self.text[self.suffix_array[mid]:] if suffix < pattern: left = mid + 1 else: right = mid positions = [] # Collect all occurrences while left < self.n: suffix = self.text[self.suffix_array[left]:] if not suffix.startswith(pattern): break positions.append(self.suffix_array[left]) left += 1 return positions def longest_repeated_substring(self): """Find the longest substring that appears at least twice""" max_len = 0 max_index = 0 for i in range(1, len(self.lcp)): if self.lcp[i] > max_len: max_len = self.lcp[i] max_index = i if max_len == 0: return "" return self.text[self.suffix_array[max_index]: self.suffix_array[max_index] + max_len] def longest_common_substring(self, other_text): """Find longest common substring with another text""" # Concatenate texts with different separators combined = self.text[:-1] + '#' + other_text + '$' sa = SuffixArray(combined[:-1]) # Remove the $ we'll add n1 = len(self.text) - 1 max_len = 0 result = "" for i in range(1, len(sa.suffix_array)): pos1 = sa.suffix_array[i-1] pos2 = sa.suffix_array[i] # Check if suffixes are from different strings if (pos1 < n1 and pos2 > n1) or (pos1 > n1 and pos2 < n1): lcp_len = sa.lcp[i] if lcp_len > max_len: max_len = lcp_len result = sa.text[pos1:pos1 + lcp_len] return result # Generalized Suffix Tree for multiple strings class GeneralizedSuffixTree: class Node: def __init__(self): self.children = {} self.is_leaf = False self.string_indices = set() def __init__(self): self.root = self.Node() self.strings = [] def add_string(self, s): """Add a string to the generalized suffix tree""" string_index = len(self.strings) self.strings.append(s) # Add all suffixes for i in range(len(s)): self._add_suffix(s[i:], string_index) def _add_suffix(self, suffix, string_index): """Add a suffix to the tree""" node = self.root for char in suffix: if char not in node.children: node.children[char] = self.Node() node = node.children[char] node.string_indices.add(string_index) node.is_leaf = True def find_common_substrings(self, min_length=1): """Find all common substrings of minimum length""" common_substrings = [] def dfs(node, path): # Check if this node represents a common substring if len(node.string_indices) == len(self.strings) and \ len(path) >= min_length: common_substrings.append(path) # Continue DFS for char, child in node.children.items(): dfs(child, path + char) dfs(self.root, "") return common_substrings def longest_common_substring_all(self): """Find longest common substring among all strings""" common = self.find_common_substrings() if not common: return "" return max(common, key=len) # Suffix Automaton (DAWG - Directed Acyclic Word Graph) class SuffixAutomaton: class State: def __init__(self): self.len = 0 self.link = None self.next = {} def __init__(self, s): self.states = [self.State()] self.last = 0 for char in s: self._extend(char) def _extend(self, char): """Extend automaton with a new character""" cur = len(self.states) self.states.append(self.State()) self.states[cur].len = self.states[self.last].len + 1 p = self.last while p is not None and char not in self.states[p].next: self.states[p].next[char] = cur p = self.states[p].link if p > 0 else None if p is None: self.states[cur].link = 0 else: q = self.states[p].next[char] if self.states[p].len + 1 == self.states[q].len: self.states[cur].link = q else: clone = len(self.states) self.states.append(self.State()) self.states[clone].len = self.states[p].len + 1 self.states[clone].next = self.states[q].next.copy() self.states[clone].link = self.states[q].link while p is not None and self.states[p].next.get(char) == q: self.states[p].next[char] = clone p = self.states[p].link if p > 0 else None self.states[q].link = self.states[cur].link = clone self.last = cur def contains(self, pattern): """Check if pattern exists in the string""" state = 0 for char in pattern: if char not in self.states[state].next: return False state = self.states[state].next[char] return True def count_distinct_substrings(self): """Count number of distinct substrings""" count = 0 for state in self.states[1:]: # Skip initial state count += self.states[state].len if state > 0 and self.states[state].link is not None: count -= self.states[self.states[state].link].len return count
OCaml
(* Suffix Array Implementation *) module SuffixArray = struct type t = { text: string; suffix_array: int array; lcp: int array; } (* Build suffix array using naive approach *) let build_suffix_array text = let n = String.length text in let suffixes = Array.init n (fun i -> (i, String.sub text i (n - i))) in Array.sort (fun (_, s1) (_, s2) -> String.compare s1 s2) suffixes; Array.map fst suffixes (* Build LCP array using Kasai's algorithm *) let build_lcp_array text suffix_array = let n = Array.length suffix_array in let lcp = Array.make n 0 in let rank = Array.make n 0 in (* Build rank array *) for i = 0 to n - 1 do rank.(suffix_array.(i)) <- i done; let h = ref 0 in for i = 0 to n - 1 do if rank.(i) > 0 then begin let j = suffix_array.(rank.(i) - 1) in while i + !h < n && j + !h < n && text.[i + !h] = text.[j + !h] do incr h done; lcp.(rank.(i)) <- !h; if !h > 0 then decr h end done; lcp let create text = let text = text ^ "$" in let suffix_array = build_suffix_array text in let lcp = build_lcp_array text suffix_array in { text; suffix_array; lcp } (* Find all occurrences of pattern *) let find_pattern sa pattern = let n = Array.length sa.suffix_array in let rec binary_search_left left right = if left >= right then left else let mid = (left + right) / 2 in let suffix = String.sub sa.text sa.suffix_array.(mid) (String.length sa.text - sa.suffix_array.(mid)) in if suffix < pattern then binary_search_left (mid + 1) right else binary_search_left left mid in let start_pos = binary_search_left 0 n in let positions = ref [] in let rec collect_occurrences i = if i < n then let suffix = String.sub sa.text sa.suffix_array.(i) (min (String.length pattern) (String.length sa.text - sa.suffix_array.(i))) in if suffix = pattern then begin positions := sa.suffix_array.(i) :: !positions; collect_occurrences (i + 1) end in collect_occurrences start_pos; List.rev !positions (* Find longest repeated substring *) let longest_repeated_substring sa = let max_len = ref 0 in let max_index = ref 0 in for i = 1 to Array.length sa.lcp - 1 do if sa.lcp.(i) > !max_len then begin max_len := sa.lcp.(i); max_index := i end done; if !max_len = 0 then "" else String.sub sa.text sa.suffix_array.(!max_index) !max_len end (* Trie-based Suffix Tree *) module SuffixTree = struct type node = { mutable children: (char, node) Hashtbl.t; mutable start: int; mutable end_: int; mutable suffix_link: node option; } type t = { root: node; text: string; } let create_node start end_ = { children = Hashtbl.create 26; start; end_; suffix_link = None; } let create text = let text = text ^ "$" in let root = create_node (-1) (-1) in { root; text } (* Check if pattern exists *) let contains tree pattern = let rec search node pos pattern_pos = if pattern_pos >= String.length pattern then true else match Hashtbl.find_opt node.children pattern.[pattern_pos] with | None -> false | Some child -> let edge_len = child.end_ - child.start + 1 in let rec check_edge i = if i >= edge_len || pattern_pos + i >= String.length pattern then if pattern_pos + i >= String.length pattern then true else search child (child.start + i) (pattern_pos + i) else if tree.text.[child.start + i] = pattern.[pattern_pos + i] then check_edge (i + 1) else false in check_edge 0 in search tree.root 0 0 (* Count occurrences of pattern *) let count_occurrences tree pattern = let rec count_leaves node = if Hashtbl.length node.children = 0 then 1 else Hashtbl.fold (fun _ child acc -> acc + count_leaves child ) node.children 0 in let rec find_node node pos pattern_pos = if pattern_pos >= String.length pattern then Some node else match Hashtbl.find_opt node.children pattern.[pattern_pos] with | None -> None | Some child -> let edge_len = child.end_ - child.start + 1 in let remaining = String.length pattern - pattern_pos in if remaining <= edge_len then (* Pattern ends within this edge *) let rec verify i = if i >= remaining then Some child else if tree.text.[child.start + i] = pattern.[pattern_pos + i] then verify (i + 1) else None in verify 0 else (* Continue to next node *) let rec verify i = if i >= edge_len then find_node child (child.start + edge_len) (pattern_pos + edge_len) else if tree.text.[child.start + i] = pattern