Specialized Structures
Level 4: Advanced Mastery - Master specialized data structures for complex applications
Specialized Structures
Advanced Segment Trees
Segment Tree with Lazy Propagation
Segment trees with lazy propagation efficiently handle range updates and queries in O(log n) time by deferring updates until necessary.
🏢 Real-Life Analogy:
Think of lazy propagation like a corporate announcement: instead of calling each employee individually, you tell department heads, and they pass the message down only when needed.
⚡ Advanced Operations:
- Range Updates: Add/multiply values in range [l, r]
- Range Queries: Sum, min, max in range [l, r]
- Lazy Propagation: Defer updates for efficiency
- Persistent Variants: Keep all versions of the tree
TypeScript
// Segment Tree with Lazy Propagation
class LazySegmentTree {
private tree: number[];
private lazy: number[];
private n: number;
constructor(arr: number[]) {
this.n = arr.length;
this.tree = new Array(4 * this.n).fill(0);
this.lazy = new Array(4 * this.n).fill(0);
this.build(arr, 1, 0, this.n - 1);
}
private build(arr: number[], node: number, start: number, end: number): void {
if (start === end) {
this.tree[node] = arr[start];
} else {
const mid = Math.floor((start + end) / 2);
this.build(arr, 2 * node, start, mid);
this.build(arr, 2 * node + 1, mid + 1, end);
this.tree[node] = this.tree[2 * node] + this.tree[2 * node + 1];
}
}
private updateLazy(node: number, start: number, end: number): void {
if (this.lazy[node] !== 0) {
this.tree[node] += this.lazy[node] * (end - start + 1);
if (start !== end) {
this.lazy[2 * node] += this.lazy[node];
this.lazy[2 * node + 1] += this.lazy[node];
}
this.lazy[node] = 0;
}
}
private updateRangeHelper(node: number, start: number, end: number,
l: number, r: number, val: number): void {
this.updateLazy(node, start, end);
if (start > r || end < l) return;
if (start >= l && end <= r) {
this.tree[node] += val * (end - start + 1);
if (start !== end) {
this.lazy[2 * node] += val;
this.lazy[2 * node + 1] += val;
}
return;
}
const mid = Math.floor((start + end) / 2);
this.updateRangeHelper(2 * node, start, mid, l, r, val);
this.updateRangeHelper(2 * node + 1, mid + 1, end, l, r, val);
this.updateLazy(2 * node, start, mid);
this.updateLazy(2 * node + 1, mid + 1, end);
this.tree[node] = this.tree[2 * node] + this.tree[2 * node + 1];
}
private queryRangeHelper(node: number, start: number, end: number,
l: number, r: number): number {
if (start > r || end < l) return 0;
this.updateLazy(node, start, end);
if (start >= l && end <= r) {
return this.tree[node];
}
const mid = Math.floor((start + end) / 2);
return this.queryRangeHelper(2 * node, start, mid, l, r) +
this.queryRangeHelper(2 * node + 1, mid + 1, end, l, r);
}
updateRange(l: number, r: number, val: number): void {
this.updateRangeHelper(1, 0, this.n - 1, l, r, val);
}
queryRange(l: number, r: number): number {
return this.queryRangeHelper(1, 0, this.n - 1, l, r);
}
}
// Fenwick Tree (Binary Indexed Tree)
class FenwickTree {
private tree: number[];
private n: number;
constructor(size: number) {
this.n = size;
this.tree = new Array(size + 1).fill(0);
}
// Add val to index i
update(i: number, val: number): void {
for (++i; i <= this.n; i += i & (-i)) {
this.tree[i] += val;
}
}
// Get prefix sum from 0 to i
query(i: number): number {
let sum = 0;
for (++i; i > 0; i -= i & (-i)) {
sum += this.tree[i];
}
return sum;
}
// Get range sum from l to r
rangeQuery(l: number, r: number): number {
return this.query(r) - (l > 0 ? this.query(l - 1) : 0);
}
// Find kth element (0-indexed)
kthElement(k: number): number {
let pos = 0;
for (let i = Math.floor(Math.log2(this.n)); i >= 0; i--) {
const newPos = pos + (1 << i);
if (newPos <= this.n && this.tree[newPos] <= k) {
k -= this.tree[newPos];
pos = newPos;
}
}
return pos;
}
}
// 2D Fenwick Tree
class FenwickTree2D {
private tree: number[][];
private n: number;
private m: number;
constructor(rows: number, cols: number) {
this.n = rows;
this.m = cols;
this.tree = Array(rows + 1).fill(0).map(() => Array(cols + 1).fill(0));
}
update(r: number, c: number, val: number): void {
for (let i = r + 1; i <= this.n; i += i & (-i)) {
for (let j = c + 1; j <= this.m; j += j & (-j)) {
this.tree[i][j] += val;
}
}
}
query(r: number, c: number): number {
let sum = 0;
for (let i = r + 1; i > 0; i -= i & (-i)) {
for (let j = c + 1; j > 0; j -= j & (-j)) {
sum += this.tree[i][j];
}
}
return sum;
}
rangeQuery(r1: number, c1: number, r2: number, c2: number): number {
return this.query(r2, c2) -
(r1 > 0 ? this.query(r1 - 1, c2) : 0) -
(c1 > 0 ? this.query(r2, c1 - 1) : 0) +
(r1 > 0 && c1 > 0 ? this.query(r1 - 1, c1 - 1) : 0);
}
}
Python
# Segment Tree with Lazy Propagation
class LazySegmentTree:
def __init__(self, arr):
self.n = len(arr)
self.tree = [0] * (4 * self.n)
self.lazy = [0] * (4 * self.n)
self.build(arr, 1, 0, self.n - 1)
def build(self, arr, node, start, end):
if start == end:
self.tree[node] = arr[start]
else:
mid = (start + end) // 2
self.build(arr, 2 * node, start, mid)
self.build(arr, 2 * node + 1, mid + 1, end)
self.tree[node] = self.tree[2 * node] + self.tree[2 * node + 1]
def update_lazy(self, node, start, end):
if self.lazy[node] != 0:
self.tree[node] += self.lazy[node] * (end - start + 1)
if start != end:
self.lazy[2 * node] += self.lazy[node]
self.lazy[2 * node + 1] += self.lazy[node]
self.lazy[node] = 0
def update_range_helper(self, node, start, end, l, r, val):
self.update_lazy(node, start, end)
if start > r or end < l:
return
if start >= l and end <= r:
self.tree[node] += val * (end - start + 1)
if start != end:
self.lazy[2 * node] += val
self.lazy[2 * node + 1] += val
return
mid = (start + end) // 2
self.update_range_helper(2 * node, start, mid, l, r, val)
self.update_range_helper(2 * node + 1, mid + 1, end, l, r, val)
self.update_lazy(2 * node, start, mid)
self.update_lazy(2 * node + 1, mid + 1, end)
self.tree[node] = self.tree[2 * node] + self.tree[2 * node + 1]
def query_range_helper(self, node, start, end, l, r):
if start > r or end < l:
return 0
self.update_lazy(node, start, end)
if start >= l and end <= r:
return self.tree[node]
mid = (start + end) // 2
return (self.query_range_helper(2 * node, start, mid, l, r) +
self.query_range_helper(2 * node + 1, mid + 1, end, l, r))
def update_range(self, l, r, val):
self.update_range_helper(1, 0, self.n - 1, l, r, val)
def query_range(self, l, r):
return self.query_range_helper(1, 0, self.n - 1, l, r)
# Fenwick Tree (Binary Indexed Tree)
class FenwickTree:
def __init__(self, size):
self.n = size
self.tree = [0] * (size + 1)
def update(self, i, val):
"""Add val to index i"""
i += 1 # Convert to 1-indexed
while i <= self.n:
self.tree[i] += val
i += i & (-i)
def query(self, i):
"""Get prefix sum from 0 to i"""
i += 1 # Convert to 1-indexed
result = 0
while i > 0:
result += self.tree[i]
i -= i & (-i)
return result
def range_query(self, l, r):
"""Get range sum from l to r"""
return self.query(r) - (self.query(l - 1) if l > 0 else 0)
def kth_element(self, k):
"""Find kth smallest element (0-indexed)"""
pos = 0
bit_mask = 1
while bit_mask <= self.n:
bit_mask <<= 1
bit_mask >>= 1
while bit_mask > 0:
new_pos = pos + bit_mask
if new_pos <= self.n and self.tree[new_pos] <= k:
k -= self.tree[new_pos]
pos = new_pos
bit_mask >>= 1
return pos
# Range Fenwick Tree (for range updates and point queries)
class RangeFenwickTree:
def __init__(self, size):
self.n = size
self.tree = [0] * (size + 1)
def update_range(self, l, r, val):
"""Add val to range [l, r]"""
self._update(l, val)
self._update(r + 1, -val)
def _update(self, i, val):
"""Internal update function"""
i += 1 # Convert to 1-indexed
while i <= self.n:
self.tree[i] += val
i += i & (-i)
def query(self, i):
"""Get value at index i"""
i += 1 # Convert to 1-indexed
result = 0
while i > 0:
result += self.tree[i]
i -= i & (-i)
return result
# 2D Fenwick Tree
class FenwickTree2D:
def __init__(self, rows, cols):
self.n = rows
self.m = cols
self.tree = [[0] * (cols + 1) for _ in range(rows + 1)]
def update(self, r, c, val):
"""Add val to position (r, c)"""
i = r + 1
while i <= self.n:
j = c + 1
while j <= self.m:
self.tree[i][j] += val
j += j & (-j)
i += i & (-i)
def query(self, r, c):
"""Get sum of rectangle from (0,0) to (r,c)"""
result = 0
i = r + 1
while i > 0:
j = c + 1
while j > 0:
result += self.tree[i][j]
j -= j & (-j)
i -= i & (-i)
return result
def range_query(self, r1, c1, r2, c2):
"""Get sum of rectangle from (r1,c1) to (r2,c2)"""
return (self.query(r2, c2) -
(self.query(r1 - 1, c2) if r1 > 0 else 0) -
(self.query(r2, c1 - 1) if c1 > 0 else 0) +
(self.query(r1 - 1, c1 - 1) if r1 > 0 and c1 > 0 else 0))
# Order Statistics Tree (using Fenwick Tree)
class OrderStatisticsTree:
def __init__(self, max_val):
self.max_val = max_val
self.fenwick = FenwickTree(max_val + 1)
self.count = 0
def insert(self, val):
"""Insert a value"""
self.fenwick.update(val, 1)
self.count += 1
def delete(self, val):
"""Delete a value"""
self.fenwick.update(val, -1)
self.count -= 1
def kth_smallest(self, k):
"""Find kth smallest element (1-indexed)"""
return self.fenwick.kth_element(k - 1)
def count_less(self, val):
"""Count elements less than val"""
if val <= 0:
return 0
return self.fenwick.query(val - 1)
def count_range(self, l, r):
"""Count elements in range [l, r]"""
return self.fenwick.range_query(l, r)
OCaml
(* Lazy Segment Tree *)
module LazySegmentTree = struct
type t = {
tree: int array;
lazy: int array;
n: int;
}
let create arr =
let n = Array.length arr in
let tree = Array.make (4 * n) 0 in
let lazy = Array.make (4 * n) 0 in
let rec build node start end_ =
if start = end_ then
tree.(node) <- arr.(start)
else
let mid = (start + end_) / 2 in
build (2 * node) start mid;
build (2 * node + 1) (mid + 1) end_;
tree.(node) <- tree.(2 * node) + tree.(2 * node + 1)
in
build 1 0 (n - 1);
{ tree; lazy; n }
let update_lazy st node start end_ =
if st.lazy.(node) <> 0 then begin
st.tree.(node) <- st.tree.(node) + st.lazy.(node) * (end_ - start + 1);
if start <> end_ then begin
st.lazy.(2 * node) <- st.lazy.(2 * node) + st.lazy.(node);
st.lazy.(2 * node + 1) <- st.lazy.(2 * node + 1) + st.lazy.(node)
end;
st.lazy.(node) <- 0
end
let rec update_range st node start end_ l r val =
update_lazy st node start end_;
if start > r || end_ < l then ()
else if start >= l && end_ <= r then begin
st.tree.(node) <- st.tree.(node) + val * (end_ - start + 1);
if start <> end_ then begin
st.lazy.(2 * node) <- st.lazy.(2 * node) + val;
st.lazy.(2 * node + 1) <- st.lazy.(2 * node + 1) + val
end
end else begin
let mid = (start + end_) / 2 in
update_range st (2 * node) start mid l r val;
update_range st (2 * node + 1) (mid + 1) end_ l r val;
update_lazy st (2 * node) start mid;
update_lazy st (2 * node + 1) (mid + 1) end_;
st.tree.(node) <- st.tree.(2 * node) + st.tree.(2 * node + 1)
end
let rec query_range st node start end_ l r =
if start > r || end_ < l then 0
else begin
update_lazy st node start end_;
if start >= l && end_ <= r then st.tree.(node)
else
let mid = (start + end_) / 2 in
query_range st (2 * node) start mid l r +
query_range st (2 * node + 1) (mid + 1) end_ l r
end
let update st l r val =
update_range st 1 0 (st.n - 1) l r val
let query st l r =
query_range st 1 0 (st.n - 1) l r
end
(* Fenwick Tree *)
module FenwickTree = struct
type t = {
tree: int array;
n: int;
}
let create size =
{ tree = Array.make (size + 1) 0; n = size }
let update ft i val =
let i = ref (i + 1) in
while !i <= ft.n do
ft.tree.(!i) <- ft.tree.(!i) + val;
i := !i + (!i land (-!i))
done
let query ft i =
let i = ref (i + 1) in
let sum = ref 0 in
while !i > 0 do
sum := !sum + ft.tree.(!i);
i := !i - (!i land (-!i))
done;
!sum
let range_query ft l r =
query ft r - (if l > 0 then query ft (l - 1) else 0)
end
| Operation | Segment Tree | Fenwick Tree | Notes |
|---|---|---|---|
| Point Update | O(log n) | O(log n) | Single element modification |
| Range Update | O(log n) | O(log n)* | *Requires difference array |
| Range Query | O(log n) | O(log n) | Sum, min, max, etc. |
| Space | O(n) | O(n) | Fenwick uses less memory |
Suffix Structures
Suffix Arrays and Suffix Trees
Suffix structures are powerful data structures for string processing that allow efficient pattern matching, longest common substring, and many other string operations.
📚 Real-Life Analogy:
Think of a suffix tree as an index at the back of a book, but instead of just words, it indexes every possible ending of every word, allowing you to find any substring instantly.
🔤 Suffix Structure Applications:
- Pattern Matching: Find all occurrences in O(m + occ)
- Longest Repeated Substring: Find longest substring that appears twice
- Longest Common Substring: Between multiple strings
- Palindrome Finding: All palindromic substrings
TypeScript
// Suffix Array Construction using DC3 Algorithm
class SuffixArray {
private text: string;
private suffixArray: number[];
private lcp: number[]; // Longest Common Prefix array
constructor(text: string) {
this.text = text + '$'; // Add sentinel
this.suffixArray = this.buildSuffixArray();
this.lcp = this.buildLCPArray();
}
// Build suffix array using naive O(n log n) approach
private buildSuffixArray(): number[] {
const n = this.text.length;
const suffixes: [string, number][] = [];
for (let i = 0; i < n; i++) {
suffixes.push([this.text.substring(i), i]);
}
suffixes.sort((a, b) => a[0].localeCompare(b[0]));
return suffixes.map(s => s[1]);
}
// Build LCP array using Kasai's algorithm
private buildLCPArray(): number[] {
const n = this.text.length;
const lcp = new Array(n).fill(0);
const rank = new Array(n).fill(0);
// Build rank array (inverse of suffix array)
for (let i = 0; i < n; i++) {
rank[this.suffixArray[i]] = i;
}
let h = 0;
for (let i = 0; i < n; i++) {
if (rank[i] > 0) {
const j = this.suffixArray[rank[i] - 1];
while (i + h < n && j + h < n &&
this.text[i + h] === this.text[j + h]) {
h++;
}
lcp[rank[i]] = h;
if (h > 0) h--;
}
}
return lcp;
}
// Find all occurrences of pattern
findPattern(pattern: string): number[] {
const n = this.suffixArray.length;
let left = 0, right = n - 1;
// Binary search for leftmost occurrence
while (left < right) {
const mid = Math.floor((left + right) / 2);
const suffix = this.text.substring(this.suffixArray[mid]);
if (suffix < pattern) {
left = mid + 1;
} else {
right = mid;
}
}
const positions: number[] = [];
// Collect all occurrences
while (left < n) {
const suffix = this.text.substring(this.suffixArray[left]);
if (!suffix.startsWith(pattern)) break;
positions.push(this.suffixArray[left]);
left++;
}
return positions;
}
// Find longest repeated substring
longestRepeatedSubstring(): string {
let maxLen = 0;
let maxIndex = 0;
for (let i = 1; i < this.lcp.length; i++) {
if (this.lcp[i] > maxLen) {
maxLen = this.lcp[i];
maxIndex = i;
}
}
if (maxLen === 0) return "";
return this.text.substring(
this.suffixArray[maxIndex],
this.suffixArray[maxIndex] + maxLen
);
}
}
// Suffix Tree implementation
class SuffixTreeNode {
children: Map = new Map();
start: number;
end: number;
suffixLink?: SuffixTreeNode;
constructor(start: number, end: number) {
this.start = start;
this.end = end;
}
}
class SuffixTree {
private root: SuffixTreeNode;
private text: string;
constructor(text: string) {
this.text = text + '$';
this.root = new SuffixTreeNode(-1, -1);
this.build();
}
private build(): void {
// Ukkonen's algorithm implementation
const n = this.text.length;
let activeNode = this.root;
let activeEdge = -1;
let activeLength = 0;
let remainingSuffixCount = 0;
let leafEnd = -1;
for (let i = 0; i < n; i++) {
leafEnd = i;
remainingSuffixCount++;
while (remainingSuffixCount > 0) {
if (activeLength === 0) {
activeEdge = i;
}
const edge = this.text[activeEdge];
const next = activeNode.children.get(edge);
if (!next) {
// Create new leaf
activeNode.children.set(edge,
new SuffixTreeNode(i, n - 1));
if (activeNode !== this.root) {
activeNode = activeNode.suffixLink || this.root;
}
remainingSuffixCount--;
} else {
// Walk down the edge
const edgeLength = next.end - next.start + 1;
if (activeLength >= edgeLength) {
activeEdge += edgeLength;
activeLength -= edgeLength;
activeNode = next;
continue;
}
if (this.text[next.start + activeLength] === this.text[i]) {
activeLength++;
break;
}
// Split edge
const split = new SuffixTreeNode(next.start,
next.start + activeLength - 1);
activeNode.children.set(edge, split);
split.children.set(this.text[i],
new SuffixTreeNode(i, n - 1));
next.start += activeLength;
split.children.set(this.text[next.start], next);
if (activeNode !== this.root) {
activeNode = activeNode.suffixLink || this.root;
}
remainingSuffixCount--;
}
}
}
}
// Check if pattern exists
contains(pattern: string): boolean {
let node = this.root;
let i = 0;
while (i < pattern.length) {
const edge = node.children.get(pattern[i]);
if (!edge) return false;
let j = 0;
while (j < edge.end - edge.start + 1 && i < pattern.length) {
if (this.text[edge.start + j] !== pattern[i]) {
return false;
}
i++;
j++;
}
if (i < pattern.length) {
node = edge;
}
}
return true;
}
}
Python
# Suffix Array and LCP Construction
class SuffixArray:
def __init__(self, text):
self.text = text + '$' # Add sentinel
self.n = len(self.text)
self.suffix_array = self._build_suffix_array()
self.lcp = self._build_lcp_array()
def _build_suffix_array(self):
"""Build suffix array using improved O(n log n) approach"""
# Create initial suffix array with positions
suffixes = [(self.text[i:], i) for i in range(self.n)]
suffixes.sort()
return [suffix[1] for suffix in suffixes]
def _build_lcp_array(self):
"""Build LCP array using Kasai's algorithm"""
lcp = [0] * self.n
rank = [0] * self.n
# Build rank array (inverse of suffix array)
for i in range(self.n):
rank[self.suffix_array[i]] = i
h = 0
for i in range(self.n):
if rank[i] > 0:
j = self.suffix_array[rank[i] - 1]
while i + h < self.n and j + h < self.n and \
self.text[i + h] == self.text[j + h]:
h += 1
lcp[rank[i]] = h
if h > 0:
h -= 1
return lcp
def find_pattern(self, pattern):
"""Find all occurrences of pattern"""
left, right = 0, self.n - 1
# Binary search for leftmost occurrence
while left < right:
mid = (left + right) // 2
suffix = self.text[self.suffix_array[mid]:]
if suffix < pattern:
left = mid + 1
else:
right = mid
positions = []
# Collect all occurrences
while left < self.n:
suffix = self.text[self.suffix_array[left]:]
if not suffix.startswith(pattern):
break
positions.append(self.suffix_array[left])
left += 1
return positions
def longest_repeated_substring(self):
"""Find the longest substring that appears at least twice"""
max_len = 0
max_index = 0
for i in range(1, len(self.lcp)):
if self.lcp[i] > max_len:
max_len = self.lcp[i]
max_index = i
if max_len == 0:
return ""
return self.text[self.suffix_array[max_index]:
self.suffix_array[max_index] + max_len]
def longest_common_substring(self, other_text):
"""Find longest common substring with another text"""
# Concatenate texts with different separators
combined = self.text[:-1] + '#' + other_text + '$'
sa = SuffixArray(combined[:-1]) # Remove the $ we'll add
n1 = len(self.text) - 1
max_len = 0
result = ""
for i in range(1, len(sa.suffix_array)):
pos1 = sa.suffix_array[i-1]
pos2 = sa.suffix_array[i]
# Check if suffixes are from different strings
if (pos1 < n1 and pos2 > n1) or (pos1 > n1 and pos2 < n1):
lcp_len = sa.lcp[i]
if lcp_len > max_len:
max_len = lcp_len
result = sa.text[pos1:pos1 + lcp_len]
return result
# Generalized Suffix Tree for multiple strings
class GeneralizedSuffixTree:
class Node:
def __init__(self):
self.children = {}
self.is_leaf = False
self.string_indices = set()
def __init__(self):
self.root = self.Node()
self.strings = []
def add_string(self, s):
"""Add a string to the generalized suffix tree"""
string_index = len(self.strings)
self.strings.append(s)
# Add all suffixes
for i in range(len(s)):
self._add_suffix(s[i:], string_index)
def _add_suffix(self, suffix, string_index):
"""Add a suffix to the tree"""
node = self.root
for char in suffix:
if char not in node.children:
node.children[char] = self.Node()
node = node.children[char]
node.string_indices.add(string_index)
node.is_leaf = True
def find_common_substrings(self, min_length=1):
"""Find all common substrings of minimum length"""
common_substrings = []
def dfs(node, path):
# Check if this node represents a common substring
if len(node.string_indices) == len(self.strings) and \
len(path) >= min_length:
common_substrings.append(path)
# Continue DFS
for char, child in node.children.items():
dfs(child, path + char)
dfs(self.root, "")
return common_substrings
def longest_common_substring_all(self):
"""Find longest common substring among all strings"""
common = self.find_common_substrings()
if not common:
return ""
return max(common, key=len)
# Suffix Automaton (DAWG - Directed Acyclic Word Graph)
class SuffixAutomaton:
class State:
def __init__(self):
self.len = 0
self.link = None
self.next = {}
def __init__(self, s):
self.states = [self.State()]
self.last = 0
for char in s:
self._extend(char)
def _extend(self, char):
"""Extend automaton with a new character"""
cur = len(self.states)
self.states.append(self.State())
self.states[cur].len = self.states[self.last].len + 1
p = self.last
while p is not None and char not in self.states[p].next:
self.states[p].next[char] = cur
p = self.states[p].link if p > 0 else None
if p is None:
self.states[cur].link = 0
else:
q = self.states[p].next[char]
if self.states[p].len + 1 == self.states[q].len:
self.states[cur].link = q
else:
clone = len(self.states)
self.states.append(self.State())
self.states[clone].len = self.states[p].len + 1
self.states[clone].next = self.states[q].next.copy()
self.states[clone].link = self.states[q].link
while p is not None and self.states[p].next.get(char) == q:
self.states[p].next[char] = clone
p = self.states[p].link if p > 0 else None
self.states[q].link = self.states[cur].link = clone
self.last = cur
def contains(self, pattern):
"""Check if pattern exists in the string"""
state = 0
for char in pattern:
if char not in self.states[state].next:
return False
state = self.states[state].next[char]
return True
def count_distinct_substrings(self):
"""Count number of distinct substrings"""
count = 0
for state in self.states[1:]: # Skip initial state
count += self.states[state].len
if state > 0 and self.states[state].link is not None:
count -= self.states[self.states[state].link].len
return count
OCaml
(* Suffix Array Implementation *)
module SuffixArray = struct
type t = {
text: string;
suffix_array: int array;
lcp: int array;
}
(* Build suffix array using naive approach *)
let build_suffix_array text =
let n = String.length text in
let suffixes = Array.init n (fun i -> (i, String.sub text i (n - i))) in
Array.sort (fun (_, s1) (_, s2) -> String.compare s1 s2) suffixes;
Array.map fst suffixes
(* Build LCP array using Kasai's algorithm *)
let build_lcp_array text suffix_array =
let n = Array.length suffix_array in
let lcp = Array.make n 0 in
let rank = Array.make n 0 in
(* Build rank array *)
for i = 0 to n - 1 do
rank.(suffix_array.(i)) <- i
done;
let h = ref 0 in
for i = 0 to n - 1 do
if rank.(i) > 0 then begin
let j = suffix_array.(rank.(i) - 1) in
while i + !h < n && j + !h < n &&
text.[i + !h] = text.[j + !h] do
incr h
done;
lcp.(rank.(i)) <- !h;
if !h > 0 then decr h
end
done;
lcp
let create text =
let text = text ^ "$" in
let suffix_array = build_suffix_array text in
let lcp = build_lcp_array text suffix_array in
{ text; suffix_array; lcp }
(* Find all occurrences of pattern *)
let find_pattern sa pattern =
let n = Array.length sa.suffix_array in
let rec binary_search_left left right =
if left >= right then left
else
let mid = (left + right) / 2 in
let suffix = String.sub sa.text sa.suffix_array.(mid)
(String.length sa.text - sa.suffix_array.(mid)) in
if suffix < pattern then
binary_search_left (mid + 1) right
else
binary_search_left left mid
in
let start_pos = binary_search_left 0 n in
let positions = ref [] in
let rec collect_occurrences i =
if i < n then
let suffix = String.sub sa.text sa.suffix_array.(i)
(min (String.length pattern)
(String.length sa.text - sa.suffix_array.(i))) in
if suffix = pattern then begin
positions := sa.suffix_array.(i) :: !positions;
collect_occurrences (i + 1)
end
in
collect_occurrences start_pos;
List.rev !positions
(* Find longest repeated substring *)
let longest_repeated_substring sa =
let max_len = ref 0 in
let max_index = ref 0 in
for i = 1 to Array.length sa.lcp - 1 do
if sa.lcp.(i) > !max_len then begin
max_len := sa.lcp.(i);
max_index := i
end
done;
if !max_len = 0 then ""
else String.sub sa.text sa.suffix_array.(!max_index) !max_len
end
(* Trie-based Suffix Tree *)
module SuffixTree = struct
type node = {
mutable children: (char, node) Hashtbl.t;
mutable start: int;
mutable end_: int;
mutable suffix_link: node option;
}
type t = {
root: node;
text: string;
}
let create_node start end_ =
{
children = Hashtbl.create 26;
start;
end_;
suffix_link = None;
}
let create text =
let text = text ^ "$" in
let root = create_node (-1) (-1) in
{ root; text }
(* Check if pattern exists *)
let contains tree pattern =
let rec search node pos pattern_pos =
if pattern_pos >= String.length pattern then true
else
match Hashtbl.find_opt node.children pattern.[pattern_pos] with
| None -> false
| Some child ->
let edge_len = child.end_ - child.start + 1 in
let rec check_edge i =
if i >= edge_len || pattern_pos + i >= String.length pattern then
if pattern_pos + i >= String.length pattern then true
else search child (child.start + i) (pattern_pos + i)
else if tree.text.[child.start + i] = pattern.[pattern_pos + i] then
check_edge (i + 1)
else false
in
check_edge 0
in
search tree.root 0 0
(* Count occurrences of pattern *)
let count_occurrences tree pattern =
let rec count_leaves node =
if Hashtbl.length node.children = 0 then 1
else
Hashtbl.fold (fun _ child acc ->
acc + count_leaves child
) node.children 0
in
let rec find_node node pos pattern_pos =
if pattern_pos >= String.length pattern then Some node
else
match Hashtbl.find_opt node.children pattern.[pattern_pos] with
| None -> None
| Some child ->
let edge_len = child.end_ - child.start + 1 in
let remaining = String.length pattern - pattern_pos in
if remaining <= edge_len then
(* Pattern ends within this edge *)
let rec verify i =
if i >= remaining then Some child
else if tree.text.[child.start + i] = pattern.[pattern_pos + i] then
verify (i + 1)
else None
in
verify 0
else
(* Continue to next node *)
let rec verify i =
if i >= edge_len then
find_node child (child.start + edge_len) (pattern_pos + edge_len)
else if tree.text.[child.start + i] = pattern