Advanced Data Structures

Level 3: Intermediate Advanced - Master sophisticated data structures for complex problems

Advanced Data Structures
Advanced Trees (AVL, Red-Black, B-Trees)
Self-Balancing Trees
Self-balancing trees automatically maintain their height to ensure O(log n) operations. AVL trees maintain strict balance, Red-Black trees offer more relaxed balancing, and B-Trees are optimized for disk storage.
🏢 Real-Life Analogy:
Think of organizing a corporate hierarchy: AVL is like strict equal distribution of employees per manager, Red-Black allows some flexibility in team sizes, and B-Trees are like organizing departments across multiple office buildings.

🌲 Tree Types Comparison:

  • AVL Trees: Height difference ≤ 1, more rotations, faster lookups
  • Red-Black Trees: Relaxed balancing, fewer rotations, faster insertions
  • B-Trees: Multiple keys per node, optimized for disk I/O
Operation AVL Tree Red-Black Tree B-Tree
Search O(log n) O(log n) O(log n)
Insert O(log n) O(log n) O(log n)
Delete O(log n) O(log n) O(log n)
Height Guarantee 1.44 log n 2 log n log_m n
Trie (Prefix Trees)
Efficient String Storage and Retrieval
A Trie is a tree-like data structure that stores strings character by character, enabling efficient prefix searches, autocomplete functionality, and word validation.
📖 Real-Life Analogy:
Think of a dictionary where words sharing the same prefix are grouped together. Instead of storing "cat", "cats", "car" separately, a trie shares the common prefix "ca" and branches only where words differ.

🔤 Trie Applications:

  • Autocomplete: Suggest words based on prefix
  • Spell Checkers: Validate word existence
  • IP Routing: Longest prefix matching
  • Word Games: Scrabble, Boggle solvers
Operation Time Complexity Space Complexity
Insert O(m) O(m)
Search O(m) O(1)
Delete O(m) O(1)
Prefix Search O(p + n) O(1)

m = length of word, p = length of prefix, n = number of words with prefix

Union-Find (Disjoint Set Union)
Efficient Connected Component Management
Union-Find is a data structure that tracks a set of elements partitioned into disjoint subsets. It efficiently determines whether two elements belong to the same set and can merge sets.
🏝️ Real-Life Analogy:
Imagine islands being connected by bridges. Union-Find helps track which islands are connected (in the same component) and can efficiently build new bridges (union operations) while checking if two islands are already connected.

🔗 Union-Find Applications:

  • Kruskal's MST: Detect cycles in graph
  • Network Connectivity: Track connected computers
  • Image Processing: Find connected regions
  • Social Networks: Friend groups and communities
TypeScript
class UnionFind {
    private parent: number[];
    private rank: number[];
    private count: number;
    
    constructor(n: number) {
        this.parent = Array(n).fill(0).map((_, i) => i);
        this.rank = Array(n).fill(0);
        this.count = n;
    }
    
    // Find with path compression
    find(x: number): number {
        if (this.parent[x] !== x) {
            this.parent[x] = this.find(this.parent[x]); // Path compression
        }
        return this.parent[x];
    }
    
    // Union by rank
    union(x: number, y: number): boolean {
        const rootX = this.find(x);
        const rootY = this.find(y);
        
        if (rootX === rootY) return false; // Already in same set
        
        // Union by rank
        if (this.rank[rootX] < this.rank[rootY]) {
            this.parent[rootX] = rootY;
        } else if (this.rank[rootX] > this.rank[rootY]) {
            this.parent[rootY] = rootX;
        } else {
            this.parent[rootY] = rootX;
            this.rank[rootX]++;
        }
        
        this.count--;
        return true;
    }
    
    // Check if connected
    connected(x: number, y: number): boolean {
        return this.find(x) === this.find(y);
    }
    
    // Get number of disjoint sets
    getCount(): number {
        return this.count;
    }
}

// Example: Finding number of islands
function numIslands(grid: string[][]): number {
    if (!grid.length || !grid[0].length) return 0;
    
    const rows = grid.length;
    const cols = grid[0].length;
    const uf = new UnionFind(rows * cols);
    let landCount = 0;
    
    // Convert 2D position to 1D
    const getIndex = (r: number, c: number) => r * cols + c;
    
    // Directions: right, down
    const directions = [[0, 1], [1, 0]];
    
    for (let r = 0; r < rows; r++) {
        for (let c = 0; c < cols; c++) {
            if (grid[r][c] === '1') {
                landCount++;
                
                // Union with adjacent land cells
                for (const [dr, dc] of directions) {
                    const nr = r + dr;
                    const nc = c + dc;
                    
                    if (nr < rows && nc < cols && grid[nr][nc] === '1') {
                        const idx1 = getIndex(r, c);
                        const idx2 = getIndex(nr, nc);
                        
                        if (uf.union(idx1, idx2)) {
                            landCount--;
                        }
                    }
                }
            }
        }
    }
    
    return landCount;
}

// Weighted Union-Find for Minimum Spanning Tree
class WeightedUnionFind extends UnionFind {
    private weights: Map = new Map();
    
    addEdge(x: number, y: number, weight: number): boolean {
        if (this.union(x, y)) {
            const key = `${Math.min(x, y)}-${Math.max(x, y)}`;
            this.weights.set(key, weight);
            return true;
        }
        return false;
    }
    
    getTotalWeight(): number {
        let total = 0;
        for (const weight of this.weights.values()) {
            total += weight;
        }
        return total;
    }
}
                                
Python
class UnionFind:
    def __init__(self, n):
        self.parent = list(range(n))
        self.rank = [0] * n
        self.count = n
    
    def find(self, x):
        """Find with path compression"""
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])
        return self.parent[x]
    
    def union(self, x, y):
        """Union by rank"""
        root_x = self.find(x)
        root_y = self.find(y)
        
        if root_x == root_y:
            return False
        
        # Union by rank
        if self.rank[root_x] < self.rank[root_y]:
            self.parent[root_x] = root_y
        elif self.rank[root_x] > self.rank[root_y]:
            self.parent[root_y] = root_x
        else:
            self.parent[root_y] = root_x
            self.rank[root_x] += 1
        
        self.count -= 1
        return True
    
    def connected(self, x, y):
        """Check if two elements are in same set"""
        return self.find(x) == self.find(y)
    
    def get_count(self):
        """Get number of disjoint sets"""
        return self.count

# Example: Accounts Merge (Group accounts by common email)
def accounts_merge(accounts):
    from collections import defaultdict
    
    email_to_id = {}
    email_to_name = {}
    
    # Assign unique ID to each email
    email_id = 0
    for account in accounts:
        name = account[0]
        for email in account[1:]:
            if email not in email_to_id:
                email_to_id[email] = email_id
                email_id += 1
            email_to_name[email] = name
    
    # Create Union-Find structure
    uf = UnionFind(email_id)
    
    # Union emails in same account
    for account in accounts:
        if len(account) > 2:
            first_email_id = email_to_id[account[1]]
            for email in account[2:]:
                uf.union(first_email_id, email_to_id[email])
    
    # Group emails by component
    component_to_emails = defaultdict(list)
    for email, email_id in email_to_id.items():
        root = uf.find(email_id)
        component_to_emails[root].append(email)
    
    # Build result
    result = []
    for emails in component_to_emails.values():
        name = email_to_name[emails[0]]
        result.append([name] + sorted(emails))
    
    return sorted(result)

# Example: Minimum Cost to Connect All Points (Kruskal's Algorithm)
def min_cost_connect_points(points):
    n = len(points)
    if n <= 1:
        return 0
    
    # Calculate all edges with Manhattan distance
    edges = []
    for i in range(n):
        for j in range(i + 1, n):
            dist = abs(points[i][0] - points[j][0]) + abs(points[i][1] - points[j][1])
            edges.append((dist, i, j))
    
    # Sort edges by weight
    edges.sort()
    
    # Kruskal's algorithm using Union-Find
    uf = UnionFind(n)
    total_cost = 0
    edges_used = 0
    
    for cost, u, v in edges:
        if uf.union(u, v):
            total_cost += cost
            edges_used += 1
            if edges_used == n - 1:
                break
    
    return total_cost
                                
OCaml
(* Union-Find with path compression and union by rank *)
module type UNION_FIND = sig
  type t
  
  val create : int -> t
  val find : t -> int -> int
  val union : t -> int -> int -> bool
  val connected : t -> int -> int -> bool
  val get_count : t -> int
  val components : t -> int -> int list list
end

module UnionFind : UNION_FIND = struct
  type t = {
    mutable parent: int array;
    mutable rank: int array;
    mutable count: int;
    size: int;
  }
  
  let create n = {
    parent = Array.init n (fun i -> i);
    rank = Array.make n 0;
    count = n;
    size = n;
  }
  
  let rec find uf x =
    if uf.parent.(x) <> x then
      uf.parent.(x) <- find uf uf.parent.(x);  (* Path compression *)
    uf.parent.(x)
  
  let union uf x y =
    let root_x = find uf x in
    let root_y = find uf y in
    
    if root_x = root_y then false
    else begin
      (* Union by rank - functional approach to rank comparison *)
      let (new_parent, new_child, rank_increment) = 
        if uf.rank.(root_x) < uf.rank.(root_y) then
          (root_y, root_x, 0)
        else if uf.rank.(root_x) > uf.rank.(root_y) then
          (root_x, root_y, 0)
        else
          (root_x, root_y, 1)
      in
      uf.parent.(new_child) <- new_parent;
      uf.rank.(new_parent) <- uf.rank.(new_parent) + rank_increment;
      uf.count <- uf.count - 1;
      true
    end
  
  let connected uf x y =
    find uf x = find uf y
  
  let get_count uf = uf.count
  
  let components uf n =
    let component_map = Array.make n [] in
    for i = 0 to n - 1 do
      let root = find uf i in
      component_map.(root) <- i :: component_map.(root)
    done;
    Array.fold_right (fun comp acc -> 
      if comp = [] then acc else comp :: acc
    ) component_map []
end

(* Functional Union-Find using persistent data structures *)
module type PERSISTENT_UNION_FIND = sig
  type t
  
  val empty : int -> t
  val find : t -> int -> int * t
  val union : t -> int -> int -> t
  val connected : t -> int -> int -> bool
end

module PersistentUnionFind : PERSISTENT_UNION_FIND = struct
  type t = {
    parent: int array;
    rank: int array;
  }
  
  let empty n = {
    parent = Array.init n (fun i -> i);
    rank = Array.make n 0;
  }
  
  let rec find_with_path uf x path =
    if uf.parent.(x) = x then (x, path)
    else find_with_path uf uf.parent.(x) (x :: path)
  
  let find uf x =
    let (root, path) = find_with_path uf x [] in
    let new_parent = Array.copy uf.parent in
    List.iter (fun node -> new_parent.(node) <- root) path;
    (root, { uf with parent = new_parent })
  
  let union uf x y =
    let (root_x, uf1) = find uf x in
    let (root_y, uf2) = find uf1 y in
    
    if root_x = root_y then uf2
    else
      let new_parent = Array.copy uf2.parent in
      let new_rank = Array.copy uf2.rank in
      
      if uf2.rank.(root_x) < uf2.rank.(root_y) then
        new_parent.(root_x) <- root_y
      else if uf2.rank.(root_x) > uf2.rank.(root_y) then
        new_parent.(root_y) <- root_x
      else begin
        new_parent.(root_y) <- root_x;
        new_rank.(root_x) <- new_rank.(root_x) + 1
      end;
      
      { parent = new_parent; rank = new_rank }
  
  let connected uf x y =
    let (root_x, uf1) = find uf x in
    let (root_y, _) = find uf1 y in
    root_x = root_y
end

(* Example: Find connected components in a graph *)
let find_connected_components n edges =
  let uf = UnionFind.create n in
  
  (* Process all edges *)
  List.iter (fun (u, v) ->
    ignore (UnionFind.union uf u v)
  ) edges;
  
  (* Group vertices by component *)
  let components = Array.make n [] in
  for i = 0 to n - 1 do
    let root = UnionFind.find uf i in
    components.(root) <- i :: components.(root)
  done;
  
  (* Return non-empty components *)
  Array.to_list components
  |> List.filter (fun comp -> comp <> [])
  |> List.map List.rev

(* Example: Detect cycle in undirected graph *)
let has_cycle n edges =
  let uf = UnionFind.create n in
  
  List.exists (fun (u, v) ->
    if UnionFind.connected uf u v then
      true  (* Cycle detected *)
    else begin
      UnionFind.union uf u v;
      false
    end
  ) edges
                                
Operation Without Optimization With Path Compression With Both Optimizations
Find O(n) O(log n) O(α(n)) ≈ O(1)
Union O(n) O(log n) O(α(n)) ≈ O(1)
Connected O(n) O(log n) O(α(n)) ≈ O(1)

α(n) is the inverse Ackermann function, which grows extremely slowly

Segment Trees & Fenwick Trees
Efficient Range Query Data Structures
Segment Trees and Fenwick Trees (Binary Indexed Trees) are data structures designed for efficient range queries and updates. They excel at problems involving cumulative operations over array segments.
📊 Real-Life Analogy:
Think of a company's hierarchical reporting structure where each manager knows the total sales of their team. To find total sales for any department range, you can quickly combine pre-calculated subtotals instead of adding individual sales.

🎯 Applications:

  • Range Sum Queries: Sum of elements in a range
  • Range Min/Max: Minimum or maximum in a range
  • Range Updates: Update all elements in a range
  • Counting Inversions: Efficient inversion counting
TypeScript
// Segment Tree for Range Sum Queries
class SegmentTree {
    private tree: number[];
    private n: number;
    
    constructor(nums: number[]) {
        this.n = nums.length;
        this.tree = new Array(4 * this.n).fill(0);
        this.build(nums, 0, 0, this.n - 1);
    }
    
    private build(nums: number[], node: number, start: number, end: number): void {
        if (start === end) {
            this.tree[node] = nums[start];
        } else {
            const mid = Math.floor((start + end) / 2);
            const leftChild = 2 * node + 1;
            const rightChild = 2 * node + 2;
            
            this.build(nums, leftChild, start, mid);
            this.build(nums, rightChild, mid + 1, end);
            
            this.tree[node] = this.tree[leftChild] + this.tree[rightChild];
        }
    }
    
    update(index: number, value: number): void {
        this.updateHelper(0, 0, this.n - 1, index, value);
    }
    
    private updateHelper(node: number, start: number, end: number, 
                        index: number, value: number): void {
        if (start === end) {
            this.tree[node] = value;
        } else {
            const mid = Math.floor((start + end) / 2);
            const leftChild = 2 * node + 1;
            const rightChild = 2 * node + 2;
            
            if (index <= mid) {
                this.updateHelper(leftChild, start, mid, index, value);
            } else {
                this.updateHelper(rightChild, mid + 1, end, index, value);
            }
            
            this.tree[node] = this.tree[leftChild] + this.tree[rightChild];
        }
    }
    
    sumRange(left: number, right: number): number {
        return this.queryHelper(0, 0, this.n - 1, left, right);
    }
    
    private queryHelper(node: number, start: number, end: number, 
                       left: number, right: number): number {
        if (right < start || left > end) {
            return 0; // Out of range
        }
        
        if (left <= start && end <= right) {
            return this.tree[node]; // Complete overlap
        }
        
        // Partial overlap
        const mid = Math.floor((start + end) / 2);
        const leftChild = 2 * node + 1;
        const rightChild = 2 * node + 2;
        
        const leftSum = this.queryHelper(leftChild, start, mid, left, right);
        const rightSum = this.queryHelper(rightChild, mid + 1, end, left, right);
        
        return leftSum + rightSum;
    }
}

// Fenwick Tree (Binary Indexed Tree) for Range Sum
class FenwickTree {
    private tree: number[];
    private n: number;
    
    constructor(n: number) {
        this.n = n;
        this.tree = new Array(n + 1).fill(0);
    }
    
    // Update value at index i by delta
    update(i: number, delta: number): void {
        i++; // Fenwick tree is 1-indexed
        while (i <= this.n) {
            this.tree[i] += delta;
            i += i & (-i); // Add last set bit
        }
    }
    
    // Get prefix sum from index 0 to i
    query(i: number): number {
        i++; // Fenwick tree is 1-indexed
        let sum = 0;
        while (i > 0) {
            sum += this.tree[i];
            i -= i & (-i); // Remove last set bit
        }
        return sum;
    }
    
    // Get range sum from left to right
    rangeQuery(left: number, right: number): number {
        return this.query(right) - (left > 0 ? this.query(left - 1) : 0);
    }
}

// Example: Count smaller numbers after self
function countSmaller(nums: number[]): number[] {
    if (!nums.length) return [];
    
    // Coordinate compression
    const sorted = [...new Set(nums)].sort((a, b) => a - b);
    const rank = new Map();
    sorted.forEach((num, i) => rank.set(num, i));
    
    const n = sorted.length;
    const fenwick = new FenwickTree(n);
    const result: number[] = [];
    
    // Process from right to left
    for (let i = nums.length - 1; i >= 0; i--) {
        const r = rank.get(nums[i])!;
        result[i] = r > 0 ? fenwick.query(r - 1) : 0;
        fenwick.update(r, 1);
    }
    
    return result;
}
                                
Python
# Segment Tree for Range Sum Queries
class SegmentTree:
    def __init__(self, nums):
        self.n = len(nums)
        self.tree = [0] * (4 * self.n)
        if self.n > 0:
            self.build(nums, 0, 0, self.n - 1)
    
    def build(self, nums, node, start, end):
        if start == end:
            self.tree[node] = nums[start]
        else:
            mid = (start + end) // 2
            left_child = 2 * node + 1
            right_child = 2 * node + 2
            
            self.build(nums, left_child, start, mid)
            self.build(nums, right_child, mid + 1, end)
            
            self.tree[node] = self.tree[left_child] + self.tree[right_child]
    
    def update(self, index, value):
        self._update_helper(0, 0, self.n - 1, index, value)
    
    def _update_helper(self, node, start, end, index, value):
        if start == end:
            self.tree[node] = value
        else:
            mid = (start + end) // 2
            left_child = 2 * node + 1
            right_child = 2 * node + 2
            
            if index <= mid:
                self._update_helper(left_child, start, mid, index, value)
            else:
                self._update_helper(right_child, mid + 1, end, index, value)
            
            self.tree[node] = self.tree[left_child] + self.tree[right_child]
    
    def sum_range(self, left, right):
        return self._query_helper(0, 0, self.n - 1, left, right)
    
    def _query_helper(self, node, start, end, left, right):
        if right < start or left > end:
            return 0  # Out of range
        
        if left <= start and end <= right:
            return self.tree[node]  # Complete overlap
        
        # Partial overlap
        mid = (start + end) // 2
        left_child = 2 * node + 1
        right_child = 2 * node + 2
        
        left_sum = self._query_helper(left_child, start, mid, left, right)
        right_sum = self._query_helper(right_child, mid + 1, end, left, right)
        
        return left_sum + right_sum

# Fenwick Tree (Binary Indexed Tree)
class FenwickTree:
    def __init__(self, n):
        self.n = n
        self.tree = [0] * (n + 1)
    
    def update(self, i, delta):
        """Update value at index i by delta"""
        i += 1  # Fenwick tree is 1-indexed
        while i <= self.n:
            self.tree[i] += delta
            i += i & (-i)  # Add last set bit
    
    def query(self, i):
        """Get prefix sum from index 0 to i"""
        i += 1  # Fenwick tree is 1-indexed
        total = 0
        while i > 0:
            total += self.tree[i]
            i -= i & (-i)  # Remove last set bit
        return total
    
    def range_query(self, left, right):
        """Get range sum from left to right"""
        return self.query(right) - (self.query(left - 1) if left > 0 else 0)

# Example: Range Update and Point Query using Fenwick Tree
class RangeUpdateFenwick:
    def __init__(self, n):
        self.fenwick = FenwickTree(n)
    
    def range_update(self, left, right, delta):
        """Add delta to all elements from left to right"""
        self.fenwick.update(left, delta)
        if right + 1 < self.fenwick.n:
            self.fenwick.update(right + 1, -delta)
    
    def point_query(self, index):
        """Get value at index after all range updates"""
        return self.fenwick.query(index)

# Example: Count inversions using Fenwick Tree
def count_inversions(nums):
    if not nums:
        return 0
    
    # Coordinate compression
    sorted_nums = sorted(set(nums))
    rank = {num: i + 1 for i, num in enumerate(sorted_nums)}
    
    n = len(sorted_nums)
    fenwick = FenwickTree(n)
    inversions = 0
    
    for num in nums:
        r = rank[num]
        # Count elements greater than current element
        inversions += fenwick.query(n - 1) - fenwick.query(r - 1)
        fenwick.update(r - 1, 1)
    
    return inversions
                                
OCaml
(* Segment Tree for Range Sum Queries *)
module SegmentTree = struct
  type t = {
    tree: int array;
    n: int;
  }
  
  let create nums =
    let n = Array.length nums in
    let tree = Array.make (4 * n) 0 in
    let rec build node start end_ =
      if start = end_ then
        tree.(node) <- nums.(start)
      else
        let mid = (start + end_) / 2 in
        let left_child = 2 * node + 1 in
        let right_child = 2 * node + 2 in
        build left_child start mid;
        build right_child (mid + 1) end_;
        tree.(node) <- tree.(left_child) + tree.(right_child)
    in
    if n > 0 then build 0 0 (n - 1);
    { tree; n }
  
  let update seg_tree index value =
    let rec update_helper node start end_ =
      if start = end_ then
        seg_tree.tree.(node) <- value
      else
        let mid = (start + end_) / 2 in
        let left_child = 2 * node + 1 in
        let right_child = 2 * node + 2 in
        if index <= mid then
          update_helper left_child start mid
        else
          update_helper right_child (mid + 1) end_;
        seg_tree.tree.(node) <- seg_tree.tree.(left_child) + seg_tree.tree.(right_child)
    in
    update_helper 0 0 (seg_tree.n - 1)
  
  let query seg_tree left right =
    let rec query_helper node start end_ =
      if right < start || left > end_ then
        0  (* Out of range *)
      else if left <= start && end_ <= right then
        seg_tree.tree.(node)  (* Complete overlap *)
      else
        let mid = (start + end_) / 2 in
        let left_child = 2 * node + 1 in
        let right_child = 2 * node + 2 in
        let left_sum = query_helper left_child start mid in
        let right_sum = query_helper right_child (mid + 1) end_ in
        left_sum + right_sum
    in
    query_helper 0 0 (seg_tree.n - 1)
end

(* Fenwick Tree (Binary Indexed Tree) *)
module FenwickTree = struct
  type t = {
    tree: int array;
    n: int;
  }
  
  let create n = {
    tree = Array.make (n + 1) 0;
    n
  }
  
  let update fenwick i delta =
    let i = ref (i + 1) in  (* 1-indexed *)
    while !i <= fenwick.n do
      fenwick.tree.(!i) <- fenwick.tree.(!i) + delta;
      i := !i + (!i land (-(!i)))  (* Add last set bit *)
    done
  
  let query fenwick i =
    let i = ref (i + 1) in  (* 1-indexed *)
    let sum = ref 0 in
    while !i > 0 do
      sum := !sum + fenwick.tree.(!i);
      i := !i - (!i land (-(!i)))  (* Remove last set bit *)
    done;
    !sum
  
  let range_query fenwick left right =
    query fenwick right - (if left > 0 then query fenwick (left - 1) else 0)
end

(* Example: Count inversions *)
let count_inversions nums =
  let sorted_unique = 
    Array.to_list nums
    |> List.sort_uniq compare
    |> Array.of_list in
  
  let n = Array.length sorted_unique in
  let rank = Hashtbl.create n in
  Array.iteri (fun i num -> Hashtbl.add rank num i) sorted_unique;
  
  let fenwick = FenwickTree.create n in
  let inversions = ref 0 in
  
  Array.iter (fun num ->
    let r = Hashtbl.find rank num in
    inversions := !inversions + FenwickTree.query fenwick (n - 1) - FenwickTree.query fenwick r;
    FenwickTree.update fenwick r 1
  ) nums;
  
  !inversions
                                
Operation Segment Tree Fenwick Tree Space
Build O(n) O(n log n) O(n)
Point Update O(log n) O(log n) -
Range Query O(log n) O(log n) -
Range Update O(log n)* O(log n)** -

*With lazy propagation, **Using difference array technique