Modern Topics
Master cutting-edge algorithms used in modern distributed systems, big data processing, and machine learning applications
Modern software systems handle massive datasets, real-time streams, and complex optimization problems that require specialized algorithmic approaches. This section covers cutting-edge algorithms that are increasingly relevant in technical interviews at top-tier companies.
π Why Modern Topics Matter:
- Industry Relevance: Used in real production systems at scale
- Interview Differentiation: Demonstrates advanced knowledge and problem-solving
- Practical Applications: Solve real-world constraints like memory and time limits
- Career Growth: Essential for senior engineering roles and system design
Hash Functions: hβ, hβ, hβ, hβ β β β β [0][1][2][3][4][5][6][7][8][9] β Row 1 [0][1][2][3][4][5][6][7][8][9] β Row 2 [0][1][2][3][4][5][6][7][8][9] β Row 3 [0][1][2][3][4][5][6][7][8][9] β Row 4 Stream: a, b, a, c, a, b, d, a... For element 'a': increment counters at hβ(a), hβ(a), hβ(a), hβ(a) Query frequency of 'a': min(counter[hβ(a)], counter[hβ(a)], counter[hβ(a)], counter[hβ(a)]) Reservoir Sampling (size k=3): Stream: [1, 2, 3, 4, 5, 6, 7, 8, ...] β Reservoir: [1, 2, 3] β [1, 4, 3] β [1, 4, 5] β [1, 4, 7] β ...
// Count-Min Sketch Implementation class CountMinSketch { private table: number[][]; private hashFunctions: ((x: string) => number)[]; private width: number; private depth: number; constructor(epsilon: number, delta: number) { this.width = Math.ceil(Math.E / epsilon); this.depth = Math.ceil(Math.log(1 / delta)); this.table = Array(this.depth).fill(null).map(() => Array(this.width).fill(0)); this.hashFunctions = this.generateHashFunctions(); } private generateHashFunctions(): ((x: string) => number)[] { const functions: ((x: string) => number)[] = []; for (let i = 0; i < this.depth; i++) { const a = Math.floor(Math.random() * 1000) + 1; const b = Math.floor(Math.random() * 1000); functions.push((x: string) => { let hash = 0; for (let j = 0; j < x.length; j++) { hash = (hash * a + x.charCodeAt(j) + b) % this.width; } return Math.abs(hash); }); } return functions; } add(element: string, count: number = 1): void { for (let i = 0; i < this.depth; i++) { const j = this.hashFunctions[i](element); this.table[i][j] += count; } } query(element: string): number { let minCount = Infinity; for (let i = 0; i < this.depth; i++) { const j = this.hashFunctions[i](element); minCount = Math.min(minCount, this.table[i][j]); } return minCount; } } // Reservoir Sampling Implementation class ReservoirSampling { private reservoir: any[]; private sampleSize: number; private itemsSeen: number; constructor(sampleSize: number) { this.reservoir = []; this.sampleSize = sampleSize; this.itemsSeen = 0; } addItem(item: any): void { this.itemsSeen++; if (this.reservoir.length < this.sampleSize) { this.reservoir.push(item); } else { const randomIndex = Math.floor(Math.random() * this.itemsSeen); if (randomIndex < this.sampleSize) { this.reservoir[randomIndex] = item; } } } getSample(): any[] { return [...this.reservoir]; } }
import random import math from typing import List, Any class CountMinSketch: def __init__(self, epsilon: float, delta: float): self.width = math.ceil(math.e / epsilon) self.depth = math.ceil(math.log(1 / delta)) self.table = [[0] * self.width for _ in range(self.depth)] self.hash_params = [(random.randint(1, 1000), random.randint(0, 1000)) for _ in range(self.depth)] def _hash(self, element: str, row: int) -> int: a, b = self.hash_params[row] hash_value = 0 for char in element: hash_value = (hash_value * a + ord(char) + b) % self.width return abs(hash_value) def add(self, element: str, count: int = 1) -> None: for i in range(self.depth): j = self._hash(element, i) self.table[i][j] += count def query(self, element: str) -> int: return min(self.table[i][self._hash(element, i)] for i in range(self.depth)) class ReservoirSampling: def __init__(self, sample_size: int): self.reservoir = [] self.sample_size = sample_size self.items_seen = 0 def add_item(self, item: Any) -> None: self.items_seen += 1 if len(self.reservoir) < self.sample_size: self.reservoir.append(item) else: random_index = random.randint(0, self.items_seen - 1) if random_index < self.sample_size: self.reservoir[random_index] = item def get_sample(self) -> List[Any]: return self.reservoir.copy()
(* Count-Min Sketch in OCaml *) module CountMinSketch = struct type t = { table: int array array; width: int; depth: int; hash_params: (int * int) array; } let create epsilon delta = let width = int_of_float (ceil (exp 1.0 /. epsilon)) in let depth = int_of_float (ceil (log (1.0 /. delta))) in let table = Array.make_matrix depth width 0 in let hash_params = Array.init depth (fun _ -> (Random.int 1000 + 1, Random.int 1000)) in { table; width; depth; hash_params } let hash element row sketch = let (a, b) = sketch.hash_params.(row) in let hash_value = ref 0 in String.iter (fun char -> hash_value := (!hash_value * a + Char.code char + b) mod sketch.width ) element; abs !hash_value let add sketch element count = for i = 0 to sketch.depth - 1 do let j = hash element i sketch in sketch.table.(i).(j) <- sketch.table.(i).(j) + count done let query sketch element = let min_count = ref max_int in for i = 0 to sketch.depth - 1 do let j = hash element i sketch in min_count := min !min_count sketch.table.(i).(j) done; !min_count end (* Reservoir Sampling *) module ReservoirSampling = struct type 'a t = { mutable reservoir: 'a array; mutable size: int; sample_size: int; mutable items_seen: int; } let create sample_size = { reservoir = Array.make sample_size (Obj.magic 0); size = 0; sample_size; items_seen = 0; } let add_item sampler item = sampler.items_seen <- sampler.items_seen + 1; if sampler.size < sampler.sample_size then ( sampler.reservoir.(sampler.size) <- item; sampler.size <- sampler.size + 1 ) else ( let random_index = Random.int sampler.items_seen in if random_index < sampler.sample_size then sampler.reservoir.(random_index) <- item ) end
π Streaming Algorithm Applications:
- Count-Min Sketch: Network traffic analysis, frequent itemset mining
- Reservoir Sampling: Social media feed sampling, database query sampling
- HyperLogLog: Unique visitor counting, database cardinality estimation
- Bloom Filter: Cache filtering, duplicate detection
Algorithm | Space Complexity | Time per Operation | Accuracy | Use Case |
---|---|---|---|---|
Count-Min Sketch | O(Ξ΅β»ΒΉ log Ξ΄β»ΒΉ) | O(log Ξ΄β»ΒΉ) | Ξ΅-Ξ΄ guarantee | Frequency estimation |
Reservoir Sampling | O(k) | O(1) | Exact uniform sample | Random sampling |
HyperLogLog | O(log log n) | O(1) | Β±2% typically | Cardinality estimation |
Bloom Filter | O(m) | O(k) | No false negatives | Membership testing |
Exact Solution Approximate Solutions β β OPT = 100 Ξ±-approximation β ALG β€ Ξ± Γ OPT Ξ± = 2: ALG β€ 200 (2-approximation) Ξ± = 1.5: ALG β€ 150 (1.5-approximation) Ξ± = 1.1: ALG β€ 110 (1.1-approximation) Lower Ξ± = Better approximation = Higher cost
π― When Exact Solutions Are Too Expensive:
- NP-Hard Problems: TSP, Vertex Cover, Set Cover
- Large-Scale Optimization: Resource allocation, scheduling
- Real-Time Requirements: Online decision making
- Memory Constraints: Embedded systems, mobile devices
Problem | Approximation Ratio | Time Complexity | Algorithm | Applications |
---|---|---|---|---|
Vertex Cover | 2 | O(E) | Greedy Edge Selection | Network monitoring |
Set Cover | ln n | O(|Sets| Γ |Universe|) | Greedy Coverage | Facility location |
TSP (Metric) | 1.5 | O(nΒ³) | Christofides | Route planning |
Knapsack | 1-Ξ΅ | O(nΒ³/Ξ΅) | FPTAS | Resource allocation |
Ski Rental Problem (B = 10): Day 1: Rent ($1) or Buy ($10)? β Rent Strategy: Pay $1 each day Days: 1 2 3 4 5 6 7 8 9 10 11 12 ... Cost: 1 2 3 4 5 6 7 8 9 10 11 12 ... Buy Strategy: Pay $10 upfront Days: 1 2 3 4 5 6 7 8 9 10 11 12 ... Cost: 10 10 10 10 10 10 10 10 10 10 10 10 ... Threshold Strategy: Rent for B-1 days, then buy Days: 1 2 3 4 5 6 7 8 9 10 11 12 ... Cost: 1 2 3 4 5 6 7 8 9 19 19 19 ... β Buy at day 10 Competitive Ratio: 2 (never worse than 2Γ optimal)
// Online Algorithms Implementation // Ski Rental Problem class SkiRental { private buyPrice: number; private rentPrice: number; private strategy: 'rent' | 'buy' | 'threshold'; private currentCost: number; private daysSinceDecision: number; private hasWontBuy: boolean; constructor(buyPrice: number, rentPrice: number = 1, strategy: 'rent' | 'buy' | 'threshold' = 'threshold') { this.buyPrice = buyPrice; this.rentPrice = rentPrice; this.strategy = strategy; this.currentCost = 0; this.daysSinceDecision = 0; this.hasWontBuy = false; } // Make decision for next ski day skiDay(): { action: string; cost: number; totalCost: number } { this.daysSinceDecision++; let action: string; let dayCost: number; switch (this.strategy) { case 'rent': action = 'rent'; dayCost = this.rentPrice; break; case 'buy': if (this.daysSinceDecision === 1) { action = 'buy'; dayCost = this.buyPrice; } else { action = 'use_owned_skis'; dayCost = 0; } break; case 'threshold': // Rent for (buyPrice - 1) days, then buy const threshold = this.buyPrice - 1; if (this.daysSinceDecision <= threshold && !this.hasWontBuy) { action = 'rent'; dayCost = this.rentPrice; } else if (!this.hasWontBuy) { action = 'buy'; dayCost = this.buyPrice; this.hasWontBuy = true; } else { action = 'use_owned_skis'; dayCost = 0; } break; } this.currentCost += dayCost; return { action, cost: dayCost, totalCost: this.currentCost }; } // Calculate competitive ratio against optimal offline solution static competitiveAnalysis(skiDays: number, buyPrice: number, rentPrice: number = 1): { optimalCost: number; thresholdCost: number; competitiveRatio: number; } { // Optimal offline solution (knows future) const optimalCost = Math.min(skiDays * rentPrice, buyPrice); // Threshold strategy cost const threshold = buyPrice - 1; let thresholdCost: number; if (skiDays <= threshold) { thresholdCost = skiDays * rentPrice; } else { thresholdCost = threshold * rentPrice + buyPrice; } const competitiveRatio = thresholdCost / optimalCost; return { optimalCost, thresholdCost, competitiveRatio }; } } // Online Caching Algorithm - Least Recently Used (LRU) class OnlineLRUCache { private capacity: number; private cache: Map; private accessOrder: string[]; private hits: number; private misses: number; constructor(capacity: number) { this.capacity = capacity; this.cache = new Map(); this.accessOrder = []; this.hits = 0; this.misses = 0; } get(key: string): any | null { if (this.cache.has(key)) { this.hits++; // Move to end (most recently used) this.accessOrder = this.accessOrder.filter(k => k !== key); this.accessOrder.push(key); return this.cache.get(key); } else { this.misses++; return null; } } put(key: string, value: any): void { if (this.cache.has(key)) { // Update existing key this.cache.set(key, value); this.accessOrder = this.accessOrder.filter(k => k !== key); this.accessOrder.push(key); } else { // Add new key if (this.cache.size >= this.capacity) { // Evict least recently used const lru = this.accessOrder.shift()!; this.cache.delete(lru); } this.cache.set(key, value); this.accessOrder.push(key); } } getStats(): { hits: number; misses: number; hitRate: number } { const total = this.hits + this.misses; return { hits: this.hits, misses: this.misses, hitRate: total > 0 ? this.hits / total : 0 }; } }
# Online Algorithms in Python class SkiRental: def __init__(self, buy_price: int, rent_price: int = 1): self.buy_price = buy_price self.rent_price = rent_price self.current_cost = 0 self.days = 0 self.has_bought = False def ski_day_threshold_strategy(self): self.days += 1 threshold = self.buy_price - 1 if self.days <= threshold and not self.has_bought: cost = self.rent_price action = 'rent' elif not self.has_bought: cost = self.buy_price action = 'buy' self.has_bought = True else: cost = 0 action = 'use_owned' self.current_cost += cost return {'action': action, 'cost': cost, 'total': self.current_cost} class OnlineLRUCache: def __init__(self, capacity: int): self.capacity = capacity self.cache = {} self.access_order = [] def get(self, key: str): if key in self.cache: self.access_order.remove(key) self.access_order.append(key) return self.cache[key] return None def put(self, key: str, value): if key in self.cache: self.access_order.remove(key) elif len(self.cache) >= self.capacity: lru = self.access_order.pop(0) del self.cache[lru] self.cache[key] = value self.access_order.append(key)
(* Online Algorithms in OCaml *) module SkiRental = struct type t = { buy_price: int; rent_price: int; mutable current_cost: int; mutable days: int; mutable has_bought: bool; } let create buy_price rent_price = { buy_price; rent_price; current_cost = 0; days = 0; has_bought = false; } let ski_day_threshold_strategy rental = rental.days <- rental.days + 1; let threshold = rental.buy_price - 1 in let (action, cost) = if rental.days <= threshold && not rental.has_bought then ("rent", rental.rent_price) else if not rental.has_bought then ( rental.has_bought <- true; ("buy", rental.buy_price) ) else ("use_owned", 0) in rental.current_cost <- rental.current_cost + cost; (action, cost, rental.current_cost) end
β‘ Online Algorithm Applications:
- Ski Rental: Infrastructure provisioning, capacity planning
- Caching: Web caches, CPU caches, database buffers
- Load Balancing: Request routing, resource allocation
- Paging: Virtual memory management, page replacement
Algorithm | Competitive Ratio | Problem Type | Real-World Use | Best Strategy |
---|---|---|---|---|
Ski Rental | 2 | Buy vs Rent | Cloud provisioning | Threshold at B-1 |
LRU Caching | k (cache size) | Page replacement | Memory management | Exploit locality |
Load Balancing | 2 | Task assignment | Server scheduling | Greedy assignment |
K-means Clustering (k=3): Step 1: Initialize centroids Step 2: Assign to closest Step 3: Update centroids C1 C2 C3 β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ * * * β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ C1* C2* C3* *C1 *C2 *C3 Decision Tree Structure: Age β€ 25? / \ / \ Salary β€ 50K? Experience β₯ 5? / \ / \ Accept Reject Accept Consider
π§ ML Algorithm Essentials:
- K-means: Unsupervised learning, clustering similar data points
- Decision Trees: Supervised learning, creating decision rules
- Computational Focus: Understanding time/space complexity, not statistics
- Implementation Details: How algorithms work under the hood
Algorithm | Time Complexity | Space Complexity | Type | Use Case |
---|---|---|---|---|
K-means | O(tknd) | O(kd) | Unsupervised | Customer segmentation |
Decision Tree | O(n log n Γ d) | O(n) | Supervised | Classification/Regression |
Linear Regression | O(ndΒ²) | O(d) | Supervised | Prediction/Forecasting |
Where: t = iterations, k = clusters, n = samples, d = dimensions
Identify the Context
Recognize when modern algorithms are needed: large data, real-time constraints, approximation acceptable
Choose the Right Tool
Streaming for memory limits, approximation for speed, online for uncertainty, ML for pattern recognition
Explain Trade-offs
Discuss accuracy vs efficiency, memory vs time, exact vs approximate solutions
Implement Core Logic
Focus on key algorithmic insights rather than complete implementation
Analyze Performance
Provide complexity analysis and discuss real-world scalability
π Ready for Expert-Level Practice!
You've mastered cutting-edge algorithms used in modern distributed systems. Time for comprehensive practice and interview preparation!
Continue to Practice Focus β