Modern Topics
Master cutting-edge algorithms used in modern distributed systems, big data processing, and machine learning applications
Modern software systems handle massive datasets, real-time streams, and complex optimization problems that require specialized algorithmic approaches. This section covers cutting-edge algorithms that are increasingly relevant in technical interviews at top-tier companies.
π Why Modern Topics Matter:
- Industry Relevance: Used in real production systems at scale
- Interview Differentiation: Demonstrates advanced knowledge and problem-solving
- Practical Applications: Solve real-world constraints like memory and time limits
- Career Growth: Essential for senior engineering roles and system design
Hash Functions: hβ, hβ, hβ, hβ
β β β β
[0][1][2][3][4][5][6][7][8][9] β Row 1
[0][1][2][3][4][5][6][7][8][9] β Row 2
[0][1][2][3][4][5][6][7][8][9] β Row 3
[0][1][2][3][4][5][6][7][8][9] β Row 4
Stream: a, b, a, c, a, b, d, a...
For element 'a': increment counters at hβ(a), hβ(a), hβ(a), hβ(a)
Query frequency of 'a': min(counter[hβ(a)], counter[hβ(a)], counter[hβ(a)], counter[hβ(a)])
Reservoir Sampling (size k=3):
Stream: [1, 2, 3, 4, 5, 6, 7, 8, ...]
β
Reservoir: [1, 2, 3] β [1, 4, 3] β [1, 4, 5] β [1, 4, 7] β ...
// Count-Min Sketch Implementation
class CountMinSketch {
private table: number[][];
private hashFunctions: ((x: string) => number)[];
private width: number;
private depth: number;
constructor(epsilon: number, delta: number) {
this.width = Math.ceil(Math.E / epsilon);
this.depth = Math.ceil(Math.log(1 / delta));
this.table = Array(this.depth).fill(null).map(() => Array(this.width).fill(0));
this.hashFunctions = this.generateHashFunctions();
}
private generateHashFunctions(): ((x: string) => number)[] {
const functions: ((x: string) => number)[] = [];
for (let i = 0; i < this.depth; i++) {
const a = Math.floor(Math.random() * 1000) + 1;
const b = Math.floor(Math.random() * 1000);
functions.push((x: string) => {
let hash = 0;
for (let j = 0; j < x.length; j++) {
hash = (hash * a + x.charCodeAt(j) + b) % this.width;
}
return Math.abs(hash);
});
}
return functions;
}
add(element: string, count: number = 1): void {
for (let i = 0; i < this.depth; i++) {
const j = this.hashFunctions[i](element);
this.table[i][j] += count;
}
}
query(element: string): number {
let minCount = Infinity;
for (let i = 0; i < this.depth; i++) {
const j = this.hashFunctions[i](element);
minCount = Math.min(minCount, this.table[i][j]);
}
return minCount;
}
}
// Reservoir Sampling Implementation
class ReservoirSampling {
private reservoir: any[];
private sampleSize: number;
private itemsSeen: number;
constructor(sampleSize: number) {
this.reservoir = [];
this.sampleSize = sampleSize;
this.itemsSeen = 0;
}
addItem(item: any): void {
this.itemsSeen++;
if (this.reservoir.length < this.sampleSize) {
this.reservoir.push(item);
} else {
const randomIndex = Math.floor(Math.random() * this.itemsSeen);
if (randomIndex < this.sampleSize) {
this.reservoir[randomIndex] = item;
}
}
}
getSample(): any[] {
return [...this.reservoir];
}
}
import random
import math
from typing import List, Any
class CountMinSketch:
def __init__(self, epsilon: float, delta: float):
self.width = math.ceil(math.e / epsilon)
self.depth = math.ceil(math.log(1 / delta))
self.table = [[0] * self.width for _ in range(self.depth)]
self.hash_params = [(random.randint(1, 1000), random.randint(0, 1000))
for _ in range(self.depth)]
def _hash(self, element: str, row: int) -> int:
a, b = self.hash_params[row]
hash_value = 0
for char in element:
hash_value = (hash_value * a + ord(char) + b) % self.width
return abs(hash_value)
def add(self, element: str, count: int = 1) -> None:
for i in range(self.depth):
j = self._hash(element, i)
self.table[i][j] += count
def query(self, element: str) -> int:
return min(self.table[i][self._hash(element, i)] for i in range(self.depth))
class ReservoirSampling:
def __init__(self, sample_size: int):
self.reservoir = []
self.sample_size = sample_size
self.items_seen = 0
def add_item(self, item: Any) -> None:
self.items_seen += 1
if len(self.reservoir) < self.sample_size:
self.reservoir.append(item)
else:
random_index = random.randint(0, self.items_seen - 1)
if random_index < self.sample_size:
self.reservoir[random_index] = item
def get_sample(self) -> List[Any]:
return self.reservoir.copy()
(* Count-Min Sketch in OCaml *)
module CountMinSketch = struct
type t = {
table: int array array;
width: int;
depth: int;
hash_params: (int * int) array;
}
let create epsilon delta =
let width = int_of_float (ceil (exp 1.0 /. epsilon)) in
let depth = int_of_float (ceil (log (1.0 /. delta))) in
let table = Array.make_matrix depth width 0 in
let hash_params = Array.init depth (fun _ ->
(Random.int 1000 + 1, Random.int 1000)) in
{ table; width; depth; hash_params }
let hash element row sketch =
let (a, b) = sketch.hash_params.(row) in
let hash_value = ref 0 in
String.iter (fun char ->
hash_value := (!hash_value * a + Char.code char + b) mod sketch.width
) element;
abs !hash_value
let add sketch element count =
for i = 0 to sketch.depth - 1 do
let j = hash element i sketch in
sketch.table.(i).(j) <- sketch.table.(i).(j) + count
done
let query sketch element =
let min_count = ref max_int in
for i = 0 to sketch.depth - 1 do
let j = hash element i sketch in
min_count := min !min_count sketch.table.(i).(j)
done;
!min_count
end
(* Reservoir Sampling *)
module ReservoirSampling = struct
type 'a t = {
mutable reservoir: 'a array;
mutable size: int;
sample_size: int;
mutable items_seen: int;
}
let create sample_size = {
reservoir = Array.make sample_size (Obj.magic 0);
size = 0;
sample_size;
items_seen = 0;
}
let add_item sampler item =
sampler.items_seen <- sampler.items_seen + 1;
if sampler.size < sampler.sample_size then (
sampler.reservoir.(sampler.size) <- item;
sampler.size <- sampler.size + 1
) else (
let random_index = Random.int sampler.items_seen in
if random_index < sampler.sample_size then
sampler.reservoir.(random_index) <- item
)
end
π Streaming Algorithm Applications:
- Count-Min Sketch: Network traffic analysis, frequent itemset mining
- Reservoir Sampling: Social media feed sampling, database query sampling
- HyperLogLog: Unique visitor counting, database cardinality estimation
- Bloom Filter: Cache filtering, duplicate detection
| Algorithm | Space Complexity | Time per Operation | Accuracy | Use Case |
|---|---|---|---|---|
| Count-Min Sketch | O(Ξ΅β»ΒΉ log Ξ΄β»ΒΉ) | O(log Ξ΄β»ΒΉ) | Ξ΅-Ξ΄ guarantee | Frequency estimation |
| Reservoir Sampling | O(k) | O(1) | Exact uniform sample | Random sampling |
| HyperLogLog | O(log log n) | O(1) | Β±2% typically | Cardinality estimation |
| Bloom Filter | O(m) | O(k) | No false negatives | Membership testing |
Exact Solution Approximate Solutions
β β
OPT = 100 Ξ±-approximation
β
ALG β€ Ξ± Γ OPT
Ξ± = 2: ALG β€ 200 (2-approximation)
Ξ± = 1.5: ALG β€ 150 (1.5-approximation)
Ξ± = 1.1: ALG β€ 110 (1.1-approximation)
Lower Ξ± = Better approximation = Higher cost
π― When Exact Solutions Are Too Expensive:
- NP-Hard Problems: TSP, Vertex Cover, Set Cover
- Large-Scale Optimization: Resource allocation, scheduling
- Real-Time Requirements: Online decision making
- Memory Constraints: Embedded systems, mobile devices
| Problem | Approximation Ratio | Time Complexity | Algorithm | Applications |
|---|---|---|---|---|
| Vertex Cover | 2 | O(E) | Greedy Edge Selection | Network monitoring |
| Set Cover | ln n | O(|Sets| Γ |Universe|) | Greedy Coverage | Facility location |
| TSP (Metric) | 1.5 | O(nΒ³) | Christofides | Route planning |
| Knapsack | 1-Ξ΅ | O(nΒ³/Ξ΅) | FPTAS | Resource allocation |
Ski Rental Problem (B = 10):
Day 1: Rent ($1) or Buy ($10)?
β
Rent Strategy: Pay $1 each day
Days: 1 2 3 4 5 6 7 8 9 10 11 12 ...
Cost: 1 2 3 4 5 6 7 8 9 10 11 12 ...
Buy Strategy: Pay $10 upfront
Days: 1 2 3 4 5 6 7 8 9 10 11 12 ...
Cost: 10 10 10 10 10 10 10 10 10 10 10 10 ...
Threshold Strategy: Rent for B-1 days, then buy
Days: 1 2 3 4 5 6 7 8 9 10 11 12 ...
Cost: 1 2 3 4 5 6 7 8 9 19 19 19 ...
β Buy at day 10
Competitive Ratio: 2 (never worse than 2Γ optimal)
// Online Algorithms Implementation
// Ski Rental Problem
class SkiRental {
private buyPrice: number;
private rentPrice: number;
private strategy: 'rent' | 'buy' | 'threshold';
private currentCost: number;
private daysSinceDecision: number;
private hasWontBuy: boolean;
constructor(buyPrice: number, rentPrice: number = 1, strategy: 'rent' | 'buy' | 'threshold' = 'threshold') {
this.buyPrice = buyPrice;
this.rentPrice = rentPrice;
this.strategy = strategy;
this.currentCost = 0;
this.daysSinceDecision = 0;
this.hasWontBuy = false;
}
// Make decision for next ski day
skiDay(): { action: string; cost: number; totalCost: number } {
this.daysSinceDecision++;
let action: string;
let dayCost: number;
switch (this.strategy) {
case 'rent':
action = 'rent';
dayCost = this.rentPrice;
break;
case 'buy':
if (this.daysSinceDecision === 1) {
action = 'buy';
dayCost = this.buyPrice;
} else {
action = 'use_owned_skis';
dayCost = 0;
}
break;
case 'threshold':
// Rent for (buyPrice - 1) days, then buy
const threshold = this.buyPrice - 1;
if (this.daysSinceDecision <= threshold && !this.hasWontBuy) {
action = 'rent';
dayCost = this.rentPrice;
} else if (!this.hasWontBuy) {
action = 'buy';
dayCost = this.buyPrice;
this.hasWontBuy = true;
} else {
action = 'use_owned_skis';
dayCost = 0;
}
break;
}
this.currentCost += dayCost;
return {
action,
cost: dayCost,
totalCost: this.currentCost
};
}
// Calculate competitive ratio against optimal offline solution
static competitiveAnalysis(skiDays: number, buyPrice: number, rentPrice: number = 1): {
optimalCost: number;
thresholdCost: number;
competitiveRatio: number;
} {
// Optimal offline solution (knows future)
const optimalCost = Math.min(skiDays * rentPrice, buyPrice);
// Threshold strategy cost
const threshold = buyPrice - 1;
let thresholdCost: number;
if (skiDays <= threshold) {
thresholdCost = skiDays * rentPrice;
} else {
thresholdCost = threshold * rentPrice + buyPrice;
}
const competitiveRatio = thresholdCost / optimalCost;
return { optimalCost, thresholdCost, competitiveRatio };
}
}
// Online Caching Algorithm - Least Recently Used (LRU)
class OnlineLRUCache {
private capacity: number;
private cache: Map;
private accessOrder: string[];
private hits: number;
private misses: number;
constructor(capacity: number) {
this.capacity = capacity;
this.cache = new Map();
this.accessOrder = [];
this.hits = 0;
this.misses = 0;
}
get(key: string): any | null {
if (this.cache.has(key)) {
this.hits++;
// Move to end (most recently used)
this.accessOrder = this.accessOrder.filter(k => k !== key);
this.accessOrder.push(key);
return this.cache.get(key);
} else {
this.misses++;
return null;
}
}
put(key: string, value: any): void {
if (this.cache.has(key)) {
// Update existing key
this.cache.set(key, value);
this.accessOrder = this.accessOrder.filter(k => k !== key);
this.accessOrder.push(key);
} else {
// Add new key
if (this.cache.size >= this.capacity) {
// Evict least recently used
const lru = this.accessOrder.shift()!;
this.cache.delete(lru);
}
this.cache.set(key, value);
this.accessOrder.push(key);
}
}
getStats(): { hits: number; misses: number; hitRate: number } {
const total = this.hits + this.misses;
return {
hits: this.hits,
misses: this.misses,
hitRate: total > 0 ? this.hits / total : 0
};
}
}
# Online Algorithms in Python
class SkiRental:
def __init__(self, buy_price: int, rent_price: int = 1):
self.buy_price = buy_price
self.rent_price = rent_price
self.current_cost = 0
self.days = 0
self.has_bought = False
def ski_day_threshold_strategy(self):
self.days += 1
threshold = self.buy_price - 1
if self.days <= threshold and not self.has_bought:
cost = self.rent_price
action = 'rent'
elif not self.has_bought:
cost = self.buy_price
action = 'buy'
self.has_bought = True
else:
cost = 0
action = 'use_owned'
self.current_cost += cost
return {'action': action, 'cost': cost, 'total': self.current_cost}
class OnlineLRUCache:
def __init__(self, capacity: int):
self.capacity = capacity
self.cache = {}
self.access_order = []
def get(self, key: str):
if key in self.cache:
self.access_order.remove(key)
self.access_order.append(key)
return self.cache[key]
return None
def put(self, key: str, value):
if key in self.cache:
self.access_order.remove(key)
elif len(self.cache) >= self.capacity:
lru = self.access_order.pop(0)
del self.cache[lru]
self.cache[key] = value
self.access_order.append(key)
(* Online Algorithms in OCaml *)
module SkiRental = struct
type t = {
buy_price: int;
rent_price: int;
mutable current_cost: int;
mutable days: int;
mutable has_bought: bool;
}
let create buy_price rent_price = {
buy_price; rent_price;
current_cost = 0; days = 0; has_bought = false;
}
let ski_day_threshold_strategy rental =
rental.days <- rental.days + 1;
let threshold = rental.buy_price - 1 in
let (action, cost) =
if rental.days <= threshold && not rental.has_bought then
("rent", rental.rent_price)
else if not rental.has_bought then (
rental.has_bought <- true;
("buy", rental.buy_price)
) else
("use_owned", 0)
in
rental.current_cost <- rental.current_cost + cost;
(action, cost, rental.current_cost)
end
β‘ Online Algorithm Applications:
- Ski Rental: Infrastructure provisioning, capacity planning
- Caching: Web caches, CPU caches, database buffers
- Load Balancing: Request routing, resource allocation
- Paging: Virtual memory management, page replacement
| Algorithm | Competitive Ratio | Problem Type | Real-World Use | Best Strategy |
|---|---|---|---|---|
| Ski Rental | 2 | Buy vs Rent | Cloud provisioning | Threshold at B-1 |
| LRU Caching | k (cache size) | Page replacement | Memory management | Exploit locality |
| Load Balancing | 2 | Task assignment | Server scheduling | Greedy assignment |
K-means Clustering (k=3):
Step 1: Initialize centroids Step 2: Assign to closest Step 3: Update centroids
C1 C2 C3 β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’
* * * β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’ β’
C1* C2* C3* *C1 *C2 *C3
Decision Tree Structure:
Age β€ 25?
/ \
/ \
Salary β€ 50K? Experience β₯ 5?
/ \ / \
Accept Reject Accept Consider
π§ ML Algorithm Essentials:
- K-means: Unsupervised learning, clustering similar data points
- Decision Trees: Supervised learning, creating decision rules
- Computational Focus: Understanding time/space complexity, not statistics
- Implementation Details: How algorithms work under the hood
| Algorithm | Time Complexity | Space Complexity | Type | Use Case |
|---|---|---|---|---|
| K-means | O(tknd) | O(kd) | Unsupervised | Customer segmentation |
| Decision Tree | O(n log n Γ d) | O(n) | Supervised | Classification/Regression |
| Linear Regression | O(ndΒ²) | O(d) | Supervised | Prediction/Forecasting |
Where: t = iterations, k = clusters, n = samples, d = dimensions
Identify the Context
Recognize when modern algorithms are needed: large data, real-time constraints, approximation acceptable
Choose the Right Tool
Streaming for memory limits, approximation for speed, online for uncertainty, ML for pattern recognition
Explain Trade-offs
Discuss accuracy vs efficiency, memory vs time, exact vs approximate solutions
Implement Core Logic
Focus on key algorithmic insights rather than complete implementation
Analyze Performance
Provide complexity analysis and discuss real-world scalability
π Ready for Expert-Level Practice!
You've mastered cutting-edge algorithms used in modern distributed systems. Time for comprehensive practice and interview preparation!
Continue to Practice Focus β