System Integration
Bridge the gap between algorithmic knowledge and real-world system design with scalability, constraints, and design patterns
Level 5: Expert Interview Preparation - System Integration
Scalability Considerations
Understanding how algorithms behave at scale, designing for horizontal and vertical scaling, and making informed trade-offs between performance, consistency, and availability.
🏗️ Real-Life Analogy:
Think of designing a transportation system - a bicycle path works for a few people, but to handle city-wide traffic you need highways, traffic lights, alternate routes, and real-time monitoring. Scalable systems require similar infrastructure thinking.
TypeScript
// Type definitions for better type safety
interface ServerRequest {
id: string;
method: 'GET' | 'POST' | 'PUT' | 'DELETE';
path: string;
headers: Record;
body?: unknown;
timestamp: number;
}
interface ServerResponse {
serverId: string;
response: string;
status?: number;
headers?: Record;
processingTime?: number;
}
// Scalable Rate Limiter Design
interface RateLimiter {
isAllowed(userId: string): boolean;
reset(userId: string): void;
}
// Token Bucket Implementation - Good for burst traffic
class TokenBucketRateLimiter implements RateLimiter {
private buckets = new Map();
private capacity: number;
private refillRate: number; // tokens per second
constructor(capacity: number, refillRate: number) {
this.capacity = capacity;
this.refillRate = refillRate;
}
isAllowed(userId: string): boolean {
let bucket = this.buckets.get(userId);
if (!bucket) {
bucket = new TokenBucket(this.capacity, this.refillRate);
this.buckets.set(userId, bucket);
}
return bucket.consume();
}
reset(userId: string): void {
this.buckets.delete(userId);
}
}
class TokenBucket {
private tokens: number;
private lastRefill: number;
constructor(
private capacity: number,
private refillRate: number
) {
this.tokens = capacity;
this.lastRefill = Date.now();
}
consume(): boolean {
this.refill();
if (this.tokens > 0) {
this.tokens--;
return true;
}
return false;
}
private refill(): void {
const now = Date.now();
const timePassed = (now - this.lastRefill) / 1000;
const tokensToAdd = timePassed * this.refillRate;
this.tokens = Math.min(this.capacity, this.tokens + tokensToAdd);
this.lastRefill = now;
}
}
// Distributed Cache with Consistent Hashing
class ConsistentHashRing {
private ring = new Map();
private sortedHashes: number[] = [];
private virtualNodes = 150; // Virtual nodes per server
addServer(serverId: string): void {
for (let i = 0; i < this.virtualNodes; i++) {
const hash = this.hash(`${serverId}:${i}`);
this.ring.set(hash, serverId);
}
this.sortedHashes = Array.from(this.ring.keys()).sort((a, b) => a - b);
}
removeServer(serverId: string): void {
for (let i = 0; i < this.virtualNodes; i++) {
const hash = this.hash(`${serverId}:${i}`);
this.ring.delete(hash);
}
this.sortedHashes = Array.from(this.ring.keys()).sort((a, b) => a - b);
}
getServer(key: string): string | null {
if (this.sortedHashes.length === 0) return null;
const hash = this.hash(key);
// Binary search for the first server hash >= key hash
let left = 0, right = this.sortedHashes.length;
while (left < right) {
const mid = Math.floor((left + right) / 2);
if (this.sortedHashes[mid] < hash) {
left = mid + 1;
} else {
right = mid;
}
}
// Wrap around if we've gone past the end
const serverHash = this.sortedHashes[left % this.sortedHashes.length];
return this.ring.get(serverHash) || null;
}
private hash(key: string): number {
// Simple hash function - in practice use SHA-1 or similar
let hash = 0;
for (let i = 0; i < key.length; i++) {
hash = ((hash << 5) - hash + key.charCodeAt(i)) & 0xffffffff;
}
return Math.abs(hash);
}
}
// Scalable Search System Design
class SearchIndex {
private invertedIndex = new Map>();
private documents = new Map();
private docCount = 0;
addDocument(content: string): number {
const docId = this.docCount++;
this.documents.set(docId, content);
// Tokenize and index
const tokens = this.tokenize(content);
tokens.forEach(token => {
if (!this.invertedIndex.has(token)) {
this.invertedIndex.set(token, new Set());
}
this.invertedIndex.get(token)!.add(docId);
});
return docId;
}
search(query: string, limit: number = 10): SearchResult[] {
const queryTokens = this.tokenize(query);
// Get candidate documents
let candidates: Set | null = null;
for (const token of queryTokens) {
const docs = this.invertedIndex.get(token);
if (!docs) return []; // No results if any token not found
if (candidates === null) {
candidates = new Set(docs);
} else {
// Intersection - documents containing all terms
candidates = new Set([...candidates].filter(x => docs.has(x)));
}
}
if (!candidates) return [];
// Score and rank results
const results: SearchResult[] = [];
for (const docId of candidates) {
const score = this.calculateTFIDF(docId, queryTokens);
results.push({
docId,
content: this.documents.get(docId)!,
score
});
}
return results
.sort((a, b) => b.score - a.score)
.slice(0, limit);
}
private tokenize(text: string): string[] {
return text.toLowerCase()
.replace(/[^\w\s]/g, '')
.split(/\s+/)
.filter(token => token.length > 0);
}
private calculateTFIDF(docId: number, queryTokens: string[]): number {
const doc = this.documents.get(docId)!;
const docTokens = this.tokenize(doc);
const docLength = docTokens.length;
let score = 0;
for (const token of queryTokens) {
const tf = docTokens.filter(t => t === token).length / docLength;
const df = this.invertedIndex.get(token)?.size || 0;
const idf = Math.log(this.documents.size / (df + 1));
score += tf * idf;
}
return score;
}
}
interface SearchResult {
docId: number;
content: string;
score: number;
}
// Load Balancer with Health Checks
class LoadBalancer {
private servers: Server[] = [];
private currentIndex = 0;
addServer(server: Server): void {
this.servers.push(server);
}
removeServer(serverId: string): void {
this.servers = this.servers.filter(s => s.id !== serverId);
}
// Round Robin with Health Checks
getHealthyServer(): Server | null {
const healthyServers = this.servers.filter(s => s.isHealthy());
if (healthyServers.length === 0) return null;
const server = healthyServers[this.currentIndex % healthyServers.length];
this.currentIndex++;
return server;
}
// Weighted Round Robin
getWeightedServer(): Server | null {
const weightedServers: Server[] = [];
this.servers.forEach(server => {
if (server.isHealthy()) {
for (let i = 0; i < server.weight; i++) {
weightedServers.push(server);
}
}
});
if (weightedServers.length === 0) return null;
const server = weightedServers[this.currentIndex % weightedServers.length];
this.currentIndex++;
return server;
}
}
class Server {
constructor(
public id: string,
public weight: number = 1,
private healthCheckUrl: string = ''
) {}
isHealthy(): boolean {
// Simulate health check - in practice, make HTTP request
return Math.random() > 0.1; // 90% healthy
}
async handleRequest(request: ServerRequest): Promise {
// Simulate request processing
await new Promise(resolve => setTimeout(resolve, Math.random() * 100));
return { serverId: this.id, response: "OK" };
}
}
Python
import time
import hashlib
import threading
import heapq
from typing import Dict, List, Set, Optional, Tuple
from collections import defaultdict, deque
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor
import asyncio
# Scalable Rate Limiter Design
class TokenBucketRateLimiter:
"""Token bucket rate limiter for handling burst traffic"""
def __init__(self, capacity: int, refill_rate: float):
self.capacity = capacity
self.refill_rate = refill_rate # tokens per second
self.buckets: Dict[str, 'TokenBucket'] = {}
self.lock = threading.RLock()
def is_allowed(self, user_id: str) -> bool:
with self.lock:
if user_id not in self.buckets:
self.buckets[user_id] = TokenBucket(self.capacity, self.refill_rate)
return self.buckets[user_id].consume()
def reset(self, user_id: str) -> None:
with self.lock:
self.buckets.pop(user_id, None)
class TokenBucket:
def __init__(self, capacity: int, refill_rate: float):
self.capacity = capacity
self.tokens = capacity
self.refill_rate = refill_rate
self.last_refill = time.time()
def consume(self) -> bool:
self._refill()
if self.tokens > 0:
self.tokens -= 1
return True
return False
def _refill(self) -> None:
now = time.time()
time_passed = now - self.last_refill
tokens_to_add = time_passed * self.refill_rate
self.tokens = min(self.capacity, self.tokens + tokens_to_add)
self.last_refill = now
# Distributed Cache with Consistent Hashing
class ConsistentHashRing:
"""Consistent hashing for distributed cache"""
def __init__(self, virtual_nodes: int = 150):
self.virtual_nodes = virtual_nodes
self.ring: Dict[int, str] = {}
self.sorted_hashes: List[int] = []
def add_server(self, server_id: str) -> None:
for i in range(self.virtual_nodes):
key = f"{server_id}:{i}"
hash_value = self._hash(key)
self.ring[hash_value] = server_id
self.sorted_hashes = sorted(self.ring.keys())
def remove_server(self, server_id: str) -> None:
for i in range(self.virtual_nodes):
key = f"{server_id}:{i}"
hash_value = self._hash(key)
if hash_value in self.ring:
del self.ring[hash_value]
self.sorted_hashes = sorted(self.ring.keys())
def get_server(self, key: str) -> Optional[str]:
if not self.sorted_hashes:
return None
hash_value = self._hash(key)
# Binary search for the first server hash >= key hash
idx = self._binary_search(hash_value)
server_hash = self.sorted_hashes[idx % len(self.sorted_hashes)]
return self.ring[server_hash]
def _hash(self, key: str) -> int:
return int(hashlib.md5(key.encode()).hexdigest(), 16)
def _binary_search(self, target: int) -> int:
left, right = 0, len(self.sorted_hashes)
while left < right:
mid = (left + right) // 2
if self.sorted_hashes[mid] < target:
left = mid + 1
else:
right = mid
return left
# Scalable Search System
@dataclass
class SearchResult:
doc_id: int
content: str
score: float
class SearchIndex:
"""Inverted index for scalable search"""
def __init__(self):
self.inverted_index: Dict[str, Set[int]] = defaultdict(set)
self.documents: Dict[int, str] = {}
self.doc_count = 0
self.lock = threading.RLock()
def add_document(self, content: str) -> int:
with self.lock:
doc_id = self.doc_count
self.doc_count += 1
self.documents[doc_id] = content
tokens = self._tokenize(content)
for token in tokens:
self.inverted_index[token].add(doc_id)
return doc_id
def search(self, query: str, limit: int = 10) -> List[SearchResult]:
query_tokens = self._tokenize(query)
if not query_tokens:
return []
# Get candidate documents (intersection of all terms)
candidates = None
for token in query_tokens:
docs = self.inverted_index.get(token, set())
if not docs:
return [] # No results if any token not found
if candidates is None:
candidates = docs.copy()
else:
candidates &= docs
if not candidates:
return []
# Score and rank results
results = []
for doc_id in candidates:
score = self._calculate_tf_idf(doc_id, query_tokens)
results.append(SearchResult(
doc_id=doc_id,
content=self.documents[doc_id],
score=score
))
return sorted(results, key=lambda x: x.score, reverse=True)[:limit]
def _tokenize(self, text: str) -> List[str]:
import re
return [token.lower() for token in re.findall(r'\b\w+\b', text)]
def _calculate_tf_idf(self, doc_id: int, query_tokens: List[str]) -> float:
doc_tokens = self._tokenize(self.documents[doc_id])
doc_length = len(doc_tokens)
score = 0.0
for token in query_tokens:
tf = doc_tokens.count(token) / doc_length
df = len(self.inverted_index[token])
idf = math.log(len(self.documents) / (df + 1))
score += tf * idf
return score
# Load Balancer with Health Checks
class Server:
def __init__(self, server_id: str, weight: int = 1):
self.id = server_id
self.weight = weight
self.healthy = True
self.last_health_check = time.time()
async def handle_request(self, request) -> dict:
# Simulate request processing
await asyncio.sleep(0.1) # Simulate work
return {"server_id": self.id, "response": "OK"}
def is_healthy(self) -> bool:
# Simulate health check
if time.time() - self.last_health_check > 30: # Check every 30 seconds
self.healthy = random.random() > 0.05 # 95% healthy
self.last_health_check = time.time()
return self.healthy
class LoadBalancer:
def __init__(self):
self.servers: List[Server] = []
self.current_index = 0
self.lock = threading.Lock()
def add_server(self, server: Server) -> None:
with self.lock:
self.servers.append(server)
def remove_server(self, server_id: str) -> None:
with self.lock:
self.servers = [s for s in self.servers if s.id != server_id]
def get_healthy_server(self) -> Optional[Server]:
"""Round robin with health checks"""
healthy_servers = [s for s in self.servers if s.is_healthy()]
if not healthy_servers:
return None
with self.lock:
server = healthy_servers[self.current_index % len(healthy_servers)]
self.current_index += 1
return server
def get_weighted_server(self) -> Optional[Server]:
"""Weighted round robin"""
weighted_servers = []
for server in self.servers:
if server.is_healthy():
weighted_servers.extend([server] * server.weight)
if not weighted_servers:
return None
with self.lock:
server = weighted_servers[self.current_index % len(weighted_servers)]
self.current_index += 1
return server
# Circuit Breaker Pattern for Fault Tolerance
class CircuitBreaker:
def __init__(self, failure_threshold: int = 5, timeout: int = 60):
self.failure_threshold = failure_threshold
self.timeout = timeout
self.failure_count = 0
self.last_failure_time = None
self.state = 'CLOSED' # CLOSED, OPEN, HALF_OPEN
self.lock = threading.Lock()
def call(self, func, *args, **kwargs):
with self.lock:
if self.state == 'OPEN':
if time.time() - self.last_failure_time > self.timeout:
self.state = 'HALF_OPEN'
else:
raise Exception("Circuit breaker is OPEN")
try:
result = func(*args, **kwargs)
self._on_success()
return result
except Exception as e:
self._on_failure()
raise e
def _on_success(self):
self.failure_count = 0
self.state = 'CLOSED'
def _on_failure(self):
self.failure_count += 1
self.last_failure_time = time.time()
if self.failure_count >= self.failure_threshold:
self.state = 'OPEN'
# Database Sharding Strategy
class ShardManager:
def __init__(self, num_shards: int):
self.num_shards = num_shards
self.shards = {i: f"shard_{i}" for i in range(num_shards)}
def get_shard(self, key: str) -> str:
"""Hash-based sharding"""
hash_value = int(hashlib.md5(key.encode()).hexdigest(), 16)
shard_id = hash_value % self.num_shards
return self.shards[shard_id]
def get_range_shard(self, key: int, ranges: List[Tuple[int, int]]) -> str:
"""Range-based sharding"""
for i, (start, end) in enumerate(ranges):
if start <= key <= end:
return self.shards[i]
raise ValueError(f"Key {key} not in any range")
def add_shard(self) -> None:
"""Add new shard (requires resharding)"""
new_shard_id = self.num_shards
self.shards[new_shard_id] = f"shard_{new_shard_id}"
self.num_shards += 1
# In practice, this would trigger data migration
OCaml
open Printf
(* Rate Limiter with Token Bucket *)
module TokenBucket = struct
type t = {
mutable tokens: float;
capacity: float;
refill_rate: float; (* tokens per second *)
mutable last_refill: float;
}
let create capacity refill_rate = {
tokens = capacity;
capacity;
refill_rate;
last_refill = Unix.gettimeofday ();
}
let refill bucket =
let now = Unix.gettimeofday () in
let time_passed = now -. bucket.last_refill in
let tokens_to_add = time_passed *. bucket.refill_rate in
bucket.tokens <- min bucket.capacity (bucket.tokens +. tokens_to_add);
bucket.last_refill <- now
let consume bucket =
refill bucket;
if bucket.tokens > 0.0 then (
bucket.tokens <- bucket.tokens -. 1.0;
true
) else false
end
module RateLimiter = struct
type t = {
buckets: (string, TokenBucket.t) Hashtbl.t;
capacity: float;
refill_rate: float;
}
let create capacity refill_rate = {
buckets = Hashtbl.create 1000;
capacity;
refill_rate;
}
let is_allowed limiter user_id =
let bucket =
match Hashtbl.find_opt limiter.buckets user_id with
| Some b -> b
| None ->
let b = TokenBucket.create limiter.capacity limiter.refill_rate in
Hashtbl.add limiter.buckets user_id b;
b
in
TokenBucket.consume bucket
let reset limiter user_id =
Hashtbl.remove limiter.buckets user_id
end
(* Consistent Hashing for Distributed Systems *)
module ConsistentHash = struct
type t = {
ring: (int, string) Hashtbl.t;
mutable sorted_hashes: int list;
virtual_nodes: int;
}
let create virtual_nodes = {
ring = Hashtbl.create (virtual_nodes * 10);
sorted_hashes = [];
virtual_nodes;
}
let hash_string s =
let hash = ref 0 in
String.iteri (fun i c ->
hash := (!hash lsl 5) - !hash + Char.code c
) s;
abs !hash
let add_server ring server_id =
for i = 0 to ring.virtual_nodes - 1 do
let key = sprintf "%s:%d" server_id i in
let hash_val = hash_string key in
Hashtbl.add ring.ring hash_val server_id
done;
ring.sorted_hashes <- List.sort compare (Hashtbl.fold (fun k _ acc -> k :: acc) ring.ring [])
let remove_server ring server_id =
for i = 0 to ring.virtual_nodes - 1 do
let key = sprintf "%s:%d" server_id i in
let hash_val = hash_string key in
Hashtbl.remove ring.ring hash_val
done;
ring.sorted_hashes <- List.sort compare (Hashtbl.fold (fun k _ acc -> k :: acc) ring.ring [])
let rec find_server_hash hashes target =
match hashes with
| [] -> None
| h :: _ when h >= target -> Some h
| _ :: rest -> find_server_hash rest target
let get_server ring key =
if ring.sorted_hashes = [] then None
else
let hash_val = hash_string key in
match find_server_hash ring.sorted_hashes hash_val with
| Some h -> Hashtbl.find_opt ring.ring h
| None ->
(* Wrap around to first server *)
match ring.sorted_hashes with
| h :: _ -> Hashtbl.find_opt ring.ring h
| [] -> None
end
(* Load Balancer with Health Checks *)
module LoadBalancer = struct
type server = {
id: string;
weight: int;
mutable healthy: bool;
mutable last_check: float;
}
type t = {
mutable servers: server list;
mutable current_index: int;
}
let create () = {
servers = [];
current_index = 0;
}
let add_server lb server =
lb.servers <- server :: lb.servers
let remove_server lb server_id =
lb.servers <- List.filter (fun s -> s.id <> server_id) lb.servers
let is_healthy server =
let now = Unix.gettimeofday () in
if now -. server.last_check > 30.0 then (
server.healthy <- Random.float 1.0 > 0.05; (* 95% healthy *)
server.last_check <- now
);
server.healthy
let get_healthy_server lb =
let healthy_servers = List.filter is_healthy lb.servers in
match healthy_servers with
| [] -> None
| servers ->
let server = List.nth servers (lb.current_index mod List.length servers) in
lb.current_index <- lb.current_index + 1;
Some server
end
(* Circuit Breaker Pattern *)
module CircuitBreaker = struct
type state = Closed | Open | HalfOpen
type t = {
mutable failure_count: int;
mutable last_failure_time: float;
mutable state: state;
failure_threshold: int;
timeout: float;
}
let create ?(failure_threshold=5) ?(timeout=60.0) () = {
failure_count = 0;
last_failure_time = 0.0;
state = Closed;
failure_threshold;
timeout;
}
let call_with_breaker breaker f arg =
let now = Unix.gettimeofday () in
match breaker.state with
| Open when now -. breaker.last_failure_time > breaker.timeout ->
breaker.state <- HalfOpen;
(try
let result = f arg in
breaker.failure_count <- 0;
breaker.state <- Closed;
result
with e ->
breaker.failure_count <- breaker.failure_count + 1;
breaker.last_failure_time <- now;
breaker.state <- Open;
raise e)
| Open -> failwith "Circuit breaker is OPEN"
| Closed | HalfOpen ->
(try
let result = f arg in
breaker.failure_count <- 0;
breaker.state <- Closed;
result
with e ->
breaker.failure_count <- breaker.failure_count + 1;
breaker.last_failure_time <- now;
if breaker.failure_count >= breaker.failure_threshold then
breaker.state <- Open;
raise e)
end
🏗️ Scalability Patterns:
- Horizontal Scaling: Add more servers to handle increased load
- Vertical Scaling: Increase resources on existing servers
- Load Distribution: Distribute requests across multiple servers
- Caching Strategies: Reduce database load with intelligent caching
- Sharding: Partition data across multiple databases
Real-World Constraints
Production systems operate under constraints that textbook algorithms don't consider: memory limits, network latency, hardware failures, security requirements, and business logic complexities.
🌍 Real-Life Analogy:
Think of building a bridge - in theory, you might design the perfect suspension bridge, but real-world constraints like budget, environmental regulations, existing infrastructure, and local materials force practical compromises. Software systems face similar real-world limitations.
⚖️ Common Real-World Constraints:
- Memory Limits: Working with limited RAM and avoiding memory leaks
- Network Latency: Dealing with slow or unreliable network connections
- Hardware Failures: Designing for server crashes and data corruption
- Security Requirements: Implementing authentication, authorization, encryption
- Compliance: Meeting regulatory requirements (GDPR, HIPAA, etc.)
Design Patterns in Algorithm Implementation
Applying software design patterns to algorithm implementation improves code maintainability, testability, and scalability while making systems more robust and easier to extend.
🏗️ Real-Life Analogy:
Think of architectural blueprints - successful building designs follow proven patterns (foundation, frame, utilities, finishing). Software design patterns provide similar proven templates for organizing code and solving common programming challenges.
🎨 Key Design Patterns for Algorithms:
- Strategy Pattern: Choose algorithms at runtime based on context
- Template Method: Define algorithm skeleton, customize specific steps
- Observer Pattern: Notify components of algorithm state changes
- Factory Pattern: Create algorithm instances based on input type
- Decorator Pattern: Add features like caching, logging, metrics
Performance Profiling and Bottleneck Identification
Expert engineers use profiling tools and techniques to identify performance bottlenecks, measure actual vs theoretical performance, and optimize based on real-world usage patterns.
🔧 Real-Life Analogy:
Think of a race car engineer who doesn't just design the car but also uses telemetry data during races to identify which components are limiting performance. Similarly, software engineers use profiling to find and fix performance bottlenecks.
📊 Profiling Techniques:
- CPU Profiling: Identify functions consuming most processor time
- Memory Profiling: Track memory allocation, leaks, and usage patterns
- I/O Profiling: Measure disk and network operation performance
- Concurrency Profiling: Analyze thread contention and synchronization
- End-to-End Tracing: Track requests across distributed systems
Ready for the final challenge?
You've mastered system integration! Now let's focus on comprehensive practice and interview preparation strategies.
Continue to Practice Focus →