Data Structures & Algorithms in Python
Difficulty: Medium-Hard | Companies: Google, Meta, Amazon, Netflix, Stripe
Array Manipulation Techniques
Python lists are dynamic arrays. Understanding their underlying implementation helps optimize performance.
Flattening Nested Lists
def flatten_nested(nested_list):
"""Recursively flatten arbitrarily nested lists."""
result = []
for item in nested_list:
if isinstance(item, list):
result.extend(flatten_nested(item))
else:
result.append(item)
return result
# Using generator for memory efficiency
def flatten_generator(nested_list):
"""Generator-based flattening for large datasets."""
for item in nested_list:
if isinstance(item, list):
yield from flatten_generator(item)
else:
yield item
# Test cases
nested = [1, [2, 3, [4, 5]], [6, 7], 8]
print(flatten_nested(nested)) # [1, 2, 3, 4, 5, 6, 7, 8]
print(list(flatten_generator(nested))) # Same result
βΉοΈ
Generator-based solutions use O(1) memory for the result structure, while list-based approaches use O(n).
Two-Pointer Technique
def two_sum_sorted(nums, target):
"""Find two numbers that sum to target in sorted array."""
left, right = 0, len(nums) - 1
while left < right:
current_sum = nums[left] + nums[right]
if current_sum == target:
return [left, right]
elif current_sum < target:
left += 1
else:
right -= 1
return []
def remove_duplicates(nums):
"""Remove duplicates in-place from sorted array."""
if not nums:
return 0
write_pointer = 1
for read_pointer in range(1, len(nums)):
if nums[read_pointer] != nums[read_pointer - 1]:
nums[write_pointer] = nums[read_pointer]
write_pointer += 1
return write_pointer
Linked List Operations
Singly Linked List Implementation
class ListNode:
def __init__(self, val=0, next=None):
self.val = val
self.next = next
class LinkedList:
def __init__(self):
self.head = None
self.size = 0
def append(self, val):
"""Add element to end - O(n) without tail pointer."""
new_node = ListNode(val)
if not self.head:
self.head = new_node
self.size += 1
return
current = self.head
while current.next:
current = current.next
current.next = new_node
self.size += 1
def reverse(self):
"""Reverse linked list iteratively - O(n) time, O(1) space."""
prev = None
current = self.head
while current:
next_temp = current.next
current.next = prev
prev = current
current = next_temp
self.head = prev
def has_cycle(self):
"""Detect cycle using Floyd's algorithm - O(n) time, O(1) space."""
if not self.head or not self.head.next:
return False
slow = self.head
fast = self.head.next
while slow != fast:
if not fast or not fast.next:
return False
slow = slow.next
fast = fast.next.next
return True
def find_middle(self):
"""Find middle node using two pointers."""
if not self.head:
return None
slow = fast = self.head
while fast.next and fast.next.next:
slow = slow.next
fast = fast.next.next
return slow
# Usage example
ll = LinkedList()
for i in range(1, 6):
ll.append(i)
ll.reverse()
print(ll.head.val) # 5
β οΈ
Always consider edge cases: empty list, single node, and cycles in linked list problems.
Hash Table Patterns
Consistent Hashing Implementation
import hashlib
from bisect import bisect_right
class ConsistentHash:
"""Implementation of consistent hashing for distributed systems."""
def __init__(self, nodes=None, virtual_nodes=150):
self.ring = {}
self.sorted_keys = []
self.virtual_nodes = virtual_nodes
if nodes:
for node in nodes:
self.add_node(node)
def _hash(self, key):
"""Generate hash for a given key."""
return int(hashlib.md5(key.encode()).hexdigest(), 16)
def add_node(self, node):
"""Add a node with virtual nodes to the ring."""
for i in range(self.virtual_nodes):
virtual_key = f"{node}:v{i}"
hash_val = self._hash(virtual_key)
self.ring[hash_val] = node
self.sorted_keys.append(hash_val)
self.sorted_keys.sort()
def remove_node(self, node):
"""Remove a node and its virtual nodes from the ring."""
for i in range(self.virtual_nodes):
virtual_key = f"{node}:v{i}"
hash_val = self._hash(virtual_key)
del self.ring[hash_val]
self.sorted_keys.remove(hash_val)
def get_node(self, key):
"""Get the node responsible for the given key."""
if not self.ring:
return None
hash_val = self._hash(key)
idx = bisect_right(self.sorted_keys, hash_val) % len(self.sorted_keys)
return self.ring[self.sorted_keys[idx]]
# Usage
ch = ConsistentHash(["server1", "server2", "server3"])
print(ch.get_node("user:123")) # Returns a server
Stack and Queue Patterns
Min Stack Implementation
class MinStack:
"""Stack that supports getMin() in O(1) time."""
def __init__(self):
self.stack = []
self.min_stack = []
def push(self, val):
self.stack.append(val)
# Push to min_stack if it's empty or val <= current min
if not self.min_stack or val <= self.min_stack[-1]:
self.min_stack.append(val)
def pop(self):
if not self.stack:
raise IndexError("Stack is empty")
val = self.stack.pop()
if val == self.min_stack[-1]:
self.min_stack.pop()
return val
def top(self):
if not self.stack:
raise IndexError("Stack is empty")
return self.stack[-1]
def get_min(self):
if not self.min_stack:
raise IndexError("Stack is empty")
return self.min_stack[-1]
# Monotonic Stack for Next Greater Element
def next_greater_element(nums):
"""Find next greater element for each position."""
n = len(nums)
result = [-1] * n
stack = []
for i in range(n):
while stack and nums[i] > nums[stack[-1]]:
idx = stack.pop()
result[idx] = nums[i]
stack.append(i)
return result
# Test
print(next_greater_element([2, 1, 2, 4, 3])) # [4, 2, 4, -1, -1]
Tree Traversal Patterns
Binary Search Tree with Validation
class TreeNode:
def __init__(self, val=0, left=None, right=None):
self.val = val
self.left = left
self.right = right
class BST:
def __init__(self):
self.root = None
def insert(self, val):
"""Insert value maintaining BST property."""
self.root = self._insert_recursive(self.root, val)
def _insert_recursive(self, node, val):
if not node:
return TreeNode(val)
if val < node.val:
node.left = self._insert_recursive(node.left, val)
elif val > node.val:
node.right = self._insert_recursive(node.right, val)
return node
def is_valid_bst(self):
"""Validate BST using range checking."""
return self._validate(self.root, float('-inf'), float('inf'))
def _validate(self, node, min_val, max_val):
if not node:
return True
if node.val <= min_val or node.val >= max_val:
return False
return (self._validate(node.left, min_val, node.val) and
self._validate(node.right, node.val, max_val))
def lowest_common_ancestor(self, p, q):
"""Find LCA of two nodes in BST."""
current = self.root
while current:
if p.val < current.val and q.val < current.val:
current = current.left
elif p.val > current.val and q.val > current.val:
current = current.right
else:
return current
return None
# Level-order traversal with level separation
def level_order_traversal(root):
"""BFS traversal returning list of lists per level."""
if not root:
return []
from collections import deque
result = []
queue = deque([root])
while queue:
level_size = len(queue)
current_level = []
for _ in range(level_size):
node = queue.popleft()
current_level.append(node.val)
if node.left:
queue.append(node.left)
if node.right:
queue.append(node.right)
result.append(current_level)
return result
βΉοΈ
For BST problems, always clarify if the tree is balanced. An unbalanced BST degrades to O(n) for search operations.
Graph Algorithms
Graph Representations and BFS/DFS
from collections import defaultdict, deque
class Graph:
def __init__(self, directed=False):
self.graph = defaultdict(list)
self.directed = directed
def add_edge(self, u, v, weight=1):
self.graph[u].append((v, weight))
if not self.directed:
self.graph[v].append((u, weight))
def bfs(self, start):
"""BFS traversal - O(V + E) time, O(V) space."""
visited = set([start])
queue = deque([start])
traversal = []
while queue:
vertex = queue.popleft()
traversal.append(vertex)
for neighbor, _ in self.graph[vertex]:
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
return traversal
def dfs_iterative(self, start):
"""Iterative DFS using stack."""
visited = set([start])
stack = [start]
traversal = []
while stack:
vertex = stack.pop()
traversal.append(vertex)
for neighbor, _ in self.graph[vertex]:
if neighbor not in visited:
visited.add(neighbor)
stack.append(neighbor)
return traversal
def has_path(self, source, destination):
"""Check if path exists between two nodes."""
if source == destination:
return True
visited = set([source])
queue = deque([source])
while queue:
node = queue.popleft()
for neighbor, _ in self.graph[node]:
if neighbor == destination:
return True
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
return False
def detect_cycle(self):
"""Detect cycle in undirected graph."""
visited = set()
def dfs(node, parent):
visited.add(node)
for neighbor, _ in self.graph[node]:
if neighbor not in visited:
if dfs(neighbor, node):
return True
elif neighbor != parent:
return True
return False
for node in self.graph:
if node not in visited:
if dfs(node, None):
return True
return False
# Topological Sort using Kahn's Algorithm
def topological_sort_kahn(graph):
"""Topological sort using BFS (Kahn's algorithm)."""
in_degree = defaultdict(int)
nodes = set()
for node in graph:
nodes.add(node)
for neighbor, _ in graph[node]:
in_degree[neighbor] += 1
nodes.add(neighbor)
queue = deque([node for node in nodes if in_degree[node] == 0])
result = []
while queue:
node = queue.popleft()
result.append(node)
for neighbor, _ in graph[node]:
in_degree[neighbor] -= 1
if in_degree[neighbor] == 0:
queue.append(neighbor)
if len(result) != len(nodes):
raise ValueError("Graph has a cycle")
return result
Dynamic Programming Foundations
Classic DP Problems
def fibonacci_memoized(n, memo={}):
"""Fibonacci with memoization - O(n) time, O(n) space."""
if n in memo:
return memo[n]
if n <= 1:
return n
memo[n] = fibonacci_memoized(n - 1, memo) + fibonacci_memoized(n - 2, memo)
return memo[n]
def coin_change(coins, amount):
"""Minimum coins to make amount - O(amount * coins) time."""
dp = [float('inf')] * (amount + 1)
dp[0] = 0
for i in range(1, amount + 1):
for coin in coins:
if coin <= i and dp[i - coin] + 1 < dp[i]:
dp[i] = dp[i - coin] + 1
return dp[amount] if dp[amount] != float('inf') else -1
def longest_common_subsequence(text1, text2):
"""LCS using bottom-up DP."""
m, n = len(text1), len(text2)
dp = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
if text1[i - 1] == text2[j - 1]:
dp[i][j] = dp[i - 1][j - 1] + 1
else:
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
return dp[m][n]
def knapsack_01(weights, values, capacity):
"""0/1 Knapsack problem."""
n = len(weights)
dp = [[0] * (capacity + 1) for _ in range(n + 1)]
for i in range(1, n + 1):
for w in range(capacity + 1):
dp[i][w] = dp[i - 1][w]
if weights[i - 1] <= w:
dp[i][w] = max(dp[i][w],
dp[i - 1][w - weights[i - 1]] + values[i - 1])
return dp[n][capacity]
Follow-Up Questions
-
Explain the time complexity differences between Python lists and linked lists for various operations.
-
How would you implement a LRU cache using OrderedDict versus a custom doubly-linked list?
-
When would you choose BFS over DFS, and vice versa, in graph problems?
-
Explain how Python's dictionary implementation handles hash collisions.
-
How does the two-pointer technique apply to problems involving sorted data?