Complete Data Structures & Algorithms Reference Guide
Master the fundamentals and advanced techniques
Arrays are the most fundamental data structure. They store elements in contiguous memory locations, allowing O(1) access by index.
# Python array operations
arr = [1, 2, 3, 4, 5]
# Access: O(1)
first = arr[0] # 1
# Search: O(n)
if 3 in arr:
print("Found")
# Insertion: O(n) - shifts elements
arr.insert(2, 99) # [1, 2, 99, 3, 4, 5]
# Deletion: O(n)
arr.remove(99) # [1, 2, 3, 4, 5]
# String operations
s = "hello"
reversed_s = s[::-1] # "olleh"
substring = s[1:4] # "ell"
Use two pointers moving through the array (usually from both ends or at different speeds) to solve problems efficiently.
# Problem: Find two numbers in sorted array that sum to target
def two_sum(arr, target):
left, right = 0, len(arr) - 1
while left < right:
current_sum = arr[left] + arr[right]
if current_sum == target:
return [left, right]
elif current_sum < target:
left += 1 # Need larger sum
else:
right -= 1 # Need smaller sum
return None
# Time: O(n), Space: O(1)
def reverse_string(s):
s = list(s) # Strings are immutable
left, right = 0, len(s) - 1
while left < right:
s[left], s[right] = s[right], s[left] # Swap
left += 1
right -= 1
return ''.join(s)
Maintain a "window" of elements and slide it through the array. Perfect for substring/subarray problems.
def max_sum_subarray(arr, k):
# Fixed window size
window_sum = sum(arr[:k])
max_sum = window_sum
for i in range(k, len(arr)):
# Slide window: remove left, add right
window_sum = window_sum - arr[i - k] + arr[i]
max_sum = max(max_sum, window_sum)
return max_sum
# Time: O(n), Space: O(1)
def longest_unique_substring(s):
# Variable window
char_map = {}
left = 0
max_len = 0
for right in range(len(s)):
# Shrink window if duplicate found
if s[right] in char_map:
left = max(left, char_map[s[right]] + 1)
char_map[s[right]] = right
max_len = max(max_len, right - left + 1)
return max_len
Store key-value pairs with average O(1) access time. Uses a hash function to map keys to array indices.
def two_sum_hash(arr, target):
# Store complement -> index
seen = {}
for i, num in enumerate(arr):
complement = target - num
if complement in seen:
return [seen[complement], i]
seen[num] = i
return None
# Time: O(n), Space: O(n)
A linear data structure where elements are stored in nodes, each pointing to the next node.
class ListNode:
def __init__(self, val=0, next=None):
self.val = val
self.next = next
# Example: Reverse Linked List
def reverse_list(head):
prev = None
current = head
while current:
next_node = current.next # Store next
current.next = prev # Reverse link
prev = current # Move prev
current = next_node # Move current
return prev # New head
# Stack implementation
class Stack:
def __init__(self):
self.items = []
def push(self, item):
self.items.append(item)
def pop(self):
return self.items.pop()
def peek(self):
return self.items[-1] if self.items else None
# Example: Valid Parentheses
def is_valid(s):
stack = []
pairs = {')': '(', '}': '{', ']': '['}
for char in s:
if char in pairs:
if not stack or stack.pop() != pairs[char]:
return False
else:
stack.append(char)
return not stack
# Queue implementation
from collections import deque
class Queue:
def __init__(self):
self.items = deque()
def enqueue(self, item):
self.items.append(item)
def dequeue(self):
return self.items.popleft()
Efficiently search in a sorted array by repeatedly dividing the search space in half.
def binary_search(arr, target):
left, right = 0, len(arr) - 1
while left <= right:
mid = (left + right) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
left = mid + 1
else:
right = mid - 1
return -1 # Not found
Comparison of common sorting algorithms:
| Algorithm | Best | Average | Worst | Space | Stable? |
|---|---|---|---|---|---|
| Quick Sort | O(n log n) | O(n log n) | O(n²) | O(log n) | No |
| Merge Sort | O(n log n) | O(n log n) | O(n log n) | O(n) | Yes |
| Heap Sort | O(n log n) | O(n log n) | O(n log n) | O(1) | No |
| Bubble Sort | O(n) | O(n²) | O(n²) | O(1) | Yes |
def quicksort(arr):
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quicksort(left) + middle + quicksort(right)
def merge_sort(arr):
if len(arr) <= 1:
return arr
mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
return merge(left, right)
def merge(left, right):
result = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
result.extend(left[i:])
result.extend(right[j:])
return result
A tree is a hierarchical data structure. A Binary Search Tree (BST) maintains order: left < root < right.
class TreeNode:
def __init__(self, val=0, left=None, right=None):
self.val = val
self.left = left
self.right = right
# Inorder: Left -> Root -> Right
def inorder(root):
if not root:
return
inorder(root.left)
print(root.val)
inorder(root.right)
# Preorder: Root -> Left -> Right
def preorder(root):
if not root:
return
print(root.val)
preorder(root.left)
preorder(root.right)
# Postorder: Left -> Right -> Root
def postorder(root):
if not root:
return
postorder(root.left)
postorder(root.right)
print(root.val)
The two fundamental ways to explore nodes in a graph or tree. Choosing the right one depends entirely on the problem structure.
| Feature | BFS (Breadth-First) | DFS (Depth-First) |
|---|---|---|
| Data Structure | Queue (FIFO) | Stack (LIFO) or Recursion |
| Movement | Level by level (ripples) | Deep branch exploration |
| Best For | Shortest path in unweighted graphs | Maze solving, Cycle detection |
| Complexity | O(V + E) | O(V + E) |
from collections import deque
def bfs(graph, start_node):
queue = deque([start_node])
visited = set([start_node])
while queue:
vertex = queue.popleft() # Dequeue (First In, First Out)
print(vertex)
for neighbor in graph[vertex]:
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
def dfs(graph, node, visited=None):
if visited is None:
visited = set()
visited.add(node)
print(node)
for neighbor in graph[node]:
if neighbor not in visited:
dfs(graph, neighbor, visited)
def dfs_iterative(graph, start_node):
stack = [start_node]
visited = set([start_node])
while stack:
vertex = stack.pop() # Pop from stack (Last In, First Out)
print(vertex)
for neighbor in graph[vertex]:
if neighbor not in visited:
visited.add(neighbor)
stack.append(neighbor)
An optimization technique. It is essentially Recursion + Memory. It stores results of expensive function calls and reuses them.
Start from the big problem and break it down. Store results in a dictionary/map.
def fib_memo(n, memo={}):
if n in memo: return memo[n] # Return cached result
if n <= 1: return n
# Store result before returning
memo[n] = fib_memo(n-1, memo) + fib_memo(n-2, memo)
return memo[n]
# Time: O(n), Space: O(n)
Start from the smallest subproblem and iteratively build up the table. Often saves memory (no recursion stack).
def fib_tab(n):
if n <= 1: return n
table = [0] * (n + 1)
table[1] = 1
for i in range(2, n + 1):
# Current depends on previous two
table[i] = table[i-1] + table[i-2]
return table[n]
# Time: O(n), Space: O(n)
# Problem: You can climb 1 or 2 steps. How many ways to reach top?
def climb_stairs(n):
if n <= 2:
return n
dp = [0] * (n + 1)
dp[1] = 1
dp[2] = 2
for i in range(3, n + 1):
dp[i] = dp[i-1] + dp[i-2]
return dp[n]
Make the locally optimal choice at each step, hoping it leads to a globally optimal solution.
# Problem: Select maximum non-overlapping activities
def activity_selection(activities):
# Sort by end time
activities.sort(key=lambda x: x[1])
selected = [activities[0]]
last_end = activities[0][1]
for start, end in activities[1:]:
if start >= last_end:
selected.append((start, end))
last_end = end
return selected
# activities = [(start, end), ...]
# Works for standard coin systems (1, 5, 10, 25)
def coin_change_greedy(coins, amount):
coins.sort(reverse=True)
result = []
for coin in coins:
while amount >= coin:
result.append(coin)
amount -= coin
return result if amount == 0 else None
Systematically explore all possibilities by building solutions incrementally and abandoning paths that can't lead to a solution.
def solve_n_queens(n):
result = []
board = ['.' * n for _ in range(n)]
def is_safe(row, col):
# Check column
for i in range(row):
if board[i][col] == 'Q':
return False
# Check diagonals
for i, j in zip(range(row-1, -1, -1),
range(col-1, -1, -1)):
if board[i][j] == 'Q':
return False
for i, j in zip(range(row-1, -1, -1),
range(col+1, n)):
if board[i][j] == 'Q':
return False
return True
def backtrack(row):
if row == n:
result.append([row[:] for row in board])
return
for col in range(n):
if is_safe(row, col):
board[row] = board[row][:col] + 'Q' + board[row][col+1:]
backtrack(row + 1)
board[row] = board[row][:col] + '.' + board[row][col+1:] # Backtrack
backtrack(0)
return result
A complete binary tree that maintains heap property: parent nodes are always greater (max-heap) or smaller (min-heap) than children.
import heapq
# Min-heap (default)
heap = []
heapq.heappush(heap, 3)
heapq.heappush(heap, 1)
heapq.heappush(heap, 2)
min_val = heapq.heappop(heap) # 1
# Max-heap (negate values)
max_heap = []
heapq.heappush(max_heap, -3)
heapq.heappush(max_heap, -1)
max_val = -heapq.heappop(max_heap) # 3
# Example: Find K largest elements
def k_largest(arr, k):
return heapq.nlargest(k, arr)
A tree-like data structure for storing strings. Each node represents a character, and paths from root to leaf represent words.
class TrieNode:
def __init__(self):
self.children = {}
self.is_end = False
class Trie:
def __init__(self):
self.root = TrieNode()
def insert(self, word):
node = self.root
for char in word:
if char not in node.children:
node.children[char] = TrieNode()
node = node.children[char]
node.is_end = True
def search(self, word):
node = self.root
for char in word:
if char not in node.children:
return False
node = node.children[char]
return node.is_end
def starts_with(self, prefix):
node = self.root
for char in prefix:
if char not in node.children:
return False
node = node.children[char]
return True
The "Try Everything" approach. While rarely the most efficient, it is the foundation for verifying correctness and benchmarking optimized solutions.
# Problem: Find two numbers in arr that sum to target
def brute_force_two_sum(arr, target):
n = len(arr)
# Nested loops check every pair
for i in range(n):
for j in range(i + 1, n):
if arr[i] + arr[j] == target:
return [i, j]
return None
# Time: O(n²), Space: O(1)