Using sorted()

sorted() is a built-in function in Python.

  • The original order of values is unchanged once it’s called.

  • It returns an ordered list.

    numbers = [1, 5, 3, 2, 4]
    sorted_numbers = sorted(numbers)
    print(sorted_numbers)
    print(numbers)

    Output:

    [1, 2, 3, 4, 5]

    [1, 5, 3, 2, 4]

Using sort()

The sort() is a method of list class. It orders in place and return nothing.

sorted_numbers = numbers.sort()
print(sorted_numbers)
print(numbers)

# Output:
# None
# [1, 2, 3, 4, 5]

Sort in reverse order

numbers = [1, 5, 3, 2, 4]
sorted_numbers = sorted(numbers, reverse=True)
print(sorted_numbers)
print(numbers)

# Output:
# [5, 4, 3, 2, 1]
# [1, 5, 3, 2, 4]


numbers.sort(reverse=True)
print(numbers)

# Output:
# [5, 4, 3, 2, 1]

Sort dictionary

Note that the keys in a dictionary must be unique. In the following example, even if two 5s are initialized in the number list, they will be deduplicated.

numbers_set = {20, 5, 3, 2, 4, 5}
sorted_numbers_set = sorted(numbers_set)
print(numbers_set)
print(sorted_numbers_set)
print(set(sorted_numbers_set))

# Output:
# {2, 3, 4, 5, 20}
# [2, 3, 4, 5, 20]
# {2, 3, 4, 5, 20}

Sort tuple

Example 1:

numbers_tuple = (10, 5, 3, 2, 4, 5)
sorted_numbers_tuple = sorted(numbers_tuple)
print(numbers_tuple)
print(sorted_numbers_tuple)
print(tuple(sorted_numbers_tuple))

# Output:
# (10, 5, 3, 2, 4, 5)
# [2, 3, 4, 5, 5, 10]
# (2, 3, 4, 5, 5, 10)

Example 2:

fruits = [("apple", 10), ("orange", 6), ("banana", 9), ("pear", 3)]
sorted_fruits = sorted(fruits, key=lambda x: x[1], reverse=True)
print(sorted_fruits)

# Output:
# [('apple', 10), ('banana', 9), ('orange', 6), ('pear', 3)]

Example 3:

from collections import namedtuple

Student = namedtuple("Student", "name, age, grade")
students = [
    Student("bill", 11, 97),
    Student("Andy", 12, 95),
    Student("Harry", 10, 100),
    Student("Thor", 11, 96),
]
sorted_students = sorted(students, key=lambda x: getattr(x, "grade"), reverse=True)
print(sorted_students)
print(students)

# Output:
# [Student(name='Harry', age=10, grade=100), Student(name='bill', age=11, grade=97), Student(name='Thor', age=11, grade=96), Student(name='Andy', age=12, grade=95)]
# [Student(name='bill', age=11, grade=97), Student(name='Andy', age=12, grade=95), Student(name='Harry', age=10, grade=100), Student(name='Thor', age=11, grade=96)]

Tree traversal (also known as tree search and walking the tree) is a form of graph traversal and refers to the process of visiting (e.g. retrieving, updating, or deleting) each node in a tree data structure, exactly once. Such traversals are classified by the order in which the nodes are visited. - Wikipedia

Tree traversal algorithms can be classified in the following two categories:

  • Depth-First Search (DFS): It starts at the root node (selecting some arbitrary node as the root node in the case of a graph) and explores as far as possible along each branch before backtracking.
  • Breadth-First Search (BFS): It starts at the tree’s root (selecting some arbitrary node as the root node in the case of a graph) and searches all nodes at the current depth level before moving on to the nodes at the next depth level.

Depth-First Search (DFS) algorithms have three variants:

  • Preorder Traversal (current-left-right): Visit the current node before visiting any nodes inside left or right subtrees.
  • Inorder Traversal (left-current-right): Visit the current node after visiting all nodes inside left subtree but before visiting any node within the right subtree.
  • Postorder Traversal (left-right-current): Visit the current node after visiting all the nodes of left and right subtrees.

Breadth-First Search (BFS) Algorithm has one variant:

  • Level Order Traversal: Visit nodes level-by-level and left-to-right fashion at the same level.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class Node:
def __init__(self, val):
self.val = val
self.left = None
self.right = None


def pre_order_traverse(node):
if node == None:
return

print(f"{node.val} ", end="")
pre_order_traverse(node.left)
pre_order_traverse(node.right)


def in_order_traverse(node):
if node == None:
return

in_order_traverse(node.left)
print(f"{node.val} ", end="")
in_order_traverse(node.right)


def post_order_traverse(node):
if node == None:
return

post_order_traverse(node.left)
post_order_traverse(node.right)
print(f"{node.val} ", end="")


def level_order_traverse(node):
if node == None:
return

queue = []
queue.append(node)

while len(queue) > 0:
first = queue.pop(0)

print(f"{first.val} ", end="")

if first.left != None:
queue.append(first.left)

if first.right != None:
queue.append(first.right)

root = Node(0)
n1 = Node(1)
n2 = Node(2)
root.left = n1
root.right = n2
n3 = Node(3)
n4 = Node(4)
n1.left = n3
n1.right = n4
n5 = Node(5)
n6 = Node(6)
n2.left = n5
n2.right = n6
n7 = Node(7)
n8 = Node(8)
n3.left = n7
n3.right = n8

pre_order_traverse(root)
print()
in_order_traverse(root)
print()
post_order_traverse(root)
print()
level_order_traverse(root)
print()

# Output:
# 0 1 3 7 8 4 2 5 6
# 7 3 8 1 4 0 5 2 6
# 7 8 3 4 1 5 6 2 0
# 0 1 2 3 4 5 6 7 8

Selection sort

The selection sort algorithm sorts array by repeatedly finding the minimum element from the unsorted subarray and moving it to the sorted subarray.

For ascending order, we can implement the algorithm as below.

def selection_sort(my_list):
    for i in range(len(my_list) - 1):
        min_idx = i
        for j in range(i + 1, len(my_list)):
            if my_list[j] < my_list[min_idx]:
                min_idx = j

        # swap the minimum element with the current element
        temp = my_list[i]
        my_list[i] = my_list[min_idx]
        my_list[min_idx] = temp


test_list_1 = [4, 3, 2, 10, 12, 1, 5, 6]
selection_sort(test_list_1)
print(test_list_1)

# Time complexity: O(n^2)
# Space complexity: O(1)

The following steps elaborate how it works. The left arrow indicates the current element position and the right arrow indicates the minimum element position in the unsorted sublist.

i=0: [1, 3, 2, 10, 12, 4, 5, 6]
      ^                ^
i=1: [1, 2, 3, 10, 12, 4, 5, 6]
         ^  ^
i=2: [1, 2, 3, 10, 12, 4, 5, 6]
            ^^
i=3: [1, 2, 3, 4, 12, 10, 5, 6]
               ^      ^
i=4: [1, 2, 3, 4, 5, 10, 12, 6]
                  ^      ^
i=5: [1, 2, 3, 4, 5, 6, 12, 10]
                     ^      ^
i=6: [1, 2, 3, 4, 5, 6, 10, 12]
                        ^   ^

Insertion sort

Just like playing a card game, to re-organize the cards, we pick an element from the unsorted sublist(on the right side), search a spot backwards from the sorted sublist(on the left side), and insert the element by shifting greater elements to the right.

def insert_sort(my_list):
    # pick an element from the unsorted sublist [i:n-1]
    for i in range(1, len(my_list)):
        current = my_list[i]

        # search a spot in the sorted sublist [0:i-1]
        j = i - 1
        while j >= 0:
            if my_list[j] > current:
                my_list[j + 1] = my_list[j]
                j -= 1
            else:
                break

        # insert the current element to the spot
        my_list[j + 1] = current


test_list_2 = [4, 3, 2, 10, 12, 1, 5, 6]
insert_sort(test_list_2)
print(test_list_2)

# Time complexity: O(n^2)
# Space complexity: O(1)

The following steps elaborate how it works. The left arrow indicates where to insert and the right arrow indicates the current picked unsorted element.

i=1: [3, 4, 2, 10, 12, 1, 5, 6]
      ^  ^
i=2: [2, 3, 4, 10, 12, 1, 5, 6]
      ^     ^
i=3: [2, 3, 4, 10, 12, 1, 5, 6]
               ^^
i=4: [2, 3, 4, 10, 12, 1, 5, 6]
                   ^^ 
i=5: [1, 2, 3, 4, 10, 12, 5, 6]
      ^               ^
i=6: [1, 2, 3, 4, 5, 10, 12, 6]
                  ^      ^
i=7: [1, 2, 3, 4, 5, 6, 10, 12]
                     ^      ^

Bubble sort

Bubble sort repeatedly swapping the adjacent elements if they are in wrong order.

def bubble_sort(my_list):
    n = len(my_list)

    # bubble up the maximum element for each round, totally (n-1) rounds
    for i in range(n - 1):
        swapped = False
        # walk through the unsorted sublist [0:n-i-2]
        for j in range(n - i - 1):
            # bubble up if the current item is greater
            if my_list[j] > my_list[j + 1]:
                temp = my_list[j]
                my_list[j] = my_list[j + 1]
                my_list[j + 1] = temp
                swapped = True

        # already sorted
        if not swapped:
            break


test_list_3 = [4, 3, 2, 10, 12, 1, 5, 6]
bubble_sort(test_list_3)
print(test_list_3)

# Time complexity: O(n^2)
# Space complexity: O(1)

The following steps elaborate how it works.

i=0: [3, 4, 2, 10, 12, 1, 5, 6]
     [3, 2, 4, 10, 12, 1, 5, 6]
     [3, 2, 4, 10, 12, 1, 5, 6]
     [3, 2, 4, 10, 12, 1, 5, 6]
     [3, 2, 4, 10, 1, 12, 5, 6]
     [3, 2, 4, 10, 1, 5, 12, 6]
     [3, 2, 4, 10, 1, 5, 6, 12]
                            ^
i=1: [2, 3, 4, 10, 1, 5, 6, 12]
     [2, 3, 4, 10, 1, 5, 6, 12]
     [2, 3, 4, 10, 1, 5, 6, 12]
     [2, 3, 4, 1, 10, 5, 6, 12]
     [2, 3, 4, 1, 5, 10, 6, 12]
     [2, 3, 4, 1, 5, 6, 10, 12]
                        ^
i=2: [2, 3, 4, 1, 5, 6, 10, 12]
     [2, 3, 4, 1, 5, 6, 10, 12]
     [2, 3, 1, 4, 5, 6, 10, 12]
     [2, 3, 1, 4, 5, 6, 10, 12]
     [2, 3, 1, 4, 5, 6, 10, 12]
                     ^
i=3: [2, 3, 1, 4, 5, 6, 10, 12]
     [2, 3, 1, 4, 5, 6, 10, 12]
     [2, 1, 3, 4, 5, 6, 10, 12]
     [2, 1, 3, 4, 5, 6, 10, 12]
                  ^
i=4: [2, 1, 3, 4, 5, 6, 10, 12]
     [1, 2, 3, 4, 5, 6, 10, 12]
     [1, 2, 3, 4, 5, 6, 10, 12]
               ^
i=5: [1, 2, 3, 4, 5, 6, 10, 12]
     [1, 2, 3, 4, 5, 6, 10, 12]
            ^
For loop ends early since there is no swap in this round!

Merge sort

Merge sort is Divide-and-Conquer algorithm. It divides list into two halves, recursively calls itself, and then merges the two sorted halves. The merge function is the key process for the merging two halves.

def merge_sort(my_list, low, high):
    if low < high:
        mid = int((low + high) / 2)

        # sort each half of list
        merge_sort(my_list, low, mid)
        merge_sort(my_list, mid + 1, high)

        # merge the sorted two halves
        merge(my_list, low, mid, high)


def merge(my_list, low, mid, high):
    n1 = mid - low + 1
    n2 = high - mid

    # temporary list to store each half of list
    my_list1 = [None] * n1
    my_list2 = [None] * n2
    for i in range(n1):
        my_list1[i] = my_list[low + i]
    for i in range(n2):
        my_list2[i] = my_list[mid + 1 + i]

    # merge the two halves using running pointers on each half
    i = j = 0
    k = low
    while i < n1 and j < n2:
        if my_list1[i] < my_list2[j]:
            my_list[k] = my_list1[i]
            i += 1
        else:
            my_list[k] = my_list2[j]
            j += 1
        k += 1

    # merge the remaining items from each sorted list
    while i < n1:
        my_list[k] = my_list1[i]
        i += 1
        k += 1
    while j < n2:
        my_list[k] = my_list2[j]
        j += 1
        k += 1


test_list_4 = [4, 3, 2, 10, 12, 1, 5, 6]
merge_sort(test_list_4, 0, len(test_list_4) - 1)
print(test_list_4)

The following process simulates how it works.

1.      [4, 3, 2, 10, 12, 1, 5, 6]
2.     [4,3,2,10]       [12,1,15,6]
3.   [4,3]   [2,10]   [12,1]   [15,6]
4.  [4] [3] [2] [10] [12] [1] [15] [6]
5.   [3,4]   [2,10]   [1,12]   [6,15]
6.     [2,3,4,10]       [1,6,12,15]
7.        [1,2,3,4,6,10,12,15]

# Time complexity: O(n*logn)
# Space complexity: O(n)

Quick sort

Like merge sort, quick sort is also a Divide-and-Conquer algorithm. It picks an element as pivot and partitions the list around the pivot. The pivot can be picked in different ways(first, last, random, or median position). The partition function is the key process. Its target is to put the pivot in the correct position, put all smaller elements to the left, and put all greater elements to the right of it.

def partition(my_list, low, high):
    pivot = my_list[high]
    curr = low - 1

    # move smaller items to the very left
    for i in range(low, high):
        if my_list[i] < pivot:
            curr += 1
            temp = my_list[curr]
            my_list[curr] = my_list[i]
            my_list[i] = temp

    # move pivot to the final place
    temp = my_list[curr + 1]
    my_list[curr + 1] = pivot
    my_list[high] = temp

    # return pivot index
    return curr + 1


def quick_sort(my_list, low, high):
    if low < high:
        pivot = partition(my_list, low, high)

        quick_sort(my_list, low, pivot - 1)
        quick_sort(my_list, pivot + 1, high)


test_list_5 = [4, 3, 2, 10, 12, 1, 5, 6]
quick_sort(test_list_5, 0, len(test_list_5) - 1)
print(test_list_5)

# Time complexity: O(n*logn)
# Space complexity: O(n)

The following process simulates how it works.

1. [4, 3, 2, 1, 5] 6 [12, 10]
                   ^ 
2. [4, 3, 2, 1] 5  6 [12, 10]
                ^
3. 1 [3, 2, 4]  5  6 [12, 10]
   ^
4. 1 [3, 2] 4   5  6 [12, 10]
            ^
5. 1 2 [3]  4   5  6 [12, 10]
     ^
6. 1 2 [3]  4   5  6 10  [12]
                     ^

Heap sort

Heap sort is a comparison based sorting algorithm based on binary heap. It follows the below idea:

  1. build a max heap with the given list
  2. repeat the following steps until the heap contains only one item:

2.1 swap the root of heap with last item in the heap

2.2 remove the last item of the heap which is already sorted to expected position

2.3 heapify the remaining items in the heap

def heapify(my_List, n, i):
    largest = i

    # get left and right child
    l = 2 * i + 1
    r = 2 * i + 2

    # check if left child exists and greater than parent
    if l < n and my_List[largest] < my_List[l]:
        largest = l

    # check if right child exist and greater than parent
    if r < n and my_List[largest] < my_List[r]:
        largest = r

    # change parent if needed
    if largest != i:
        # swap parent with largest child
        my_List[i], my_List[largest] = my_List[largest], my_List[i]

        # heapify the subtree of largest child
        heapify(my_List, n, largest)


def heap_sort(my_list):
    n = len(my_list)

    # 1. build max heap
    for i in range(n // 2 - 1, -1, -1):
        heapify(my_list, n, i)

    # extract one by one
    for i in range(n - 1, 0, -1):
        # 2.1 swap the root of heap with the last item in the heap
        my_list[0], my_list[i] = my_list[i], my_list[0]

        # 2.3 heapify the remaining items
        heapify(my_list, i, 0)


test_list_6 = [4, 3, 2, 10, 12, 1, 5, 6]
heap_sort(test_list_6)
print(test_list_6)

# Time complexity: O(n*logn)
# Space complexity: O(1)

In this post, we’ll try to understand the difference between shadow copy and deep copy in Python. We study the copy behavior with list data structure in Python.

Problem statement

Assuming we have the following list:

test_list = [1, 2, [3, 6], 4, 5]

We want to create a duplicate list so that we can modify the contents independently without affecting the original list.

test_list_clone = [1, 2, [3, 6], 4, 5]
test_list_clone[0] = 11
test_list_clone[2].append(11)
print(test_list)
print(test_list_clone)

The expected output should be:

[1, 2, [3, 6], 4, 5]

[11, 2, [3, 6, 11], 4, 5]

How should we copy the original list?

Duplicate reference

It’s not uncommon that you may want to copy the list as below.

test_list_clone = test_list
test_list_clone[0] = 11
test_list_clone[2].append(11)
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 11], 4, 5]

[11, 2, [3, 6, 11], 4, 5]

This doesn’t work since it simply stores the list reference to another variable. When the duplciate list is modified, the original list is also modified.

Copy list by brute force

In this solution, we append each item in the original list to the duplicate list by using a list comprehensions(a short form of for loop).

test_list_clone = [item for item in test_list]
test_list_clone[0] = 22
test_list_clone[2][-1] = 22
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 22], 4, 5]

[22, 2, [3, 6, 22], 4, 5]

This works partially but not for the nested list. This is because it only duplicates the outer list. But the inner list is still duplicated as a reference of original object. That’s why the number 22 is appended to both the duplciate list and original list.

Copy list with slice

We can get a subset of the list with slice in Python. The colon means to get the whole copy as original. Again, this only duplicates the outer list.

test_list_clone = test_list[:]
test_list_clone[0] = 33
test_list_clone[2][-1] = 33
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 33], 4, 5]

[33, 2, [3, 6, 33], 4, 5]

Copy list with list constructor

A more readable form might be using list constructor to duplciate the list. But it has the same issue without deep copy of inner list.

test_list_clone = list(test_list)
test_list_clone[0] = 44
test_list_clone[2][-1] = 44
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 44], 4, 5]

[44, 2, [3, 6, 44], 4, 5]

Copy list with starred expression

Another way is to use starred expression to duplciate the list.

test_list_clone = [*test_list]
test_list_clone[0] = 55
test_list_clone[2][-1] = 55
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 55], 4, 5]

[55, 2, [3, 6, 55], 4, 5]

Copy list with copy function, python 3.3+

In Python 3.3+, a copy function of the list is available. But it doesn’t do the deep copy either.

test_list_clone = test_list.copy()
test_list_clone[0] = 66
test_list_clone[2][-1] = 66
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 66], 4, 5]

[66, 2, [3, 6, 66], 4, 5]

Copy list with multiplication

A tricky way to duplciate the list is to use multiplication style.

test_list_clone = test_list * 1
test_list_clone[0] = 77
test_list_clone[2][-1] = 77
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 77], 4, 5]

[77, 2, [3, 6, 77], 4, 5]

Copy list with copy package

Fortunately, a copy package is built in Python to do the shadow copy and deep copy for us.

Shadow copy

import copy

test_list_clone = copy.copy(test_list)
test_list_clone[0] = 88
test_list_clone[2][-1] = 88
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 88], 4, 5]

[88, 2, [3, 6, 88], 4, 5]

Deep copy

test_list_clone = copy.deepcopy(test_list)
test_list_clone[0] = 99
test_list_clone[2][-1] = 99
print(test_list)
print(test_list_clone)

Output:

[11, 2, [3, 6, 88], 4, 5]

[99, 2, [3, 6, 99], 4, 5]

Finally, we got a way to do the deep copy for the nested list. Now, the nested list is modified without affecting the original list content.

Tree node definition

1
2
3
4
5
class Node:
def __init__(self, val):
self.val = val
self.left = None
self.right = None

To build the tree nodes:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
root = Node(0)
n1 = Node(1)
n2 = Node(2)
root.left = n1
root.right = n2
n3 = Node(3)
n4 = Node(4)
n1.left = n3
n1.right = n4
n5 = Node(5)
n6 = Node(6)
n2.left = n5
n2.right = n6
n7 = Node(7)
n8 = Node(8)
n3.left = n7
n3.right = n8

The tree looks like below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
               0
/ \
1 2
/ \ / \
3 4 5 6
/ \
7 8

class Node:
def __init__(self, val):
self.val = val
self.left = None
self.right = None
```

To build the tree nodes:

```python
root = Node(0)
n1 = Node(1)
n2 = Node(2)
root.left = n1
root.right = n2
n3 = Node(3)
n4 = Node(4)
n1.left = n3
n1.right = n4
n5 = Node(5)
n6 = Node(6)
n2.left = n5
n2.right = n6
n7 = Node(7)
n8 = Node(8)
n3.left = n7
n3.right = n8

The tree looks like below:

1
2
3
4
5
6
7
       0
/ \
1 2
/ \ / \
3 4 5 6
/ \
7 8

Pre-order traversal

1
2
3
4
5
6
7
8
9
10
11
12
def pre_order_traverse(node):
if node == None:
return

print(f"{node.val} ", end="")
pre_order_traverse(node.left)
pre_order_traverse(node.right)

pre_order_traverse(root)
print()

#Output: 0 1 3 7 8 4 2 5 6

In-order traversal

1
2
3
4
5
6
7
8
9
10
11
12
def in_order_traverse(node):
if node == None:
return

in_order_traverse(node.left)
print(f"{node.val} ", end="")
in_order_traverse(node.right)

in_order_traverse(root)
print()

#Output: 7 3 8 1 4 0 5 2 6

Post-order traversal

1
2
3
4
5
6
7
8
9
10
11
12
def post_order_traverse(node):
if node == None:
return

post_order_traverse(node.left)
post_order_traverse(node.right)
print(f"{node.val} ", end="")

post_order_traverse(root)
print()

#Output: 7 8 3 4 1 5 6 2 0

Get tree size

1
2
3
4
5
6
7
8
9
10
def size(node):
if node == None:
return 0

return size(node.left) + 1 + size(node.right)

print(f"size: {size(root)}")

#Output:
#size: 9

Get tree depth

1
2
3
4
5
6
7
8
9
10
def depth(node):
if node == None:
return 0

return max(depth(node.left), depth(node.right)) + 1

print(f"depth: {depth(root)}")

#Output:
#depth: 4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def printPaths(node, path):
if node == None:
return

# add node to the path
path.append(node.val)

if node.left == None and node.right == None:
# this is leaf node
print("path : ", end="")
for n in path:
print(f"{n} ", end="")
print()
else:
# search left subtree
printPaths(node.left, path)

# search right subtree
printPaths(node.right, path)

# remove node from path
path.pop()

printPaths(root, list())

#Output:
#path : 0 1 3 7
#path : 0 1 3 8
#path : 0 1 4
#path : 0 2 5
#path : 0 2 6

Check if path exists for target sum

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def hasPathSum(node, sum):
if node == None:
return sum == 0

sum -= node.val

return hasPathSum(node.left, sum) or hasPathSum(node.right, sum)

print(hasPathSum(root, 7))
print(hasPathSum(root, 12))
print(hasPathSum(root, 10))

#Output:
#True
#True
#False

[Leetcode 114] Flatten Binary Tree to Linked List

he root of a binary tree, flatten the tree into a “linked list”:

  • The “linked list” should use the same TreeNode class where the right child pointer points to the next node in the list and the left child pointer is always null.
  • The “linked list” should be in the same order as a pre-order traversal of the binary tree.

Constraints:

  • The number of nodes in the tree is in the range [0, 2000].
  • -100 <= Node.val <= 100

Follow up: Can you flatten the tree in-place (with O(1) extra space)?

Solution:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# Definition for a binary tree node.
# class TreeNode:
# def __init__(self, val=0, left=None, right=None):
# self.val = val
# self.left = left
# self.right = right
class Solution:
# Refer to https://leetcode.com/problems/flatten-binary-tree-to-linked-list/solutions/1208004/extremely-intuitive-o-1-space-solution-with-simple-explanation-python/?envType=study-plan-v2&envId=top-interview-150
# 1(curr) 1 1
# / \ / \
# 2 5 --> 2 --> 2
# / \ \ / \ / \
# 3 4(p) 6 3 4(p) 3 4
# \ \
# 5 5
# \ \
# 6 6
def flatten(self, root: Optional[TreeNode]) -> None:
"""
Do not return anything, modify root in-place instead.
"""
curr = root

while curr:
if curr.left != None:
p = curr.left

# find the right most node of currnt left subtree
while p.right != None:
p = p.right

# shift the contents of currnt right subtree to p.right
p.right = curr.right

# shift the contents of current left subtree to curr.right
curr.right = curr.left

# set curr.left to None
curr.left = None

# repeat the above process
curr = curr.right

Intro

A graph is a data structure that consists of vertices that are connected via edges.

Using an adjacency matrix

# implement a graph using adjacency matrix
class Graph:
    def __init__(self, size):
        self.adj_matrix = []
        for i in range(size):
            self.adj_matrix.append([0 for i in range(size)])
        self.size = size

    def add_edge(self, v1, v2):
        if v1 != v2:
            self.adj_matrix[v1][v2] = 1
            self.adj_matrix[v2][v1] = 1

    def remove_edge(self, v1, v2):
        if self.adj_matrix[v1][v2] != 0:
            self.adj_matrix[v1][v2] = 0

    def print_matrix(self):
        for row in self.adj_matrix:
            for col in row:
                print(f"{col} ", end="")
            print()


g = Graph(5)
g.add_edge(0, 1)
g.add_edge(0, 2)
g.add_edge(1, 2)
g.add_edge(2, 0)
g.add_edge(2, 3)
g.print_matrix()

# Output:
# 0 1 1 0 0
# 1 0 1 0 0
# 1 1 0 1 0
# 0 0 1 0 0
# 0 0 0 0 0

Using adjacency matrix has a drawback to implement graph. Memory is allocated for all the edges whether it is present or not. This can be avoided by using the adjacency list.

Using an adjacency list

# Using linked list based deque to improve vertice removal efficiency

from collections import deque

class Graph:
    def __init__(self):
        self.adj_list = {}

    def add_edge(self, v1, v2):
        if v1 not in self.adj_list:
            self.adj_list[v1] = deque()

        if v2 not in self.adj_list[v1]:
            self.adj_list[v1].append(v2)

        if v2 not in self.adj_list:
            self.adj_list[v2] = deque()

        if v1 not in self.adj_list[v2]:
            self.adj_list[v2].append(v1)

    def remove_edge(self, v1, v2):
        if v1 in self.adj_list and v2 in self.adj_list:
            self.adj_list[v1].remove(v2)
            self.adj_list[v2].remove(v1)

    def print_graph(self):
        for v1 in self.adj_list:
            for v2 in self.adj_list[v1]:
                print(f"({v1},{v2}) ", end="")
            print()


g = Graph()
g.add_edge(0, 1)
g.add_edge(0, 2)
g.add_edge(1, 2)
g.add_edge(2, 0)
g.add_edge(2, 3)
g.print_graph()
print()
g.remove_edge(0, 1)
g.remove_edge(1, 2)
g.print_graph()

# Output:
# (0,1) (0,2)
# (1,0) (1,2)
# (2,0) (2,1) (2,3)
# (3,2)
#
# (0,2)
#
# (2,0) (2,3)
# (3,2)

Intro

Hash Table is a data structure which maps keys to values for highly efficient data search. It stores data in an array by using a hash function to generate a slot to insert the data value.

The following are the general steps to insert a key/value pair to the Hash Table.

  1. Compute the key’s hash code with a hash function
  2. Find the available slot by mapping the hash code to an index of array
  3. Insert the value to the available slot

Initial implementation

Create an empty hash table of size 10:

hash_table = [None] * 10

To create a hash function, we simply return the modulus of the array length. The modulo operator(%) yields the remainder from the division of the key by the length of hash table. This ensures the hash key always falls into the range of [0,len(hash_table)-1]. So a slot in the array can be always retrieved.

def hash_func(key):
    return key % len(hash_table)

To insert key/value pair to the hash table, we firstly get the hash key and then stores the value to the retrieved slot.

def insert(key, value):
    hash_key = hash_func(key)
    hash_table[hash_key] = value

To test the code, we insert two key/value pairs (5,6) and (5,8). Both values are stored in the slot 5 of the array. However, the value 8 replaces the existing value 6 at the same slot(with the same key 5).

hash_table = [None] * 10
print(hash_table)
insert(5,6)
print(hash_table)

insert(5,8)
print(hash_table)

# Output:
# [None, None, None, None, None, None, None, None, None, None]
# [None, None, None, None, None, 6, None, None, None, None]
# [None, None, None, None, None, 8, None, None, None, None]

Solve the collision

A hash key collision would occur when the multiple keys hit the same slot(index) in the hash table(array).

There are generally two techniques to resolve a collision:

  • Linear probing(open addressing)
  • Separate chaining(open hashing)

Linear probing

In linear probing(aka open addressing), all the entries are stored in the array itself instead of linked list. If the slot to insert entry is already occupied, the next slot will be searched sequentially.

hash_key = hash_func(key)
while hash_table[hash_key]:
    hash_key = (hash_key + 1) % len(hash_table)

Separate chaining

Separate chaining is a very commonly used technique for collision resolution. It is usually implemented with linked list. All the entries will be inserted into a specific linked list.

In python, we create a hash table as nested list below. Each hash table bucket contains a list to store data entries.

hash_table = [[] for _ in range(10)] # init with empty list

The same hash function can be used.

def hash_func(key):
    return key % len(hash_table)

To insert an entry to the hash table, we simply append it to the list positioned by the hash key.

def insert(key, value):
    hash_key = hash_func(key)
    hash_table[hash_key].append(value)

To test the code, we still insert two key/value pairs (5,6) and (5,8). Now, the two values 6 and 8 are stored in the slot 5 of the same list.

hash_table = [[] for _ in range(10)] # init with empty list
print(hash_table)
insert(5,6)
print(hash_table)
insert(5,8)
print(hash_table)

# Output:
# [[], [], [], [], [], [], [], [], [], []]
# [[], [], [], [], [], [6], [], [], [], []]
# [[], [], [], [], [], [6, 8], [], [], [], []]

Final implementation

Now that we know about the hash function and how to resolve hash collision, we can implement the hash table with insert, delete and search functions. We use Python built-in function hash() to generate hash code from an generic object. This allows the hash table to support generic types like integer, string and so on.

def hash_func(key):
    hash_key = hash(key)
    print("Hash key is {}".format(hash_key))
    return hash_key % len(hash_table)


def insert(key, value):
    hash_key = hash_func(key)
    bucket = hash_table[hash_key]

    # check if the key already exists
    existed = False
    for idx, pair in enumerate(bucket):
        k, v = pair
        if key == k:
            existed = True
            break

    if existed:
        # update the existing value
        bucket[idx] = (key, value)
    else:
        # add the new key/value pair
        bucket.append((key, value))


def delete(key):
    hash_key = hash_func(key)
    bucket = hash_table[hash_key]

    # check if the key already exists
    existed = False
    for idx, pair in enumerate(bucket):
        k, v = pair
        if key == k:
            existed = True
            break

    if existed:
        # delete the key/value pair
        del bucket[idx]
        print("key {} deleted".format(key))
    else:
        # key does not exist
        print("key {} not found".format(key))


def search(key):
    hash_key = hash_func(key)
    bucket = hash_table[hash_key]

    # check if the key already exists
    for idx, pair in enumerate(bucket):
        k, v = pair
        if key == k:
            return v


hash_table = [[] for _ in range(10)]  # init with empty list
print(hash_table)
insert(5, 6)
print(hash_table)
insert(5, 8)
print(hash_table)
insert(3, 4)
print(hash_table)
insert(9, 999)
print(hash_table)
insert(11, 111)
print(hash_table)
insert(20, 20)
print(hash_table)
insert(1000, 1000)
print(hash_table)
print(search(11))
delete(11)
insert("11", 222)
print(hash_table)
insert("aa", "val")
print(hash_table)

# Output:
# [[], [], [], [], [], [], [], [], [], []]
# Hash key is 5
# [[], [], [], [], [], [(5, 6)], [], [], [], []]
# Hash key is 5
# [[], [], [], [], [], [(5, 8)], [], [], [], []]
# Hash key is 3
# [[], [], [], [(3, 4)], [], [(5, 8)], [], [], [], []]
# Hash key is 9
# [[], [], [], [(3, 4)], [], [(5, 8)], [], [], [], [(9, 999)]]
# Hash key is 11
# [[], [(11, 111)], [], [(3, 4)], [], [(5, 8)], [], [], [], [(9, 999)]]
# Hash key is 20
#[[(20, 20)], [(11, 111)], [], [(3, 4)], [], [(5, 8)], [], [], [], [(9, 999)]]
# Hash key is 1000
# [[(20, 20), (1000, 1000)], [(11, 111)], [], [(3, 4)], [], [(5, 8)], [], [], [], [(9, 999)]]
# Hash key is 11
# 111
# Hash key is 11
# key 11 deleted
# Hash key is 3453933796215988004
# [[(20, 20), (1000, 1000)], [], [], [(3, 4)], [('11', 222)], [(5, 8)], [], [], [], [(9, 999)]]
# Hash key is -185426594550641729
# [[(20, 20), (1000, 1000)], [('aa', 'val')], [], [(3, 4)], [('11', 222)], [(5, 8)], [], [], [], [(9, 999)]]

In languages like Python, array(aka list) is automatially resizable. The array will grow as you append items. You don’t have to resize it manually.

In some languages like Java, array comes with fixed length. The size is defined when you create it. When you need a data structure with dynamic resizing, you can use ArrayList.

In this post, we learn to implement the dynamic array in Python. This is to understand how dynamic array works regardless of any programming language.

class ArrayList:
    def __init__(self, size=10):
        self.capacity = size  # max size
        self.list = [None] * self.capacity
        self.size = 0  # current size

    def add(self, value):
        # check if there is enough capacity
        if self.size >= self.capacity:
            self._increase_capacity()

        # add value to the list
        self.list[self.size] = value
        self.size += 1

        print("{} is added to list".format(value))

    def _increase_capacity(self):
        # double the capacity
        new_capacity = self.capacity * 2

        # create a new list with new capacity
        new_list = [None] * new_capacity

        # copy the items from old to new list
        for i in range(self.capacity):
            new_list[i] = self.list[i]

        # update new capacity and list
        self.capacity = new_capacity
        self.list = new_list

        print("capacity is doubled to {}".format(self.capacity))

    def get(self, index):
        if index < 0 or index >= self.size:
            raise Exception("invalid index")

        return self.list[index]

    def delete(self, index):
        if index < 0 or index >= self.size:
            raise Exception("invalid index")

        # check if this is the last item to be deleted
        if index == self.size - 1:
            self.list[index] = None
            return

        # shift items left
        for i in range(index, self.size - 1):
            self.list[i] = self.list[i + 1]

        # update the last item to None
        self.list[self.size - 1] = None

        # update the size
        self.size -= 1

To test the code:

list1 = ArrayList(5)
list1.add(1)
list1.add(2)
list1.add(3)
list1.add(4)
list1.add(5)
list1.add(6)
list1.add(7)
print(list1.list)
list1.delete(6)
print(list1.list)

Output:

1 is added to list
2 is added to list
3 is added to list
4 is added to list
5 is added to list
capacity is doubled to 10
6 is added to list
7 is added to list
[1, 2, 3, 4, 5, 6, 7, None, None, None]
[1, 2, 3, 4, 5, 6, None, None, None, None]

Intro

A string is a collection of characters. In Java, StringBuilder class is used to create mutable string. But there isn’t a built-in StringBuilder in Python. In Python, string is an immutable object. New memory has to be allocated for every string.

In Pythin, there are following ways to implement a StringBuilder.

  1. string concatenation(append)
  2. join() function
  3. string IO module

String concatenation(append)

def method1():
    string = ""
    for i in range(count):
        string += str(i)
    return string

join() function

def method2():
    strings = []
    for i in range(count):
        strings.append(str(i))
    return "".join(strings)

StringIO module

def method3():
    si = StringIO()
    for i in range(count):
        si.write(str(i))

    return si.getvalue()

Efficiency measurement

I’m interested to see which method is more efficient. So, we are building a long string with one million numbers in order to measure the run time of each method.

from io import StringIO
import time
from array import array


# using string concatenation(append)
def method1():
    string = ""
    for i in range(count):
        string += str(i)
    return string


# using join()
def method2():
    strings = []
    for i in range(count):
        strings.append(str(i))
    return "".join(strings)


# using StringIO module
def method3():
    si = StringIO()
    for i in range(count):
        si.write(str(i))

    return si.getvalue()


# implement a StringBuilder in Python
class StringBuilder:
    def __init__(self):
        self.sb = StringIO()

    def add(self, str):
        self.sb.write(str)

    def toString(self):
        return self.sb.getvalue()


# using array
def method4():
    char_array = array("L")
    for i in range(count):
        char_array.append(i)
    return char_array


# test function
def run_test(method):
    start_time = time.perf_counter()
    eval("method" + str(method))()
    end_time = time.perf_counter()
    print("elapsed time {}".format(round(end_time - start_time, 2)))

We run the tests for the four different methods twice to measure the efficiency.

# tests
count = 1000000
run_test(1)
run_test(2)
run_test(3)
run_test(4)


run_test(4)
run_test(3)
run_test(2)
run_test(1)

# Output:
# elapsed time 0.29
# elapsed time 0.16
# elapsed time 0.19
# elapsed time 0.09

# elapsed time 0.08
# elapsed time 0.19
# elapsed time 0.15
# elapsed time 0.31

Conclusion

The string appending method is the slowest. The join() function is relative efficient. Using an array seems efficient as well. However, we don’t count the array to string convention time yet.

In general, the join() functions seems fast enough for long string construction.

Intro

A linked list is a data structure which represents a sequence of nodes. In a singly linked list, each node points to the next node. In a doubly linked list, each node points to both the next and previous node.

To find nth element in the linked list, you have to iterate through n nodes which results in linear time complexity O(n). This is less efficient than array which has constant time complexity O(1) to access an element.

However, the advantage to use linked list is for insert and delete operation since it can be achieved in the constant time without shifting nodes around.

The following are the key methods to be implemented:

  • insert(): add an item to the linked list
  • remove(): remove a specified item from the linked list
  • insertAt(index): insert an item at a given index
  • deleteAt(index): delete an item at a given index
  • find(): search an item in the linked list
  • is_empty(): return if the linked list is empty or not
  • count(): return the number of items in the linked list

Construct a node class

Before we implement a singly linked list, we need to construct a node class. The linked list would contain one or more nodes. Each node has a value and a pointer linked to the next node.

class Node:
    def __init__(self):
        self.val = None
        self.next = None

    def set_val(self, val):
        self.val = val

    def get_val(self):
        return self.val

    def set_next(self, next):
        self.next = next

    def get_next(self):
        return self.next

Build a singly Linked List

The first function in the linked list class is the constructor, which will be called whenever a linked list is initialized.

It can include three attributes:

  • head: the first node in the linked list
  • tail: the last node in the linked list
  • count: the number of nodes in the linked list

Then we can build the other required functions according to the node pointer manipulations.

class LinkedList:
    def __init__(self):
        self.head = None
        self.tail = None
        self.count = 0

    def count_nodes(self):
        return self.count

    # insert at the tail of list
    def insert(self, val):
        new_node = Node()
        new_node.set_val(val)

        if self.head == None:
            self.head = new_node
            self.tail = new_node
        else:
            self.tail.next = new_node
            self.tail = new_node

        self.count += 1

    # insert at the given index
    def insertAt(self, val, index):
        new_node = Node()
        new_node.set_val(val)

        current_idx = 0
        current_node = self.head
        previous_node = current_node

        while current_idx < index and current_node.next != None:
            previous_node = current_node
            current_node = current_node.next
            current_idx += 1

        if current_idx == index:
            # found the index
            new_node.next = current_node
            previous_node.next = new_node

            # increase counter
            self.count += 1
        else:
            print("the specified index {} is invalid".format(index))

    # delete the specified node
    def deleteAt(self, index):
        current_idx = 0
        current_node = self.head
        previous_node = current_node

        while True:
            if current_idx < index:
                if current_node.next != None:
                    # move on to the next node
                    previous_node = current_node
                    current_node = current_node.next
                    current_idx += 1
                else:
                    print("invalid index {}".format(index))
                    break
            else:
                # found the index, delete it
                if current_node.next == None:
                    # this is tail
                    previous_node.next = None
                    self.tail = previous_node
                else:
                    # this is not tail
                    previous_node.next = current_node.next

                # decrease the counter
                self.count -= 1

                break

    def find(self, index):
        current_node = self.head
        current_idx = 0

        while current_node.next != None:
            if current_idx == index:
                return current_node.get_val()
            else:
                current_node = current_node.next

        print("invalid index {}".format(index))

    def print_list(self):
        current_node = self.head
        current_idx = 0

        while current_node.next != None:
            print(current_node.get_val())
            current_node = current_node.next

        # print the last node
        print(current_node.get_val())
        print("count: {}".format(self.count_nodes()))
        print()

Test the Linked List class

ll = LinkedList()
ll.insert(1)
ll.insert(2)
ll.insert(5)
ll.print_list()

ll.insertAt(4, 2)
ll.print_list()

ll.insertAt(3, 3)
ll.print_list()

ll.deleteAt(3)
ll.print_list()

Output:

1
2
5
count: 3

1
2
4
5
count: 4

1
2
4
3
5
count: 5

1
2
4
5
count: 4
0%