Data Structures- Space and Time Complexity
Why do you think a programmer should care about space and time complexity?
Because space can make things easier to read in the future and make code faster. Time is convenient and makes testing code faster.
from IPython.display import Image, display
from pathlib import Path
# prepares a series of images
def image_data(path=Path("images/"), images=None): # path of static images is defaulted
for image in images:
# File to open
image['filename'] = path / image['file'] # file with path
return images
def image_display(images):
for image in images:
display(Image(filename=image['filename']))
if __name__ == "__main__":
lassen_volcano = image_data(images=[{'source': "Peter Carolin", 'label': "Lassen Volcano", 'file': "lassen-volcano.jpg"}])
image_display(lassen_volcano)
from IPython.display import HTML, display
from pathlib import Path
from PIL import Image as pilImage
from io import BytesIO
import base64
# prepares a series of images
def image_data(path=Path("images/"), images=None): # path of static images is defaulted
for image in images:
# File to open
image['filename'] = path / image['file'] # file with path
return images
def scale_image(img):
baseWidth = 625
#baseWidth = 1250
#baseWidth = 2500
#baseWidth = 5000 # see the effect of doubling or halfing the baseWidth
#baseWidth = 10000
#baseWidth = 20000
#baseWidth = 40000
scalePercent = (baseWidth/float(img.size[0]))
scaleHeight = int((float(img.size[1])*float(scalePercent)))
scale = (baseWidth, scaleHeight)
return img.resize(scale)
def image_to_base64(img, format):
with BytesIO() as buffer:
img.save(buffer, format)
return base64.b64encode(buffer.getvalue()).decode()
def image_management(image): # path of static images is defaulted
# Image open return PIL image object
img = pilImage.open(image['filename'])
# Python Image Library operations
image['format'] = img.format
image['mode'] = img.mode
image['size'] = img.size
image['width'], image['height'] = img.size
image['pixels'] = image['width'] * image['height']
# Scale the Image
img = scale_image(img)
image['pil'] = img
image['scaled_size'] = img.size
image['scaled_width'], image['scaled_height'] = img.size
image['scaled_pixels'] = image['scaled_width'] * image['scaled_height']
# Scaled HTML
image['html'] = '<img src="data:image/png;base64,%s">' % image_to_base64(image['pil'], image['format'])
if __name__ == "__main__":
# Use numpy to concatenate two arrays
images = image_data(images = [{'source': "Peter Carolin", 'label': "Lassen Volcano", 'file': "lassen-volcano.jpg"}])
# Display meta data, scaled view, and grey scale for each image
for image in images:
image_management(image)
print("---- meta data -----")
print(image['label'])
print(image['source'])
print(image['format'])
print(image['mode'])
print("Original size: ", image['size'], " pixels: ", f"{image['pixels']:,}")
print("Scaled size: ", image['scaled_size'], " pixels: ", f"{image['scaled_pixels']:,}")
print("-- original image --")
display(HTML(image['html']))
I think it's a time and space complexity. There's a lot of data being created in the code and it's increasing the time needed to execute.
Big O Notation
- Constant O(1)
- Linear O(n)
- Quadratic O(n^2)
- Logarithmic O(logn)
- Exponential (O(2^n))
numbers = list(range(1000))
print(numbers)
Constant O(1)
print(numbers[263])
ncaa_bb_ranks = {1:"Alabama",2:"Houston", 3:"Purdue", 4:"Kansas"}
#look up a value in a dictionary given a key
print(ncaa_bb_ranks[1]) #0.0s
def sum(a, b):
return a + b
print(sum(90,88))
print(sum(.9,.88)) #0.1s
Linear O(n)
for i in numbers:
print(i) #0.4s
def reverse_list(arr):
n = len(arr)
reversed_arr = [None] * n #create a list of None based on the length or arr
for i in range(n):
reversed_arr[n-i-1] = arr[i] #stores the value at the index of arr to the value at the index of reversed_arr starting at the beginning for arr and end for reversed_arr
return reversed_arr
print(numbers)
print(reverse_list(numbers)) #0.0s
Quadratic O(n^2)
for i in numbers:
for j in numbers:
print(i,j) #8m 40s
def multiply_matrices(matrix1, matrix2):
m = len(matrix1)
n = len(matrix2[0])
result = [[0] * n] * m #this creates the new matrix based on the size of matrix 1 and 2
for i in range(m):
for j in range(n):
for k in range(len(matrix2)):
result[i][j] += matrix1[i][k] * matrix2[k][j]
return result
print(multiply_matrices([[1,2],[3,4]], [[3,4],[1,2]])) #0.0s
Logarithmic O(logn)
def binary_search(arr, low, high, target):
while low <= high:
mid = (low + high) // 2 #integer division
if arr[mid] == target:
return mid
elif arr[mid] < target:
low = mid + 1
else:
high = mid - 1
target = 263
result = binary_search(numbers, 0, len(numbers) - 1, target)
print(result) #0.1s
Exponential O(2^n)
def fibonacci(n):
if n <= 1:
return n
else:
return fibonacci(n-1) + fibonacci(n-2)
print(fibonacci(5)) #0.1s
#print(fibonacci(10)) #0.2s
#print(fibonacci(20)) #0.1s
#print(fibonacci(30)) #3.1s
#print(fibonacci(40)) #38.7s
def generate_subsets(s):
if not s:
return [[]]
subsets = generate_subsets(s[1:])
return [[s[0]] + subset for subset in subsets] + subsets
#print(generate_subsets([1,2,3])) #0.1s
print(generate_subsets([1,2,3,4,5,6])) #0.0s
import time
start_time = time.time()
print(fibonacci(34))
end_time = time.time()
total_time = end_time - start_time
print("Time taken:", total_time, "seconds")
start_time = time.time()
print(fibonacci(35))
end_time = time.time()
total_time = end_time - start_time
print("Time taken:", total_time, "seconds")
Why is time and space complexity important when choosing an algorithm?
- It will affect how fast the code will run.
Should you always use a constant time algorithm / Should you never use an exponential time algorithm? Explain? - While constant time algorithms are very fast and exponential time algorithms aren't, it might be neccessary to use both. There may be cases where you can't use constant time algorithms, and it may be better to use a different one. What are some general patterns that you noticed to determine each algorithm's time and space complexity?
- constant tends to be 1 or a few lines. Linear depends on a list, so they'll likely be a single for loop on a list. Quadratic will be a for loop inside a for loop. Logarithmic is something like binary search, and will grow logarithmically.