Conversation
|
|
||
|
|
||
| def main(): | ||
| if len(sys.argv) == 5: |
There was a problem hiding this comment.
#add
argparse is a better module to handle with command line arguments.
|
|
||
|
|
||
| def main(): | ||
| if len(sys.argv) == 5: |
There was a problem hiding this comment.
#style
Instead of pushing the entire function code by one intent level. You can skip instead.
if len(sys.argv) < 5:
continue
| k = int(sys.argv[1]) # number of nearest neighbor | ||
| d = int(sys.argv[2]) # number of pca dimension | ||
| n = int(sys.argv[3]) # number of test samples to consider | ||
| input_data = sys.argv[4] |
|
|
||
| # convert the image to gray scale | ||
| train_grayed = convert_gray(train_data, 1000 - n, 1024) | ||
| test_grayed = convert_gray(test_data, n, 1024) |
There was a problem hiding this comment.
#fix
convert entire data matrix to gray and then split into train and test subset
|
|
||
|
|
||
| def convert_gray(data, row, column): | ||
| img = data[:] |
There was a problem hiding this comment.
why creating a copy? what is the use of this line?
img = data is creating a reference
img = data[:] is creating a copy of matrix
|
|
||
| def knn(train_data, test_data, test_labels, train_labels, k, d): | ||
| # do pca and reduce dimension for train data | ||
| pca_obj, train_pca = do_pca(train_data, d) |
There was a problem hiding this comment.
do pca on train data also here. give the do_pca single responsiblity.
There was a problem hiding this comment.
you can remove a lot of functions here
| return grayed | ||
|
|
||
|
|
||
| def do_pca(gray_data, d): |
| distance = [] | ||
| neighbors = [] | ||
| for i in range(len(train_pca)): | ||
| distance.append((train_pca[i], calculate_distance(train_pca[i], test_pca), train_labels[i])) |
There was a problem hiding this comment.
#fix
just a distance array of (index, distance) is enough. you are keeping copy of each record in a tuple again.
| for i in range(len(train_pca)): | ||
| distance.append((train_pca[i], calculate_distance(train_pca[i], test_pca), train_labels[i])) | ||
| distance.sort(key=lambda x: x[1]) | ||
| for x in range(k): |
There was a problem hiding this comment.
#clean
return distance[:k]
| distance = [] | ||
| neighbors = [] | ||
| for i in range(len(train_pca)): | ||
| distance.append((train_pca[i], calculate_distance(train_pca[i], test_pca), train_labels[i])) |
There was a problem hiding this comment.
you can calculate the distance of the entire train_pca without for loop.
distance = np.sqrt(np.sum((train_mat - test_rec) ** 2))
No description provided.