-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathalp.py
More file actions
81 lines (55 loc) · 2.38 KB
/
alp.py
File metadata and controls
81 lines (55 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import argparse
import numpy as np
import sys
from blob import get_annotations_images
from dataset import Dataset
from annotation import Annotation
from image_segmentation.picture import Picture
from image_segmentation.preprocessor import Preprocessor
def get_image_chars(image):
characters = []
lines = image.get_segments()
for line in lines:
words = line.get_segments()
for word in words:
characters.extend(word.get_segments())
return characters
def parse_arguments():
parser = argparse.ArgumentParser(usage='Active Learning Platform to keep on learning')
parser.add_argument('-i', '--image-container', type=str, help='azure blob image container', default='pictures')
parser.add_argument('-a', '--annotation-container', type=str,
help='azure blob annotation container', default='code')
return parser.parse_args()
if __name__ == '__main__':
print("Welcome to the Active Learning Platform")
args = parse_arguments()
dataset = Dataset()
# Get all the (annotation, image) pairs from Azure Blob Storage
annotated_images = get_annotations_images(args.image_container, args.annotation_container)
# Generate a dataset from the blob data
# - Segment images into characters
# - Match lines, words and character to the annotations
# - Add the images to the datatset
for (pre_image, annotation) in annotated_images:
height, width, _ = pre_image.shape
picture = Picture(pre_image, 0, 0, width, height, None)
image = Preprocessor().process(picture)
code = Annotation(annotation)
print(code._annotation)
image_characters = get_image_chars(picture)
if len(image_characters) != len(code):
continue
for idx, character in enumerate(image_characters):
char = character.get_segments()
label = code[idx]
img = np.reshape(char, 28*28)
img = img.astype('float32')
dataset.add_image(img, label)
dataset_name = dataset.save()
if not dataset_name:
print("No new additions to the dataset.")
sys.exit(69)
# Move the dataset to the training directory
# Run training and collect the model yaml and weights
# Benchmark the results and upload the artifacts to Azure Blob Storage with a summary
# Wait for 10 secs and try a clean shutdown