Sumit Kumar
Published © GPL3+

OtoNix - Otoscopy using Neural Image Classification

Xilinx AI-based Otitis media detection to assist clinicians in ear disease diagnosis using Deep Learning-based annotated Image Retrieval.

IntermediateFull instructions providedOver 1 day761
OtoNix - Otoscopy using Neural Image Classification

Things used in this project

Hardware components

Zynq UltraScale+ MPSoC ZCU104
Zynq UltraScale+ MPSoC ZCU104
×1

Software apps and online services

PYNQ Framework
AMD PYNQ Framework
Xilinx Software Development Kit
AMD Xilinx Software Development Kit

Story

Read more

Schematics

Circuit

OtoNix Inception V3 Customized Layers

DPU Optimized OtoNix Transfer Learning Network

SweetVIZ- Dataset Profiling

Code

OtoNix - Quantization

BatchFile
TF_NETWORK_PATH="/home/sumit/Desktop/OtoNix"
FROZEN_MODEL="model_incv3_3.pb"
INPUT_NODES="input_1"
OUTPUT_NODES="dense_3/Softmax "
INPUT_FN="otonix_input_fn.calib_input"

vai_q_tensorflow inspect \
              --input_frozen_graph ${TF_NETWORK_PATH}/${FROZEN_MODEL}

vai_q_tensorflow quantize \
			  --input_frozen_graph ${TF_NETWORK_PATH}/${FROZEN_MODEL}  \
			  --input_fn  ${INPUT_FN} \
	          --input_nodes ${INPUT_NODES} \
			  --output_nodes ${OUTPUT_NODES}\
			  --input_shapes ?,224,224,3 \
			  --calib_iter 10 \
			  --method 1 \
			  --gpu 0 \
			  --output_dir ${TF_NETWORK_PATH}/qoutput

Output - Kernal Graph

BatchFile
otonix_kernel_graph.gv
digraph G {
ordering = out
ranksep = 1
node [shape = box3d, fontcolor = gray18, bgcolor = oldlace, style = filled];
0[label = "kernel id: 0, type: DPUKernel", fillcolor = wheat]
1[label = "kernel id: 1, type: CPUKernel", fillcolor = wheat]
0->1[label = "(dense_3_MatMul, 0) -> (dense_3_Softmax, 0)"]
}

OtoNix-DataAug

Python
Data preprocessing and augmentation
# -*- coding: utf-8 -*-
"""OtoNix-DataAug

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1cNtvviGOAh4uwxxKlli5qzn9WS5O3WnO
"""

!pip install pandas-profiling==2.7.1
!pip install sweetviz

# Tympanic membrane dataset preprocessing

# Import the libraries
import pandas as pd
import pandas_profiling as pp
import sweetviz as sv
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
import os
from sklearn.model_selection import train_test_split
import shutil

# Read the metadata
pdf = pd.read_csv('/content/drive/MyDrive/tympanic_membrane_dataset/metadata.csv')

# Explore the dataset using pandas profiling
pp.ProfileReport(pdf)

# Explore the dataset using sweetviz
sweet_report = sv.analyze(pdf)
sweet_report.show_html()

# Set y as the labels
y = pdf['dx']

# Split the metadata into training and validation
df_train, df_val = train_test_split(pdf, test_size=0.33, random_state=1, stratify=y)

# Print the shape of the training and validation split
print(df_train.shape)
print(df_val.shape)

# Find the number of values in the training and validation set
df_train['dx'].value_counts()
df_val['dx'].value_counts()

# Transfer the images into folders
# Set the image id as the index
pdf.set_index('image_id', inplace=True)

# Get a list of images in dataset folder
folder= os.listdir('/content/drive/MyDrive/tympanic_membrane_dataset/images')

# Get a list of training and validation images
train_list = list(df_train['image_id'])
val_list = list(df_val['image_id'])

print(train_list)
print(val_list)

# Create new directories for the images for training and validation
base_dir = '/content/drive/MyDrive/tympanic_membrane_dataset/train_val'
os.mkdir(base_dir)

# Training file directory
train_dir = os.path.join(base_dir, 'train_dir')
os.mkdir(train_dir)

# Validation file directory
val_dir = os.path.join(base_dir, 'val_dir')
os.mkdir(val_dir)

# Test/quantization file directory
test_dir = os.path.join(base_dir, 'test_dir')
os.mkdir(test_dir)

# Create new folders in the training directory for each of the classes
aom = os.path.join(train_dir, 'aom')
os.mkdir(aom)
csom = os.path.join(train_dir, 'csom')
os.mkdir(csom)
earVentilationTube = os.path.join(train_dir, 'earVentilationTube')
os.mkdir(earVentilationTube)
earwax = os.path.join(train_dir, 'earwax')
os.mkdir(earwax)
normal = os.path.join(train_dir, 'normal')
os.mkdir(normal)
otitisexterna = os.path.join(train_dir, 'otitisexterna')
os.mkdir(otitisexterna)
tympanoskleros = os.path.join(train_dir, 'tympanoskleros')
os.mkdir(tympanoskleros)


# Create new folders in the validation directory for each of the classes
aom = os.path.join(val_dir, 'aom')
os.mkdir(aom)
csom = os.path.join(val_dir, 'csom')
os.mkdir(csom)
earVentilationTube = os.path.join(val_dir, 'earVentilationTube')
os.mkdir(earVentilationTube)
earwax = os.path.join(val_dir, 'earwax')
os.mkdir(earwax)
normal = os.path.join(val_dir, 'normal')
os.mkdir(normal)
otitisexterna = os.path.join(val_dir, 'otitisexterna')
os.mkdir(otitisexterna)
tympanoskleros = os.path.join(val_dir, 'tympanoskleros')
os.mkdir(tympanoskleros)

# Create new folders in the test directory for each of the classes
aom = os.path.join(test_dir, 'aom')
os.mkdir(aom)
csom = os.path.join(test_dir, 'csom')
os.mkdir(csom)
earVentilationTube = os.path.join(test_dir, 'earVentilationTube')
os.mkdir(earVentilationTube)
earwax = os.path.join(test_dir, 'earwax')
os.mkdir(earwax)
normal = os.path.join(test_dir, 'normal')
os.mkdir(normal)
otitisexterna = os.path.join(test_dir, 'otitisexterna')
os.mkdir(otitisexterna)
tympanoskleros = os.path.join(test_dir, 'tympanoskleros')
os.mkdir(tympanoskleros)

# Transfer the training images
for image in train_list:

    fname = image + '.png'
    label = pdf.loc[image, 'dx']
    
    if fname in folder:
        # source path to image
        src = os.path.join('/content/drive/MyDrive/tympanic_membrane_dataset/images', fname)
        # destination path to image
        dst = os.path.join(train_dir, label, fname)
        # copy the image from the source to the destination
        shutil.copyfile(src, dst)
  
# Transfer the validation and test/quatizer images
testimgcount=0 
#open the testlabelfile
f = open(os.path.join(base_dir, 'testlabelfile.txt'), 'a+')
fcal = open(os.path.join(base_dir, 'calibration.txt'), 'a+')

    
for image in val_list:

    fname = image + '.png'
    label = pdf.loc[image, 'dx']

    if fname in folder:
        # source path to image
        src = os.path.join('/content/drive/MyDrive/tympanic_membrane_dataset/images', fname)
        # destination path to image
        dst = os.path.join(val_dir, label, fname)
        # copy the image from the source to the destination
        shutil.copyfile(src, dst)
        #copy for testing and dnndk quantization
        if testimgcount<50:
            dst = os.path.join(test_dir,label, fname)
            shutil.copyfile(src, dst)
            #append the testlabel file
            f.write(label+'\n')
            #append in the calibration.txt file
            imgloc =  os.path.join(label,fname)
            fcal.write('{0}\n'.format(imgloc))
            # increment the file#
            testimgcount+=1
  
f.close()
fcal.close()

# Check how many training images are in each folder
print(len(os.listdir(base_dir +'/train_dir/aom')))
print(len(os.listdir(base_dir +'/train_dir/csom')))
print(len(os.listdir(base_dir +'/train_dir/earwax')))
print(len(os.listdir(base_dir +'/train_dir/normal')))
print(len(os.listdir(base_dir +'/train_dir/earVentilationTube')))
print(len(os.listdir(base_dir +'/train_dir/otitisexterna')))
print(len(os.listdir(base_dir +'/train_dir/tympanoskleros')))

# Check how many validation images are in each folder
print(len(os.listdir(base_dir +'/val_dir/aom')))
print(len(os.listdir(base_dir +'/val_dir/csom')))
print(len(os.listdir(base_dir +'/val_dir/earwax')))
print(len(os.listdir(base_dir +'/val_dir/normal')))
print(len(os.listdir(base_dir +'/val_dir/earVentilationTube')))
print(len(os.listdir(base_dir +'/val_dir/otitisexterna')))
print(len(os.listdir(base_dir +'/val_dir/tympanoskleros')))

# check how many test/quantization images are
print(len(os.listdir(base_dir + '/test_dir')))

# Import pyplot
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(12, 2))

image_samples_label = ['aom','csom','earwax','normal','earVentilationTube','otitisexterna','tympanoskleros']

# histogram for available images in training folder
nt_aom = len(os.listdir(base_dir +'/train_dir/aom'))
nt_csom = len(os.listdir(base_dir +'/train_dir/csom'))
nt_earwax = len(os.listdir(base_dir +'/train_dir/earwax'))
nt_normal = len(os.listdir(base_dir +'/train_dir/normal'))
nt_earVentilationTube = len(os.listdir(base_dir +'/train_dir/earVentilationTube'))
nt_otitisexterna = len(os.listdir(base_dir +'/train_dir/otitisexterna'))
nt_tympanoskleros = len(os.listdir(base_dir +'/train_dir/tympanoskleros'))

train_dir_samples = [nt_aom,nt_csom,nt_earwax,nt_normal,nt_earVentilationTube,nt_otitisexterna,nt_tympanoskleros]

ax.bar(image_samples_label,train_dir_samples,color = 'green',edgecolor = 'cyan')

ax.set(title = "Training Image Directory",
       xlabel = "Labels",
       ylabel = "Training Samples")
plt.show()

fig2, ax2 = plt.subplots(figsize=(12, 2))
# histogram for available images in validation folder
nv_aom = len(os.listdir(base_dir +'/val_dir/aom'))
nv_csom = len(os.listdir(base_dir +'/val_dir/csom'))
nv_earwax = len(os.listdir(base_dir +'/val_dir/earwax')),
nv_normal = len(os.listdir(base_dir +'/val_dir/normal')),
nv_earVentilationTube = len(os.listdir(base_dir +'/val_dir/earVentilationTube'))
nv_otitisexterna = len(os.listdir(base_dir +'/val_dir/otitisexterna'))
nv_tympanoskleros = len(os.listdir(base_dir +'/val_dir/tympanoskleros'))

val_dir_samples = [nv_aom,nv_csom,nv_earwax,nv_normal,nv_earVentilationTube,nv_otitisexterna,nv_tympanoskleros]

ax2.bar(image_samples_label,val_dir_samples,color = 'red',edgecolor = 'cyan')

ax2.set(title = "Validation Image Directory",
       xlabel = "Labels",
       ylabel = "Validation Samples")

plt.show()

# Create a data generator to augment the images in real time
datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        # brightness_range=(0.9,1.1),
        fill_mode='nearest')

# Augment the data
# Class 'normal' is not going to be augmented, it already has more than 350 images
class_list = ['aom', 'csom', 'earwax', 'earVentilationTube', 'otitisexterna', 'tympanoskleros']

for item in class_list:

    # Create a temporary directory for the augmented images
    aug_dir = '/content/drive/MyDrive/tympanic_membrane_dataset/train_val/aug_dir'
    os.mkdir(aug_dir)

    # Create a directory within the base dir to store images of the same class
    img_dir = os.path.join(aug_dir, 'img_dir')
    os.mkdir(img_dir)

    # Choose a class
    img_class = item

    # List all the images in the directory
    img_list = os.listdir(base_dir +'/train_dir/' + img_class)

    # Copy images from the class train dir to the img_dir
    for fname in img_list:
        # source path to image
        src = os.path.join(base_dir +'/train_dir/' + img_class, fname)
        # destination path to image
        dst = os.path.join(img_dir, fname)
        # copy the image from the source to the destination
        shutil.copyfile(src, dst)

    # point to a dir containing the images and not to the images themselves
    path = aug_dir
    
    save_path = base_dir + '/train_dir/' + img_class

    batch_size = 50

    aug_datagen = datagen.flow_from_directory(path,
                                              save_to_dir=save_path,
                                              save_format='png',
                                              target_size=(500, 500),
                                              batch_size=batch_size)

    # Generate the augmented images and add them to the training folders
    num_aug_images_wanted = 350  # total number of images we want to have in each class
    num_files = len(os.listdir(img_dir))
    num_batches = int(np.ceil((num_aug_images_wanted - num_files) / batch_size))

    # run the generator and create about 350 augmented images
    for i in range(0, num_batches):
        imgs, labels = next(aug_datagen)

    # delete temporary directory with the raw image files
    shutil.rmtree(aug_dir)

# Check how many training images are in each folder
print(len(os.listdir(base_dir +'/train_dir/aom')))
print(len(os.listdir(base_dir +'/train_dir/csom')))
print(len(os.listdir(base_dir +'/train_dir/earwax')))
print(len(os.listdir(base_dir +'/train_dir/normal')))
print(len(os.listdir(base_dir +'/train_dir/earVentilationTube')))
print(len(os.listdir(base_dir +'/train_dir/otitisexterna')))
print(len(os.listdir(base_dir +'/train_dir/tympanoskleros')))

# Check how many validation images are in each folder
print(len(os.listdir(base_dir +'/val_dir/aom')))
print(len(os.listdir(base_dir +'/val_dir/csom')))
print(len(os.listdir(base_dir +'/val_dir/earwax')))
print(len(os.listdir(base_dir +'/val_dir/normal')))
print(len(os.listdir(base_dir +'/val_dir/earVentilationTube')))
print(len(os.listdir(base_dir +'/val_dir/otitisexterna')))
print(len(os.listdir(base_dir +'/val_dir/tympanoskleros')))

fig, ax = plt.subplots(figsize=(12, 2))

image_samples_label = ['aom','csom','earwax','normal','earVentilationTube','otitisexterna','tympanoskleros']

# histogram for available images in training folder
nt_aom = len(os.listdir(base_dir +'/train_dir/aom'))
nt_csom = len(os.listdir(base_dir +'/train_dir/csom'))
nt_earwax = len(os.listdir(base_dir +'/train_dir/earwax'))
nt_normal = len(os.listdir(base_dir +'/train_dir/normal'))
nt_earVentilationTube = len(os.listdir(base_dir +'/train_dir/earVentilationTube'))
nt_otitisexterna = len(os.listdir(base_dir +'/train_dir/otitisexterna'))
nt_tympanoskleros = len(os.listdir(base_dir +'/train_dir/tympanoskleros'))

train_dir_samples = [nt_aom,nt_csom,nt_earwax,nt_normal,nt_earVentilationTube,nt_otitisexterna,nt_tympanoskleros]

ax.bar(image_samples_label,train_dir_samples,color = 'green',edgecolor = 'black')

ax.set(title = "Training Image Directory",
       xlabel = "Labels",
       ylabel = "Training Samples")
plt.show()

fig2, ax2 = plt.subplots(figsize=(12, 2))
# histogram for available images in validation folder
nv_aom = len(os.listdir(base_dir +'/val_dir/aom'))
nv_csom = len(os.listdir(base_dir +'/val_dir/csom'))
nv_earwax = len(os.listdir(base_dir +'/val_dir/earwax')),
nv_normal = len(os.listdir(base_dir +'/val_dir/normal')),
nv_earVentilationTube = len(os.listdir(base_dir +'/val_dir/earVentilationTube'))
nv_otitisexterna = len(os.listdir(base_dir +'/val_dir/otitisexterna'))
nv_tympanoskleros = len(os.listdir(base_dir +'/val_dir/tympanoskleros'))

val_dir_samples = [nv_aom,nv_csom,nv_earwax,nv_normal,nv_earVentilationTube,nv_otitisexterna,nv_tympanoskleros]

ax2.bar(image_samples_label,val_dir_samples,color = 'orange',edgecolor = 'black')

ax2.set(title = "Validation Image Directory",
       xlabel = "Labels",
       ylabel = "Validation Samples")

plt.show()

Calibration.txt

Plain text
aom/aom (68).png
normal/normal (366).png
normal/normal (472).png
normal/normal (522).png
csom/csom (50).png
normal/normal (644).png
normal/normal (209).png
earwax/earwax (20).png
normal/normal (408).png
normal/normal (225).png
earwax/earwax (32).png
earwax/earwax (89).png
normal/normal (518).png
aom/aom (37).png
earwax/earwax (51).png
tympanoskleros/tympanoskleros (23).png
normal/normal (272).png
normal/normal (636).png
csom/csom (31).png
earwax/earwax (136).png
normal/normal (302).png
normal/normal (459).png
aom/aom (115).png
otitisexterna/otitisexterna (7).png
normal/normal (258).png
normal/normal (244).png
normal/normal (266).png
normal/normal (213).png
normal/normal (383).png
otitisexterna/otitisexterna (18).png
csom/csom (35).png
normal/normal (521).png
normal/normal (493).png
normal/normal (307).png
earwax/earwax (124).png
csom/csom (9).png
normal/normal (159).png
tympanoskleros/tympanoskleros (28).png
csom/csom (26).png
normal/normal (374).png
normal/normal (347).png
earwax/earwax (130).png
aom/aom (70).png
normal/normal (152).png
csom/csom (37).png
csom/csom (13).png
aom/aom (111).png
earwax/earwax (16).png
otitisexterna/otitisexterna (13).png
normal/normal (453).png

OtoNix - Freeze_Session.py

Python
''' Copyright 2019 Xilinx Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may 
not use this file except in compliance with the License. You may obtain
a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''

import tensorflow as tf

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a computation graph.
    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. 
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    Usage:
    from keras import backend as K
    frozen_graph = freeze_session(K.get_session(),output_names=[out.op.name for out in model.outputs])
    tf.train.write_graph(frozen_graph, ".", "model/model_incv3_2_6.pb", as_text=False)
    """
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = tf.graph_util.convert_variables_to_constants(
            session, input_graph_def, output_names, freeze_var_names)
        return frozen_graph

DPU Runner

Python
runner.py
'''
Copyright 2019 Xilinx Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''

from ctypes import *
import numpy as np
import json
import os
import re

class Tensor(Structure):
  _fields_ = [
    ('name', c_char_p),
    ('dims', POINTER(c_int32)),
    ('ndims', c_int32),
    ('dtype', c_int32)
  ]

class Runner:
  # tensor format enum
  TensorFormat = type('', (), {})()
  TensorFormat.NCHW = 0
  TensorFormat.NHWC = 1

  def __init__(self, path):
    metaFile = os.path.join(path, "meta.json")
    if not os.path.isfile(metaFile):
      raise AssertionError("meta.json file %s not found" % metaFile)
      
    # select .so file based on path/meta.json
    with open(metaFile) as f:
      meta = json.load(f)
      libFile = self._parse_path(meta['lib'])

    if not libFile or not os.path.isfile(libFile):
      raise AssertionError("C++ library .so file %s not found" % libFile)

    self._libFile = os.path.abspath(libFile)
    self._lib = cdll.LoadLibrary(self._libFile)

    self._lib.DpuPyRunnerCreate.argtypes = [c_char_p]
    self._lib.DpuPyRunnerCreate.restype = c_void_p
    self._lib.DpuPyRunnerGetInputTensors.argtypes = [c_void_p,
      POINTER(c_void_p), POINTER(c_int)]
    self._lib.DpuPyRunnerGetOutputTensors.argtypes = [c_void_p, 
      POINTER(c_void_p), POINTER(c_int)]
    self._lib.DpuPyRunnerGetTensorFormat.argtypes = [c_void_p]
    self._lib.DpuPyRunnerGetTensorFormat.restype = c_int
    self._lib.DpuPyRunnerExecuteAsync.argtypes = [c_void_p, 
      POINTER(np.ctypeslib.ndpointer(c_float, flags="C_CONTIGUOUS")),
      POINTER(np.ctypeslib.ndpointer(c_float, flags="C_CONTIGUOUS")),
      c_int, POINTER(c_int)]
    self._lib.DpuPyRunnerExecuteAsync.restype = c_int
    self._lib.DpuPyRunnerWait.argtypes = [c_void_p, c_int]
    self._lib.DpuPyRunnerWait.restype = c_int
    self._lib.DpuPyRunnerDestroy.argtypes = [c_void_p]

    self._runner = self._lib.DpuPyRunnerCreate(path.encode('utf-8'))

  def get_input_tensors(self):
    ptr = c_void_p()
    n = c_int(0)
    self._lib.DpuPyRunnerGetInputTensors(self._runner, byref(ptr), byref(n))
    tensors = []
    for i in range(n.value):
      tensors.append(Tensor.from_address(ptr.value + (i*sizeof(Tensor))))
    return tensors

  def get_output_tensors(self):
    ptr = c_void_p()
    n = c_int(0)
    self._lib.DpuPyRunnerGetOutputTensors(self._runner, byref(ptr), byref(n))
    tensors = []
    for i in range(n.value):
      tensors.append(Tensor.from_address(ptr.value + (i*sizeof(Tensor))))
    return tensors
  
  def get_tensor_format(self):
    return(self._lib.DpuPyRunnerGetTensorFormat(self._runner))

  def execute_async(self, inputs, outputs):
    """
      Args:
        inputs: list of numpy arrays
        outputs: list of numpy arrays
        order of numpy arrays in inputs/outputs must match 
          the order in get_input_tensors() and get_output_tensors()
    """
    status = c_int(0)
    ret = self._lib.DpuPyRunnerExecuteAsync(self._runner, 
			self._numpy_list_2_cptr_list(inputs), 
      self._numpy_list_2_cptr_list(outputs), 
      inputs[0].shape[0], byref(status))

    if status.value != 0:
      raise RuntimeError("Runner.execute_async could not enqueue new DPU job")

    return ret

  def _numpy_list_2_cptr_list(self, nplist):
    ptrList = (np.ctypeslib.ndpointer(c_float, flags="C_CONTIGUOUS") * len(nplist))()

    for i, tensor in enumerate(nplist):
      ptrList[i] = tensor.ctypes.data_as(np.ctypeslib.ndpointer(c_float, flags="C_CONTIGUOUS"))

    return ptrList

  def _parse_path(self, path):
    """
      Translate any {STRING} in 'path' to os.environ["STRING"]
      E.g., {XILINX_ROOT}/path/to/file to /opt/xilinx/path/to/file
    """
    retpath = path
    regex = r"\{(.*?)\}"
    matches = re.finditer(regex, path, re.MULTILINE | re.DOTALL)
    for matchNum, match in enumerate(matches):
      word = match.group(1)
      retpath = retpath.replace("{"+word+"}", os.environ[word])

    return retpath

  def wait(self, job_id):
    return self._lib.DpuPyRunnerWait(self._runner, job_id)

  def __del__(self):
    if hasattr(self, '_lib') and self._lib \
      and hasattr(self, '_runner') and self._runner:
      self._lib.DpuPyRunnerDestroy(self._runner)

otonix_aux.py

Python
'''
Copyright 2019 Xilinx Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''

from ctypes import *
import cv2
import numpy as np
import runner
import os
import input_fn_sl as input_fn
import math
import threading
import time
import sys


def passedp(label_index, pred_softmax):
    if len(label_index) != len(pred_softmax):
        raise ValueError("Lists of different length.")
    return sum(i == np.argmax(j) for i, j in zip(label_index, pred_softmax))

def top_n_accuracy(label_index, pred_softmax, n):
    if len(label_index) != len(pred_softmax):
         raise ValueError("Lists of different length.")
    # take the top 3 in each predictions
    best_n = np.argsort(pred_softmax, axis=1)[:,-n:]
    return sum(i in j for i, j in zip(label_index, best_n))
'''
Calculate softmax
data: data to be calculated
size: data size
return: softamx result
'''
def CPUCalcSoftmax(data,size):
    sum=0.0
    result = [0 for i in range(size)]
    for i in range(size):
        try:
            result[i] = math.exp(data[i])
        except:
            print('Math range error for data {}'.format(data[i]))
        sum +=result[i]
    for i in range(size):
        result[i] /=sum
    return result

def get_script_directory():
    path = os.getcwd()
    return path

'''
Get topk results according to its probability
datain: data result of softmax
filePath: filePath in witch that records the infotmation of kinds
'''
def TopK(datain,size,filePath):

    cnt=[i for i in range(size) ]
    pair=zip(datain,cnt)
    pair=sorted(pair,reverse=True)
    softmax_new,cnt_new=zip(*pair)
    fp=open(filePath, "r")
    data1=fp.readlines()
    fp.close()
    for i in range(5):
        flag=0
        for line in data1:
            if flag==cnt_new[i]:
                print("Top[%d] %f %s" %(i, (softmax_new[i]),(line.strip)("\n")))
            flag=flag+1

#l = threading.Lock()
#SCRIPT_DIR = get_script_directory()
#calib_image_dir  = SCRIPT_DIR + "/../images/"
#IMAGE_WIDTH = 224
#IMAGE_HEIGHT = 224
#batchSize = 2
#global threadnum
#threadnum = 0
#global runTotall
#runRotal = 0

'''
run inception_v1 with batch
dpu: dpu runner
img: imagelist to be run
cnt: threadnum
runTotall: total images to run
'''
def skinlv1(dpu,img,cnt, runTotall, batchSize, threadnum, l):
    print('cnt : {} and runTotall: {}'.format(cnt, runTotall))

    """get tensor"""
    inputTensors = dpu.get_input_tensors()
    outputTensors = dpu.get_output_tensors()
    tensorformat = dpu.get_tensor_format() 
    if tensorformat == dpu.TensorFormat.NCHW:
        outputHeight = outputTensors[0].dims[2]
        outputWidth = outputTensors[0].dims[3]
        outputChannel = outputTensors[0].dims[1]
    elif tensorformat == dpu.TensorFormat.NHWC:
        outputHeight = outputTensors[0].dims[1]
        outputWidth = outputTensors[0].dims[2]
        outputChannel = outputTensors[0].dims[3]
    else:
        exit("Format error")
    outputSize = outputHeight*outputWidth*outputChannel    
    softmax = np.empty(outputSize)
    
    #global runTotall
    count = cnt 
    while count < runTotall:
        l.acquire()
        if (runTotall < (count+batchSize)):
            runSize = runTotall - count
        else:
            runSize = batchSize
        l.release()
        shapeIn = (runSize,) + tuple([inputTensors[0].dims[i] for i in range(inputTensors[0].ndims)][1:])
        
        """prepare batch input/output """
        outputData = []
        inputData = []
        outputData.append(np.empty((runSize,outputSize), dtype = np.float32, order = 'C'))
        inputData.append(np.empty((shapeIn), dtype = np.float32, order = 'C'))
        
        """init input image to input buffer """
        for j in range(runSize):
            imageRun = inputData[0]
            imageRun[j,...] = img[count+j].reshape(inputTensors[0].dims[1],inputTensors[0].dims[2],inputTensors[0].dims[3])
        """run with batch """
        job_id = dpu.execute_async(inputData,outputData)
        dpu.wait(job_id)

        """softmax calculate with batch """
        for j in range(runSize):
            softmax = CPUCalcSoftmax(outputData[0][j], outputSize)
        l.acquire()
        count = count + threadnum*runSize
        l.release()
    
def main(argv):
    global threadnum

    """create runner """
    dpu = runner.Runner(argv[2])

    listimage=os.listdir(calib_image_dir)
    threadAll = []
    threadnum = int(argv[1])
    i = 0
    global runTotall
    runTotall = len(listimage)

    """image list to be run """
    img = []
    for i in range(runTotall):
        path = os.path.join(calib_image_dir,listimage[i])
        image = cv2.imread(path)
        img.append(input_fn.preprocess_fn(image))

    """run with batch """
    time1 = time.time()
    for i in range(int(threadnum)):
        t1 = threading.Thread(target=runInceptionV1, args=(dpu, img, i*batchSize))
        threadAll.append(t1)
    for x in threadAll:
        x.start()
    for x in threadAll:
        x.join()

    time2 = time.time()

    timetotal = time2 - time1
    fps = float(runTotall / timetotal)
    print("%.2f FPS" %fps)

    del dpu
    
if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("please input thread number and json file path.")
    else :
        main(sys.argv)

Otonix_app.py

Python
from ctypes import *
import cv2
import numpy as np
import runner
import os
import input_fn_sl as input_fn
import math
import threading
import time
import sys
from otonix_aux import *

# Transform the dpu_skinl_0.elf into shard library libdpumodelskinl.so
!/usr/bin/aarch64-xilinx-linux-gcc -fPIC -shared \
../coutput/output_zcu102/dpu_otonix_0.elf -o libdpumodelotonix.so

SCRIPT_DIR = get_script_directory()
calib_image_dir  = SCRIPT_DIR + "/../data/calib/"
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
threadnum = 0
runRotal = 0
path = os.getcwd()
batchSize =2



# load lession images
labels = ['aom', 'csom', 'earwax', 'earVentilationTube', 'normal', 'otitisexterna', 'tympanoskleros']
print(labels.index('aom'))

org_label_index =[]
img = []
with open(path +'/../data/calibration.txt', 'r') as fp:
    for cnt, line in enumerate(fp):
        org_label_index.append(labels.index(line.split('/')[0] ))
        # append image
        img_path = os.path.join(calib_image_dir,line.rstrip())
        #print(img_path)
        image = cv2.imread(img_path)
        image = cv2.resize(image,(224,224))
        img.append(input_fn.preprocess_fn(image))
        #print("Line {}: {}".format(org_label_index[cnt], line.split('/')[0]))

runTotal = len(img)
runSteps = int(runTotal/batchSize)
print('{} images loaded.'.format(runTotal))

# DPU related
"""create runner """
dpu = runner.Runner(path+'/dpuv2_rundir/' ) 

"""get tensor"""
inputTensors = dpu.get_input_tensors()
outputTensors = dpu.get_output_tensors()
tensorformat = dpu.get_tensor_format() 
if tensorformat == dpu.TensorFormat.NCHW:
    outputHeight = outputTensors[0].dims[2]
    outputWidth = outputTensors[0].dims[3]
    outputChannel = outputTensors[0].dims[1]
elif tensorformat == dpu.TensorFormat.NHWC:
    outputHeight = outputTensors[0].dims[1]
    outputWidth = outputTensors[0].dims[2]
    outputChannel = outputTensors[0].dims[3]
else:
    exit("Format error")
    
outputSize = outputHeight*outputWidth*outputChannel    
softmax = np.empty(outputSize)

softmax =[]
for steps in range(runSteps):
    shapeIn = (batchSize,) + tuple([inputTensors[0].dims[i] for i in range(inputTensors[0].ndims)][1:])
    #print(shapeIn)
    """prepare batch input/output """
    outputData = []
    inputData = []
    outputData.append(np.empty((batchSize,outputSize), dtype = np.float32, order = 'C'))
    inputData.append(np.empty((shapeIn), dtype = np.float32, order = 'C'))

    """init input image to input buffer """
    k = steps * batchSize
    for j in range(batchSize):
        imageRun = inputData[0]
        #print(j)
        imageRun[j,...] = img[j+k].reshape(inputTensors[0].dims[1],inputTensors[0].dims[2],inputTensors[0].dims[3])
    """run with batch """
    job_id = dpu.execute_async(inputData,outputData)
    dpu.wait(job_id)
    #print(inputData[0])

    """softmax calculate with batch """
    for j in range(batchSize):
        softmax.append( CPUCalcSoftmax(outputData[0][j], outputSize))

# delete the dpu resouces
del dpu

# accuracy 
passed = passedp(org_label_index, softmax)
print('Passed: {0}, Failed: {1}, Accuracy: {2:2.2f}%'.format(passed, 600-passed, passed/len(softmax)*100))
print('Top-3 accuracy: {0:2.2f}%'.format(top_n_accuracy(org_label_index,softmax, 3)/len(softmax)*100))

# FPS estimation

SCRIPT_DIR = get_script_directory()
calib_image_dir  = SCRIPT_DIR + "/../data/calib/nv"

IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
batchSize = 1
threadnum = 0
runRotal = 0
path = os.getcwd()

"""create runner """
dpu = runner.Runner(path+'/dpuv2_rundir' ) 

#multi threading
l = threading.Lock()
threadAll = []
threadnum = 4 #int(argv[1])

"""image list to be run """
listimage=os.listdir(calib_image_dir)
runTotall = len(listimage)
img = []
for i in range(runTotall):
    path = os.path.join(calib_image_dir,listimage[i])
    image = cv2.imread(path)
    image = cv2.resize(image,(224,224))
    img.append(input_fn.preprocess_fn(image))

    """run with batch """
time1 = time.time()
for i in range(threadnum):
    t1 = threading.Thread(target=otonixv1, args=(dpu, img, i*batchSize, runTotall, batchSize,threadnum, l))
    threadAll.append(t1)
for x in threadAll:
    x.start()
for x in threadAll:
    x.join()

time2 = time.time()

timetotal = time2 - time1
fps = float(runTotall / timetotal)
print("%.2f FPS" %fps)

# Free resouces 
del dpu

Google Drive Link

Gitignore
https://drive.google.com/drive/folders/1KftsaByjCWLN0U1HQGBd0O9cNrXnaBjE?usp=sharing

OtoNix- Training

Python
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import sys
sys.path.append('/floyd/input/local_dataset')
sys.path.append('home/freeze_session.py')


# In[2]:


from tensorflow.python.client import device_lib
device_lib.list_local_devices()


# In[3]:


get_ipython().system('cat /proc/meminfo')


# In[4]:


# The model for the tympanic memebrane Otitis Media classifier.
# Vinod Dec, 2020
# trained with tympanic_membrane dataset. 7 types of otitis media. 

# - Acute Otitis Media(aom)
# - Chronic suppurative otitis media(csom)
# - Earwax(earwax)
# - Ear Ventilation Tube(earVentilationTube)
# - Normal(normal)
# - Otitis externa(otitisexterna)
# - Tympanoskleros(tympanoskleros)


# Import the libraries
import numpy as np
import tensorflow as tf
import keras
from keras import backend as K
from keras.layers.core import Dense, Dropout
from keras.layers import Flatten
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from freeze_session import freeze_session


# In[5]:


print(tf.__version__)


# In[6]:


print(tf.__version__)
# Check if GPU is available
#tf.config.list_physical_devices('GPU') 
K.tensorflow_backend._get_available_gpus()


# In[17]:


#  The paths for the training and validation images
train_path = '/floyd/input/local_dataset/train_val/train_dir'
valid_path = '/floyd/input/local_dataset/train_val/val_dir'

# Declare a few hyperparameters 
num_train_samples = 1846
num_val_samples = 311
train_batch_size = 8
val_batch_size = 8
image_size = 224

# Steps are needed in an iteration
train_steps = np.ceil(num_train_samples / train_batch_size)
val_steps = np.ceil(num_val_samples / val_batch_size)


# In[8]:


# generators
train_batches = ImageDataGenerator(
    preprocessing_function= \
        keras.applications.inception_v3.preprocess_input).flow_from_directory(
    train_path,
    target_size=(image_size, image_size),
    batch_size=train_batch_size,
    shuffle=True)

valid_batches = ImageDataGenerator(
    preprocessing_function= \
        keras.applications.inception_v3.preprocess_input).flow_from_directory(
    valid_path,
    target_size=(image_size, image_size),
    batch_size=val_batch_size)

test_batches = ImageDataGenerator(
    preprocessing_function= \
        keras.applications.inception_v3.preprocess_input).flow_from_directory(
    valid_path,
    target_size=(image_size, image_size),
    batch_size=val_batch_size,
    shuffle=False)


# In[9]:


tcl =train_batches.classes
print(tcl.shape)
vcl = valid_batches.classes
print(vcl)
tscl = test_batches.classes
print(tscl)
print(test_batches.class_indices.keys())
print(valid_batches.class_indices.values())
print(test_batches.labels)


# In[10]:


# Create a inception_v3 model alonge with weights
iv3_model = keras.applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
# See a summary of the layers in the model
iv3_model.summary()


# In[11]:


# Taking the output of the inception_v3 just before last layer
x = iv3_model.output
# flattening the outputs of the last conv layer
flatten = Flatten()(x)
# adding two fully connected layers. Meeting DPU requirement, keeping output/input ratio at @ 1/6
dense1 = Dense(2048, activation= 'relu')(flatten)
dense3= Dense(128, activation= 'relu')(dense1)
# adding the prediction layer with 'softmax'
predictions = Dense(7, activation='softmax')(dense3)

# Create a new model with the new outputs
model = Model(inputs=iv3_model.input, outputs=predictions)

# See a summary of the new layers in the model
model.summary()


# In[12]:


# Freeze the weights of the layers that aren't training
for layer in model.layers[:-3]:
    layer.trainable = False
for layer in model.layers:
    print(layer.name, layer.trainable)


# In[ ]:


# Train the model
# Define Top2 and Top3 Accuracy
from keras.metrics import categorical_accuracy, top_k_categorical_accuracy

# Compile the model
#model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=[categorical_accuracy, top_2_accuracy, top_3_accuracy])
model.compile(Adam(lr=0.01), loss='categorical_crossentropy', metrics=[categorical_accuracy])

# Add weights to make the model more sensitive to acute otitis media(aom)
# 'aom', 'csom', 'earVentilationTube', 'earwax', 'normal', 'otitisexterna', 'tympanoskleros'
class_weights={
    0: 3.0,  # aom
    1: 1.0,  # csom
    2: 1.0,  # earVentilationTube
    3: 1.0,  # earwax
    4: 1.0,  # normal
    5: 1.0,  # otitisexterna
    6: 1.0,  # tympanoskleros
}

# Declare the filepath for the saved model
filepath = "/floyd/home/model_incv3_6.h5"

# Declare a checkpoint to save the best version of the model
checkpoint = ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=1,
                             save_best_only=True, mode='max')

# Reduce the learning rate as the learning stagnates
reduce_lr = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.2, patience=2,
                              verbose=1, mode='max', min_lr=0.000001)
callbacks_list = [checkpoint, reduce_lr]

# Fit the model
history = model.fit_generator(train_batches,
                              steps_per_epoch=train_steps,
                              class_weight=class_weights,
                              validation_data=valid_batches,
                              validation_steps=val_steps,
                              epochs=5,
                              verbose=1,
                              callbacks=callbacks_list)


# In[13]:


# Evaluation of the best epoch
#model.load_weights('model/model_incv3_2.h5')
model=keras.models.load_model('/floyd/home/model_incv3_3.h5')

val_loss, val_cat_acc = model.evaluate_generator(test_batches, steps=val_steps)

print('val_loss:', val_loss)
print('val_cat_acc:', val_cat_acc)


# In[14]:


score = model.evaluate_generator(test_batches, steps=val_steps)
print(model.metrics_names, score)


# In[15]:


print('Output node:', [out.op.name for out in model.outputs])
print('Input node:',[inp.op.name for inp in model.inputs])


# In[16]:


# save the model as .pb file
from freeze_session import freeze_session
pb_file = "/floyd/home/model_incv3_3.pb"
frozen_graph = freeze_session(K.get_session(),output_names=[out.op.name for out in model.outputs])
tf.train.write_graph(frozen_graph, ".", pb_file , as_text=False)


# In[ ]:

Otonix_Input_fn.py

Python
We have modified the default Input function file with Vitis AI examples
#!/usr/bin/env python
# coding: utf-8
# 
# Copyright 2020 Xilinx Inc.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import cv2

INPUT_SHAPE = (224,224)
IMAGE_DIR = '/home/sumit/Desktop/OtoNix/dataset_local/train_val'

calib_image_list="/home/sumit/Desktop/OtoNix/dataset_local/train_val/calibration.txt"
calib_batch_size = 10

def calib_input(iter):
    images =[]
    line = open(calib_image_list).readlines()
    for index in range(0, calib_batch_size):
        cline = line[iter * calib_batch_size+index]
        calib_image_name = cline.strip()

        #open imagea as BGR
        print('processing images: {}\n'.format(IMAGE_DIR + calib_image_name))
        image = cv2.imread(IMAGE_DIR + calib_image_name)
        #convert to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #resize to (224, 224) cv2 image shape is (width/column, height/row)
        image = cv2.resize(image, INPUT_SHAPE)
        image = image/255.0
        #append in the list
        images.append(image)
    
    images = np.asarray(images)
    print('input shape of iter {0} is {1}\n'.format(iter, images.shape))
    return { "input_1": images}

Otonix - Vitis AI compilation

BatchFile
#!/bin/bash

# Copyright 2020 Xilinx Inc.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ML_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && cd ../../.. && pwd )"
export ML_DIR
dcf_dir='../../xilinx_dnndk_v3.1/host_x86/dcf' 
net=otonix_ld
model_dir=quantize_results
output_dir=dnnc_output

home_dir=$ML_DIR

echo "Compiling network: ${net}"

dnnc --frozen_pb=${model_dir}/deploy_model.pb     \
     --parser=tensorflow \
     --output_dir=${output_dir} \
     --net_name=${net}                           \
     --dcf=${dcf_dir}/ZCU104.dcf               \
     --cpu_arch=arm64                            \
     --mode=debug                                \
     --save_kernel \
     --dump all


echo " copying dpu elf file into /../zcu104/baseline/model/arm64_2048 "
cp ${output_dir}/dpu_${net}\_*.elf  ../zcu104/baseline/model/arm64_2048

Credits

Sumit Kumar

Sumit Kumar

32 projects • 97 followers
19 y/o. My daily routine involves dealing with electronics, code, distributed storage and cloud APIs.

Comments