Faster R-CNN

7 minute read

import tensorflow as tf

from skimage.io import imread

im = imread('zebra.jpg')

import matplotlib.pyplot as plt

plt.imshow(im)

<matplotlib.image.AxesImage at 0x17bbc7e20>

png

import numpy as np

bbs = np.array([[120,25,200,165], [300,50,480,320]])

import matplotlib.patches as pt

fig, ax = plt.subplots()
ax.imshow(im)
for i in bbs:
    ax.add_patch(pt.Rectangle((i[0],i[1]),i[2]-i[0],i[3]-i[1], fill=None, color='red'))

png

# %pip install -U opencv-python

Collecting opencv-python
  Downloading opencv_python-4.6.0.66-cp37-abi3-macosx_11_0_arm64.whl (30.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.0/30.0 MB[0m [31m49.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hRequirement already satisfied: numpy>=1.17.3 in /Users/hdj/miniforge3/envs/tf/lib/python3.9/site-packages (from opencv-python) (1.22.4)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.6.0.66
Note: you may need to restart the kernel to use updated packages.

import cv2

im2 = cv2.imread('zebra.jpg')

im2 = im2[...,::-1]

im2_ = im2.copy()

for i in bbs:
    cv2.rectangle(im2_, (i[0],i[1]), (i[2],i[3]), color=(255,0,0))

plt.imshow(im2_)

<matplotlib.image.AxesImage at 0x17ee52940>

png

im2.shape

(333, 500, 3)

im3 = cv2.resize(im2, (800,800))

800/333

2.4024024024024024

800/500

1.6

im3.shape

(800, 800, 3)

im2.shape[0]/im3.shape[0]

bbs2 = bbs.copy()

bbs2

array([[120,  25, 200, 165],
       [300,  50, 480, 320]])

bbs2[:,0] = bbs2[:,0]*im3.shape[1]/im2.shape[1]
bbs2[:,1] = bbs2[:,1]*im3.shape[0]/im2.shape[0]
bbs2[:,2] = bbs2[:,2]*im3.shape[1]/im2.shape[1]
bbs2[:,3] = bbs2[:,3]*im3.shape[0]/im2.shape[0]

bbs2

array([[192,  60, 320, 396],
       [480, 120, 768, 768]])

bbs2[:,0] = bbs2[:,0]*800/500
bbs2[:,1] = bbs2[:,1]*800/300
bbs2[:,2] = bbs2[:,2]*800/500
bbs2[:,3] = bbs2[:,3]*800/300

bbs2

array([[192,  66, 320, 440],
       [480, 133, 768, 853]])

im3.shape

(800, 800, 3)

im3_ = im3.copy()

for i in bbs2:
    cv2.rectangle(im3_,(i[0],i[1]), (i[2],i[3]), color=(255,0,0), thickness=3)

plt.imshow(im3_)

<matplotlib.image.AxesImage at 0x2c9e6be20>

png

# VGG

vgg = tf.keras.applications.VGG16(include_top=False)

Metal device set to: Apple M1


2022-08-11 20:05:16.587170: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-08-11 20:05:16.587590: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)

vgg.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0         
                                                                 
 block3_conv1 (Conv2D)       (None, None, None, 256)   295168    
                                                                 
 block3_conv2 (Conv2D)       (None, None, None, 256)   590080    
                                                                 
 block3_conv3 (Conv2D)       (None, None, None, 256)   590080    
                                                                 
 block3_pool (MaxPooling2D)  (None, None, None, 256)   0         
                                                                 
 block4_conv1 (Conv2D)       (None, None, None, 512)   1180160   
                                                                 
 block4_conv2 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block4_conv3 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, None, None, 512)   0         
                                                                 
 block5_conv1 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_pool (MaxPooling2D)  (None, None, None, 512)   0         
                                                                 
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________

len(vgg.layers) # input 1개, MaxPlooing 5개 제외 13 layers 

im3.shape

(800, 800, 3)

vgg(im3[tf.newaxis])

<tf.Tensor: shape=(1, 25, 25, 512), dtype=float32, numpy=
array([[[[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.14235538, ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  0.72310966, ...,  0.        ,
           2.1115327 ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           3.5494702 ,  0.        ],
         [ 0.        ,  0.        ,  2.5827112 , ...,  0.        ,
           1.2787983 ,  0.        ]],

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.07996958,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]],

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.77511567,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           3.3173277 ,  0.        ]],

        ...,

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           2.6076238 ,  0.2472178 ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  1.459591  ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.13755503],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           5.72634   ,  0.        ]],

        [[15.054876  ,  0.        ,  0.        , ...,  0.        ,
           2.1826699 ,  0.        ],
         [11.117959  ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]],

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.2981035 ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           1.1234553 ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]]]], dtype=float32)>

from tensorflow.keras.backend import function

o = function([vgg.layers[0].input], [vgg.layers[0].output])

o(im3)[0].shape

(800, 800, 3)

for l in vgg.layers:
    o = function([vgg.layers[0].input], [l.output])
    print(o(im3[tf.newaxis])[0].shape)

(1, 800, 800, 3)
(1, 800, 800, 64)
(1, 800, 800, 64)
(1, 400, 400, 64)
(1, 400, 400, 128)
(1, 400, 400, 128)
(1, 200, 200, 128)
(1, 200, 200, 256)
(1, 200, 200, 256)
(1, 200, 200, 256)
(1, 100, 100, 256)
(1, 100, 100, 512)
(1, 100, 100, 512)
(1, 100, 100, 512)
(1, 50, 50, 512)
(1, 50, 50, 512)
(1, 50, 50, 512)
(1, 50, 50, 512)
(1, 25, 25, 512)

model = tf.keras.Model(vgg.layers[0].input, vgg.layers[-2].output)

model.summary()

Model: "model_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0         
                                                                 
 block3_conv1 (Conv2D)       (None, None, None, 256)   295168    
                                                                 
 block3_conv2 (Conv2D)       (None, None, None, 256)   590080    
                                                                 
 block3_conv3 (Conv2D)       (None, None, None, 256)   590080    
                                                                 
 block3_pool (MaxPooling2D)  (None, None, None, 256)   0         
                                                                 
 block4_conv1 (Conv2D)       (None, None, None, 512)   1180160   
                                                                 
 block4_conv2 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block4_conv3 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, None, None, 512)   0         
                                                                 
 block5_conv1 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________

plt.imshow(im3_)

<matplotlib.image.AxesImage at 0x2c9da6fa0>

png

800/16

50.0

800/50 # 800 의 50 이면 16칸 거기에 중점은 8

16.0

x = np.arange(8, 800, 16) # 중점 8
y = np.arange(8, 800, 16)

array([  8,  24,  40,  56,  72,  88, 104, 120, 136, 152, 168, 184, 200,
       216, 232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408,
       424, 440, 456, 472, 488, 504, 520, 536, 552, 568, 584, 600, 616,
       632, 648, 664, 680, 696, 712, 728, 744, 760, 776, 792])

center = np.array(np.meshgrid(x,y)).T.reshape(-1,2)

center

array([[  8,   8],
       [  8,  24],
       [  8,  40],
       ...,
       [792, 760],
       [792, 776],
       [792, 792]])

len(center)

im4 = im3.copy()

for i in range(2500):
    cv2.circle(im4, (center[i][0], center[i][1]), 1, (0,255,0), thickness=5)

plt.figure(figsize=(8,8))
plt.imshow(im4)

<matplotlib.image.AxesImage at 0x17e3c6400>

png

anchor_ratio = [0.5, 1,  2]
anchor_scale = [8,  16, 32]

anchor_list = np.zeros((2500*9,4), dtype=np.int32)

for i, c in enumerate(center):
    c_x, c_y = c[0], c[1]
    for r in anchor_ratio:
        for s in anchor_scale:
            h = pow(pow(s,2)/r, 0.5)
            w = h*r
            h *= 16
            w *= 16
            anchor_list[i] = [int(c_x-0.5*w), int(c_y-0.5*h), int(c_x+0.5*w), int(c_y+0.5*h)]

270*9

anchor_list[2430]

array([ 413,  306, 1138,  669], dtype=int32)

im5 = im4.copy()

for i in range(2430, 2439):
    cv2.rectangle(im5, (anchor_list[i][0],anchor_list[i][1]), 
                       (anchor_list[i][2],anchor_list[i][3]), color=(0,0,255), thickness=3)

plt.figure(figsize=(8,8))
plt.imshow(im5)

<matplotlib.image.AxesImage at 0x2cc8d7310>

png

np.array(np.meshgrid(x,y)).reshape(-1,2)

array([[  8,  24],
       [ 40,  56],
       [ 72,  88],
       ...,
       [792, 792],
       [792, 792],
       [792, 792]])

# reference

a = np.arange(0,5)
b = np.arange(0,5)

np.meshgrid(a,b)[0]+np.meshgrid(a,b)[1]

array([[0, 1, 2, 3, 4],
       [1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [3, 4, 5, 6, 7],
       [4, 5, 6, 7, 8]])

np.ogrid[0:5,0:5][0] + np.ogrid[0:5,0:5][1]

array([[0, 1, 2, 3, 4],
       [1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [3, 4, 5, 6, 7],
       [4, 5, 6, 7, 8]])

np.mgrid[0:5,0:5]

array([[[0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4]],

       [[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]]])

Twitter Facebook LinkedIn

D.J Hwang

Faster R-CNN

You May Also Enjoy

Human Activity Recognition on STM32L4 IoTnode

Fast R-CNN

R-CNN/SPP-Net

BCD 변환 코드