前言
在之前的很多分享中,在UI自动化处理上,大家都是对弹窗做了很多的讲解,不管是最近看的360测试之美,还是之前的各种分享中,但是一直都没有实战过。那天看了https://testerhome.com/articles/27527文章半天,也没有写出来代码,但是呢,在和大佬沟通中呢,推荐了美团开源的一个框架--vision-ml
正文
满怀信心的下载别人的开源代码下载。下载本地,安装了依赖各种报错。执行不下去。后来翻阅大量资料材质,python版本不一样,导致了很多库的版本需要适配,那么最快速的方式呢,就是去找现成的版本,在testerhome恒总找到了版本。但是在我本地安装还是各种报错。所以我最后的固定的版本
Python3.7.9
Keras==2.2.4
numpy==1.21.4
tensorflow==1.15.2
scikit-learn==1.0.1
scipy==1.7.3
selectivesearch==0.4
opencv-python==3.4.2.17
h5py==2.10.0
那么我们先按照美团开源的去运行下,首先运行rcnn_train.py。去产生我们的模型,然后运行rcnn_predict.py
结果
可以正常返回坐标,然后结果可以正常选中。
针对里面的代码,我进行部分删除了,不让最后的图片展示了。
在实际的执行中,我发现有些路径可以在不同的电脑上面可能会不一样,那么我们应该怎么做呢,我对代码进行了改造。
all.config改成了
# -*- coding: UTF-8 -*-
import os
# image shape
IMG_ROW, IMG_COL = 50, 50
# model train
batch_size = 30
num_classes = 2
epochs = 10
# model name
path=os.path.join(os.getcwd(),'model')
model_name =os.path.join(path,"trained_model_1.h5")
# image path
IMAGE_PATH = os.path.join(os.getcwd(),'image')
TRAIN_PATH = os.path.join(os.getcwd(),'train')
PREDICT_PATH = os.path.join(os.getcwd(),'predict')
# selective search
SCALE = 2.0
SIGMA = 0.8
MIN_SIZE = 80
# augmentation size for one class
augmentation_size = 350
在训练模块改造了
import keras
import numpy as np
from all_config import *
from image_utils.image_utils import *
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras import regularizers
# input image dimensions
img_rows, img_cols = IMG_ROW, IMG_COL
trained_model = model_name
class Image(object):
"""
class for generate train image from user image in image folder
"""
def __init__(self):
self.image_path = IMAGE_PATH
self.train_path = TRAIN_PATH
def get_augmentation(self):
"""
generate train image from image folder
:return:
"""
cls_list = ["0", "1"]
self._clear_train_path()
for cls_prefix in cls_list:
x = []
i = 0
for name in os.listdir(self.image_path):
cls_num = name.split("_")[0]
if cls_num == cls_prefix:
img = cv2.imread(os.path.join(self.image_path,name))
_, img = get_binary_image(img)
img = cv2.resize(img, (img_rows, img_cols))
x.append(img)
x = np.asarray(x, np.float32)
data_gen = ImageDataGenerator(rotation_range=20, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True,
vertical_flip=True, fill_mode='nearest', data_format='channels_last')
for _ in data_gen.flow(x, batch_size=1, save_to_dir=self.train_path, save_prefix=cls_prefix, save_format="png"):
i = i+1
if i >= augmentation_size:
print("class_{0} augmentation for {1} samples".format(cls_prefix, i))
break
def _clear_train_path(self):
"""
clear train path
:return:
"""
if os.path.exists(self.train_path):
for file in os.listdir(self.train_path):
os.remove(os.path.join(self.train_path,file))
else:
os.mkdir(self.train_path)
def get_data():
"""
get train data from train folder and transfer in numpy type
:return: train and test in numpy type
"""
x = []
y = []
for name in os.listdir(TRAIN_PATH):
cls_num = name.split("_")[0]
img = cv2.imread( os.path.join(TRAIN_PATH, name))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, (img_rows, img_cols))
x.append(img)
y.append(cls_num)
x = np.asarray(x, dtype=np.float32)
y = np.asarray(y, dtype=np.float32)
return train_test_split(x, y, random_state=30, test_size=.28)
def train_model():
"""
build model and compile with config params
:return:
"""
# the data, split between train and test sets
x_train, x_test, y_train, y_test = get_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dropout(0.25))
model.add(Dense(num_classes, activation='softmax', kernel_regularizer=regularizers.l2(0.01)))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
model.save(trained_model)
print(model.summary())
print('Test loss:', score[0])
print('Test accuracy:', score[1])
if __name__ == "__main__":
Image().get_augmentation()
train_model()
在针对实际图片识别的时候呢,改造了
import selectivesearch.selectivesearch
from all_config import *
from image_utils.image_utils import *
from keras.models import load_model
import os
img_rows, img_cols = IMG_ROW, IMG_COL
model = load_model(model_name)
model._make_predict_function()
def get_prediction(img_binary, rects):
rectangles = []
score_list = []
img_binary = cv2.cvtColor(img_binary, cv2.COLOR_BGR2GRAY)
for rect in rects:
roi = get_roi_image(img_binary, rect)
roi = cv2.resize(roi, (img_rows, img_cols))
score = model.predict(numpy.asarray([roi], numpy.float32).reshape(1, img_rows, img_cols, 1) / 255)
if score[0][1] > 0.8:
# print(score)
rectangles.append(rect)
score_list.append(score[0][1])
return rectangles, score_list
def image_view(image):
show_width = 320
img = image
if image.shape[1] > show_width:
scale = show_width/image.shape[1]
img = cv2.resize(image, (0, 0), fx=scale, fy=scale)
cv2.imshow("img", img)
cv2.waitKey(0)
def get_proposals(img):
img = cv2.imread(img)
scale = 800 / img.shape[1]
img = cv2.resize(img, (0, 0), fx=scale, fy=scale)
gray, binary = get_binary_image(img)
mg_lbl, regions = selectivesearch.selective_search(binary, SCALE, SIGMA, MIN_SIZE)
regions = get_proposal(regions, img.shape)
print("proposals:", len(regions))
cv2.drawContours(binary, regions, -1, (255, 145, 30), 2)
image_view(binary)
def model_predict(img_file, view):
res_obj = {
"score": 0,
"position": ""
}
img = cv2.imread(img_file) if view else cv2.imdecode(numpy.fromstring(img_file, numpy.uint8), 1)
scale = 500 / img.shape[1]
img = cv2.resize(img, (0, 0), fx=scale, fy=scale)
gray, binary = get_binary_image(img)
if get_gray_score(binary):
mg_lbl, regions = selectivesearch.selective_search(binary, SCALE, SIGMA, MIN_SIZE)
regions = get_proposal(regions, img.shape)
rectangles, score_list = get_prediction(binary, regions)
if len(score_list) > 0:
score = round(max(score_list), 2)
rect = rectangles[score_list.index(max(score_list))]
position = get_pos(rect, scale)
res_obj["score"] = float(score)
res_obj["position"] = position
cv2.drawContours(img, [rect], -1, (255, 145, 30), 2)
if not os.path.exists(PREDICT_PATH):
os.mkdir(PREDICT_PATH)
filpath=os.path.join(PREDICT_PATH,img_file)
cv2.imwrite(filpath, img)
if view:
print(res_obj)
return res_obj
if __name__ == "__main__":
model_predict("image.png", view=True)
其实我改造很简单。就是把路径改成了根据动态的,在不同系统执行不会出现问题,一个小小的改造,可能让我们实际应用变得简单。
上面的是一个简单的测试,和代码的稍微的改造。
我们在手机上,去测试一个弹窗。
我本地有一个小米设备,我通过adb截屏获取了这个图片,获取图片的步骤
adb shell screencap -p /sdcard/01.png
adb pull /sdcard/01.png .
我想要通过adb去点击这个关闭,如何做呢。
我们把图片增加一个在image,一个图标的是1_10,其他图标是0_10,然后我们训练下模型,rcnn_predict.py改下图片的名称位置,然后执行rcnn_predict.py获取坐标。最后图标坐标是
那么我们看下在图片的定位展示
我们在电脑上执行
adb shell input tap 358 1147
实际结果
可以正常关闭所有正在进行的程序,我们可以把这个模型拓展到其他的模块。
然后我们按照上面继续截图训练模型,最后根据模型坐标去点击
点击后可以正常关闭。那么我们后续可以继续训练我们的模型,在实际的工作中,我们要对弹窗的处理。在执行用例前判断是否存在这样的弹窗,截图传给模型,然后产生对应的坐标,我们拿着坐标去点击,没有坐标则不做处理。