From bfba0672856078e429ce6b7373b8267b9454fd89 Mon Sep 17 00:00:00 2001 From: FEIJINTI <83849113+FEIJINTI@users.noreply.github.com> Date: Tue, 19 Jul 2022 15:39:51 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86dt=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 02_classification.py | 20 ++++++++--- main_test.py | 22 +++++++++--- models.py | 83 ++++++++++++++++++++++++++++++++++---------- utils.py | 2 +- 4 files changed, 99 insertions(+), 28 deletions(-) diff --git a/02_classification.py b/02_classification.py index fc0aff8..7e75930 100644 --- a/02_classification.py +++ b/02_classification.py @@ -7,7 +7,8 @@ import numpy as np - +import scipy +from imblearn.under_sampling import RandomUnderSampler from models import AnonymousColorDetector from utils import read_labeled_img @@ -17,8 +18,15 @@ from utils import read_labeled_img data_dir = "data/dataset" -color_dict = {(0, 0, 255): "yangeng"} +color_dict = {(0, 0, 255): "yangeng", (255, 0, 0): 'beijing'} +label_index = {"yangeng": 1, "beijing": 0} dataset = read_labeled_img(data_dir, color_dict=color_dict, is_ps_color_space=False) +rus = RandomUnderSampler(random_state=0) +x_list, y_list = np.concatenate([v for k, v in dataset.items()], axis=0).tolist(), \ + np.concatenate([np.ones((v.shape[0],)) * label_index[k] for k, v in dataset.items()], axis=0).tolist() + +x_resampled, y_resampled = rus.fit_resample(x_list, y_list) +dataset = {"inside": np.array(x_resampled)} # ## 模型训练 @@ -32,10 +40,14 @@ negative_sample_num = None # None或者一个数字 world_boundary = np.array([0, 0, 0, 255, 255, 255]) # 对数据进行预处理 x = np.concatenate([v for k, v in dataset.items()], axis=0) -negative_sample_num = int(x.shape[0] * 0.7) if negative_sample_num is None else negative_sample_num +negative_sample_num = int(x.shape[0] * 1.2) if negative_sample_num is None else negative_sample_num model = AnonymousColorDetector() -model.fit(x, world_boundary, threshold, negative_sample_size=negative_sample_num, train_size=0.7) +model.fit(x, world_boundary, threshold, negative_sample_size=negative_sample_num, train_size=0.7, + is_save_dataset=True, model_selection='dt') +# data = scipy.io.loadmat('dataset_2022-07-19_15-07.mat') +# x, y = data['x'], data['y'].ravel() +# model.fit(x, y=y, is_generate_negative=False, model_selection='dt') model.save() diff --git a/main_test.py b/main_test.py index 6626e82..43a0883 100644 --- a/main_test.py +++ b/main_test.py @@ -10,15 +10,17 @@ import matplotlib.pyplot as plt import numpy as np from models import Detector, AnonymousColorDetector +from utils import read_labeled_img -def virtual_main(detector: Detector, test_img=None, test_img_dir=None): +def virtual_main(detector: Detector, test_img=None, test_img_dir=None, test_model=False): """ 虚拟读图测试程序 :param detector: 杂质探测器,需要继承Detector类 :param test_img: 测试图像,rgb格式的图片或者路径 :param test_img_dir: 测试图像文件夹 + :param test_model: 是否进行模型约束性测试 :return: """ if test_img is not None: @@ -29,8 +31,10 @@ def virtual_main(detector: Detector, test_img=None, test_img_dir=None): else: raise TypeError("test img should be np.ndarray or str") t1 = time.time() - result = detector.predict(img) + img = cv2.resize(img, (1024, 256)) t2 = time.time() + result = 1 - detector.predict(img) + t3 = time.time() fig, axs = plt.subplots(3, 1) axs[0].imshow(img) axs[1].imshow(result) @@ -38,10 +42,18 @@ def virtual_main(detector: Detector, test_img=None, test_img_dir=None): mask_color[result > 0] = (0, 0, 255) result_show = cv2.addWeighted(img, 1, mask_color, 0.5, 0) axs[2].imshow(result_show) - plt.title(f'{(t2 - t1) * 1000:.2f} ms') + axs[0].set_title( + f' resize {(t2 - t1) * 1000:.2f} ms, predict {(t3 - t2) * 1000:.2f} ms, total {(t3 - t1) * 1000:.2f} ms') plt.show() + if test_model: + data_dir = "data/dataset" + color_dict = {(0, 0, 255): "yangeng"} + dataset = read_labeled_img(data_dir, color_dict=color_dict, is_ps_color_space=False) + ground_truth = dataset['yangeng'] + world_boundary = np.array([0, 0, 0, 255, 255, 255]) + detector.visualize(world_boundary, sample_size=50000, class_max_num=5000, ground_truth=ground_truth) if __name__ == '__main__': - detector = AnonymousColorDetector(file_path='models/ELM_2022-07-18_17-22.mat') - virtual_main(detector, test_img='data/dataset/img/yangeng.bmp') + detector = AnonymousColorDetector(file_path='models/dt_2022-07-19_14-38.model') + virtual_main(detector, test_img=r'data/dataset/img/yangeng.bmp', test_model=True) diff --git a/models.py b/models.py index 99ed01a..85b84aa 100644 --- a/models.py +++ b/models.py @@ -4,12 +4,17 @@ # @File: models.py # @Software:PyCharm、 import datetime +import pickle import cv2 import numpy as np import scipy.io +import tqdm +from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split +from utils import lab_scatter, read_labeled_img +from tqdm import tqdm from elm import ELM @@ -34,10 +39,12 @@ class Detector(object): class AnonymousColorDetector(Detector): def __init__(self, file_path: str = None): self.model = None + self.model_type = 'None' if file_path is not None: - self.model = ELM(model_path=file_path) + self.load(file_path) - def fit(self, x: np.ndarray, world_boundary: np.ndarray, threshold: float, + def fit(self, x: np.ndarray, world_boundary: np.ndarray = None, threshold: float = None, + is_generate_negative: bool = True, y: np.ndarray = None, model_selection='elm', negative_sample_size: int = 1000, train_size: float = 0.8, is_save_dataset=False, **kwargs): """ 拟合到指定的样本分布情况下,根据x进行分布的变化。 @@ -45,23 +52,36 @@ class AnonymousColorDetector(Detector): :param x: ndarray类型的正样本数据,给出的正样本形状为 n x feature_num :param world_boundary: 整个世界的边界,边界形状为 feature_num个下限, feature_num个上限 :param threshold: 与正样本之间的距离阈值大于多少则不认为是指定的样本类别 + :param is_generate_negative: 是否生成负样本 + :param y: 给出x对应的样本y + :param model_selection: 模型的选择, in ['elm', 'decision tree'] :param negative_sample_size: 负样本的数量 :param train_size: 训练集的比例, float :param is_save_dataset: 是否保存数据集 :param kwargs: 与模型相对应的参数 :return: """ - node_num = kwargs.get('node_num', 10) - self.model = ELM(input_size=x.shape[1], node_num=node_num, output_num=2, **kwargs) - negative_samples = self.generate_negative_samples(x, world_boundary, threshold, - sample_size=negative_sample_size) - data_x, data_y = np.concatenate([x, negative_samples], axis=0), \ - np.concatenate([np.ones(x.shape[0], dtype=int), - np.zeros(negative_samples.shape[0], dtype=int)], axis=0) + if model_selection == 'elm': + node_num = kwargs.get('node_num', 10) + self.model = ELM(input_size=x.shape[1], node_num=node_num, output_num=2, **kwargs) + elif model_selection == 'dt': + self.model = DecisionTreeClassifier(**kwargs) + else: + raise ValueError("你看看我要的是啥") + self.model_type = model_selection + if is_generate_negative: + negative_samples = self.generate_negative_samples(x, world_boundary, threshold, + sample_size=negative_sample_size) + data_x, data_y = np.concatenate([x, negative_samples], axis=0), \ + np.concatenate([np.ones(x.shape[0], dtype=int), + np.zeros(negative_samples.shape[0], dtype=int)], axis=0) + else: + data_x, data_y = x, y if is_save_dataset: path = datetime.datetime.now().strftime("dataset_%Y-%m-%d_%H-%M.mat") scipy.io.savemat(path, {'x': data_x, 'y': data_y}) - x_train, x_val, y_train, y_val = train_test_split(data_x, data_y, train_size=train_size, shuffle=True) + x_train, x_val, y_train, y_val = train_test_split(data_x, data_y, train_size=train_size, shuffle=True, + stratify=data_y) self.model.fit(x_train, y_train) y_predict = self.model.predict(x_val) print(classification_report(y_true=y_val, y_pred=y_predict)) @@ -88,9 +108,11 @@ class AnonymousColorDetector(Detector): :param threshold: 与正样本x之间的距离限制 :return: 负样本形状为:(sample_size, feature_num) """ + feature_num = x.shape[1] negative_samples = np.zeros((sample_size, feature_num), dtype=x.dtype) generated_sample_num = 0 + bar = tqdm(total=sample_size, ncols=100) while generated_sample_num <= sample_size: generated_data = np.random.uniform(world_boundary[:feature_num], world_boundary[feature_num:], size=(sample_size, feature_num)) @@ -100,20 +122,45 @@ class AnonymousColorDetector(Detector): if not in_threshold: negative_samples[sample_idx, :] = sample generated_sample_num += 1 + bar.update() if generated_sample_num >= sample_size: break + bar.close() return negative_samples - def save(self, file_path=None): - self.model.save(file_path) + def save(self): + path = datetime.datetime.now().strftime(f"{self.model_type}_%Y-%m-%d_%H-%M.model") + with open(path, 'wb') as f: + pickle.dump((self.model_type, self.model), f) def load(self, file_path): - self.model.load(file_path) + with open(file_path, 'rb') as model_file: + data = pickle.load(model_file) + self.model_type, self.model = data + + def visualize(self, world_boundary: np.ndarray, sample_size: int, ground_truth=None, + **kwargs): + feature_num = world_boundary.shape[0] // 2 + x = np.random.uniform(world_boundary[:feature_num], world_boundary[feature_num:], + size=(sample_size, feature_num)) + pred_y = self.model.predict(x) + draw_dataset = {'Inside': x[pred_y == 1, :], 'Outside': x[pred_y == 0, :]} + if ground_truth is not None: + draw_dataset.update({'Given': ground_truth}) + lab_scatter(draw_dataset, is_3d=True, is_ps_color_space=False, **kwargs) if __name__ == '__main__': - detector = AnonymousColorDetector() - x = np.array([[10, 30, 20], [10, 35, 25], [10, 35, 36]]) - world_boundary = np.array([0, -127, -127, 100, 127, 127]) - detector.fit(x, world_boundary, threshold=5, negative_sample_size=2000) - detector.load('ELM_2022-07-18_17-01.mat') + data_dir = "data/dataset" + color_dict = {(0, 0, 255): "yangeng"} + dataset = read_labeled_img(data_dir, color_dict=color_dict, is_ps_color_space=False) + ground_truth = dataset['yangeng'] + detector = AnonymousColorDetector(file_path='models/dt_2022-07-19_14-38.model') + # x = np.array([[10, 30, 20], [10, 35, 25], [10, 35, 36]]) + world_boundary = np.array([0, 0, 0, 255, 255, 255]) + # detector.fit(x, world_boundary, threshold=5, negative_sample_size=2000) + detector.visualize(world_boundary, sample_size=50000, class_max_num=5000, ground_truth=ground_truth) + data = scipy.io.loadmat('data/dataset_2022-07-19_11-35.mat') + x, y = data['x'], data['y'] + dataset = {'inside': x[y.ravel() == 1, :], "outside": x[y.ravel() == 0, :]} + lab_scatter(dataset, class_max_num=5000, is_3d=True, is_ps_color_space=False) diff --git a/utils.py b/utils.py index 2a14575..a8249f0 100644 --- a/utils.py +++ b/utils.py @@ -55,7 +55,7 @@ def read_labeled_img(dataset_dir: str, color_dict: dict, ext='.bmp', is_ps_color return total_dataset -def lab_scatter(dataset: dict, class_max_num=None, is_3d=False, is_ps_color_space=True): +def lab_scatter(dataset: dict, class_max_num=None, is_3d=False, is_ps_color_space=True, **kwargs): """ 在lab色彩空间内绘制3维数据分布情况