{ "cells": [ { "cell_type": "markdown", "id": "dd2c8c55", "metadata": {}, "source": [ "# Preprocessing" ] }, { "cell_type": "code", "execution_count": 1, "id": "716880ac", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from scipy.io import savemat, loadmat\n", "import os" ] }, { "cell_type": "markdown", "id": "4d7dc4a0", "metadata": {}, "source": [ "## Step 1: \n", "Convert the dataset to mat format for Matlab." ] }, { "cell_type": "code", "execution_count": 2, "id": "711356a2", "metadata": {}, "outputs": [], "source": [ "dataset = pd.read_csv('preprocess/dataset/mango/NAnderson2020MendeleyMangoNIRData.csv')\n", "y = dataset.DM\n", "x = dataset.loc[:, '684': '990']\n", "savemat('preprocess/dataset/mango/mango_origin.mat', {'x': x.values, 'y': y.values})" ] }, { "cell_type": "markdown", "id": "3e41e8e6", "metadata": {}, "source": [] }, { "cell_type": "markdown", "id": "ea5e54fd", "metadata": {}, "source": [ "## Step3:\n", "Data split with train test split." ] }, { "cell_type": "code", "execution_count": 3, "id": "6eac026e", "metadata": {}, "outputs": [], "source": [ "data = loadmat('preprocess/dataset/mango/mango_preprocessed.mat')\n", "x, y = data['x'], data['y']\n", "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=24)\n", "if not os.path.exists('mango'):\n", " os.makedirs('mango')\n", "savemat('preprocess/dataset/mango/mango_dm_split.mat',{'x_train':x_train, 'y_train':y_train, 'x_test':x_test, 'y_test':y_test,\n", " 'max_y': data['max_y'], 'min_y': data['min_y'],\n", " 'min_x':data['min_x'], 'max_x':data['max_x']})" ] }, { "cell_type": "markdown", "id": "b2977dae", "metadata": {}, "source": [ "## Step 4:\n", "Show data with pictures\n", "use `draw_pics_origin` to draw original spectra\n", "![img](./preprocess/pics/raw.png)" ] }, { "cell_type": "markdown", "source": [ "use `draw_pics_preprocessed.m` to draw proprecessed spectra\n", "![img](./preprocess/pics/preprocessed.png)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } } ], "metadata": { "interpreter": { "hash": "7f619fc91ee8bdab81d49e7c14228037474662e3f2d607687ae505108922fa06" }, "kernelspec": { "display_name": "Python 3.9.7 64-bit ('base': conda)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }