Commit 99599ac2 authored by Frank Rayo's avatar Frank Rayo 🚀
Browse files

script for gradient boosted trees regression for fishing suitability

parent 2188acfb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import xgboost as xgb\n",
"import sklearn\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import pygraphviz # apt update -y; apt upgrade -y; apt-get install -y graphviz libgraphviz-dev pkg-config; pip install graphviz pygraphviz"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!wget -q https://pedro.asti.dost.gov.ph/gitlab/franco/fish-suitability-map/-/raw/master/2017-2020_filtered_filtered_with_cfa.csv -O /tmp/training-2017-2020-monthly-mean.csv\n",
"!wget -q https://pedro.asti.dost.gov.ph/gitlab/franco/fish-suitability-map/-/raw/master/2021_converted.csv -O /tmp/testing-2021.csv"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>chl</th>\n",
" <th>sst</th>\n",
" <th>bath</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.132899</td>\n",
" <td>25.934444</td>\n",
" <td>-3173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.121123</td>\n",
" <td>24.907999</td>\n",
" <td>-3173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.116107</td>\n",
" <td>25.329166</td>\n",
" <td>-3173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.100442</td>\n",
" <td>25.823666</td>\n",
" <td>-3173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.081607</td>\n",
" <td>27.529062</td>\n",
" <td>-3173</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" chl sst bath\n",
"0 0.132899 25.934444 -3173\n",
"1 0.121123 24.907999 -3173\n",
"2 0.116107 25.329166 -3173\n",
"3 0.100442 25.823666 -3173\n",
"4 0.081607 27.529062 -3173"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data = pd.read_csv('/tmp/training-2017-2020-monthly-mean.csv', usecols=['bath', 'chl', 'sst'], engine='c', index_col=False)\n",
"train_labels = pd.read_csv('/tmp/training-2017-2020-monthly-mean.csv', usecols=['boat_present'], engine='c', index_col=False)\n",
"\n",
"train_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAf/UlEQVR4nO3dfZwcVZ3v8c+XBORhIKjBWUiQoAY1EEEZeVBXZ1ZcA6vEXVGIQQ0CWUBcn+Al7rqYRXdfIkZBxMWgEJXAgKxCXsiD9wbGXBW4JJeHEBCNECWACRAIDISHwO/+UWe06XTP1PRMddNT3/fr1a9U1TlV9Ts1nf71qao+pYjAzMzKa4tWB2BmZq3lRGBmVnJOBGZmJedEYGZWck4EZmYl50RgZlZyTgQlIWmlpO5Wx9FKkv5R0n2S+iW9uaB9XC3p46Ndd6QkhaTXvZT2JWlKqju+gX00vK5tzolgDJC0WtJBVcvmSPrVwHxE7BkRfUNsZ6z/5/oGcGJEdETELdWFo/FhGREHR8QPR7tus5TgPWA1OBFY07wEPlx2A1Y2uvJLIH6zQjgRlERlr0HSfpKWSXpc0lpJ30zVlqZ/H0unTw6UtIWkL0n6o6R1kn4kaULFdj+Wyh6R9O9V+5kn6TJJF0p6HJiT9n2DpMckPSjpO5K2qtheSDpB0u8lPSHpK5JeK+k3Kd5LK+tXtbFmrJJeJqkfGAfcJukPNdYdaPttqe2HS+qWtEbSFyT9GbhA0sslXSnpIUmPpunJFdvpk3RMmp4j6VeSvpHq3ivp4Abr7i5paTom/1vSOZIuHOTvfXI6vg9I+kRV2T9IuiUdz/skzasorvUeeK2k69Lf+GFJiyTtWG/fw9jXgE+kOB+UdFLFultIOkXSH9K+L5X0ijr7mSPpnnR87pU0O098lkSEX23+AlYDB1UtmwP8qlYd4Abgo2m6AzggTU8BAhhfsd4ngFXAa1LdnwI/TmXTgH7gHcBWZKdenqvYz7w0/wGyLx3bAPsCBwDj0/7uAj5Tsb8ArgB2APYEngGWpP1PAO4EPl7nONSNtWLbrxvkOL6oHOgGNgGnAy9L8b8S+CCwLbA98BPg8op1+oBjKv4GzwHHkiWh44EHADVQ94Z0fLdKx/tx4MI67ZgBrAX2ArYDLqpsW2rX9PQ3eVOq+4FB3gOvA96TjsFOZMnizDzHMee+Lk5xTgce4q/vn08DNwKT076/B1xcHWda93Hg9alsZ2DPVv+/bKdXywNoKGg4H1gH3JGz/ofJPkBWAhe1Ov4Cjsdqsg/kxypeT1E/ESwF/gOYWLWdWh8CS4ATKuZfnz6wxgOnDvzHTGXbAs/y4kSwdIjYPwP8rGI+gLdXzC8HvlAxP7/eh9BgsVZse7iJ4Flg60HW2Qd4tGK+jxd/uK+qOj4B/M1w6gKvJktI21aUX0j9RHA+8LWK+T0GaztwJvCteu+BGvU/ANyS9zjm2NcbKsq/DvwgTd8FvLuibOeK995f4iRLBI+RJehtmv3/byy82vXU0EKybz1DkjQV+CLZh8ueZB88Y9EHImLHgRdwwiB1jyb7cPitpJslvW+QursAf6yY/yPZf77OVHbfQEFEPAU8UrX+fZUzkvZIp1P+nE4X/RcwsWqdtRXTG2vMdzQQa6MeioinB2YkbSvpe+n00+NkSXVHSePqrP/ngYl0fKB+/PXq7gKsr1gGVce1yi5V5ZXHBEn7S7o+nd7aABzH5n+Dyvqdknol3Z/afOFg9RvYV3Wsu6Tp3YCfpdOIj5Elhuep+ntGxJPA4WnbD0r6uaQ35InPMm2ZCCJiKbC+clk6j3mNpOWS/k/FG+FY4JyIeDStu67J4b7kRMTvI2IW8Cqy0x6XSdqO7BtWtQfI/kMOGPh2uhZ4kKzbDoCkgVMnL9pd1fx/A78FpkbEDsC/Amq8NbljbVR1/J8n62nsn+J/Z1o+Wm2o5UHgFZK2rVi26xD1K8tfXVV+EbAY2DUiJgDn8tf4a70H/istn57afCT52zvYvgZUx/pAmr4POLjyC05EbB0R91fvJCKujYj3kPUafguclzM+o00TQR0LgE9FxL7AScB30/I9gD0k/VrSjZJy9STGMklHStopIl4g61IDvEB2fvYFsnPsAy4GPpsuVnaQfShcEhGbgMuA90t6m7ILuPMY+gNie7Lzuf0pWR8/Ss0aKtY81vLitteyPVmv5LF04fLLDUebU0T8EVgGzJO0laQDgfcPssqlZBfmp6XkUR3j9mQ9jKcl7Qd8pKKs1ntge7JTjxskTQJOHkb4g+1rwL+nntaewFHAJWn5ucB/StoNQNJOkmZWr5x6LDPTl5lnUqwvDCPG0hsTiSD9p38b8BNJt5JdVNo5FY8HppKd750FnJf3jocxbAawUtmdNGcBR0TExnTq4T+BX6fu+AFk55t/THYK5F7gaeBTABGxMk33kn0L7Se7dvPMIPs+iezD4Amyb22XDFJ3uOrGmtM84Iep7R+uU+dMsovGD5NdyLym0WCHaTZwINmpt6+SHbeaxzkiriaL8zqyi+fXVVU5AThN0hNk13kurVi31nvgP4C3ABuAn5NdhM+r7r4q/DLFuQT4RkT8Ii0/i6w38Yu0/o3A/jXW3wL4HFlPYj3wLkb3C8aYN3BHQtuRNAW4MiL2krQDcHdE7Fyj3rnATRFxQZpfApwSETc3NeASSAn5MbLTPve2OJwxTdIlwG8jovAeiY19Y6JHEBGPA/dK+hCAMnun4svJegNImkh2quieFoQ5Jkl6f+rWb0d2e+MKsjuUbBRJemu6DrZFOr05k+y9bTZibZkIJF1Mdl/165X94Odosq7z0ZJuI7tNdOBc4rXAI5LuBK4HTo6I6jtbrHEzybrkD5Cdgjsi2rWb+dL2N2S3m/YD3waOjxrDZJg1om1PDZmZ2ehoyx6BmZmNnrYbRGvixIkxZcqUhtZ98skn2W677UY3oJc4t7kc3OZyGEmbly9f/nBE7FSrrO0SwZQpU1i2bFlD6/b19dHd3T26Ab3Euc3l4DaXw0jaLOmP9cp8asjMrOScCMzMSs6JwMys5JwIzMxKzonAzKzknAjMzEqusEQg6Xxlz429Y5A63ZJulbRS0i+LisXMzOorskewkEGeIpaGgv4ucGh6ctiHCozFzMzqKCwR1HqKWJWPAD+NiD+l+qV/cpiZWSsUOuhc5TMDapSdCWwJ7En2FKOzIuJHdbYzF5gL0NnZuW9vb29D8axbv4G1G7Pp6ZMmNLSNdtPf309HR71H5I5NbnM5uM3D09PTszwiumqVtXKIifHAvsC7yZ74dIOkGyPid9UVI2IB2aMo6erqikZ/Yn32oiuYvyJr8urZjW2j3fhn+OXgNpdDUW1uZSJYAzwSEU8CT0paCuwNbJYIzMysOK28ffQK4B2SxqcHbO8P3NXCeMzMSqmwHkF6ilg3MFHSGuDLZNcEiIhzI+IuSdcAtwMvAN+PiLq3mpqZWTEKSwQRMStHnTOAM4qKwczMhuZfFpuZlZwTgZlZyTkRmJmVnBOBmVnJORGYmZWcE4GZWck5EZiZlZwTgZlZyTkRmJmVnBOBmVnJORGYmZWcE4GZWck5EZiZlZwTgZlZyTkRmJmVnBOBmVnJORGYmZVcYYlA0vmS1kka9PGTkt4qaZOkw4qKxczM6iuyR7AQmDFYBUnjgNOBXxQYh5mZDaKwRBARS4H1Q1T7FPA/wLqi4jAzs8EpIorbuDQFuDIi9qpRNgm4COgBzk/1LquznbnAXIDOzs59e3t7G4pn3foNrN2YTU+fNKGhbbSb/v5+Ojo6Wh1GU7nN5eA2D09PT8/yiOiqVTZ+RFGNzJnAFyLiBUmDVoyIBcACgK6uruju7m5oh2cvuoL5K7Imr57d2DbaTV9fH40er3blNpeD2zx6WpkIuoDelAQmAodI2hQRl7cwJjOz0mlZIoiI3QemJS0kOzV0eaviMTMrq8ISgaSLgW5goqQ1wJeBLQEi4tyi9mtmZsNTWCKIiFnDqDunqDjMzGxw/mWxmVnJORGYmZWcE4GZWck5EZiZlZwTgZlZyTkRmJmVnBOBmVnJORGYmZWcE4GZWck5EZiZlZwTgZlZyTkRmJmVnBOBmVnJORGYmZWcE4GZWck5EZiZlZwTgZlZyRWWCCSdL2mdpDvqlM+WdLukFZJ+I2nvomIxM7P6iuwRLARmDFJ+L/CuiJgOfAVYUGAsZmZWR5HPLF4qacog5b+pmL0RmFxULGZmVp8ioriNZ4ngyojYa4h6JwFviIhj6pTPBeYCdHZ27tvb29tQPOvWb2Dtxmx6+qQJDW2j3fT399PR0dHqMJrKbS4Ht3l4enp6lkdEV62ywnoEeUnqAY4G3lGvTkQsIJ066urqiu7u7ob2dfaiK5i/Imvy6tmNbaPd9PX10ejxalduczm4zaOnpYlA0puA7wMHR8QjrYzFzKysWnb7qKRXAz8FPhoRv2tVHGZmZVdYj0DSxUA3MFHSGuDLwJYAEXEucCrwSuC7kgA21Tt/ZWZmxSnyrqFZQ5QfA9S8OGxmZs3jXxabmZWcE4GZWck5EZiZlZwTgZlZyTkRmJmVnBOBmVnJORGYmZWcE4GZWck5EZiZlZwTgZlZyTkRmJmVnBOBmVnJORGYmZWcE4GZWck5EZiZlZwTgZlZyTkRmJmVXGGJQNL5ktZJuqNOuSR9W9IqSbdLektRsZiZWX1DJgJJyyV9UtLLh7nthcCMQcoPBqam11zgv4e5fTMzGwV5egSHA7sAN0vqlfRepafNDyYilgLrB6kyE/hRZG4EdpS0c66ozcxs1Cgi8lWUtgDeR/bN/XngAuCsiKj7YS9pCnBlROxVo+xK4GsR8as0vwT4QkQsq1F3Llmvgc7Ozn17e3tzxVxt3foNrN2YTU+fNKGhbbSb/v5+Ojo6Wh1GU7nN5eA2D09PT8/yiOiqVTY+zwYkvQk4CjgE+B9gEfAO4Dpgn4aiGoaIWAAsAOjq6oru7u6GtnP2oiuYvyJr8urZjW2j3fT19dHo8WpXbnM5uM2jZ8hEIGk58BjwA+CUiHgmFd0k6e0j2Pf9wK4V85PTMjMza6I8PYIPRcQ9tQoi4p9GsO/FwImSeoH9gQ0R8eAItmdmZg3Ic7H4GEk7DsxIermkrw61kqSLgRuA10taI+loScdJOi5VuQq4B1gFnAecMOzozcxsxPL0CA6OiH8dmImIRyUdAnxpsJUiYtYQ5QF8MleUZmZWmDw9gnGSXjYwI2kb4GWD1DczszaSp0ewCFgi6YI0fxTww+JCMjOzZhoyEUTE6ZJuB96dFn0lIq4tNiwzM2uWXL8jiIirgasLjsXMzFogz1hD/yTp95I2SHpc0hOSHm9GcGZmVrw8PYKvA++PiLuKDsbMzJovz11Da50EzMzGrjw9gmWSLgEuBwaGlyAiflpUUGZm1jx5EsEOwFPA31csC8CJwMxsDMhz++hRzQjEzMxaI89dQ3tIWjLwyElJb5I06PASZmbWPvJcLD4P+CLwHEBE3A4cUWRQZmbWPHkSwbYR8X+rlm0qIhgzM2u+PIngYUmvJbtAjKTDAD83wMxsjMhz19AnyR4T+QZJ9wP3AkcWGpWZmTVNnruG7gEOkrQdsEVEPFF8WGZm1ix5nll8atU8ABFxWo51ZwBnAeOA70fE16rKX002pPWOqc4pEXFVztjNzGwU5LlG8GTF63ngYGDKUCtJGgeck+pPA2ZJmlZV7UvApRHxZrI7kb6bO3IzMxsVeU4Nza+cl/QNIM/zCPYDVg08+D49pH4mcGfl5sl+uQwwAXggx3bNzGwU5XoeQZVtgck56k0C7quYXwPsX1VnHvALSZ8CtgMOaiAeMzMbgTzXCFaQbh0lO4+/EzDk9YGcZgELI2K+pAOBH0vaKyJeqIphLjAXoLOzk76+voZ21rkNfH569hOIRrfRbvr7+0vT1gFuczm4zaMnT4/gfRXTm8iGpc7zg7L7gV0r5ienZZWOBmYARMQNkrYGJgLrKitFxAKyW1jp6uqK7u7uHLvf3NmLrmD+iqzJq2c3to1209fXR6PHq125zeXgNo+ePBeLn6h4bQR2kPSKgdcg690MTJW0u6StyC4GL66q8yfSs5AlvRHYGnhomG0wM7MRyNMj+H9k3+wfBUR2q+efUlkAr6m1UkRsknQi2YXlccD5EbFS0mnAsohYDHweOE/SZ9O25kRE1NqemZkVI08i+F/Azwbu75d0MPCBiPjnoVZM61xVtezUiuk7gbcPK2IzMxtVeU4NHVD5I6+IuBp4W3EhmZlZM+XpETyQnj9wYZqfje/3NzMbM/L0CGaR3TL6M7LHU+6UlpmZ2RiQ55fF64FPS9ouIp5sQkxmZtZEeR5V+TZJdwJ3pfm9JXlMIDOzMSLPqaFvAe8FHgGIiNuAdxYZlJmZNU+eREBE3Fe16PkCYjEzsxbIc9fQfZLeBoSkLYFPk04TmZlZ+8vTIziO7HGVk8jGCtonzZuZ2RgwaI8gPVzmrIiY3aR4zMysyQbtEUTE88BuadA4MzMbg/JcI7gH+LWkxWSPqwQgIr5ZWFRmZtY0dXsEkn6cJg8Frkx1t694mZnZGDBYj2BfSbuQDTl9dpPiMTOzJhssEZwLLAF2B5ZVLBeDPIfAzMzaS91TQxHx7Yh4I3BBRLym4rV7RDgJmJmNEUP+jiAijm9GIGZm1hq5hpholKQZku6WtErSKXXqfFjSnZJWSrqoyHjMzGxzeW4fbUj6Mdo5wHuANcDNkhanx1MO1JkKfBF4e0Q8KulVRcVjZma1Fdkj2A9YFRH3RMSzQC8ws6rOscA5EfEoQESsKzAeMzOrQRFRzIalw4AZEXFMmv8osH9EnFhR53Lgd2QPsB8HzIuIa2psay4wF6Czs3Pf3t7ehmJat34Dazdm09MnTWhoG+2mv7+fjo6OVofRVG5zObjNw9PT07M8IrpqlRV2aiin8cBUoBuYDCyVND0iHqusFBELgAUAXV1d0d3d3dDOzl50BfNXZE1ePbuxbbSbvr4+Gj1e7cptLge3efQUeWrofmDXivnJaVmlNcDiiHguIu4l6x1MLTAmMzOrUmQiuBmYKmn3NGjdEcDiqjqXk/UGkDQR2INsbCMzM2uSwhJBRGwCTgSuJXuQzaURsVLSaZIOTdWuBR5Jz0S+Hjg5Ih4pKiYzM9tcodcIIuIq4KqqZadWTAfwufQyM7MWKPQHZWZm9tLnRGBmVnJOBGZmJedEYGZWck4EZmYl50RgZlZyTgRmZiXnRGBmVnJOBGZmJedEYGZWck4EZmYl50RgZlZyTgRmZiXnRGBmVnJOBGZmJedEYGZWck4EZmYlV2gikDRD0t2SVkk6ZZB6H5QUkrqKjMfMzDZXWCKQNA44BzgYmAbMkjStRr3tgU8DNxUVi5mZ1Vdkj2A/YFVE3BMRzwK9wMwa9b4CnA48XWAsZmZWR5EPr58E3FcxvwbYv7KCpLcAu0bEzyWdXG9DkuYCcwE6Ozvp6+trKKDObeDz0zcBNLyNdtPf31+atg5wm8vBbR49RSaCQUnaAvgmMGeouhGxAFgA0NXVFd3d3Q3t8+xFVzB/Rdbk1bMb20a76evro9Hj1a7c5nJwm0dPkaeG7gd2rZifnJYN2B7YC+iTtBo4AFjsC8ZmZs1VZCK4GZgqaXdJWwFHAIsHCiNiQ0RMjIgpETEFuBE4NCKWFRiTmZlVKSwRRMQm4ETgWuAu4NKIWCnpNEmHFrVfMzMbnkKvEUTEVcBVVctOrVO3u8hYzMysNv+y2Mys5JwIzMxKzonAzKzknAjMzErOicDMrOScCMzMSs6JwMys5JwIzMxKzonAzKzknAjMzErOicDMrOScCMzMSs6JwMys5JwIzMxKzonAzKzknAjMzErOicDMrOQKTQSSZki6W9IqSafUKP+cpDsl3S5piaTdiozHzMw2V1gikDQOOAc4GJgGzJI0raraLUBXRLwJuAz4elHxmJlZbUX2CPYDVkXEPRHxLNALzKysEBHXR8RTafZGYHKB8ZiZWQ2KiGI2LB0GzIiIY9L8R4H9I+LEOvW/A/w5Ir5ao2wuMBegs7Nz397e3oZiWrd+A2s3ZtPTJ01oaBvtpr+/n46OjlaH0VRuczm4zcPT09OzPCK6apWNH1FUo0TSkUAX8K5a5RGxAFgA0NXVFd3d3Q3t5+xFVzB/Rdbk1bMb20a76evro9Hj1a7c5nJwm0dPkYngfmDXivnJadmLSDoI+DfgXRHxTIHxmJlZDUVeI7gZmCppd0lbAUcAiysrSHoz8D3g0IhYV2AsZmZWR2GJICI2AScC1wJ3AZdGxEpJp0k6NFU7A+gAfiLpVkmL62zOzMwKUug1goi4CriqatmpFdMHFbl/MzMbmn9ZbGZWck4EZmYl50RgZlZyTgRmZiXnRGBmVnJOBGZmJedEYGZWck4EZmYl50RgZlZyTgRmZiXnRGBmVnJOBGZmJedEYGZWck4EZmYl50RgZlZyTgRmZiXnRGBmVnKFJgJJMyTdLWmVpFNqlL9M0iWp/CZJU4qMx8zMNldYIpA0DjgHOBiYBsySNK2q2tHAoxHxOuBbwOlFxWNmZrUV+czi/YBVEXEPgKReYCZwZ0WdmcC8NH0Z8B1JiogoMC4zs7Yx5ZSf/2V64YztCtlHkYlgEnBfxfwaYP96dSJik6QNwCuBhysrSZoLzE2z/ZLubjCmiQPbVnn6Hn9pc4m4zeVQujb3nD6iNu9Wr6DIRDBqImIBsGCk25G0LCK6RiGktuE2l4PbXA5FtbnIi8X3A7tWzE9Oy2rWkTQemAA8UmBMZmZWpchEcDMwVdLukrYCjgAWV9VZDHw8TR8GXOfrA2ZmzVXYqaF0zv9E4FpgHHB+RKyUdBqwLCIWAz8AfixpFbCeLFkUacSnl9qQ21wObnM5FNJm+Qu4mVm5+ZfFZmYl50RgZlZyYzIRlHFoixxt/pykOyXdLmmJpLr3FLeLodpcUe+DkkJS299qmKfNkj6c/tYrJV3U7BhHW4739qslXS/plvT+PqQVcY4WSedLWifpjjrlkvTtdDxul/SWEe80IsbUi+zC9B+A1wBbAbcB06rqnACcm6aPAC5pddxNaHMPsG2aPr4MbU71tgeWAjcCXa2Ouwl/56nALcDL0/yrWh13E9q8ADg+TU8DVrc67hG2+Z3AW4A76pQfAlwNCDgAuGmk+xyLPYK/DG0REc8CA0NbVJoJ/DBNXwa8W5KaGONoG7LNEXF9RDyVZm8k+11HO8vzdwb4CtkYVk83M7iC5GnzscA5EfEoQESsa3KMoy1PmwPYIU1PAB5oYnyjLiKWkt1FWc9M4EeRuRHYUdLOI9nnWEwEtYa2mFSvTkRsAgaGtmhXedpc6WiybxTtbMg2py7zrhHxc8aGPH/nPYA9JP1a0o2SZjQtumLkafM84EhJa4CrgE81J7SWGe7/9yG1xRATNnokHQl0Ae9qdSxFkrQF8E1gTotDabbxZKeHusl6fUslTY+Ix1oZVMFmAQsjYr6kA8l+m7RXRLzQ6sDaxVjsEZRxaIs8bUbSQcC/AYdGxDNNiq0oQ7V5e2AvoE/SarJzqYvb/IJxnr/zGmBxRDwXEfcCvyNLDO0qT5uPBi4FiIgbgK3JBqQbq3L9fx+OsZgIyji0xZBtlvRm4HtkSaDdzxvDEG2OiA0RMTEipkTEFLLrIodGxLLWhDsq8ry3LyfrDSBpItmponuaGONoy9PmPwHvBpD0RrJE8FBTo2yuxcDH0t1DBwAbIuLBkWxwzJ0aipfm0BaFytnmM4AO4CfpuvifIuLQlgU9QjnbPKbkbPO1wN9LuhN4Hjg5Itq2t5uzzZ8HzpP0WbILx3Pa+YudpIvJkvnEdN3jy8CWABFxLtl1kEOAVcBTwFEj3mcbHy8zMxsFY/HUkJmZDYMTgZlZyTkRmJmVnBOBmVnJORGYmZWcE4G1BUn/IukuSYskHTrEaKNzJH2nTtlVknYsLNA6JB0n6WNpeo6kXXKs01frB3CV7Zc0T9JJafq09KNBJH1G0raj2wobq8bc7whszDoBOCgi1qT5hn4nEBEtGaI43f89YA5wBw0Ojpbund+s/RFxasXsZ4ALye4zNxuUewT2kifpXLJhiK+W9NnKb/ySPiTpDkm3SVpasdoukq6R9HtJX6/Y1mpJEyVNST2M89K4/b+QtE2q89Y0zvutks6oNS68pJ0lLU117pD0t2l5f0WdwyQtTNPzJJ0k6TCysZ4WpXW3kXSqpJvTdhZUjYT70Yp97Je2VbPHI2lh2ue/ALsA1ysbp/8Tks6sqHespG8N9+9gY5cTgb3kRcRxZN+eeyKi+gPsVOC9EbE3UPlL6X2Aw4HpwOGSdmVzU8mGbN4TeAz4YFp+AfDPEbEP2a9za/kIcG2qszdwa862XAYsA2ZHxD4RsRH4TkS8NSL2ArYB3lexyrZpHycA5+fcx7f56/HqIRuH5/2StkxVjsq7LSsHJwJrd78GFko6lmwIggFL0nhDTwN3ArWeyHZvRNyappcDU9L1g+3T4GUA9Z7wdTNwlKR5wPSIeGIEbehR9qS8FcDfAXtWlF0MfxmjfodGrm9ERD9wHfA+SW8AtoyIFSOI18YYJwJra6m38CWy0RiXSxp4rkTl6KrPU/t6WJ469fa7lOxJUveTJaKPDRRVVNt6qO1I2hr4LnBYREwHzqtar3oMmEbHhPk+2bWJo8h6PGZ/4URgbU3SayPipnSh9CFePDzvsKVx+5+QtH9aVHNAQmXPfF4bEeeRfcgOPDd2raQ3Knsewj/W2c0TZMNkw18/9B+W1EE2Gm6lw9P+3kE2yuSGnE2p3AcRcRPZsfkIqZdhNsB3DVm7O0PSVLLnty4he6btPiPc5tFko1m+APyS7Al21bqBkyU9B/QDAz2CU4AryZLSMrIRX6stBM6VtBE4kKwXcAfwZ7JTTpWelnQL2eiTnxhGGxYA10h6IF0ngOxawT4Dj7E0G+DRR82qSOpI59VJ9+vvHBGfbnFYIybpSuBbEbGk1bHYS4tPDZlt7h8GbtkE/hb4aqsDGglJO0r6HbDRScBqcY/AzKzk3CMwMys5JwIzs5JzIjAzKzknAjOzknMiMDMruf8PMke24amYHRkAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"n, bins, patches = plt.hist(train_labels, 100)\n",
"plt.xlabel('fishing suitability')\n",
"plt.ylabel('frequency')\n",
"plt.title('Histogram of training data labels')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"xgtrain = xgb.DMatrix(train_data, train_labels)\n",
"\n",
"# for parameter values, see here https://xgboost.readthedocs.io/en/latest/parameter.html\n",
"param = {'max_depth': 5, # depth of a decision tree\n",
" 'learning_rate': 0.1,\n",
" #'min_split_loss': 1,\n",
" #'min_child_weight': 1,\n",
" 'max_delta_step': 10,\n",
" 'tree_method': 'exact',\n",
" 'predictor': 'cpu_predictor',\n",
" 'objective': 'reg:logistic'}\n",
"\n",
"# train gradient boosted trees\n",
"bst = xgb.train(param, xgtrain, num_boost_round=1000)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:title={'center':'Feature importance'}, xlabel='F score', ylabel='Features'>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAEWCAYAAACaBstRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgz0lEQVR4nO3df5yVdZ338ddbMCBREREEFRUJUWBguRVlUwNd1ATFLdIsbwEx71JzNX+x6+KP7m2VWBW6tXX9UZKWZFjIqqEUQiVZIg6IGkHBhoQiGAQ04Ax+7j/OxTgzDMx3YM6cC3w/H495zHWu6zrnel/XgfOe68c5RxGBmZlZin1KHcDMzPYcLg0zM0vm0jAzs2QuDTMzS+bSMDOzZC4NMzNL5tIw202S/kXSQ6XOYdYc5PdpWClJWg50ArbWGN0jIv68m495WUT8bPfS7Xkk3QZ0j4iLS53F9k7e07A8ODci2tb42eXCaAqSWpZy+btqT81texaXhuWSpAMlPSxplaSVkv5NUots2jGSZklaK2mNpO9LapdNexToCvy3pI2SbpQ0SNJbdR5/uaR/yIZvkzRV0mOS/gqM2tny68l6m6THsuGjJIWk0ZJWSPqLpC9LOlHSQknrJN1b476jJL0o6V5J6yX9TtIZNaZ3kTRd0nuSlkr6Up3l1sz9ZeBfgAuzdV+QzTda0puSNkj6o6T/U+MxBkl6S9J1klZn6zu6xvQ2ku6S9D9Zvl9JapNNO1nS3GydFkgatAtPte1hXBqWV48AVUB34O+AM4HLsmkC7gC6AMcBRwC3AUTE/wb+xId7L99MXN5wYCrQDvh+A8tPcRLwCeBCYCJwM/APQC/gAkmfqjPvH4AOwK3AjyW1z6ZNAd7K1nUE8O+STt9B7oeBfwd+mK1732ye1cAw4ABgNHCPpP41HuNQ4EDgMGAMcJ+kg7Jp/wH8L+DvgfbAjcAHkg4DngH+LRt/PfCkpEMasY1sD+TSsDyYlv21uk7SNEmdgHOAayJiU0SsBu4BPg8QEUsjYmZEbImId4G7gU/t+OGT/DoipkXEBxReXHe4/ET/NyI2R8TzwCbg8YhYHRErgV9SKKJtVgMTI6IyIn4ILAaGSjoC+CRwU/ZY5cBDwCX15Y6IivqCRMQzEfGHKJgDPA+cWmOWSuDr2fKfBTYCx0raB7gU+KeIWBkRWyNibkRsAS4Gno2IZ7NlzwTmZdvN9mI+Bmp5cH7Nk9aSBgD7AqskbRu9D7Aim94JmEThhW//bNpfdjPDihrDR+5s+YneqTFcUc/ttjVur4zaV6T8D4U9iy7AexGxoc60E3aQu16SPk1hD6YHhfX4OPBajVnWRkRVjdt/y/J1AFpT2Auq60jgc5LOrTFuX+CFhvLYns2lYXm0AtgCdKjzYrbNvwMB9ImI9ySdD9xbY3rdSwI3UXihBCA7N1H3MErN+zS0/KZ2mCTVKI6uwHTgz0B7SfvXKI6uwMoa9627rrVuS2oFPElh7+SpiKiUNI3CIb6GrAE2A8cAC+pMWwE8GhFf2u5etlfz4SnLnYhYReEQyl2SDpC0T3bye9shqP0pHEJZnx1bv6HOQ7wDdKtx+/dAa0lDJe0L/CvQajeW39Q6AldL2lfS5yicp3k2IlYAc4E7JLWWVEbhnMNjO3msd4CjskNLAB+jsK7vAlXZXseZKaGyQ3XfAe7OTsi3kDQwK6LHgHMlnZWNb52dVD+88atvexKXhuXVJRRe8N6gcOhpKtA5m3Y70B9YT+Fk7I/r3PcO4F+zcyTXR8R64AoK5wNWUtjzeIud29nym9pvKJw0XwN8AxgREWuzaRcBR1HY6/gJcGsD7z/5UfZ7raT52R7K1cATFNbjCxT2YlJdT+FQ1svAe8B4YJ+s0IZTuFrrXQp7Hjfg15S9nt/cZ1ZCkkZReCPiKaXOYpbCfxWYmVkyl4aZmSXz4SkzM0vmPQ0zM0u2V71Po127dtG9e/dSx9jOpk2b2G+//UodYzt5zQX5zeZcjZfXbM71oVdeeWVNRKR9BExE7DU/PXr0iDx64YUXSh2hXnnNFZHfbM7VeHnN5lwfAuZF4uusD0+ZmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMkVEqTM0ma7dusc+F0wqdYztXNenirtea1nqGNvJay7Ibzbnary8ZstbruV3DgVg9uzZDBo0qFmXLemViDghZV7vaZiZWTKXhplZTmzevJmvfOUr9O3bl169enHrrbcCMGvWLPr370/v3r0ZOXIkVVVVAEyYMIF+/frRr18/evfuTYsWLXjvvfcAuPTSS+nYsSO9e/fe4fIigquvvhqgt6SFkvo3lLFopSHpKEmLGjH/KEldatxeLqlDcdKZmeVPq1atuPvuu1mwYAHl5eXMmDGDuXPnMnLkSKZMmcKiRYs48sgjmTx5MgA33HAD5eXllJeXc8cdd/CpT32K9u3bAzBq1ChmzJix0+X99Kc/ZcmSJQCLgMuB/2woY572NEYBXRqaycxsbyWJNm3aAFBZWUllZSUtWrTgYx/7GD169ABgyJAhPPnkk9vd9/HHH+eiiy6qvn3aaadVF8iOPPXUU1xyySUARMRLQDtJnXd2n2KXRktJ35f0pqSpkj4u6RZJL0taJOkBFYwATgC+L6lcUpvs/l+VNF/Sa5J6FjmrmVnJbd26lX79+tGxY0eGDBnCgAEDqKqqYt68eQBMnTqVFStW1LrP3/72N2bMmMFnP/vZRi1r5cqVHHHEETVHvQUctrP7FPvSgWOBMRHxoqTvAFcA90bE1wEkPQoMi4ipkq4Cro+Iedk0gDUR0V/SFcD1wGV1FyDpcgq7VXTocAi39Kkq8io1Xqc2hSs18iavuSC/2Zyr8fKaLW+5Zs+eDUBFRQUTJ05k48aNjBs3jp49e3LjjTdy6aWXUllZyQknnEBFRUX1/FA459GzZ08WLlxY6zHffvttNm3aVGvemtauXcurr77aqJzFLo0VEfFiNvwYcDWwTNKNwMeB9sDrwH/v4P4/zn6/Anymvhki4gHgAShccpunS+i2ydulfdvkNRfkN5tzNV5es+Ut1/IvDgJqX3I7f/581q5dy/XXX8+VV14JwPPPP8+WLVtqXZY7adIkrrrqqu0u1V2+fDn77bffDi/hLSsro0OHWqeODwdW7ixnsQ9P1X0TSADfBkZERB/gQaD1Tu6/Jfu9leIXnJlZSb377rts3LgRKOxxzJw5k549e7J69WoAtmzZwvjx4/nyl79cfZ/169czZ84chg8f3ujlnXfeeXzve98DQNLJwPqIWLWz+xS7NLpKGpgNfwH4VTa8RlJbYESNeTcA+xc5j5lZbq1atYprr72WsrIyTjzxRIYMGcKwYcOYMGECxx13HGVlZZx77rmcfvrp1ff5yU9+wplnnsl+++1X67EuuugiBg4cyOLFizn88MN5+OGHAbj//vu5//77ATjnnHPo1q0bQG8Kf8Rf0VDGYv/1vhi4Mjuf8QaFy7kOonB519vAyzXmfQS4X1IFMBAzs4+YsrIyHnzwwe0OJ02YMIEJEybUe59Ro0YxatSo7cY//vjj9c5fcy9FEvfddx/f/va3F6W+I5yI2Gt+evToEXn0wgsvlDpCvfKaKyK/2Zyr8fKazbk+BMyLxNfZPL1Pw8zMcs6lYWZmyVwaZmaWzKVhZmbJXBpmZpbMpWFmZslcGmZmlsylYWZmyVwaZmaWzKVhZmbJXBpmZpbMpWFmZslcGmZmlsylYWZmyVwaZmaWzKVhZmbJXBpmZpbMpWFmZslcGmZmlsylYWZmyVwaZmaWzKVhZmbJXBpmZpbMpWFmZslcGmZmlsylYWZmyVwaZmaWzKVhZmbJXBpmZpYsqTQkHSOpVTY8SNLVktoVNZmZmeVO6p7Gk8BWSd2BB4AjgB8ULZWZmeVSaml8EBFVwD8C/y8ibgA6Fy+WmZnlUWppVEq6CBgJPJ2N27c4kczMLK9SS2M0MBD4RkQsk3Q08GjxYpmZWR61TJkpIt6QdBPQNbu9DBhfzGBmZpY/qVdPnQuUAzOy2/0kTS9iLjMzy6HUw1O3AQOAdQARUQ50K0oiMzPLreQT4RGxvs64D5o6jJmZ5VvSOQ3gdUlfAFpI+gRwNTC3eLHMzCyPUvc0vgr0ArZQeFPfeuCaImUyM7OcanBPQ1IL4JmIGAzcXPxIZmaWVw3uaUTEVuADSQc2Qx4zM8ux1HMaG4HXJM0ENm0bGRFXFyWVmZnlkiKi4ZmkkfWNj4jJTZ5oN3Tt1j32uWBSqWNs57o+Vdz1Wmo/N5+85oL8ZnOuxstrtmLlWn7n0N26/+zZsxk0aFDThEkk6ZWIOCFl3tR3hOeqHMzM8mzz5s2cdtppbNmyhaqqKkaMGMHtt9/OqaeeyoYNGwBYvXo1AwYMYNq0aaxfv56LL76YP/3pT6xfv55bb72V0aNHA3DjjTfyzDPP8MEHHzBkyBAmTZqEpFrLe++997jwwgtZvnw5Rx11FE888QQHHXRQUdYt9R3hyyT9se5PU4eR9IikEfWMP0rSoqZenplZMbRq1YpZs2axYMECysvLmTFjBi+99BK//OUvKS8vp7y8nIEDB/KZz3wGgPvuu4/jjz+eBQsWMHHiRK677jref/995s6dy4svvsjChQtZtGgRL7/8MnPmzNlueXfeeSdnnHEGS5Ys4YwzzuDOO+8s2rql7pvV3G1pDXwOaN/0cczM9nySaNu2LQCVlZVUVlbW2jv461//yqxZs/jud79bPf+GDRuICCoqKmjfvj0tW7ZEEps3b+b9998nIqisrKRTp07bLe+pp55i9uzZAIwcOZJBgwYxfnxxPh4waU8jItbW+FkZEROB3TtwB0i6RNJCSQskbfvU3NMkzc32Zrbb6zAz2xNs3bqVfv360bFjR4YMGcJJJ51UPW3atGmcccYZHHDAAQBcddVVvPnmm3Tp0oVLL72USZMmsc8++zBw4EAGDx5M586d6dy5M2eddRbHHXfcdst655136Ny58BVHhx56KO+8807R1itpT0NS/xo396Gw57FbZ5Ak9QL+Ffj7iFgjqT1wN4UvdzoF6AlMB6Y28DiXA5cDdOhwCLf0qdqdWEXRqU3hpFve5DUX5DebczVeXrMVK9e2v/gBJk6cyMaNGxk3bhw9e/bk6KOPBgqHo84555zqeefMmUOHDh34wQ9+wJIlS7jssst46KGHWLduHb/61a94/PHHAbj++uvp1KkTZWVltZZZVVVVa7lbt26tdbsppb7w31VjuApYBlywm8s+HfhRRKwBiIj3st23aRHxAfCGpO33w+qIiAcofAUtXbt1j4/SVRq7K6+5IL/ZnKvx8pqtaFdPfXHQduPmz5/P2rVrGT16NGvWrGHp0qXcdNNNtG7dGoAJEyYwduxYTj31VCRx3HHHccghh/DGG28wdOhQPv3pTwPw8ssvs3nz5u2urjrssMM49thj6dy5M6tWraJLly5FuwIr9WNExkTE4OxnSERcDrxflESFjyrZRjucy8wsp959913WrVsHQEVFBTNnzqRnz54ATJ06lWHDhlUXBkDXrl35+c9/DhSuhFq8eDHdunWja9euzJkzh6qqKiorK5kzZ069h6fOO+88Jk8uXOQ6efJkhg8fXrR1Sy2N+g4R7fSwUYJZwOckHQyQHZ4yM9vjrVq1isGDB1NWVsaJJ57IkCFDGDZsGABTpkzhoosuqjX/uHHjmDt3Ln369OG6665j/PjxdOjQgREjRnDMMcfQp08f+vbtS9++fTn33HMBuOyyy5g3bx4AY8eOZebMmXziE5/gZz/7GWPHji3auu1030xSTwofVHigpM/UmHQAhauodllEvC7pG8AcSVuBV3fn8czM8qKsrIxXX63/Ja2+cw1dunTh+eefr56+7dBSixYt+K//+q96H+ehhx6qHj744IOr91SKraEDescCw4B2wLk1xm8AvrS7C8/eNLjDNw5GRNvs93Kg9+4uz8zMds9OSyMingKekjQwIn7dTJl2WZt9W7B4N9/CXwyzZ8+u9+RYqeU1F+Q3m3M1Xl6z5TVX3qVeOvCqpCspHKqqPiwVEZcWJZWZmeVS6onwR4FDgbOAOcDhFA5RmZnZR0hqaXSPiHHApuw8xFDgpAbuY2Zme5nU0qjMfq+T1Bs4EOhYnEhmZpZXqec0HpB0EDCOwkd7tAVuKVoqMzPLpdTv09h2QfAcoFvx4piZWZ6lfp9GJ0kPS/ppdvt4SWOKG83MzPIm9ZzGI8BzQJfs9u+Ba4qQx8zMciy1NDpExBPABwARUQVsLVoqMzPLpdTS2JR9sGAASDoZWF+0VGZmlkupV099jcJVU8dIehE4BPC36pmZfcQ09Cm3XSPiTxExX9KnKHyAoYDFEVG5s/uamdnep6HDU9NqDP8wIl6PiEUuDDOzj6aGSqPmN+f5/RlmZh9xDZVG7GDYzMw+gho6Ed5X0l8p7HG0yYbJbkdEHFDUdGZmlisNfQlTi+YKYmZm+Zf6Pg0zMzOXhpmZpXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWTKXhpmZJXNpmJlZMpeGmZklc2mYmVkyl4aZmSVzaZiZWbKWpQ7QlCoqt3LU2GdKHWM71/WpYpRzNUpeszlX4zVXtuV3Di36Msx7Gma2F9m8eTMDBgygb9++9OrVi1tvvRWAMWPG0LdvX8rKyhgxYgQbN26sdb8nn3wSScybN6963B133EH37t059thjee655+pd3rJlyzjppJPo3r07F154Ie+//37xVi4ncl8aks6XdHypc5hZ/rVq1YpZs2axYMECysvLmTFjBi+99BL33HMPCxYsYOHChXTt2pV77723+j4bNmxg0qRJnHTSSdXj3njjDaZMmcLrr7/OjBkzuOKKK9i6det2y7vpppu49tprWbp0KQcddBAPP/xws6xnKeW+NIDzAZeGmTVIEm3btgWgsrKSyspKJHHAAQcAEBFUVFQgqfo+48aN46abbqJ169bV45566ik+//nP06pVK44++mi6d+/Ob3/721rLighmzZrFiBEjABg5ciTTpk0r8hqWXslKQ9J+kp6RtEDSIkkXSrpT0huSFkr6D0l/D5wHTJBULumYUuU1sz3D1q1b6devHx07dmTIkCHVexCjR4/m0EMP5Xe/+x1f/epXAZg/fz4rVqxg6NDa50NWrlzJEUccUX378MMPZ+XKlbXmWbt2Le3ataNly5Y7nGdvVMoT4WcDf46IoQCSjgS+DvSMiJDULiLWSZoOPB0RU+t7EEmXA5cDdOhwCLf0qWqm+Ok6tSmcDMybvOaC/GZzrsZrrmyzZ8+uHp44cSIbN25k3Lhx9OzZk6OPPpqRI0dy8cUX861vfYvbb7+dT37yk1xzzTWMHTuW2bNns27dOl555RU2btzIypUrefPNN6sfc9WqVbz++ut06NChehnr16+noqKiep7Vq1ezadOmWjl2xcaNG3f7MYqplKXxGnCXpPHA08Cvgc3Aw5KezsY1KCIeAB4A6Nqte9z1Wv4uCLuuTxXO1Th5zeZcjddc2ZZ/cdB24+bPn8/atWsZPXp09bh9992Xb37zm5x66qm89dZbjB07FoC3336b22+/nenTpzNgwAAABg0qPOYdd9zBmWeeycCBA6sfJyIYM2YMp5xyCi1btuTXv/41PXr0qL7Prpo9e/ZuP0YxlezwVET8HuhPoTz+DfgXYAAwFRgGzChVNjPbM7377rusW7cOgIqKCmbOnMmxxx7L0qVLgcIL/fTp0+nZsydt27ZlzZo1LF++nOXLl3PyySczffp0TjjhBM477zymTJnCli1bWLZsGUuWLKkukm0kMXjwYKZOLRwEmTx5MsOHD2/W9S2FUp7T6AL8LSIeAyYApwEHRsSzwLVA32zWDcD+pUlpZnuSVatWMXjwYMrKyjjxxBMZMmQIQ4cOZeTIkfTp04c+ffqwatUqbrnllp0+Tq9evbjgggs4/vjjOfvss7nvvvto0aIFAOeccw5//vOfARg/fjx333033bt3Z+3atYwZM6bo61hqpdyf7UPhBPcHQCXwNeBpSa0BZbcBpgAPSroaGBERfyhJWjPLvbKyMl599dXtxr/44osN3rfueYSbb76Zm2++ebv5nn322erhbt26bXdV1d6uZKUREc8Bdd8xM6Ce+V7El9yameVCPs+c7aI2+7ZgcQ4/SmD27Nn1nqQrtbzmgvxmc67Gy3M2a7w94c19ZmaWEy4NMzNL5tIwM7NkLg0zM0vm0jAzs2QuDTMzS+bSMDOzZC4NMzNL5tIwM7NkLg0zM0vm0jAzs2QuDTMzS+bSMDOzZC4NMzNL5tIwM7NkLg0zM0vm0jAzs2QuDTMzS+bSMDOzZC4NMzNL5tIwM7NkLg0zM0vm0jAzs2QuDTMzS+bSMDOzZC4NMzNL5tIwM7NkLg0zM0vm0jAzs2QuDTMzS+bSMDOzZC4NMzNL5tIwM7NkLg0zM0vm0jAzs2QuDTMzS+bSMDOzZC4NMzNL5tIwM7NkLg0zM0vm0jAzs2QuDTMzS+bSMDOzZC4NMzNL5tIwM7NkLg0zM0vm0jAzs2QuDTMzS6aIKHWGJiNpA7C41Dnq0QFYU+oQ9chrLshvNudqvLxmc64PHRkRh6TM2LLYSZrZ4og4odQh6pI0z7kaJ6/ZnKvx8prNuXaND0+ZmVkyl4aZmSXb20rjgVIH2AHnary8ZnOuxstrNufaBXvViXAzMyuuvW1Pw8zMisilYWZmyfaK0pB0tqTFkpZKGluiDMslvSapXNK8bFx7STMlLcl+H5SNl6RvZXkXSurfhDm+I2m1pEU1xjU6h6SR2fxLJI0sUq7bJK3Mtlm5pHNqTPvnLNdiSWfVGN+kz7WkIyS9IOkNSa9L+qdsfB622Y6ylXS7SWot6beSFmS5bs/GHy3pN9kyfijpY9n4Vtntpdn0oxrK28S5HpG0rMb26peNb7bnMnvMFpJelfR0druk22uXRcQe/QO0AP4AdAM+BiwAji9BjuVAhzrjvgmMzYbHAuOz4XOAnwICTgZ+04Q5TgP6A4t2NQfQHvhj9vugbPigIuS6Dbi+nnmPz57HVsDR2fPbohjPNdAZ6J8N7w/8Plt+HrbZjrKVdLtl6942G94X+E22LZ4APp+Nvx/4SjZ8BXB/Nvx54Ic7y1uEXI8AI+qZv9mey+xxvwb8AHg6u13S7bWrP3vDnsYAYGlE/DEi3gemAMNLnGmb4cDkbHgycH6N8d+LgpeAdpI6N8UCI+IXwHu7meMsYGZEvBcRfwFmAmcXIdeODAemRMSWiFgGLKXwPDf5cx0RqyJifja8AXgTOIx8bLMdZduRZtlu2bpvzG7um/0EcDowNRtfd5tt25ZTgTMkaSd5mzrXjjTbcynpcGAo8FB2W5R4e+2qvaE0DgNW1Lj9Fjv/j1UsATwv6RVJl2fjOkXEqmz4baBTNtzcmRuboznzXZUdGvjOtkNApcqVHQb4Owp/oeZqm9XJBiXebtmhlnJgNYUX1T8A6yKiqp5lVC8/m74eOLg5ckXEtu31jWx73SOpVd1cdZZfjOdyInAj8EF2+2BysL12xd5QGnlxSkT0Bz4NXCnptJoTo7B/WfLrm/OSI/OfwDFAP2AVcFepgkhqCzwJXBMRf605rdTbrJ5sJd9uEbE1IvoBh1P4a7dnc2eoT91cknoD/0wh34kUDjnd1JyZJA0DVkfEK8253GLZG0pjJXBEjduHZ+OaVUSszH6vBn5C4T/SO9sOO2W/V2ezN3fmxuZolnwR8U72n/wD4EE+3NVu1lyS9qXwovz9iPhxNjoX26y+bHnZblmWdcALwEAKh3e2fZ5dzWVULz+bfiCwtplynZ0d5ouI2AJ8l+bfXp8EzpO0nMKhwdOBSeRoezVKc59EaeofCh+6+EcKJ4a2neTr1cwZ9gP2rzE8l8Ix0AnUPpn6zWx4KLVPwP22ifMcRe0Tzo3KQeGvsWUUTgIelA23L0KuzjWGr6VwvBagF7VP+P2RwsncJn+us3X/HjCxzviSb7OdZCvpdgMOAdplw22AXwLDgB9R+8TuFdnwldQ+sfvEzvIWIVfnGttzInBnKf79Z489iA9PhJd0e+3yOjT3AouyEoWrIH5P4bjqzSVYfrfsyVwAvL4tA4XjkD8HlgA/2/YPL/tHel+W9zXghCbM8jiFQxaVFI55jtmVHMClFE60LQVGFynXo9lyFwLTqf1ieHOWazHw6WI918ApFA49LQTKs59zcrLNdpStpNsNKANezZa/CLilxv+D32br/yOgVTa+dXZ7aTa9W0N5mzjXrGx7LQIe48MrrJrtuazxuIP4sDRKur129ccfI2JmZsn2hnMaZmbWTFwaZmaWzKVhZmbJXBpmZpbMpWFmZslaNjyL2UebpK0ULsnc5vyIWF6iOGYl5UtuzRogaWNEtG3G5bWMDz+TyCxXfHjKbDdJ6izpF9l3NSySdGo2/mxJ87Pvd/h5Nq69pGnZh+e9JKksG3+bpEclvQg8KukQSU9Kejn7+WQJV9Gsmg9PmTWsTfbJqQDLIuIf60z/AvBcRHxDUgvg45IOofC5UKdFxDJJ7bN5bwdejYjzJZ1O4WNC+mXTjqfwwZcVkn4A3BMRv5LUFXgOOK5oa2iWyKVh1rCKKHxy6o68DHwn+3DBaRFRLmkQ8IsofO8BEbHtu0ROAT6bjZsl6WBJB2TTpkdERTb8D8Dxha9RAOAASW3jw++LMCsJl4bZboqIX2QfhT8UeETS3cBfduGhNtUY3gc4OSI2N0VGs6bicxpmu0nSkcA7EfEghW9m6w+8BJwm6ehsnm2Hp34JfDEbNwhYE3W+vyPzPPDVGsvoV6T4Zo3iPQ2z3TcIuEFSJbARuCQi3s2+wfHHkvah8H0cQyh8v/d3JC0E/gaM3MFjXg3cl83XEvgF8OWiroVZAl9ya2ZmyXx4yszMkrk0zMwsmUvDzMySuTTMzCyZS8PMzJK5NMzMLJlLw8zMkv1/Zhht7nNrD+sAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# plot features importance\n",
"xgb.plot_importance(bst)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 2.40.1 (20161225.0304)\n",
" -->\n",
"<!-- Title: %3 Pages: 1 -->\n",
"<svg width=\"1359pt\" height=\"392pt\"\n",
" viewBox=\"0.00 0.00 1359.18 392.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 388)\">\n",
"<title>%3</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-388 1355.1822,-388 1355.1822,4 -4,4\"/>\n",
"<!-- 0 -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>0</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"555.0911\" cy=\"-366\" rx=\"68.7879\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"555.0911\" y=\"-362.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">bath&lt;&#45;210.5</text>\n",
"</g>\n",
"<!-- 1 -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>1</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"449.0911\" cy=\"-279\" rx=\"68.7879\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"449.0911\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">bath&lt;&#45;333.5</text>\n",
"</g>\n",
"<!-- 0&#45;&gt;1 -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>0&#45;&gt;1</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M534.1489,-348.8116C518.0338,-335.585 495.5847,-317.1599 477.7957,-302.5594\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"479.9402,-299.7916 469.9898,-296.1527 475.4991,-305.2025 479.9402,-299.7916\"/>\n",
"<text text-anchor=\"middle\" x=\"553.5911\" y=\"-318.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 2 -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>2</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"766.0911\" cy=\"-279\" rx=\"94.4839\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"766.0911\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">chl&lt;0.359459996</text>\n",
"</g>\n",
"<!-- 0&#45;&gt;2 -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>0&#45;&gt;2</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M592.328,-350.6464C627.0108,-336.3459 678.943,-314.9331 717.0012,-299.2409\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"718.4483,-302.4301 726.3591,-295.3824 715.7799,-295.9586 718.4483,-302.4301\"/>\n",
"<text text-anchor=\"middle\" x=\"683.0911\" y=\"-318.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"<!-- 3 -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>3</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"262.0911\" cy=\"-192\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"262.0911\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.181823254</text>\n",
"</g>\n",
"<!-- 1&#45;&gt;3 -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>1&#45;&gt;3</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M407.8768,-264.3401C391.5553,-258.2004 372.7478,-250.7041 356.0911,-243 337.465,-234.385 317.3996,-223.7019 300.6101,-214.3425\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"302.081,-211.1542 291.6495,-209.3006 298.6483,-217.2547 302.081,-211.1542\"/>\n",
"<text text-anchor=\"middle\" x=\"400.5911\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 4 -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>4</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"449.0911\" cy=\"-192\" rx=\"68.7879\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"449.0911\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">bath&lt;&#45;332.5</text>\n",
"</g>\n",
"<!-- 1&#45;&gt;4 -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>1&#45;&gt;4</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M449.0911,-260.9735C449.0911,-249.1918 449.0911,-233.5607 449.0911,-220.1581\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"452.5912,-220.0033 449.0911,-210.0034 445.5912,-220.0034 452.5912,-220.0033\"/>\n",
"<text text-anchor=\"middle\" x=\"458.0911\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"<!-- 5 -->\n",
"<g id=\"node10\" class=\"node\">\n",
"<title>5</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"766.0911\" cy=\"-192\" rx=\"63.0888\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"766.0911\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">bath&lt;&#45;70.5</text>\n",
"</g>\n",
"<!-- 2&#45;&gt;5 -->\n",
"<g id=\"edge9\" class=\"edge\">\n",
"<title>2&#45;&gt;5</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M766.0911,-260.9735C766.0911,-249.1918 766.0911,-233.5607 766.0911,-220.1581\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"769.5912,-220.0033 766.0911,-210.0034 762.5912,-220.0034 769.5912,-220.0033\"/>\n",
"<text text-anchor=\"middle\" x=\"810.5911\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 6 -->\n",
"<g id=\"node11\" class=\"node\">\n",
"<title>6</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"1054.0911\" cy=\"-192\" rx=\"63.0888\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1054.0911\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">bath&lt;&#45;39.5</text>\n",
"</g>\n",
"<!-- 2&#45;&gt;6 -->\n",
"<g id=\"edge10\" class=\"edge\">\n",
"<title>2&#45;&gt;6</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M816.5865,-263.7462C868.2667,-248.1345 948.2556,-223.9711 1000.8961,-208.0693\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"1002.1577,-211.3445 1010.7183,-205.1022 1000.1334,-204.6436 1002.1577,-211.3445\"/>\n",
"<text text-anchor=\"middle\" x=\"937.0911\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"<!-- 7 -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>7</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"237.0911\" cy=\"-105\" rx=\"94.4839\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"237.0911\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">chl&lt;0.194019914</text>\n",
"</g>\n",
"<!-- 4&#45;&gt;7 -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>4&#45;&gt;7</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M409.5997,-177.0688C393.0201,-170.7082 373.5662,-163.1243 356.0911,-156 332.2491,-146.28 305.9177,-135.0475 284.0984,-125.6021\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"285.2977,-122.3072 274.7309,-121.537 282.511,-128.7287 285.2977,-122.3072\"/>\n",
"<text text-anchor=\"middle\" x=\"400.5911\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 8 -->\n",
"<g id=\"node7\" class=\"node\">\n",
"<title>8</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"449.0911\" cy=\"-105\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"449.0911\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.180456504</text>\n",
"</g>\n",
"<!-- 4&#45;&gt;8 -->\n",
"<g id=\"edge6\" class=\"edge\">\n",
"<title>4&#45;&gt;8</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M449.0911,-173.9735C449.0911,-162.1918 449.0911,-146.5607 449.0911,-133.1581\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"452.5912,-133.0033 449.0911,-123.0034 445.5912,-133.0034 452.5912,-133.0033\"/>\n",
"<text text-anchor=\"middle\" x=\"458.0911\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"<!-- 13 -->\n",
"<g id=\"node8\" class=\"node\">\n",
"<title>13</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"100.0911\" cy=\"-18\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"100.0911\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.129272148</text>\n",
"</g>\n",
"<!-- 7&#45;&gt;13 -->\n",
"<g id=\"edge7\" class=\"edge\">\n",
"<title>7&#45;&gt;13</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M209.5338,-87.6282C200.1824,-81.7244 189.6773,-75.0827 180.0911,-69 165.7164,-59.8787 149.9082,-49.8092 136.1673,-41.044\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"137.7847,-37.9243 127.472,-35.4955 134.0192,-43.8252 137.7847,-37.9243\"/>\n",
"<text text-anchor=\"middle\" x=\"224.5911\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 14 -->\n",
"<g id=\"node9\" class=\"node\">\n",
"<title>14</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"318.0911\" cy=\"-18\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"318.0911\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.170920715</text>\n",
"</g>\n",
"<!-- 7&#45;&gt;14 -->\n",
"<g id=\"edge8\" class=\"edge\">\n",
"<title>7&#45;&gt;14</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M253.8744,-86.9735C265.6038,-74.3752 281.4312,-57.3755 294.4377,-43.4055\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"297.0768,-45.7073 301.3294,-36.0034 291.9535,-40.9374 297.0768,-45.7073\"/>\n",
"<text text-anchor=\"middle\" x=\"292.0911\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"<!-- 9 -->\n",
"<g id=\"node12\" class=\"node\">\n",
"<title>9</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"661.0911\" cy=\"-105\" rx=\"94.4839\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"661.0911\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">chl&lt;0.154711366</text>\n",
"</g>\n",
"<!-- 5&#45;&gt;9 -->\n",
"<g id=\"edge11\" class=\"edge\">\n",
"<title>5&#45;&gt;9</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M745.3465,-174.8116C729.5907,-161.7568 707.7227,-143.6376 690.2138,-129.1302\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"692.4468,-126.4351 682.5135,-122.75 687.9807,-131.8253 692.4468,-126.4351\"/>\n",
"<text text-anchor=\"middle\" x=\"764.5911\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 10 -->\n",
"<g id=\"node13\" class=\"node\">\n",
"<title>10</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"873.0911\" cy=\"-105\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"873.0911\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.180226803</text>\n",
"</g>\n",
"<!-- 5&#45;&gt;10 -->\n",
"<g id=\"edge12\" class=\"edge\">\n",
"<title>5&#45;&gt;10</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M788.7444,-175.0172C796.5282,-169.0847 805.2529,-162.3263 813.0911,-156 823.668,-147.4632 835.0792,-137.8649 845.0814,-129.3173\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"847.4497,-131.897 852.758,-122.7279 842.8904,-126.5855 847.4497,-131.897\"/>\n",
"<text text-anchor=\"middle\" x=\"839.0911\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"<!-- 11 -->\n",
"<g id=\"node16\" class=\"node\">\n",
"<title>11</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"1054.0911\" cy=\"-105\" rx=\"63.0888\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1054.0911\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">bath&lt;&#45;84.5</text>\n",
"</g>\n",
"<!-- 6&#45;&gt;11 -->\n",
"<g id=\"edge15\" class=\"edge\">\n",
"<title>6&#45;&gt;11</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M1054.0911,-173.9735C1054.0911,-162.1918 1054.0911,-146.5607 1054.0911,-133.1581\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"1057.5912,-133.0033 1054.0911,-123.0034 1050.5912,-133.0034 1057.5912,-133.0033\"/>\n",
"<text text-anchor=\"middle\" x=\"1098.5911\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 12 -->\n",
"<g id=\"node17\" class=\"node\">\n",
"<title>12</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"1235.0911\" cy=\"-105\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1235.0911\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.178304657</text>\n",
"</g>\n",
"<!-- 6&#45;&gt;12 -->\n",
"<g id=\"edge16\" class=\"edge\">\n",
"<title>6&#45;&gt;12</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M1094.4967,-177.9248C1111.046,-171.7584 1130.2322,-164.0875 1147.0911,-156 1164.5258,-147.6363 1183.147,-137.0959 1198.7495,-127.7714\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"1200.8087,-130.6165 1207.5554,-122.4475 1197.187,-124.6262 1200.8087,-130.6165\"/>\n",
"<text text-anchor=\"middle\" x=\"1184.0911\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"<!-- 15 -->\n",
"<g id=\"node14\" class=\"node\">\n",
"<title>15</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"577.0911\" cy=\"-18\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"577.0911\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.180237263</text>\n",
"</g>\n",
"<!-- 9&#45;&gt;15 -->\n",
"<g id=\"edge13\" class=\"edge\">\n",
"<title>9&#45;&gt;15</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M643.6863,-86.9735C631.5224,-74.3752 615.1088,-57.3755 601.6206,-43.4055\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"603.9376,-40.7663 594.4737,-36.0034 598.9017,-45.6285 603.9376,-40.7663\"/>\n",
"<text text-anchor=\"middle\" x=\"669.5911\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 16 -->\n",
"<g id=\"node15\" class=\"node\">\n",
"<title>16</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"795.0911\" cy=\"-18\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"795.0911\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.175607845</text>\n",
"</g>\n",
"<!-- 9&#45;&gt;16 -->\n",
"<g id=\"edge14\" class=\"edge\">\n",
"<title>9&#45;&gt;16</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M688.7103,-87.7248C698.0663,-81.8281 708.5585,-75.1662 718.0911,-69 732.0067,-59.9987 747.2374,-49.9523 760.4529,-41.172\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"762.4266,-44.0628 768.8123,-35.6087 758.5483,-38.2354 762.4266,-44.0628\"/>\n",
"<text text-anchor=\"middle\" x=\"749.0911\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"<!-- 17 -->\n",
"<g id=\"node18\" class=\"node\">\n",
"<title>17</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"1033.0911\" cy=\"-18\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1033.0911\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.177121475</text>\n",
"</g>\n",
"<!-- 11&#45;&gt;17 -->\n",
"<g id=\"edge17\" class=\"edge\">\n",
"<title>11&#45;&gt;17</title>\n",
"<path fill=\"none\" stroke=\"#0000ff\" d=\"M1049.7399,-86.9735C1046.8679,-75.0751 1043.0482,-59.2508 1039.792,-45.7606\"/>\n",
"<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"1043.1855,-44.9029 1037.4368,-36.0034 1036.381,-46.5455 1043.1855,-44.9029\"/>\n",
"<text text-anchor=\"middle\" x=\"1089.5911\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">yes, missing</text>\n",
"</g>\n",
"<!-- 18 -->\n",
"<g id=\"node19\" class=\"node\">\n",
"<title>18</title>\n",
"<ellipse fill=\"none\" stroke=\"#000000\" cx=\"1251.0911\" cy=\"-18\" rx=\"100.1823\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1251.0911\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">leaf=&#45;0.159216523</text>\n",
"</g>\n",
"<!-- 11&#45;&gt;18 -->\n",
"<g id=\"edge18\" class=\"edge\">\n",
"<title>11&#45;&gt;18</title>\n",
"<path fill=\"none\" stroke=\"#ff0000\" d=\"M1089.7151,-89.91C1104.6828,-83.5238 1122.2609,-75.9637 1138.0911,-69 1160.2161,-59.2672 1184.675,-48.2592 1205.1625,-38.9678\"/>\n",
"<polygon fill=\"#ff0000\" stroke=\"#ff0000\" points=\"1206.6156,-42.152 1214.2735,-34.8303 1203.7212,-35.7784 1206.6156,-42.152\"/>\n",
"<text text-anchor=\"middle\" x=\"1179.0911\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">no</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.files.Source at 0x7f1cfd3e9a90>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# visualize a decision tree\n",
"xgb.to_graphviz(bst, num_trees=1)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>chl</th>\n",
" <th>sst</th>\n",
" <th>bath</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6.339128</td>\n",
" <td>18.666000</td>\n",
" <td>-2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NaN</td>\n",
" <td>21.735666</td>\n",
" <td>-2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>22.866667</td>\n",
" <td>-2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>25.441250</td>\n",
" <td>-2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>29.320999</td>\n",
" <td>-2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" chl sst bath\n",
"0 6.339128 18.666000 -2\n",
"1 NaN 21.735666 -2\n",
"2 NaN 22.866667 -2\n",
"3 NaN 25.441250 -2\n",
"4 NaN 29.320999 -2"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# read test data\n",
"test_data = pd.read_csv('/tmp/testing-2021.csv', usecols=['bath', 'chl', 'sst'])\n",
"test_labels = pd.read_csv('/tmp/testing-2021.csv', usecols=['boat_present'])\n",
"\n",
"test_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgvUlEQVR4nO3dfZgcZZX+8e8NAYQMBJfo/CAgQUxEIIBmBFRcZ1ZcAyLoihKMaBDMKqL4ghe4KiK4uyIiKqAxKMaXwIjoQhZ50Y2MWdFgkhUIAdEYogQkEQKBgahEzu+PegYqbc1MTWdquntyf66rr1R1PfXUOd2TPl1PVVcpIjAzM6u1VaMDMDOz5uQCYWZmhVwgzMyskAuEmZkVcoEwM7NCLhBmZlbIBcI2IWm5pM5Gx9FIkt4o6V5JvZJe3IDth6QXpOnZkj4xAtucKeln/SybmGIaU6KfTkmr64yh7nWtGi4QWxBJqyQdXvPcJh8MEbFfRPQM0k/pD4wW9Tng1Ihoi4hfNTKQiHh3RJw7WDtJPZJOHomYbMvhAmFNpwkKz57A8uHoqAlyMaubC4RtIr+XIelgSUskPSppjaTPp2YL07+PpGGYl0naStLHJf1e0lpJ35I0Ltfv29OyhyR9omY7Z0u6StJ3JD0KzEzb/oWkRyT9UdLFkrbN9ReSTpH0W0mPSTpX0t6Sfp7ivTLfvibHwlglbSepF9gauE3S7/pZPyS9X9JKSQ9KOl/SVmnZTEk3S7pQ0kPA2anfz0n6Q3odZ0vaPtffR1KO90t6Z8225kr6dG7+GEm3phx/J2mapH8HXglcnN6Pi1PbfST9WNI6SXdLekuun10kzU/9/BLYe8A/jE1jOlHSXel1XynpXwva/Ft6bVZJmpF7fsDXoqaPMyTdl7Zzt6RXl43RhklE+LGFPIBVwOE1z80EflbUBvgFcEKabgMOTdMTgQDG5NZ7J7ACeH5q+wPg22nZvkAvcBiwLdkQzpO57Zyd5t9A9qVle2AqcCgwJm3vLuADue0FcA2wE7Af8BdgQdr+OOBO4B39vA79xprr+wUDvI4B3AT8A/A84DfAybnXcyPwvhT79sCFwPzUfkfgv4H/TO2nAWuA/YGxwOX57QNzgU+n6YOB9cBr0us0AdgnLevpiyHNjwXuBU5McbwYeBDYNy3vBq5M7fYH7sv/HdTku8n7DbyOrKAIeBXwBPCStKwz5f95YLu0/HHghWn5QK9FJ7A6Tb8wxb9bLoa9G/1/aEt7NDyAuoKGy4C1wB0l278lfWAsBy5vdPwNfN1WkX1QP5J7PEH/BWIh8ClgfE0/m3xgpOcWAKfk5l9I9qE/BjgLuCK3bAfgr2xaIBYOEvsHgP/KzQfwitz8UuCM3PwFwBf66avfWHN9D1YgpuXmTwEWpOmZwB9yy5Q+IPfOPfcy4J7c3/Jncssm03+B+CpwYT8x9bBpgTgO+N+aNl8FPkm2h/QkqbikZf9ByQJRsPxq4LQ03UlWIMbmll8JfKLEa9HJMwXiBWT/xw8Htmn0/50t9dGqQ0xzyb55DUrSJOCjZB8m+5F90GzJ3hARO/c9yD7c+nMS2QfWryUtlnTUAG13A36fm/89WXFoT8vu7VsQEU8AD9Wsf29+RtJkSddKeiANO/0HML5mnTW56Q0F8211xFpWPt7fpz6Llj2HrCAuTcNljwA3pOf7Yqntqz97AIXDXgX2BA7p22ba7gzg/6VtjxnCdjch6QhJi9LQ1SPAkWz63jwcEY/X9L0bg78WT4uIFWT/V88G1krqlrRbbTurVksWiIhYCKzLP5fGn2+QtFTS/0raJy16F3BJRDyc1l07wuG2rIj4bUQcDzwXOA+4StJYsm+Tte4n+1Dq8zyyb5JrgD8Cu/ctSGPOu9Rurmb+K8CvgUkRsRPwb2TfQIfDQLGWtUfN+vfn5vO5PEhWrPbLFeZxEdFXvP5Y0Fd/7qX/YwW1r9+9wE/zXwYiOyvrPcCfyPItu92nSdoO+D7ZMGF7+pJxHZu+N89Ofyf5vu9n8Ndi04QiLo+Iw8jeqyD7G7QR1JIFoh9zgPdFxFTgdODL6fnJwOR04HCRpFJ7HgaS3ibpORHxFNlwFMBTZB8wT5GN4fe5AvigpL0ktZF94/9uRGwErgJeL+nl6cDx2Qz+Yb8j8CjQm4r9e4YprcFiLesjkp4taQ/gNOC7RY3Sa3cpcKGk5wJImiDptanJlWQH5feVtAPZEFB/vg6cKOnV6UD7hNwXoTVs+n5cS/Z3f4KkbdLjpZJeFBF/IzvucrakHSTtC7yjZN7bkh1b+BOwUdIRwD8XtPuUpG0lvRI4CvheidfiaZJeKOmfUkH6M1lheapkjDZMRkWBSP/JXw58T9KtZGOtu6bFY4BJZOObxwOXStp55KNsSdOA5crO7PkiMD0iNqQhon8Hbk5DBYeSjaV/m+y4xT1k/6nfBxARy9N0N9k35l6y8eW/DLDt04G3Ao+RfagUfgDXqd9Yh+AasuMetwI/JPvw7s8ZZAfFF6Xhsv8hO+5BRFwPfAH4SWrzk/46iYhfkh10vpDsYPVPeWZP6IvAsZIelvSliHiM7IN7Otm39wfIvoFvl9qfSjYE9wDZkO03yiSd+n0/WWF7mOw9ml/T7IG07H5gHvDuiPj1YK9Fje2Az5DtdTxAthf70TIx2vBRRGveMEjSRODaiNhf0k7A3RGxa0G72cAtEfGNNL8AODMiFo9owPa0VNAfIRs+uqfB4QyZpCCLfUWjYzGr0qjYg4iIR4F7JL0ZQJkD0+KryfYekDSebMhpZQPC3KJJen0azhhLNn69jOyMKTNrUi1ZICRdQXaO/gslrZZ0EtkZGidJuo3sdNZjUvMbgYck3Ul27vpHIqL2DBqr3jFkQw73kw35TY9W3X0120K07BCTmZlVqyX3IMzMrHotdyGx8ePHx8SJE+ta9/HHH2fs2LGDN2whzqk1OKfWMJpzWrp06YMR8Xc/ShxIyxWIiRMnsmTJkrrW7enpobOzc3gDajDn1BqcU2sYzTlJKv1r+T4eYjIzs0KVFQhJlym7lPIdA7TpVHbp4uWSflpVLGZmNnRV7kHMZYAL6qVfM38ZODpdRO/NFcZiZmZDVFmBKLqgXo23Aj+IiD+k9r6InplZE6n0dxD5y2EULPsCsA3ZzV52BL4YEd/qp59ZwCyA9vb2qd3d3XXF09vbS1tbf1eAbk3OqTU4p9YwmnPq6upaGhEdQ1q5yptNkN1opPCmPsDFwCKyO1qNB34LTB6sz6lTp0a9brrpprrXbVbOqTU4p9YwmnMClsQQP8MbeZrrauChyG4s8rikhcCBZLdvNDOzBmvkaa7XAIdJGpOug38I2X2HzcysCVS2B5EuqNcJjJe0muxGKNsARMTsiLhL0g3A7WQ3AvlaRPR7SqyZmY2sygpEZLeqHKzN+cD5VcVQa9l965l55g8BWPWZ143UZs3MWpJ/SW1mZoVcIMzMrJALhJmZFXKBMDOzQi4QZmZWyAXCzMwKuUCYmVkhFwgzMyvkAmFmZoVcIMzMrJALhJmZFXKBMDOzQi4QZmZWyAXCzMwKuUCYmVkhFwgzMyvkAmFmZoVcIMzMrFBlBULSZZLWShrwPtOSXippo6Rjq4rFzMyGrso9iLnAtIEaSNoaOA/4UYVxmJlZHSorEBGxEFg3SLP3Ad8H1lYVh5mZ1UcRUV3n0kTg2ojYv2DZBOByoAu4LLW7qp9+ZgGzANrb26d2d3fXFc/adetZsyGbnjJhXF19NJve3l7a2toaHcawck6twTm1hr6curq6lkZEx1DWHVNVUCV8ATgjIp6SNGDDiJgDzAHo6OiIzs7OujZ40bxruGBZlvKqGfX10Wx6enqo9/VoVs6pNTin1rA5OTWyQHQA3ak4jAeOlLQxIq5uYExmZpY0rEBExF5905Lmkg0xXd2oeMzMbFOVFQhJVwCdwHhJq4FPAtsARMTsqrZrZmbDo7ICERHHD6HtzKriMDOz+viX1GZmVsgFwszMCrlAmJlZIRcIMzMr5AJhZmaFXCDMzKyQC4SZmRVygTAzs0IuEGZmVsgFwszMCrlAmJlZIRcIMzMr5AJhZmaFXCDMzKyQC4SZmRVygTAzs0IuEGZmVqiyAiHpMklrJd3Rz/IZkm6XtEzSzyUdWFUsZmY2dFXuQcwFpg2w/B7gVRExBTgXmFNhLGZmNkRV3pN6oaSJAyz/eW52EbB7VbGYmdnQKSKq6zwrENdGxP6DtDsd2CciTu5n+SxgFkB7e/vU7u7uuuJZu249azZk01MmjKurj2bT29tLW1tbo8MYVs6pNTin1tCXU1dX19KI6BjKupXtQZQlqQs4CTisvzYRMYc0BNXR0RGdnZ11beuieddwwbIs5VUz6uuj2fT09FDv69GsnFNrcE6tYXNyamiBkHQA8DXgiIh4qJGxmJnZphp2mquk5wE/AE6IiN80Kg4zMytW2R6EpCuATmC8pNXAJ4FtACJiNnAWsAvwZUkAG4c6PmZmZtWp8iym4wdZfjJQeFDazMwaz7+kNjOzQi4QZmZWyAXCzMwKuUCYmVkhFwgzMyvkAmFmZoVcIMzMrJALhJmZFXKBMDOzQi4QZmZWyAXCzMwKuUCYmVkhFwgzMyvkAmFmZoVcIMzMrJALhJmZFXKBMDOzQi4QZmZWqLICIekySWsl3dHPckn6kqQVkm6X9JKqYjEzs6Grcg9iLjBtgOVHAJPSYxbwlQpjMTOzIaqsQETEQmDdAE2OAb4VmUXAzpJ2rSoeMzMbGkVEdZ1LE4FrI2L/gmXXAp+JiJ+l+QXAGRGxpKDtLLK9DNrb26d2d3fXFc/adetZsyGbnjJhXF19NJve3l7a2toaHcawck6twTm1hr6curq6lkZEx1DWHTNYA0lLgcuAyyPi4XqD3BwRMQeYA9DR0RGdnZ119XPRvGu4YFmW8qoZ9fXRbHp6eqj39WhWzqk1OKfWsDk5lRliOg7YDVgsqVvSayWprq1t6j5gj9z87uk5MzNrAoMWiIhYEREfAyYDl5PtTfxe0qck/cNmbHs+8PZ0NtOhwPqI+ONm9GdmZsNo0CEmAEkHACcCRwLfB+YBhwE/AQ7qZ50rgE5gvKTVwCeBbQAiYjZwXepvBfBE6t/MzJpE2WMQjwBfB86MiL+kRbdIekV/60XE8QP1G9nR8feWD9XMzEZSmT2IN0fEyqIFEfEvwxyPmZk1iTIHqU+WtHPfjKRnS/p0dSGZmVkzKFMgjoiIR/pm0qmuR1YWkZmZNYUyBWJrSdv1zUjaHthugPZmZjYKlDkGMQ9YIOkbaf5E4JvVhWRmZs1g0AIREedJuh14dXrq3Ii4sdqwzMys0Ur9DiIirgeurzgWMzNrIoMeg5D0L5J+K2m9pEclPSbp0ZEIzszMGqfMHsRngddHxF1VB2NmZs2jzFlMa1wczMy2PGX2IJZI+i5wNdB3mQ0i4gdVBWVmZo1XpkDsRHYxvX/OPReAC4SZ2ShW5jRXX2XVzGwLVOYspsmSFki6I80fIOnj1YdmZmaNVOYg9aXAR4EnASLidmB6lUGZmVnjlSkQO0TEL2ue21hFMGZm1jzKFIgHJe1NdmAaSccCvjWomdkoV6ZAvBf4KrCPpPuADwDvKdO5pGmS7pa0QtKZBcufJ+kmSb+SdLskX0bczKxJlDmLaSVwuKSxwFYR8ViZjiVtDVwCvAZYDSyWND8i7sw1+zhwZUR8RdK+ZPepnjjEHMzMrAJl7kl9Vs08ABFxziCrHgys6LtdqaRu4BggXyCC7HcWAOOA+0tFbWZmlVNEDNxA+nBu9lnAUcBdEfHOQdY7FpgWESen+ROAQyLi1FybXYEfAc8GxgKHR8TSgr5mAbMA2tvbp3Z3d5dI7e+tXbeeNRuy6SkTxtXVR7Pp7e2lra2t0WEMK+fUGpxTa+jLqaura2lEdAxl3TJDTBfk5yV9Dhiu+0EcD8yNiAskvQz4tqT9I+KpmhjmAHMAOjo6orOzs66NXTTvGi5YlqW8akZ9fTSbnp4e6n09mpVzag3OqTVsTk5lDlLX2gHYvUS7+4A9cvO7p+fyTgKuBIiIX5DtoYyvIyYzMxtmZY5BLCOd4gpsDTwHGOz4A8BiYJKkvcgKw3TgrTVt/kB2p7q5kl5EViD+VC50MzOrUpmL9R2Vm95IdvnvQX8oFxEbJZ1KNhy1NXBZRCyXdA6wJCLmAx8GLpX0QbIiNDMGOyhiZmYjokyBqD2tdae+M5kAImJdfytGxHVkp67mnzsrN30n8IpSkZqZ2YgqUyD+j+xYwsOAgJ3JhoYg+9b//EoiMzOzhipzkPrHZLccHR8Ru5ANOf0oIvaKCBcHM7NRqkyBODQNFQEQEdcDL68uJDMzawZlhpjuT/d/+E6an4F/8WxmNuqV2YM4nuzU1v8iu83oc9JzZmY2ipX5JfU64DRJYyPi8RGIyczMmkCZW46+XNKdwF1p/kBJX648MjMza6gyQ0wXAq8FHgKIiNuAf6wyKDMza7xS12KKiHtrnvpbBbGYmVkTKXMW072SXg6EpG2A00jDTWZmNnqV2YN4N9ltRyeQXXTvoDRvZmaj2IB7EOm2oV+MiBkjFI+ZmTWJAfcgIuJvwJ6Sth2heMzMrEmUOQaxErhZ0nzg6d9BRMTnK4vKzMwart89CEnfTpNHA9emtjvmHmZmNooNtAcxVdJuZJf2vmiE4jEzsyYxUIGYDSwA9gKW5J4Xvg+Emdmo1+8QU0R8KSJeBHwjIp6fe5S+D4SkaZLulrRC0pn9tHmLpDslLZd0eZ15mJnZMCtzsb731NNxOkX2EuA1wGpgsaT56TajfW0mAR8FXhERD0t6bj3bMjOz4VfqUht1OhhYERErI+KvQDdwTE2bdwGXRMTDABGxtsJ4zMxsCKosEBOA/DWcVqfn8iYDkyXdLGmRpGkVxmNmZkOgiKimY+lYYFpEnJzmTwAOiYhTc22uBZ4E3gLsDiwEpkTEIzV9zQJmAbS3t0/t7u6uK6a169azZkM2PWXCuLr6aDa9vb20tbU1Ooxh5Zxag3NqDX05dXV1LY2IjqGsW+aHcvW6D9gjN797ei5vNXBLRDwJ3CPpN8AkYHG+UUTMAeYAdHR0RGdnZ10BXTTvGi5YlqW8akZ9fTSbnp4e6n09mpVzag3OqTVsTk5VDjEtBiZJ2itdqmM6ML+mzdVAJ4Ck8WRDTisrjMnMzEqqrEBExEbgVOBGssuDXxkRyyWdI+no1OxG4KF0x7qbgI9ExENVxWRmZuVVOcRERFwHXFfz3Fm56QA+lB5mZtZEqhxiMjOzFuYCYWZmhVwgzMyskAuEmZkVcoEwM7NCLhBmZlbIBcLMzAq5QJiZWSEXCDMzK+QCYWZmhVwgzMyskAuEmZkVcoEwM7NCLhBmZlbIBcLMzAq5QJiZWSEXCDMzK+QCYWZmhSotEJKmSbpb0gpJZw7Q7k2SQlJHlfGYmVl5lRUISVsDlwBHAPsCx0vat6DdjsBpwC1VxWJmZkNX5R7EwcCKiFgZEX8FuoFjCtqdC5wH/LnCWMzMbIgUEdV0LB0LTIuIk9P8CcAhEXFqrs1LgI9FxJsk9QCnR8SSgr5mAbMA2tvbp3Z3d9cV09p161mzIZueMmFcXX00m97eXtra2hodxrByTq3BObWGvpy6urqWRsSQhvHHVBXUYCRtBXwemDlY24iYA8wB6OjoiM7Ozrq2edG8a7hgWZbyqhn19dFsenp6qPf1aFbOqTU4p9awOTlVOcR0H7BHbn739FyfHYH9gR5Jq4BDgfk+UG1m1hyqLBCLgUmS9pK0LTAdmN+3MCLWR8T4iJgYEROBRcDRRUNMZmY28iorEBGxETgVuBG4C7gyIpZLOkfS0VVt18zMhkelxyAi4jrguprnzuqnbWeVsZiZ2dD4l9RmZlbIBcLMzAq5QJiZWSEXCDMzK+QCYWZmhVwgzMyskAuEmZkVcoEwM7NCLhBmZlbIBcLMzAq5QJiZWSEXCDMzK+QCYWZmhVwgzMyskAuEmZkVcoEwM7NCLhBmZlao0gIhaZqkuyWtkHRmwfIPSbpT0u2SFkjas8p4zMysvMoKhKStgUuAI4B9geMl7VvT7FdAR0QcAFwFfLaqeMzMbGiq3IM4GFgRESsj4q9AN3BMvkFE3BQRT6TZRcDuFcZjZmZDoIiopmPpWGBaRJyc5k8ADomIU/tpfzHwQER8umDZLGAWQHt7+9Tu7u66Ylq7bj1rNmTTUyaMq6uPZtPb20tbW1ujwxhWzqk1OKfW0JdTV1fX0ojoGMq6Y6oKaigkvQ3oAF5VtDwi5gBzADo6OqKzs7Ou7Vw07xouWJalvGpGfX00m56eHup9PZqVc2oNzqk1bE5OVRaI+4A9cvO7p+c2Ielw4GPAqyLiLxXGY2ZmQ1DlMYjFwCRJe0naFpgOzM83kPRi4KvA0RGxtsJYzMxsiCorEBGxETgVuBG4C7gyIpZLOkfS0anZ+UAb8D1Jt0qa3093ZmY2wio9BhER1wHX1Tx3Vm768Cq3b2Zm9fMvqc3MrJALhJmZFXKBMDOzQi4QZmZWyAXCzMwKuUCYmVkhFwgzMyvkAmFmZoVcIMzMrJALhJmZFXKBMDOzQi4QZmZWyAXCzMwKuUCYmVkhFwgzMyvkAmFmZoVcIMzMrFCld5RrZhPP/OEm86s+87oGRWJm1pwq3YOQNE3S3ZJWSDqzYPl2kr6blt8iaWKV8ZiZWXmV7UFI2hq4BHgNsBpYLGl+RNyZa3YS8HBEvEDSdOA84LiqYhpIfo/CexNmZtUOMR0MrIiIlQCSuoFjgHyBOAY4O01fBVwsSRERFcY1qNrhpyIuImY22lVZICYA9+bmVwOH9NcmIjZKWg/sAjyYbyRpFjArzfZKurvOmMbX9l0vnTccvQyLYcupiTin1uCcWkNfTnsOdcWWOEgdEXOAOZvbj6QlEdExDCE1DefUGpxTa3BOm6ryIPV9wB65+d3Tc4VtJI0BxgEPVRiTmZmVVGWBWAxMkrSXpG2B6cD8mjbzgXek6WOBnzT6+IOZmWUqG2JKxxROBW4EtgYui4jlks4BlkTEfODrwLclrQDWkRWRKm32MFUTck6twTm1BueUI39hNzOzIr7UhpmZFXKBMDOzQqOyQIzGS3yUyOkfJf2fpI2Sjm1EjENVIqcPSbpT0u2SFkga8nncI61ETu+WtEzSrZJ+JmnfRsQ5FIPllGv3JkkhqelPEy3xPs2U9Kf0Pt0q6eRGxDkUZd4nSW9J/6eWS7p80E4jYlQ9yA6I/w54PrAtcBuwb02bU4DZaXo68N1Gxz0MOU0EDgC+BRzb6JiHKacuYIc0/Z5R8j7tlJs+Grih0XFvbk6p3Y7AQmAR0NHouIfhfZoJXNzoWIc5p0nAr4Bnp/nnDtbvaNyDePoSHxHxV6DvEh95xwDfTNNXAa+WpBGMcagGzSkiVkXE7cBTjQiwDmVyuikinkizi8h+S9PMyuT0aG52LNDsZ4mU+f8EcC7ZtdT+PJLB1alsTq2kTE7vAi6JiIcBImLtYJ2OxgJRdImPCf21iYiNQN8lPppVmZxazVBzOgm4vtKINl+pnCS9V9LvgM8C7x+h2Oo1aE6SXgLsERGDX8SsOZT923tTGt68StIeBcubSZmcJgOTJd0saZGkaYN1OhoLhI0ykt4GdADnNzqW4RARl0TE3sAZwMcbHc/mkLQV8Hngw42OZZj9NzAxIg4AfswzIw6tbAzZMFMncDxwqaSdB1phNBaI0XiJjzI5tZpSOUk6HPgYcHRE/GWEYqvXUN+nbuANVQY0DAbLaUdgf6BH0irgUGB+kx+oHvR9ioiHcn9vXwOmjlBs9Srzt7camB8RT0bEPcBvyApG/xp9cKWCgzVjgJXAXjxzsGa/mjbvZdOD1Fc2Ou7NzSnXdi6tcZC6zPv0YrIDb5MaHe8w5jQpN/16sqsKNDz2zcmppn0PzX+Qusz7tGtu+o3AokbHPQw5TQO+mabHkw1J7TJgv41OrKIX68hUHX8HfCw9dw7Zt1CAZwHfA1YAvwSe3+iYhyGnl5J9Q3icbG9oeaNjHoac/gdYA9yaHvMbHfMw5PRFYHnK56aBPmyb5TFYTjVtm75AlHyf/jO9T7el92mfRsc8DDmJbDjwTmAZMH2wPn2pDTMzKzQaj0GYmdkwcIEwM7NCLhBmZlbIBcLMzAq5QJiZWSEXCGsJkt4v6S5J8yQdPchVRWdKurifZdcN9uvRKqSruL49Tc+UtFuJdXqKfnCWz1/S2ZJOT9PnpB8WIukDknYY3ixsS1PZLUfNhtkpwOERsTrN197fvJSIOHL4QhrSdmfnZmcCdwD319nXfAryj4izcrMfAL4DPFHbzqws70FY05M0m+wyxtdL+mB+D0HSmyXdIek2SQtzq+0m6QZJv5X02VxfqySNlzQx7ZFcmq6N/yNJ26c2L00XabtV0vmS7iiIaVdJC1ObOyS9Mj3fm2tzrKS5afpsSaene3V0APPSuttLOkvS4tTPnJorC5+Q28bBqa/CPSRJc9M23w/sBtwk6SZJ75T0hVy7d0m6cKjvg215XCCs6UXEu8m+bXdFRO0H21nAayPiQLL7K/Q5CDgOmAIc18/VOCeRXf54P+AR4E3p+W8A/xoRBwF/6yestwI3pjYHkv0yukwuVwFLgBkRcVBEbCC778BLI2J/YHvgqNwqO6RtnAJcVnIbX+KZ16sLuBJ4vaRtUpMTy/ZlWzYXCGt1NwNzJb2L7KYpfRZExPqI+DPZpQWK7kZ3T0TcmqaXAhPT8YkdI+IX6fn+7rq1GDhR0tnAlIh4bDNy6FJ2Z8NlwD8B++WWXQEQEQuBneo5fhIRvcBPgKMk7QNsExHLNiNe20K4QFhLS3sXHye7kuVSSX339chf+fVvFB9vK9Omv+0uBP6R7IqZc/sOQLPpDYCeNVg/kp4FfJnsAotTgEtr1qu9Fk6918b5GtmxjxPJ9pDMBuUCYS1N0t4RcUs6QPsnNr3k8ZBFxCPAY5IOSU9N72e7ewJrIuJSsg/fl6RFayS9KN0n4Y39bOYxsstkwzPF4EFJbUDt/cSPS9s7DFgfEetLppLfBhFxC9lr81bSXonZYHwWk7W68yVNIrtS5QKyq28etJl9nkR2M5WngJ+S3XGwVifwEUlPAr1A3x7EmcC1ZMVqCdBWsO5cYLakDcDLyPYa7gAeIBu6yvuzpF8B2wDvHEIOc4AbJN2fjkNAdizioEi3nDQbjK/malZDUlsatyf93mDXiDitwWFtNknXAhdGxIJGx2KtwUNMZn/vdX2nlgKvBD7d6IA2h6SdJf0G2ODiYEPhPQgzMyvkPQgzMyvkAmFmZoVcIMzMrJALhJmZFXKBMDOzQv8fA8JUwxkG1tYAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# plot histogram of predictions\n",
"xgtest = xgb.DMatrix(test_data, test_labels)\n",
"xgtest_labels = bst.predict(xgtest)\n",
"\n",
"n, bins, patches = plt.hist(xgtest_labels, 100)\n",
"plt.xlabel('fishing suitability')\n",
"plt.ylabel('frequency')\n",
"plt.title('Histogram of predicted labels')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train Log Loss / Binary Cross Entropy = 0.0140\n",
"Test Log Loss / Binary Cross Entropy = 1.1559\n",
"Confusion matrix : \n",
" [[ 1407 129657]\n",
" [ 4407 1503837]]\n",
"Classification report : \n",
" precision recall f1-score support\n",
"\n",
" 1 0.24 0.01 0.02 131064\n",
" 0 0.92 1.00 0.96 1508244\n",
"\n",
" accuracy 0.92 1639308\n",
" macro avg 0.58 0.50 0.49 1639308\n",
"weighted avg 0.87 0.92 0.88 1639308\n",
"\n"
]
}
],
"source": [
"# accuracy assessment\n",
"# binary cross entropy for regression, f1 and confusion matrix for classification\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import log_loss, confusion_matrix, classification_report\n",
"\n",
"# convert dataframe array to numpy\n",
"train_labels_np = train_labels.to_numpy()\n",
"test_labels_np = test_labels.to_numpy()\n",
"\n",
"# calculate train loss\n",
"train_loss = log_loss(train_labels_np, bst.predict(xgtrain)) # actual, predicted\n",
"print('Train Log Loss / Binary Cross Entropy = {:.4f}'.format(train_loss))\n",
"\n",
"# calculate test loss\n",
"# xgtest_labels = bst.predict(xgtest)\n",
"test_loss = log_loss(test_labels_np, xgtest_labels) # actual, predicted\n",
"print('Test Log Loss / Binary Cross Entropy = {:.4f}'.format(test_loss))\n",
"\n",
"# regression to classification\n",
"# assign 1 if score is at least 0.1, 0 otherwise\n",
"pos_thres = 0.05\n",
"xgtest_labels_thres = np.array([1 if x >= pos_thres else 0 for x in xgtest_labels])\n",
"\n",
"# confusion matrix\n",
"matrix = confusion_matrix(test_labels_np, xgtest_labels_thres, labels=[1, 0])\n",
"print('Confusion matrix : \\n',matrix)\n",
"\n",
"# precision/recall/f1\n",
"report = classification_report(test_labels_np, xgtest_labels_thres, labels=[1, 0]) # actual then predicted\n",
"print('Classification report : \\n', report)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAenElEQVR4nO3deZhdVZ3u8e8rMwQICl2XIU0hIooEUUpE1L4VoVsEDHSLiKISRNMoCira4IwD9zIoNH0dMAiCigSkVVDB4UFKHm2hSRAMg2iEIIRJJAEKUAm+94+9a/ehqOHUqTNU1Xk/z1MPe1hnr986J5zfWWvvvbZsExERAfCMTgcQERFTR5JCRERUkhQiIqKSpBAREZUkhYiIqCQpREREJUkhnkbSTZL6Ox1HJ0n6Z0l3ShqU9KI21Xm5pMPG2H+mpI+1I5Z2k3SupM90Oo5IUug6klZI2nvYtgWSfj60bvsFtgfGOU6vJEtau0WhdtpngXfbnmX7V+2o0PZrbJ8HT/9Myv1H2v50O2KJ7pWkEFPSFEg22wI3dTiGiLZLUoinqe1NSNpd0hJJD0u6T9JpZbGryv+uLodYXibpGZI+KukOSfdL+pqkTWuO+9Zy358kfWxYPSdIuljSNyQ9DCwo6/6lpNWS7pH0eUnr1hzPkt4l6XeSHpH0aUnbS/qvMt6LassPa+OIsUpaT9IgsBZwg6Tfj/J6Szpa0m2SHpB0qqRnjHXsct/6ZRv/VLbrWkk95b4BSW+X9HzgTOBl5Xu7utxfDbFIukXS/jXxrC3pj5JeXK7vUb4PqyXdMNZwoKTjJK0s38NbJe1V89k35f2X1C/pLkkfLt+vFZIOHSOm/SVdX9b9X5J2GS/eaBLb+euiP2AFsPewbQuAn49UBvgl8JZyeRawR7ncCxhYu+Z1bwOWA88uy34b+Hq5bydgEHgFsC7F8MwTNfWcUK4fSPFjZQNgN2APYO2yvluA99bUZ+ASYBPgBcBfgCvK+jcFbgYOG+V9GDXWmmM/Z4z30cCVwDOBvwd+C7y9jvfhX4HvARtSJJ7dgE3KfQM1x3jKZ1JuOxf4TLn8ceD8mn37AbeUy1sDfwL2Ld/LfyzXtxihHTsCdwJb1Xyu25fLTXv/gX5gDXAasB7wv4FHgR1HaNuLgPuBl5bv0WEU/ybXGyve/DXnLz2F7vTd8hfY6vJX6BfHKPsE8BxJm9setH31GGUPBU6zfZvtQeBDwCEqhoIOAr5n++e2/0rxpTZ84q1f2v6u7b/Zftz2UttX215jewXwZYovk1qn2H7Y9k3AjcCPy/ofAi6n+IKZaKz1Otn2g7b/APw78MY6jv0E8CyKhPNk2caHJ1DnkG8C8yVtWK6/CbigXH4zcJnty8r38ifAEookMdyTFF+2O0lax/YK278HaNH7/zHbf7H9M+AHwMEjxLQQ+LLta8r36DyKhLPHWPFGcyQpdKcDbc8e+gPeNUbZI4DnAr8phzr2H6PsVsAdNet3UPzK7Cn33Tm0w/ZjFL9ea91ZuyLpuZK+L+neckjp/wCbD3vNfTXLj4+wPquBWOtVG+8d5THHO/bXgR8BiyXdLekUSetMoE4AbC+n+OX+2jIxzKdIFFCcD3n9sMT/CmDLUY7zXoqe2v2SFkvaClry/q+y/WjNeu17Vmtb4Nhh8c+h6B2MGm80R5JCjMn272y/Efg74GTgYkkb8fRf+QB3U/wPPeTvKYYM7gPuAbYZ2iFpA4pfzE+pbtj6l4DfADvY3gT4MKDGW1N3rPWaM+z1d493bNtP2P6k7Z2APYH9gbeOcOx6pi++gKJ3cgBwc/mFCUWy+npt4re9ke2TRjqI7W/afkUZsyk+Z2j++79Z+W9nSO17VutO4MRh8W9o+4Jx4o0mSFKIMUl6s6QtbP8NWF1u/hvwx/K/z64pfgHwPknbSZpF8cvyQttrgIspftXuWZ58PIHxv2A2Bh4GBiU9D3hnk5o1Xqz1+qCkzSTNAY4BLhzv2JLmSZoraS2Ktj1B8T4Odx+wjUY5UV5aDPwTxfvyzZrt36B4r18taa3y5Ha/pG2GH0DSjpJeJWk94M8Uv+6H4mnF+/9JSetKeiVFQvzWCGXOAo6U9FIVNpK0n6SNx4k3miBJIcazD3CTiityzgAOKcf7HwNOBH5RdvH3AM6hGB65Crid4n/a9wCUY87vofgiu4fipPP9FGPFo/kAxVj5IxRfFBeOUXaiRo11Ai4BlgLXU4yPn13Hsf8XRYJ8mGL452dl2eF+SnFJ7L2SHhipctv3UFwIsCc1743tOyl6Dx+mSN53Ah9k5P/f1wNOAh4A7qXoEX6o3Nfs9/9eYBVF7+B84EjbvxmhXUuAdwCfL8svpzjxPl680QSy85CdaL/yF/RqiqGJ2zsczoRJMkXsy8ctHKi4JPYbtp/WW4mpJT2FaBtJr5W0YTmu/FlgGcWlhhExRSQpRDsdQDF0cDewA8VQVLqqEVNIho8iIqKSnkJERFQ6PenYpGy++ebu7e3tdBgT8uijj7LRRhuNX3AG6Ja2dks7IW2dKZYuXfqA7S1G2teypCDpHIrrkO+3vfOwfcdSnGjcwvYDkkRxueO+wGPAAtvXjVdHb28vS5YsaX7wLTQwMEB/f3+nw2iLbmlrt7QT0taZQtIdo+1r5fDRuRTXuA8PZg7FDTd/qNn8GooTjztQzHvypRbGFRERo2hZUrB9FfDgCLtOB/6Np97GfwDwNReuBmZLeto8LRER0VptPdEs6QBgpe0bhu3amqdOLnZXuS0iItqobSeay5kcP0wxdDSZ4yykGGKip6eHgYGByQfXRoODg9Mu5kZ1S1u7pZ2QtnaDdl59tD2wHcXTrKCYMfM6SbsDK3nqjJPblNuexvYiYBFAX1+fp9uJoJl88mq4bmlrt7QT0tZu0LbhI9vLbP+d7V7bvRRDRC+2fS9wKfDWckbEPYCHysm+IiKijVqWFCRdQDGD444qns16xBjFLwNuo5gN8SzGfuhLRES0SMuGj8oHs4y1v7dm2cBRrYolIiLqk2kuIiKiMq2nuZiM3uN/UC2vOGm/DkYSETF1pKcQERGVJIWIiKgkKURERCVJISIiKkkKERFRSVKIiIhKkkJERFSSFCIiopKkEBERlSSFiIioJClEREQlSSEiIipJChERUUlSiIiISpJCRERUkhQiIqKSpBAREZUkhYiIqCQpREREJUkhIiIqLUsKks6RdL+kG2u2nSrpN5J+Lek7kmbX7PuQpOWSbpX06lbFFRERo2tlT+FcYJ9h234C7Gx7F+C3wIcAJO0EHAK8oHzNFyWt1cLYIiJiBC1LCravAh4ctu3HtteUq1cD25TLBwCLbf/F9u3AcmD3VsUWEREjW7uDdb8NuLBc3poiSQy5q9z2NJIWAgsBenp6GBgYaKjyY+euqZYbPUYjBgcH21pfJ3VLW7ulnZC2doOOJAVJHwHWAOdP9LW2FwGLAPr6+tzf399QDAuO/0G1vOLQxo7RiIGBARqNebrplrZ2Szshbe0GbU8KkhYA+wN72Xa5eSUwp6bYNuW2iIhoo7ZekippH+DfgPm2H6vZdSlwiKT1JG0H7AD8dztji4iIFvYUJF0A9AObS7oL+ATF1UbrAT+RBHC17SNt3yTpIuBmimGlo2w/2arYIiJiZC1LCrbfOMLms8cofyJwYqviiYiI8eWO5oiIqCQpREREJUkhIiIqSQoREVFJUoiIiEqSQkREVJIUIiKikqQQERGVJIWIiKgkKURERCVJISIiKkkKERFRSVKIiIhKkkJERFSSFCIiopKkEBERlSSFiIioJClEREQlSSEiIipJChERUUlSiIiIytqtOrCkc4D9gftt71xueyZwIdALrAAOtr1KkoAzgH2Bx4AFtq9rVWzD9R7/g6esrzhpv3ZVHRExpbSyp3AusM+wbccDV9jeAbiiXAd4DbBD+bcQ+FIL44qIiFG0LCnYvgp4cNjmA4DzyuXzgANrtn/NhauB2ZK2bFVsERExsnafU+ixfU+5fC/QUy5vDdxZU+6ucltERLRRy84pjMe2JXmir5O0kGKIiZ6eHgYGBhqq/9i5a0bd1+gx6zE4ONjS408l3dLWbmknpK3doN1J4T5JW9q+pxweur/cvhKYU1Num3Lb09heBCwC6Ovrc39/f0OBLBh2crnWikMbO2Y9BgYGaDTm6aZb2tot7YS0tRu0e/joUuCwcvkw4JKa7W9VYQ/goZphpoiIaJNWXpJ6AdAPbC7pLuATwEnARZKOAO4ADi6LX0ZxOepyiktSD29VXBERMbqWJQXbbxxl114jlDVwVKtiiYiI+uSO5oiIqCQpREREJUkhIiIqSQoREVFJUoiIiEqSQkREVJIUIiKikqQQERGVJIWIiKgkKURERCVJISIiKkkKERFRSVKIiIhKkkJERFSSFCIiopKkEBERlXGTgqSlko6StFk7AoqIiM6pp6fwBmAr4FpJiyW9WpJaHFdERHTAuEnB9nLbHwGeC3wTOAe4Q9InJT2z1QFGRET71HVOQdIuwOeAU4H/BF4PPAz8tHWhRUREu609XgFJS4HVwNnA8bb/Uu66RtLLWxhbRES02bhJAXi97dtG2mH7X5ocT0REdFA9w0dvlzR7aEXSZpI+M5lKJb1P0k2SbpR0gaT1JW0n6RpJyyVdKGndydQRERETV09SeI3t1UMrtlcB+zZaoaStgaOBPts7A2sBhwAnA6fbfg6wCjii0ToiIqIx9SSFtSStN7QiaQNgvTHK12NtYANJawMbAvcArwIuLvefBxw4yToiImKCZHvsAtJxwGuBr5abDgcutX1Kw5VKxwAnAo8DPwaOAa4uewlImgNcXvYkhr92IbAQoKenZ7fFixc3FMOylQ+Num/u1ps2dMx6DA4OMmvWrJYdfyrplrZ2SzshbZ0p5s2bt9R230j7xj3RbPtkSb8G9io3fdr2jxoNprwz+gBgO4qrmr4F7FPv620vAhYB9PX1ub+/v6E4Fhz/g1H3rTi0sWPWY2BggEZjnm66pa3d0k5IW7tBPVcfYfty4PIm1bk3cLvtPwJI+jbwcmC2pLVtrwG2AVY2qb6IiKhTPXMf/Yuk30l6SNLDkh6R9PAk6vwDsIekDcvpMvYCbgauBA4qyxwGXDKJOiIiogH1nGg+BZhve1Pbm9je2PYmjVZo+xqKE8rXAcvKGBYBxwHvl7QceBbFzXIREdFG9Qwf3Wf7lmZWavsTwCeGbb4N2L2Z9URExMTUkxSWSLoQ+C4wNMUFtr/dqqAiIqIz6kkKmwCPAf9Us81AkkJExAxTzyWph7cjkIiI6Lx6rj56rqQrJN1Yru8i6aOtDy0iItqtnquPzgI+BDwBYPvXFHMVRUTEDFNPUtjQ9n8P27amFcFERERn1XOi+QFJ21OcXEbSQRQT2M1YvaNMgbHipP3aHElERHvVkxSOori57HmSVgK3A29uaVQREdER9Vx9dBuwt6SNgGfYfqT1YUVERCfU84zmjw9bB8D2p1oUU0REdEg9w0eP1iyvD+wPNHXai4iImBrqGT76XO26pM8CDT9PISIipq56LkkdbkOK5x1ERMQMU885hWWUl6MCawFbADmfEBExA9VzTmH/muU1FFNp5+a1iIgZqJ6kMPwS1E2GrkACsP1gUyOawmpvasuNbBExE9WTFK4D5gCrAAGzKR6pCcWw0rNbEllERLRdPSeafwK81vbmtp9FMZz0Y9vb2U5CiIiYQepJCnvYvmxoxfblwJ6tCykiIjqlnuGju8vnJ3yjXD8UuLt1IUVERKfU01N4I8VlqN+heATnFuW2iIiYYeq5o/lB4BhJG9l+dLzy9ZA0G/gKsDPFyeq3AbcCFwK9wArgYNurmlFfRETUp57Hce4p6WbK+Y4kvVDSFydZ7xnAD20/D3hheezjgSts7wBcUa5HREQb1TN8dDrwauBPALZvAP6h0QolbVq+/uzyeH+1vRo4ADivLHYecGCjdURERGNke+wC0jW2XyrpV7ZfVG67wfYLG6pQ2pXioT03U/QSlgLHACttzy7LCFg1tD7s9QuBhQA9PT27LV68uJEwWLbyoYZeN2Tu1ps29LrBwUFmzZo1qbqni25pa7e0E9LWmWLevHlLbfeNtK+eq4/ulLQnYEnrUHyBT2bq7LWBFwPvsX2NpDMYNlRk25JGzFa2F1EkFfr6+tzf399QEAtGeeRmvVYc2li9AwMDNBrzdNMtbe2WdkLa2g3qGT46kuKRnFsDK4Fdy/VG3QXcZfuacv1iiiRxn6QtAcr/3j+JOiIiogFj9hQkrQWcYfvQZlVo+15Jd0ra0fatwF4UQ0k3A4cBJ5X/vaRZdbZC5kGKiJlozKRg+0lJ20pa1/Zfm1jve4DzJa0L3AYcTtFruUjSEcAdwMFNrC8iIupQzzmF24BfSLqUmkdz2j6t0UptXw+MdJJjr0aPGRERkzfqOQVJXy8X5wPfL8tuXPMXEREzzFg9hd0kbUUxTfb/a1M8ERHRQWMlhTMp7izeDlhSs13kOQoRETPSqMNHtv/D9vOBr9p+ds1fnqMQETFDjXufgu13tiOQiIjovHpuXouIiC6RpBAREZUkhYiIqCQpREREJUkhIiIq9UxzEePI5HgRMVOkpxAREZUkhYiIqGT4qMkylBQR01l6ChERUUlSiIiISpJCRERUkhQiIqKSpBAREZUkhYiIqCQpREREJUkhIiIqHUsKktaS9CtJ3y/Xt5N0jaTlki6UtG6nYouI6Fad7CkcA9xSs34ycLrt5wCrgCM6ElVERBfrSFKQtA2wH/CVcl3Aq4CLyyLnAQd2IraIiG4m2+2vVLoY+L/AxsAHgAXA1WUvAUlzgMtt7zzCaxcCCwF6enp2W7x4cUMxLFv5UEOvm4i5W2/6tG2Dg4PMmjWr5XVPBd3S1m5pJ6StM8W8efOW2u4baV/bJ8STtD9wv+2lkvon+nrbi4BFAH19fe7vn/AhAFhQM3Fdq6w4tP9p2wYGBmg05ummW9raLe2EtLUbdGKW1JcD8yXtC6wPbAKcAcyWtLbtNcA2wMoOxBYR0dXafk7B9odsb2O7FzgE+KntQ4ErgYPKYocBl7Q7toiIbjeV7lM4Dni/pOXAs4CzOxxPRETX6ehDdmwPAAPl8m3A7p2MJyKi2+XJa20y9ES2Y+euoX+E7ZAntUVE502l4aOIiOiw9BSmgfQmIqJd0lOIiIhKkkJERFSSFCIiopJzCtNMzi9ERCulpxAREZX0FFqotw2T7kVENFOSQgeMliwyNBQRnZbho4iIqCQpREREJUkhIiIqSQoREVFJUoiIiEquPupSudIpIkaSnkJERFTSU5jG8ms/IpotSWGKasXd0LnDOiLGk+GjiIiopKcQU0aGwyI6r+1JQdIc4GtAD2Bgke0zJD0TuBDoBVYAB9te1e74pqt8oUZEM3Ri+GgNcKztnYA9gKMk7QQcD1xhewfginI9IiLaqO09Bdv3APeUy49IugXYGjgA6C+LnQcMAMe1O76ZIL2GiGiUbHeucqkXuArYGfiD7dnldgGrhtaHvWYhsBCgp6dnt8WLFzdU97KVDzX0usnq2QDue7x99c3detNquZ4215afrMHBQWbNmlV3+dr4mhlHq020ndNZ2jozzJs3b6ntvpH2dSwpSJoF/Aw40fa3Ja2uTQKSVtnebKxj9PX1ecmSJQ3V36nLM4+du4bPLWtfB622p1BPm5vZsxgYGKC/v7/u8tO1hzPRdk5naevMIGnUpNCRq48krQP8J3C+7W+Xm++TtKXteyRtCdzfidi63XT9Yo6I5mj7ieZyaOhs4Bbbp9XsuhQ4rFw+DLik3bFFRHS7TvQUXg68BVgm6fpy24eBk4CLJB0B3AEc3IHYIiK6WieuPvo5oFF279XOWLpBO86dtHPIKcNbEa2VaS4iIqKSpBAREZXMfRQNmY4zrmboKWJ86SlEREQlSSEiIioZPoq6Tccho9FkKCliZOkpREREJT2FGFUzewajHaueX+kzqYcSMdUlKUTLLFv5EAs6/IWehBIxMRk+ioiISnoK0VS1v8yPnTux8tNVbY9ooietc8I7ppokhZiSZkKyiJiOMnwUERGV9BRiRujUTK0w+jBZhoZiOkpSiGlrtCGmZg09tftLPUkkpoIMH0VERCU9heh69fxCnyonvkeLNb2MaJYkhYgak/3yr2dIa6p9aU/l2KL9MnwUERGV9BQi2qye3sh0/PU+vF3TJe54qiSFiGlqokNV0zHRRPtl+CgiIipTrqcgaR/gDGAt4Cu2T+pwSBEdNZmT35O5l2OkMsfOXUN/A3WP1mOplV7N1DClkoKktYAvAP8I3AVcK+lS2zd3NrKIGNLIF3azEls9lwxPteRS77mWybSzmaba8NHuwHLbt9n+K7AYOKDDMUVEdA3Z7nQMFUkHAfvYfnu5/hbgpbbfXVNmIbCwXN0RuLXtgU7O5sADnQ6iTbqlrd3STkhbZ4ptbW8x0o4pNXxUD9uLgEWdjqNRkpbY7ut0HO3QLW3tlnZC2toNptrw0UpgTs36NuW2iIhog6mWFK4FdpC0naR1gUOASzscU0RE15hSw0e210h6N/AjiktSz7F9U4fDarZpO/TVgG5pa7e0E9LWGW9KnWiOiIjOmmrDRxER0UFJChERUUlSaBFJ+0i6VdJyScePsP8fJF0naU15f8a0VEc73y/pZkm/lnSFpG07EWcz1NHWIyUtk3S9pJ9L2qkTcTbDeG2tKfc6SZY0bS/drONzXSDpj+Xner2kt3cizraxnb8m/1GcJP898GxgXeAGYKdhZXqBXYCvAQd1OuYWtnMesGG5/E7gwk7H3cK2blKzPB/4YafjblVby3IbA1cBVwN9nY67hZ/rAuDznY61XX/pKbTGuNN12F5h+9fA3zoRYJPU084rbT9Wrl5Nce/JdFRPWx+uWd0ImK5XcdQ73cyngZOBP7czuCbL1DrDJCm0xtbAnTXrd5XbZpqJtvMI4PKWRtQ6dbVV0lGSfg+cAhzdptiabdy2SnoxMMf21Hh4dePq/Tf8unII9GJJc0bYP2MkKURbSHoz0Aec2ulYWsn2F2xvDxwHfLTT8bSCpGcApwHHdjqWNvke0Gt7F+AnwHkdjqelkhRao1um66irnZL2Bj4CzLf9lzbF1mwT/UwXAwe2MqAWGq+tGwM7AwOSVgB7AJdO05PN436utv9U8+/2K8BubYqtI5IUWqNbpusYt52SXgR8mSIh3N+BGJulnrbuULO6H/C7NsbXTGO21fZDtje33Wu7l+Jc0XzbSzoT7qTU87luWbM6H7iljfG13ZSa5mKm8CjTdUj6FLDE9qWSXgJ8B9gMeK2kT9p+QQfDnrB62kkxXDQL+JYkgD/Ynt+xoBtUZ1vfXfaKngBWAYd1LuLG1dnWGaHOth4taT6wBniQ4mqkGSvTXERERCXDRxERUUlSiIiISpJCRERUkhQiIqKSpBAREZUkhZh2JB0t6RZJ50uaP84sngskfX6UfZdJmt2yQEdRzqb61nJ5gaSt6njNwEg3h9W2X9IJkj5QLn+qvDwWSe+VtGFzWxEzVe5TiOnoXcDetu8q1xu6bt72vs0LaUL1nlmzugC4Ebi7wWNdygjtt/3xmtX3At8AHhteLmK49BRiWpF0JsU0x5dLel9tT0DS6yXdKOkGSVfVvGwrST+U9DtJp9Qca4WkzSX1lj2PsyTdJOnHkjYoy7yknAjtekmnSrpxhJi2lHRVWeZGSa8stw/WlDlI0rnl8gmSPqDiORp9wPnlazeQ9HFJ15bHWaTyjr/SW2rq2L081og9IUnnlnUeDWwFXCnpSklvk/TvNeXeIen0iX4OMXMlKcS0YvtIil/V82wP/zL7OPBq2y+kmI5gyK7AG4C5wBtGmeVyB+AL5V3lq4HXldu/Cvyr7V2BJ0cJ603Aj8oyLwSur7MtFwNLgENt72r7cYp5+19ie2dgA2D/mpdsWNbxLuCcOuv4D/7n/ZoHXERxB/06ZZHD6z1WdIckhZhJfgGcK+kdFFMWDLminK/nz8DNwEhPf7vd9vXl8lKgtzzfsLHtX5bbvzlKvdcCh0s6AZhr+5FJtGGepGskLQNeBdROfXIBgO2rgE0aOR9iexD4KbC/pOcB69heNol4Y4ZJUogZo+xFfJRi1sulkp5V7qqdmfVJRj6XVk+Z0eq9CvgHitk1zx06icxTH7Kz/njHkbQ+8EWKJ/HNBc4a9rrhc9I0OkfNVyjOZRxO0ROKqCQpxIwhaXvb15QnWf/IU6dEnjDbq4FHJL203HTIKPVuC9xn+yyKL9wXl7vuk/R8Fc8f+OdRqnmEYipq+J8E8ICkWcDwZ3e/oazvFcBDth+qsym1dWD7Gor35k2UvY+IIbn6KGaSU8vpqwVcQfG83V0necwjgLMk/Q34GTDSF3E/8EFJTwCDwFBP4Xjg+xQJagnFbLHDnQucKelx4GUUvYMbgXsphqVq/VnSr4B1gLdNoA2LgB9Kurs8rwDFuYVdba+awHGiC2SW1IgxSJpVjsNT3g+wpe1jOhzWpEn6PnC67Ss6HUtMLRk+ihjbfkOXgQKvBD7T6YAmQ9JsSb8FHk9CiJGkpxAREZX0FCIiopKkEBERlSSFiIioJClEREQlSSEiIir/Hy6BM/xdfwsLAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# histogram of true positives\n",
"# xgtest_labels_thres_prod = np.multiply(xgtest_labels_thres, test_labels_np)\n",
"temp = np.zeros(np.size(xgtest_labels))\n",
"temp[:] = np.nan\n",
"\n",
"for i in range(np.size(xgtest_labels)):\n",
" prod = test_labels_np[i] * xgtest_labels[i]\n",
" if prod > 0.05:\n",
" temp[i] = prod\n",
"\n",
"n, bins, patches = plt.hist(temp, 100)\n",
"plt.xlabel('fishing suitability')\n",
"plt.ylabel('frequency')\n",
"plt.title('Histogram of positive samples')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# save to csv file: test data + predictions\n",
"prod_data = pd.read_csv('/tmp/testing-2021.csv')\n",
"prod_labels = pd.DataFrame({'fishing_suitability': xgtest_labels})\n",
"\n",
"pd.concat([prod_data, prod_labels], axis=1).to_csv('/tmp/testing-2021-raw.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment