{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Facies classification using Machine Learning\n",
"\n",
"### aaML Submission\n",
"\n",
"### By:\n",
"\n",
"[Alexsandro G. Cerqueira](https://github.com/alexleogc), \n",
"[Alã de C. Damasceno](https://github.com/aladamasceno)\n",
"\n",
"There are tow main notebooks:\n",
"\n",
"- Data Analysis and edition\n",
"- Submission\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from libtools import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Loading the data training data without Shankle well"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"training = pd.read_csv('data-test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
" Label_Form_SH_LM | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" 77.45 | \n",
" 0.664 | \n",
" 9.9 | \n",
" 11.915 | \n",
" 4.6 | \n",
" 1.000 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 3 | \n",
" 78.26 | \n",
" 0.661 | \n",
" 14.2 | \n",
" 12.565 | \n",
" 4.1 | \n",
" 0.979 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 79.05 | \n",
" 0.658 | \n",
" 14.8 | \n",
" 13.050 | \n",
" 3.6 | \n",
" 0.957 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 86.10 | \n",
" 0.655 | \n",
" 13.9 | \n",
" 13.115 | \n",
" 3.5 | \n",
" 0.936 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" 74.58 | \n",
" 0.647 | \n",
" 13.5 | \n",
" 13.300 | \n",
" 3.4 | \n",
" 0.915 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM\n",
"0 3 77.45 0.664 9.9 11.915 4.6 1.000 1\n",
"1 3 78.26 0.661 14.2 12.565 4.1 0.979 1\n",
"2 3 79.05 0.658 14.8 13.050 3.6 0.957 1\n",
"3 3 86.10 0.655 13.9 13.115 3.5 0.936 1\n",
"4 3 74.58 0.647 13.5 13.300 3.4 0.915 1"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
" Label_Form_SH_LM | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 3700.000000 | \n",
" 3700.000000 | \n",
" 3700.000000 | \n",
" 3700.000000 | \n",
" 3700.000000 | \n",
" 2783.000000 | \n",
" 3700.000000 | \n",
" 3700.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 4.615676 | \n",
" 64.873649 | \n",
" 0.663053 | \n",
" 4.651677 | \n",
" 12.892826 | \n",
" 3.805693 | \n",
" 0.524125 | \n",
" 1.542973 | \n",
"
\n",
" \n",
" std | \n",
" 2.475808 | \n",
" 30.817166 | \n",
" 0.253863 | \n",
" 5.109006 | \n",
" 6.796219 | \n",
" 0.894118 | \n",
" 0.287147 | \n",
" 0.498217 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 10.149000 | \n",
" -0.025949 | \n",
" -21.832000 | \n",
" 0.550000 | \n",
" 0.200000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.000000 | \n",
" 43.778250 | \n",
" 0.502000 | \n",
" 1.800000 | \n",
" 8.350000 | \n",
" 3.200000 | \n",
" 0.278000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 4.000000 | \n",
" 64.817000 | \n",
" 0.645613 | \n",
" 4.400000 | \n",
" 11.857500 | \n",
" 3.600000 | \n",
" 0.531000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 7.000000 | \n",
" 80.322500 | \n",
" 0.823000 | \n",
" 7.600000 | \n",
" 15.750000 | \n",
" 4.400000 | \n",
" 0.772000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
" max | \n",
" 9.000000 | \n",
" 361.150000 | \n",
" 1.800000 | \n",
" 19.312000 | \n",
" 84.400000 | \n",
" 8.094000 | \n",
" 1.000000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND \\\n",
"count 3700.000000 3700.000000 3700.000000 3700.000000 3700.000000 \n",
"mean 4.615676 64.873649 0.663053 4.651677 12.892826 \n",
"std 2.475808 30.817166 0.253863 5.109006 6.796219 \n",
"min 1.000000 10.149000 -0.025949 -21.832000 0.550000 \n",
"25% 2.000000 43.778250 0.502000 1.800000 8.350000 \n",
"50% 4.000000 64.817000 0.645613 4.400000 11.857500 \n",
"75% 7.000000 80.322500 0.823000 7.600000 15.750000 \n",
"max 9.000000 361.150000 1.800000 19.312000 84.400000 \n",
"\n",
" PE RELPOS Label_Form_SH_LM \n",
"count 2783.000000 3700.000000 3700.000000 \n",
"mean 3.805693 0.524125 1.542973 \n",
"std 0.894118 0.287147 0.498217 \n",
"min 0.200000 0.000000 1.000000 \n",
"25% 3.200000 0.278000 1.000000 \n",
"50% 3.600000 0.531000 2.000000 \n",
"75% 4.400000 0.772000 2.000000 \n",
"max 8.094000 1.000000 2.000000 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training.describe()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"training = training.fillna(-99999)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Loading the SHANKLE well"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"blind = pd.read_csv('blind.csv')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
" Label_Form_SH_LM | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 | \n",
" 98.36 | \n",
" 0.642 | \n",
" -0.1 | \n",
" 18.685 | \n",
" 2.9 | \n",
" 1.000 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 97.57 | \n",
" 0.631 | \n",
" 7.9 | \n",
" 16.745 | \n",
" 3.2 | \n",
" 0.984 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 98.41 | \n",
" 0.615 | \n",
" 12.8 | \n",
" 14.105 | \n",
" 3.2 | \n",
" 0.968 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 2 | \n",
" 85.92 | \n",
" 0.597 | \n",
" 13.0 | \n",
" 13.385 | \n",
" 3.4 | \n",
" 0.952 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 2 | \n",
" 83.16 | \n",
" 0.592 | \n",
" 12.3 | \n",
" 13.345 | \n",
" 3.4 | \n",
" 0.935 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM\n",
"0 2 98.36 0.642 -0.1 18.685 2.9 1.000 1\n",
"1 2 97.57 0.631 7.9 16.745 3.2 0.984 1\n",
"2 2 98.41 0.615 12.8 14.105 3.2 0.968 1\n",
"3 2 85.92 0.597 13.0 13.385 3.4 0.952 1\n",
"4 2 83.16 0.592 12.3 13.345 3.4 0.935 1"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"blind.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
" Label_Form_SH_LM | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 449.000000 | \n",
" 449.000000 | \n",
" 449.000000 | \n",
" 449.000000 | \n",
" 449.000000 | \n",
" 449.000000 | \n",
" 449.000000 | \n",
" 449.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 3.576837 | \n",
" 65.431180 | \n",
" 0.630831 | \n",
" 2.348998 | \n",
" 15.741125 | \n",
" 3.224944 | \n",
" 0.503118 | \n",
" 1.342984 | \n",
"
\n",
" \n",
" std | \n",
" 2.260688 | \n",
" 25.696418 | \n",
" 0.241293 | \n",
" 6.113543 | \n",
" 9.080467 | \n",
" 0.732414 | \n",
" 0.282082 | \n",
" 0.475236 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 18.400000 | \n",
" 0.093000 | \n",
" -19.900000 | \n",
" 2.890000 | \n",
" 1.500000 | \n",
" 0.010000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.000000 | \n",
" 54.960000 | \n",
" 0.425000 | \n",
" 0.100000 | \n",
" 9.150000 | \n",
" 2.700000 | \n",
" 0.258000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 3.000000 | \n",
" 66.600000 | \n",
" 0.620000 | \n",
" 2.600000 | \n",
" 13.935000 | \n",
" 3.100000 | \n",
" 0.500000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 6.000000 | \n",
" 75.150000 | \n",
" 0.817000 | \n",
" 6.200000 | \n",
" 18.575000 | \n",
" 3.600000 | \n",
" 0.744000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
" max | \n",
" 8.000000 | \n",
" 242.750000 | \n",
" 1.311000 | \n",
" 18.600000 | \n",
" 55.915000 | \n",
" 5.400000 | \n",
" 1.000000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND PE \\\n",
"count 449.000000 449.000000 449.000000 449.000000 449.000000 449.000000 \n",
"mean 3.576837 65.431180 0.630831 2.348998 15.741125 3.224944 \n",
"std 2.260688 25.696418 0.241293 6.113543 9.080467 0.732414 \n",
"min 1.000000 18.400000 0.093000 -19.900000 2.890000 1.500000 \n",
"25% 2.000000 54.960000 0.425000 0.100000 9.150000 2.700000 \n",
"50% 3.000000 66.600000 0.620000 2.600000 13.935000 3.100000 \n",
"75% 6.000000 75.150000 0.817000 6.200000 18.575000 3.600000 \n",
"max 8.000000 242.750000 1.311000 18.600000 55.915000 5.400000 \n",
"\n",
" RELPOS Label_Form_SH_LM \n",
"count 449.000000 449.000000 \n",
"mean 0.503118 1.342984 \n",
"std 0.282082 0.475236 \n",
"min 0.010000 1.000000 \n",
"25% 0.258000 1.000000 \n",
"50% 0.500000 1.000000 \n",
"75% 0.744000 2.000000 \n",
"max 1.000000 2.000000 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"blind.describe()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"training_SH = divisao_sh(training)\n",
"training_LM = divisao_lm(training)\n",
"\n",
"blind_SH = divisao_sh(blind)\n",
"blind_LM = divisao_lm(blind)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" 77.45 | \n",
" 0.664 | \n",
" 9.9 | \n",
" 11.915 | \n",
" 4.6 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 1 | \n",
" 3 | \n",
" 78.26 | \n",
" 0.661 | \n",
" 14.2 | \n",
" 12.565 | \n",
" 4.1 | \n",
" 0.979 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 79.05 | \n",
" 0.658 | \n",
" 14.8 | \n",
" 13.050 | \n",
" 3.6 | \n",
" 0.957 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 86.10 | \n",
" 0.655 | \n",
" 13.9 | \n",
" 13.115 | \n",
" 3.5 | \n",
" 0.936 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" 74.58 | \n",
" 0.647 | \n",
" 13.5 | \n",
" 13.300 | \n",
" 3.4 | \n",
" 0.915 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS\n",
"0 3 77.45 0.664 9.9 11.915 4.6 1.000\n",
"1 3 78.26 0.661 14.2 12.565 4.1 0.979\n",
"2 3 79.05 0.658 14.8 13.050 3.6 0.957\n",
"3 3 86.10 0.655 13.9 13.115 3.5 0.936\n",
"4 3 74.58 0.647 13.5 13.300 3.4 0.915"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_SH.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 43 | \n",
" 8 | \n",
" 57.02 | \n",
" 0.628 | \n",
" 11.6 | \n",
" 12.090 | \n",
" 3.5 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 44 | \n",
" 8 | \n",
" 43.29 | \n",
" 0.702 | \n",
" 7.3 | \n",
" 6.705 | \n",
" 4.0 | \n",
" 0.981 | \n",
"
\n",
" \n",
" 45 | \n",
" 8 | \n",
" 36.06 | \n",
" 0.762 | \n",
" 5.7 | \n",
" 4.595 | \n",
" 5.0 | \n",
" 0.963 | \n",
"
\n",
" \n",
" 46 | \n",
" 8 | \n",
" 46.00 | \n",
" 0.823 | \n",
" 5.1 | \n",
" 4.620 | \n",
" 5.5 | \n",
" 0.944 | \n",
"
\n",
" \n",
" 47 | \n",
" 6 | \n",
" 60.30 | \n",
" 0.915 | \n",
" 4.6 | \n",
" 5.010 | \n",
" 5.6 | \n",
" 0.926 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS\n",
"43 8 57.02 0.628 11.6 12.090 3.5 1.000\n",
"44 8 43.29 0.702 7.3 6.705 4.0 0.981\n",
"45 8 36.06 0.762 5.7 4.595 5.0 0.963\n",
"46 8 46.00 0.823 5.1 4.620 5.5 0.944\n",
"47 6 60.30 0.915 4.6 5.010 5.6 0.926"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_LM.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 | \n",
" 98.36 | \n",
" 0.642 | \n",
" -0.1 | \n",
" 18.685 | \n",
" 2.9 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 97.57 | \n",
" 0.631 | \n",
" 7.9 | \n",
" 16.745 | \n",
" 3.2 | \n",
" 0.984 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 98.41 | \n",
" 0.615 | \n",
" 12.8 | \n",
" 14.105 | \n",
" 3.2 | \n",
" 0.968 | \n",
"
\n",
" \n",
" 3 | \n",
" 2 | \n",
" 85.92 | \n",
" 0.597 | \n",
" 13.0 | \n",
" 13.385 | \n",
" 3.4 | \n",
" 0.952 | \n",
"
\n",
" \n",
" 4 | \n",
" 2 | \n",
" 83.16 | \n",
" 0.592 | \n",
" 12.3 | \n",
" 13.345 | \n",
" 3.4 | \n",
" 0.935 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS\n",
"0 2 98.36 0.642 -0.1 18.685 2.9 1.000\n",
"1 2 97.57 0.631 7.9 16.745 3.2 0.984\n",
"2 2 98.41 0.615 12.8 14.105 3.2 0.968\n",
"3 2 85.92 0.597 13.0 13.385 3.4 0.952\n",
"4 2 83.16 0.592 12.3 13.345 3.4 0.935"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"blind_SH.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 63 | \n",
" 8 | \n",
" 46.04 | \n",
" 0.645 | \n",
" -17.8 | \n",
" 28.075 | \n",
" 2.3 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 64 | \n",
" 8 | \n",
" 26.73 | \n",
" 0.748 | \n",
" -9.0 | \n",
" 9.105 | \n",
" 5.0 | \n",
" 0.953 | \n",
"
\n",
" \n",
" 65 | \n",
" 8 | \n",
" 36.50 | \n",
" 0.794 | \n",
" 0.1 | \n",
" 5.945 | \n",
" 4.5 | \n",
" 0.930 | \n",
"
\n",
" \n",
" 66 | \n",
" 8 | \n",
" 52.24 | \n",
" 0.867 | \n",
" 4.2 | \n",
" 7.395 | \n",
" 4.1 | \n",
" 0.907 | \n",
"
\n",
" \n",
" 67 | \n",
" 6 | \n",
" 61.32 | \n",
" 0.925 | \n",
" 9.4 | \n",
" 8.825 | \n",
" 3.6 | \n",
" 0.884 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS\n",
"63 8 46.04 0.645 -17.8 28.075 2.3 1.000\n",
"64 8 26.73 0.748 -9.0 9.105 5.0 0.953\n",
"65 8 36.50 0.794 0.1 5.945 4.5 0.930\n",
"66 8 52.24 0.867 4.2 7.395 4.1 0.907\n",
"67 6 61.32 0.925 9.4 8.825 3.6 0.884"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"blind_LM.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_SH = training_SH.drop(['Facies'],axis=1)\n",
"y_SH = training_SH['Facies']\n",
"\n",
"X_LM = training_LM.drop(['Facies'],axis=1)\n",
"y_LM = training_LM['Facies']\n",
"\n",
"X_SH_blind = blind_SH.drop(['Facies'],axis=1)\n",
"y_SH_blind = blind_SH['Facies']\n",
"\n",
"X_LM_blind = blind_LM.drop(['Facies'],axis=1)\n",
"y_LM_blind = blind_LM['Facies']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)\n",
"\n",
"X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.ensemble import ExtraTreesClassifier\n",
"from sklearn.metrics import classification_report,confusion_matrix"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_split=1e-07, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" n_estimators=500, n_jobs=1, oob_score=False, random_state=None,\n",
" verbose=0, warm_start=False)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)\n",
"ETC_LM = ExtraTreesClassifier(n_estimators=500)\n",
"\n",
"ETC_SH.fit(X_train_SH, y_train_SH)\n",
"ETC_LM.fit(X_train_LM, y_train_LM)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 9 1 1 0]\n",
" [ 0 79 9 0]\n",
" [ 0 11 59 1]\n",
" [ 0 0 0 0]]\n",
" precision recall f1-score support\n",
"\n",
" 1 1.00 0.82 0.90 11\n",
" 2 0.87 0.90 0.88 88\n",
" 3 0.86 0.83 0.84 71\n",
" 5 0.00 0.00 0.00 0\n",
"\n",
"avg / total 0.87 0.86 0.87 170\n",
"\n"
]
}
],
"source": [
"pred_SH = ETC_SH.predict(X_test_SH)\n",
"print(confusion_matrix(y_test_SH,pred_SH))\n",
"print(classification_report(y_test_SH,pred_SH))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0 0 0 1 0 0 0]\n",
" [ 0 20 1 2 0 2 0]\n",
" [ 0 0 18 5 1 5 0]\n",
" [ 0 1 1 40 0 9 0]\n",
" [ 0 0 1 0 13 2 0]\n",
" [ 0 0 4 9 1 47 0]\n",
" [ 0 0 0 0 0 2 16]]\n",
" precision recall f1-score support\n",
"\n",
" 3 0.00 0.00 0.00 1\n",
" 4 0.95 0.80 0.87 25\n",
" 5 0.72 0.62 0.67 29\n",
" 6 0.70 0.78 0.74 51\n",
" 7 0.87 0.81 0.84 16\n",
" 8 0.70 0.77 0.73 61\n",
" 9 1.00 0.89 0.94 18\n",
"\n",
"avg / total 0.77 0.77 0.77 201\n",
"\n"
]
}
],
"source": [
"pred_LM = ETC_LM.predict(X_test_LM)\n",
"print(confusion_matrix(y_test_LM,pred_LM))\n",
"print(classification_report(y_test_LM,pred_LM))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 7 81 1]\n",
" [ 5 75 9]\n",
" [ 0 43 74]]\n",
" precision recall f1-score support\n",
"\n",
" 1 0.58 0.08 0.14 89\n",
" 2 0.38 0.84 0.52 89\n",
" 3 0.88 0.63 0.74 117\n",
"\n",
"avg / total 0.64 0.53 0.49 295\n",
"\n"
]
}
],
"source": [
"blind_pred_SH = ETC_SH.predict(X_SH_blind)\n",
"print(confusion_matrix(y_SH_blind, blind_pred_SH))\n",
"print(classification_report(y_SH_blind, blind_pred_SH))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0 0 0 0 0 0 0]\n",
" [ 0 1 0 5 0 1 0]\n",
" [ 0 5 3 6 0 5 0]\n",
" [ 1 1 1 52 0 16 0]\n",
" [ 0 0 0 1 2 13 1]\n",
" [ 0 0 0 15 1 23 1]\n",
" [ 0 0 0 0 0 0 0]]\n",
" precision recall f1-score support\n",
"\n",
" 3 0.00 0.00 0.00 0\n",
" 4 0.14 0.14 0.14 7\n",
" 5 0.75 0.16 0.26 19\n",
" 6 0.66 0.73 0.69 71\n",
" 7 0.67 0.12 0.20 17\n",
" 8 0.40 0.57 0.47 40\n",
" 9 0.00 0.00 0.00 0\n",
"\n",
"avg / total 0.58 0.53 0.50 154\n",
"\n"
]
}
],
"source": [
"blind_pred_LM = ETC_LM.predict(X_LM_blind)\n",
"print(confusion_matrix(y_LM_blind, blind_pred_LM))\n",
"print(classification_report(y_LM_blind, blind_pred_LM))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"blind_pred_SH = pd.DataFrame(blind_pred_SH, index=X_SH_blind.index)\n",
"blind_pred_LM = pd.DataFrame(blind_pred_LM, index=X_LM_blind.index)\n",
"pred_blind = pd.concat([blind_pred_SH,blind_pred_LM])\n",
"pred_blind = pred_blind.sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_blind = blind['Facies']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 7 81 1 0 0 0 0 0 0]\n",
" [ 5 75 9 0 0 0 0 0 0]\n",
" [ 0 43 74 0 0 0 0 0 0]\n",
" [ 0 0 0 1 0 5 0 1 0]\n",
" [ 0 0 0 5 3 6 0 5 0]\n",
" [ 0 0 1 1 1 52 0 16 0]\n",
" [ 0 0 0 0 0 1 2 13 1]\n",
" [ 0 0 0 0 0 15 1 23 1]\n",
" [ 0 0 0 0 0 0 0 0 0]]\n",
" precision recall f1-score support\n",
"\n",
" 1 0.58 0.08 0.14 89\n",
" 2 0.38 0.84 0.52 89\n",
" 3 0.87 0.63 0.73 117\n",
" 4 0.14 0.14 0.14 7\n",
" 5 0.75 0.16 0.26 19\n",
" 6 0.66 0.73 0.69 71\n",
" 7 0.67 0.12 0.20 17\n",
" 8 0.40 0.57 0.47 40\n",
" 9 0.00 0.00 0.00 0\n",
"\n",
"avg / total 0.62 0.53 0.49 449\n",
"\n"
]
}
],
"source": [
"print(confusion_matrix(y_blind, pred_blind))\n",
"print(classification_report(y_blind, pred_blind))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Using the complete training data"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"training_data = pd.read_csv('training.csv')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
" Label_Form_SH_LM | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" 77.45 | \n",
" 0.664 | \n",
" 9.9 | \n",
" 11.915 | \n",
" 4.6 | \n",
" 1.000 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 3 | \n",
" 78.26 | \n",
" 0.661 | \n",
" 14.2 | \n",
" 12.565 | \n",
" 4.1 | \n",
" 0.979 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 79.05 | \n",
" 0.658 | \n",
" 14.8 | \n",
" 13.050 | \n",
" 3.6 | \n",
" 0.957 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 86.10 | \n",
" 0.655 | \n",
" 13.9 | \n",
" 13.115 | \n",
" 3.5 | \n",
" 0.936 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" 74.58 | \n",
" 0.647 | \n",
" 13.5 | \n",
" 13.300 | \n",
" 3.4 | \n",
" 0.915 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM\n",
"0 3 77.45 0.664 9.9 11.915 4.6 1.000 1\n",
"1 3 78.26 0.661 14.2 12.565 4.1 0.979 1\n",
"2 3 79.05 0.658 14.8 13.050 3.6 0.957 1\n",
"3 3 86.10 0.655 13.9 13.115 3.5 0.936 1\n",
"4 3 74.58 0.647 13.5 13.300 3.4 0.915 1"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
" Label_Form_SH_LM | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 4149.000000 | \n",
" 4149.000000 | \n",
" 4149.000000 | \n",
" 4149.000000 | \n",
" 4149.000000 | \n",
" 4149.000000 | \n",
" 4149.000000 | \n",
" 4149.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 4.503254 | \n",
" 64.933985 | \n",
" 0.659566 | \n",
" 4.402484 | \n",
" 13.201066 | \n",
" -22098.588517 | \n",
" 0.521852 | \n",
" 1.521330 | \n",
"
\n",
" \n",
" std | \n",
" 2.474324 | \n",
" 30.302530 | \n",
" 0.252703 | \n",
" 5.274947 | \n",
" 7.132846 | \n",
" 41499.330187 | \n",
" 0.286644 | \n",
" 0.499605 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 10.149000 | \n",
" -0.025949 | \n",
" -21.832000 | \n",
" 0.550000 | \n",
" -99999.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.000000 | \n",
" 44.730000 | \n",
" 0.498000 | \n",
" 1.600000 | \n",
" 8.500000 | \n",
" 2.416000 | \n",
" 0.277000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 4.000000 | \n",
" 64.990000 | \n",
" 0.639000 | \n",
" 4.300000 | \n",
" 12.020000 | \n",
" 3.300000 | \n",
" 0.528000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 6.000000 | \n",
" 79.438000 | \n",
" 0.822000 | \n",
" 7.500000 | \n",
" 16.050000 | \n",
" 4.000000 | \n",
" 0.769000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
" max | \n",
" 9.000000 | \n",
" 361.150000 | \n",
" 1.800000 | \n",
" 19.312000 | \n",
" 84.400000 | \n",
" 8.094000 | \n",
" 1.000000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND \\\n",
"count 4149.000000 4149.000000 4149.000000 4149.000000 4149.000000 \n",
"mean 4.503254 64.933985 0.659566 4.402484 13.201066 \n",
"std 2.474324 30.302530 0.252703 5.274947 7.132846 \n",
"min 1.000000 10.149000 -0.025949 -21.832000 0.550000 \n",
"25% 2.000000 44.730000 0.498000 1.600000 8.500000 \n",
"50% 4.000000 64.990000 0.639000 4.300000 12.020000 \n",
"75% 6.000000 79.438000 0.822000 7.500000 16.050000 \n",
"max 9.000000 361.150000 1.800000 19.312000 84.400000 \n",
"\n",
" PE RELPOS Label_Form_SH_LM \n",
"count 4149.000000 4149.000000 4149.000000 \n",
"mean -22098.588517 0.521852 1.521330 \n",
"std 41499.330187 0.286644 0.499605 \n",
"min -99999.000000 0.000000 1.000000 \n",
"25% 2.416000 0.277000 1.000000 \n",
"50% 3.300000 0.528000 2.000000 \n",
"75% 4.000000 0.769000 2.000000 \n",
"max 8.094000 1.000000 2.000000 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"training_data_SH = divisao_sh(training_data)\n",
"training_data_LM = divisao_lm(training_data)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 2.297583 | \n",
" 75.003921 | \n",
" 0.530210 | \n",
" 5.432646 | \n",
" 16.700658 | \n",
" -18980.149676 | \n",
" 0.511780 | \n",
"
\n",
" \n",
" std | \n",
" 0.819513 | \n",
" 15.637085 | \n",
" 0.160003 | \n",
" 6.092309 | \n",
" 7.712549 | \n",
" 39227.270999 | \n",
" 0.287772 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 26.230000 | \n",
" -0.025949 | \n",
" -19.900000 | \n",
" 4.397000 | \n",
" -99999.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.000000 | \n",
" 64.378000 | \n",
" 0.441000 | \n",
" 2.792500 | \n",
" 11.920000 | \n",
" 2.459500 | \n",
" 0.261500 | \n",
"
\n",
" \n",
" 50% | \n",
" 2.000000 | \n",
" 73.680000 | \n",
" 0.541205 | \n",
" 5.800000 | \n",
" 14.793500 | \n",
" 3.057500 | \n",
" 0.504500 | \n",
"
\n",
" \n",
" 75% | \n",
" 3.000000 | \n",
" 83.797500 | \n",
" 0.633000 | \n",
" 9.368500 | \n",
" 19.143750 | \n",
" 3.365000 | \n",
" 0.761500 | \n",
"
\n",
" \n",
" max | \n",
" 9.000000 | \n",
" 221.125000 | \n",
" 0.966000 | \n",
" 19.257000 | \n",
" 84.400000 | \n",
" 5.100000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND \\\n",
"count 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000 \n",
"mean 2.297583 75.003921 0.530210 5.432646 16.700658 \n",
"std 0.819513 15.637085 0.160003 6.092309 7.712549 \n",
"min 1.000000 26.230000 -0.025949 -19.900000 4.397000 \n",
"25% 2.000000 64.378000 0.441000 2.792500 11.920000 \n",
"50% 2.000000 73.680000 0.541205 5.800000 14.793500 \n",
"75% 3.000000 83.797500 0.633000 9.368500 19.143750 \n",
"max 9.000000 221.125000 0.966000 19.257000 84.400000 \n",
"\n",
" PE RELPOS \n",
"count 1986.000000 1986.000000 \n",
"mean -18980.149676 0.511780 \n",
"std 39227.270999 0.287772 \n",
"min -99999.000000 0.000000 \n",
"25% 2.459500 0.261500 \n",
"50% 3.057500 0.504500 \n",
"75% 3.365000 0.761500 \n",
"max 5.100000 1.000000 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_data_SH.describe()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Facies | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 6.528433 | \n",
" 55.688079 | \n",
" 0.778336 | \n",
" 3.456620 | \n",
" 9.987847 | \n",
" -24961.843041 | \n",
" 0.531098 | \n",
"
\n",
" \n",
" std | \n",
" 1.599173 | \n",
" 36.858662 | \n",
" 0.263687 | \n",
" 4.175825 | \n",
" 4.628169 | \n",
" 43292.673480 | \n",
" 0.285358 | \n",
"
\n",
" \n",
" min | \n",
" 2.000000 | \n",
" 10.149000 | \n",
" -0.019000 | \n",
" -21.832000 | \n",
" 0.550000 | \n",
" -99999.000000 | \n",
" 0.009000 | \n",
"
\n",
" \n",
" 25% | \n",
" 5.000000 | \n",
" 32.657500 | \n",
" 0.628500 | \n",
" 1.100000 | \n",
" 6.650000 | \n",
" 1.952500 | \n",
" 0.290500 | \n",
"
\n",
" \n",
" 50% | \n",
" 6.000000 | \n",
" 46.923000 | \n",
" 0.799000 | \n",
" 3.100000 | \n",
" 8.992000 | \n",
" 3.900000 | \n",
" 0.544000 | \n",
"
\n",
" \n",
" 75% | \n",
" 8.000000 | \n",
" 68.357500 | \n",
" 0.941000 | \n",
" 5.800500 | \n",
" 12.400000 | \n",
" 4.600000 | \n",
" 0.776000 | \n",
"
\n",
" \n",
" max | \n",
" 9.000000 | \n",
" 361.150000 | \n",
" 1.800000 | \n",
" 19.312000 | \n",
" 47.721000 | \n",
" 8.094000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Facies GR ILD_log10 DeltaPHI PHIND \\\n",
"count 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000 \n",
"mean 6.528433 55.688079 0.778336 3.456620 9.987847 \n",
"std 1.599173 36.858662 0.263687 4.175825 4.628169 \n",
"min 2.000000 10.149000 -0.019000 -21.832000 0.550000 \n",
"25% 5.000000 32.657500 0.628500 1.100000 6.650000 \n",
"50% 6.000000 46.923000 0.799000 3.100000 8.992000 \n",
"75% 8.000000 68.357500 0.941000 5.800500 12.400000 \n",
"max 9.000000 361.150000 1.800000 19.312000 47.721000 \n",
"\n",
" PE RELPOS \n",
"count 2163.000000 2163.000000 \n",
"mean -24961.843041 0.531098 \n",
"std 43292.673480 0.285358 \n",
"min -99999.000000 0.009000 \n",
"25% 1.952500 0.290500 \n",
"50% 3.900000 0.544000 \n",
"75% 4.600000 0.776000 \n",
"max 8.094000 1.000000 "
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_data_LM.describe()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_SH = training_data_SH.drop(['Facies'],axis=1)\n",
"y_SH = training_data_SH['Facies']\n",
"\n",
"X_LM = training_data_LM.drop(['Facies'],axis=1)\n",
"y_LM = training_data_LM['Facies']"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
" 1986.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 75.003921 | \n",
" 0.530210 | \n",
" 5.432646 | \n",
" 16.700658 | \n",
" -18980.149676 | \n",
" 0.511780 | \n",
"
\n",
" \n",
" std | \n",
" 15.637085 | \n",
" 0.160003 | \n",
" 6.092309 | \n",
" 7.712549 | \n",
" 39227.270999 | \n",
" 0.287772 | \n",
"
\n",
" \n",
" min | \n",
" 26.230000 | \n",
" -0.025949 | \n",
" -19.900000 | \n",
" 4.397000 | \n",
" -99999.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 64.378000 | \n",
" 0.441000 | \n",
" 2.792500 | \n",
" 11.920000 | \n",
" 2.459500 | \n",
" 0.261500 | \n",
"
\n",
" \n",
" 50% | \n",
" 73.680000 | \n",
" 0.541205 | \n",
" 5.800000 | \n",
" 14.793500 | \n",
" 3.057500 | \n",
" 0.504500 | \n",
"
\n",
" \n",
" 75% | \n",
" 83.797500 | \n",
" 0.633000 | \n",
" 9.368500 | \n",
" 19.143750 | \n",
" 3.365000 | \n",
" 0.761500 | \n",
"
\n",
" \n",
" max | \n",
" 221.125000 | \n",
" 0.966000 | \n",
" 19.257000 | \n",
" 84.400000 | \n",
" 5.100000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" GR ILD_log10 DeltaPHI PHIND PE \\\n",
"count 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000 \n",
"mean 75.003921 0.530210 5.432646 16.700658 -18980.149676 \n",
"std 15.637085 0.160003 6.092309 7.712549 39227.270999 \n",
"min 26.230000 -0.025949 -19.900000 4.397000 -99999.000000 \n",
"25% 64.378000 0.441000 2.792500 11.920000 2.459500 \n",
"50% 73.680000 0.541205 5.800000 14.793500 3.057500 \n",
"75% 83.797500 0.633000 9.368500 19.143750 3.365000 \n",
"max 221.125000 0.966000 19.257000 84.400000 5.100000 \n",
"\n",
" RELPOS \n",
"count 1986.000000 \n",
"mean 0.511780 \n",
"std 0.287772 \n",
"min 0.000000 \n",
"25% 0.261500 \n",
"50% 0.504500 \n",
"75% 0.761500 \n",
"max 1.000000 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_SH.describe()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
" 2163.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 55.688079 | \n",
" 0.778336 | \n",
" 3.456620 | \n",
" 9.987847 | \n",
" -24961.843041 | \n",
" 0.531098 | \n",
"
\n",
" \n",
" std | \n",
" 36.858662 | \n",
" 0.263687 | \n",
" 4.175825 | \n",
" 4.628169 | \n",
" 43292.673480 | \n",
" 0.285358 | \n",
"
\n",
" \n",
" min | \n",
" 10.149000 | \n",
" -0.019000 | \n",
" -21.832000 | \n",
" 0.550000 | \n",
" -99999.000000 | \n",
" 0.009000 | \n",
"
\n",
" \n",
" 25% | \n",
" 32.657500 | \n",
" 0.628500 | \n",
" 1.100000 | \n",
" 6.650000 | \n",
" 1.952500 | \n",
" 0.290500 | \n",
"
\n",
" \n",
" 50% | \n",
" 46.923000 | \n",
" 0.799000 | \n",
" 3.100000 | \n",
" 8.992000 | \n",
" 3.900000 | \n",
" 0.544000 | \n",
"
\n",
" \n",
" 75% | \n",
" 68.357500 | \n",
" 0.941000 | \n",
" 5.800500 | \n",
" 12.400000 | \n",
" 4.600000 | \n",
" 0.776000 | \n",
"
\n",
" \n",
" max | \n",
" 361.150000 | \n",
" 1.800000 | \n",
" 19.312000 | \n",
" 47.721000 | \n",
" 8.094000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" GR ILD_log10 DeltaPHI PHIND PE \\\n",
"count 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000 \n",
"mean 55.688079 0.778336 3.456620 9.987847 -24961.843041 \n",
"std 36.858662 0.263687 4.175825 4.628169 43292.673480 \n",
"min 10.149000 -0.019000 -21.832000 0.550000 -99999.000000 \n",
"25% 32.657500 0.628500 1.100000 6.650000 1.952500 \n",
"50% 46.923000 0.799000 3.100000 8.992000 3.900000 \n",
"75% 68.357500 0.941000 5.800500 12.400000 4.600000 \n",
"max 361.150000 1.800000 19.312000 47.721000 8.094000 \n",
"\n",
" RELPOS \n",
"count 2163.000000 \n",
"mean 0.531098 \n",
"std 0.285358 \n",
"min 0.009000 \n",
"25% 0.290500 \n",
"50% 0.544000 \n",
"75% 0.776000 \n",
"max 1.000000 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_LM.describe()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)\n",
"\n",
"X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Applying ExtraTreeClassifier"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_split=1e-07, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" n_estimators=500, n_jobs=1, oob_score=False, random_state=None,\n",
" verbose=0, warm_start=False)"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)\n",
"ETC_LM = ExtraTreesClassifier(n_estimators=500)\n",
"\n",
"ETC_SH.fit(X_train_SH, y_train_SH)\n",
"ETC_LM.fit(X_train_LM, y_train_LM)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[15 8 1 0 0 0]\n",
" [ 3 77 8 0 0 0]\n",
" [ 0 18 65 0 0 0]\n",
" [ 0 0 1 0 0 0]\n",
" [ 0 2 0 0 0 0]\n",
" [ 0 1 0 0 0 0]]\n",
" precision recall f1-score support\n",
"\n",
" 1 0.83 0.62 0.71 24\n",
" 2 0.73 0.88 0.79 88\n",
" 3 0.87 0.78 0.82 83\n",
" 4 0.00 0.00 0.00 1\n",
" 5 0.00 0.00 0.00 2\n",
" 7 0.00 0.00 0.00 1\n",
"\n",
"avg / total 0.78 0.79 0.78 199\n",
"\n"
]
}
],
"source": [
"pred_SH = ETC_SH.predict(X_test_SH)\n",
"print(confusion_matrix(y_test_SH,pred_SH))\n",
"print(classification_report(y_test_SH,pred_SH))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 2 0 1 0 0 0 0 0]\n",
" [ 1 0 0 0 0 0 0 0]\n",
" [ 0 0 30 1 3 0 1 0]\n",
" [ 0 0 1 17 6 0 2 0]\n",
" [ 0 0 3 2 43 0 10 0]\n",
" [ 0 0 1 0 0 9 4 1]\n",
" [ 0 0 1 2 6 0 51 0]\n",
" [ 0 0 0 0 0 1 2 16]]\n",
" precision recall f1-score support\n",
"\n",
" 2 0.67 0.67 0.67 3\n",
" 3 0.00 0.00 0.00 1\n",
" 4 0.81 0.86 0.83 35\n",
" 5 0.77 0.65 0.71 26\n",
" 6 0.74 0.74 0.74 58\n",
" 7 0.90 0.60 0.72 15\n",
" 8 0.73 0.85 0.78 60\n",
" 9 0.94 0.84 0.89 19\n",
"\n",
"avg / total 0.78 0.77 0.77 217\n",
"\n"
]
}
],
"source": [
"pred_LM = ETC_LM.predict(X_test_LM)\n",
"print(confusion_matrix(y_test_LM,pred_LM))\n",
"print(classification_report(y_test_LM,pred_LM))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"validation = pd.read_csv('validation_data_nofacies.csv')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Formation | \n",
" Well Name | \n",
" Depth | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" NM_M | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A1 SH | \n",
" STUART | \n",
" 2808.0 | \n",
" 66.276 | \n",
" 0.630 | \n",
" 3.3 | \n",
" 10.65 | \n",
" 3.591 | \n",
" 1 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 1 | \n",
" A1 SH | \n",
" STUART | \n",
" 2808.5 | \n",
" 77.252 | \n",
" 0.585 | \n",
" 6.5 | \n",
" 11.95 | \n",
" 3.341 | \n",
" 1 | \n",
" 0.978 | \n",
"
\n",
" \n",
" 2 | \n",
" A1 SH | \n",
" STUART | \n",
" 2809.0 | \n",
" 82.899 | \n",
" 0.566 | \n",
" 9.4 | \n",
" 13.60 | \n",
" 3.064 | \n",
" 1 | \n",
" 0.956 | \n",
"
\n",
" \n",
" 3 | \n",
" A1 SH | \n",
" STUART | \n",
" 2809.5 | \n",
" 80.671 | \n",
" 0.593 | \n",
" 9.5 | \n",
" 13.25 | \n",
" 2.977 | \n",
" 1 | \n",
" 0.933 | \n",
"
\n",
" \n",
" 4 | \n",
" A1 SH | \n",
" STUART | \n",
" 2810.0 | \n",
" 75.971 | \n",
" 0.638 | \n",
" 8.7 | \n",
" 12.35 | \n",
" 3.020 | \n",
" 1 | \n",
" 0.911 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n",
"0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 \n",
"1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 \n",
"2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 \n",
"3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 \n",
"4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 \n",
"\n",
" NM_M RELPOS \n",
"0 1 1.000 \n",
"1 1 0.978 \n",
"2 1 0.956 \n",
"3 1 0.933 \n",
"4 1 0.911 "
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"validation.head()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Depth | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" NM_M | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 830.000000 | \n",
" 830.00000 | \n",
" 830.000000 | \n",
" 830.000000 | \n",
" 830.000000 | \n",
" 830.000000 | \n",
" 830.000000 | \n",
" 830.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 2987.070482 | \n",
" 57.61173 | \n",
" 0.666312 | \n",
" 2.851964 | \n",
" 11.655277 | \n",
" 3.654178 | \n",
" 1.678313 | \n",
" 0.535807 | \n",
"
\n",
" \n",
" std | \n",
" 94.391925 | \n",
" 27.52774 | \n",
" 0.288367 | \n",
" 3.442074 | \n",
" 5.190236 | \n",
" 0.649793 | \n",
" 0.467405 | \n",
" 0.283062 | \n",
"
\n",
" \n",
" min | \n",
" 2808.000000 | \n",
" 12.03600 | \n",
" -0.468000 | \n",
" -8.900000 | \n",
" 1.855000 | \n",
" 2.113000 | \n",
" 1.000000 | \n",
" 0.013000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2911.625000 | \n",
" 36.77325 | \n",
" 0.541000 | \n",
" 0.411250 | \n",
" 7.700000 | \n",
" 3.171500 | \n",
" 1.000000 | \n",
" 0.300000 | \n",
"
\n",
" \n",
" 50% | \n",
" 2993.750000 | \n",
" 58.34450 | \n",
" 0.675000 | \n",
" 2.397500 | \n",
" 10.950000 | \n",
" 3.515500 | \n",
" 2.000000 | \n",
" 0.547500 | \n",
"
\n",
" \n",
" 75% | \n",
" 3055.375000 | \n",
" 73.05150 | \n",
" 0.850750 | \n",
" 4.600000 | \n",
" 14.793750 | \n",
" 4.191500 | \n",
" 2.000000 | \n",
" 0.778000 | \n",
"
\n",
" \n",
" max | \n",
" 3160.500000 | \n",
" 220.41300 | \n",
" 1.507000 | \n",
" 16.500000 | \n",
" 31.335000 | \n",
" 6.321000 | \n",
" 2.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Depth GR ILD_log10 DeltaPHI PHIND PE \\\n",
"count 830.000000 830.00000 830.000000 830.000000 830.000000 830.000000 \n",
"mean 2987.070482 57.61173 0.666312 2.851964 11.655277 3.654178 \n",
"std 94.391925 27.52774 0.288367 3.442074 5.190236 0.649793 \n",
"min 2808.000000 12.03600 -0.468000 -8.900000 1.855000 2.113000 \n",
"25% 2911.625000 36.77325 0.541000 0.411250 7.700000 3.171500 \n",
"50% 2993.750000 58.34450 0.675000 2.397500 10.950000 3.515500 \n",
"75% 3055.375000 73.05150 0.850750 4.600000 14.793750 4.191500 \n",
"max 3160.500000 220.41300 1.507000 16.500000 31.335000 6.321000 \n",
"\n",
" NM_M RELPOS \n",
"count 830.000000 830.000000 \n",
"mean 1.678313 0.535807 \n",
"std 0.467405 0.283062 \n",
"min 1.000000 0.013000 \n",
"25% 1.000000 0.300000 \n",
"50% 2.000000 0.547500 \n",
"75% 2.000000 0.778000 \n",
"max 2.000000 1.000000 "
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"validation.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Making the division between SH and LM"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"validation['Label_Form_SH_LM'] = validation.Formation.apply((label_two_groups_formation))"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Formation | \n",
" Well Name | \n",
" Depth | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" NM_M | \n",
" RELPOS | \n",
" Label_Form_SH_LM | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A1 SH | \n",
" STUART | \n",
" 2808.0 | \n",
" 66.276 | \n",
" 0.630 | \n",
" 3.3 | \n",
" 10.65 | \n",
" 3.591 | \n",
" 1 | \n",
" 1.000 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" A1 SH | \n",
" STUART | \n",
" 2808.5 | \n",
" 77.252 | \n",
" 0.585 | \n",
" 6.5 | \n",
" 11.95 | \n",
" 3.341 | \n",
" 1 | \n",
" 0.978 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" A1 SH | \n",
" STUART | \n",
" 2809.0 | \n",
" 82.899 | \n",
" 0.566 | \n",
" 9.4 | \n",
" 13.60 | \n",
" 3.064 | \n",
" 1 | \n",
" 0.956 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" A1 SH | \n",
" STUART | \n",
" 2809.5 | \n",
" 80.671 | \n",
" 0.593 | \n",
" 9.5 | \n",
" 13.25 | \n",
" 2.977 | \n",
" 1 | \n",
" 0.933 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" A1 SH | \n",
" STUART | \n",
" 2810.0 | \n",
" 75.971 | \n",
" 0.638 | \n",
" 8.7 | \n",
" 12.35 | \n",
" 3.020 | \n",
" 1 | \n",
" 0.911 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n",
"0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 \n",
"1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 \n",
"2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 \n",
"3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 \n",
"4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 \n",
"\n",
" NM_M RELPOS Label_Form_SH_LM \n",
"0 1 1.000 1 \n",
"1 1 0.978 1 \n",
"2 1 0.956 1 \n",
"3 1 0.933 1 \n",
"4 1 0.911 1 "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"validation.head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"validation_SH = divisao_sh(validation)\n",
"validation_LM = divisao_lm(validation)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Formation | \n",
" Well Name | \n",
" Depth | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" NM_M | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A1 SH | \n",
" STUART | \n",
" 2808.0 | \n",
" 66.276 | \n",
" 0.630 | \n",
" 3.3 | \n",
" 10.65 | \n",
" 3.591 | \n",
" 1 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 1 | \n",
" A1 SH | \n",
" STUART | \n",
" 2808.5 | \n",
" 77.252 | \n",
" 0.585 | \n",
" 6.5 | \n",
" 11.95 | \n",
" 3.341 | \n",
" 1 | \n",
" 0.978 | \n",
"
\n",
" \n",
" 2 | \n",
" A1 SH | \n",
" STUART | \n",
" 2809.0 | \n",
" 82.899 | \n",
" 0.566 | \n",
" 9.4 | \n",
" 13.60 | \n",
" 3.064 | \n",
" 1 | \n",
" 0.956 | \n",
"
\n",
" \n",
" 3 | \n",
" A1 SH | \n",
" STUART | \n",
" 2809.5 | \n",
" 80.671 | \n",
" 0.593 | \n",
" 9.5 | \n",
" 13.25 | \n",
" 2.977 | \n",
" 1 | \n",
" 0.933 | \n",
"
\n",
" \n",
" 4 | \n",
" A1 SH | \n",
" STUART | \n",
" 2810.0 | \n",
" 75.971 | \n",
" 0.638 | \n",
" 8.7 | \n",
" 12.35 | \n",
" 3.020 | \n",
" 1 | \n",
" 0.911 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n",
"0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 \n",
"1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 \n",
"2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 \n",
"3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 \n",
"4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 \n",
"\n",
" NM_M RELPOS \n",
"0 1 1.000 \n",
"1 1 0.978 \n",
"2 1 0.956 \n",
"3 1 0.933 \n",
"4 1 0.911 "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"validation_SH.head()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Formation | \n",
" Well Name | \n",
" Depth | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" NM_M | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 43 | \n",
" A1 LM | \n",
" STUART | \n",
" 2829.5 | \n",
" 47.345 | \n",
" 0.584 | \n",
" 7.0 | \n",
" 16.30 | \n",
" 3.527 | \n",
" 2 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 44 | \n",
" A1 LM | \n",
" STUART | \n",
" 2830.0 | \n",
" 35.733 | \n",
" 0.730 | \n",
" 6.4 | \n",
" 10.20 | \n",
" 3.928 | \n",
" 2 | \n",
" 0.987 | \n",
"
\n",
" \n",
" 45 | \n",
" A1 LM | \n",
" STUART | \n",
" 2830.5 | \n",
" 29.327 | \n",
" 0.873 | \n",
" 2.7 | \n",
" 7.85 | \n",
" 4.330 | \n",
" 2 | \n",
" 0.974 | \n",
"
\n",
" \n",
" 46 | \n",
" A1 LM | \n",
" STUART | \n",
" 2831.0 | \n",
" 28.242 | \n",
" 0.963 | \n",
" 1.4 | \n",
" 6.30 | \n",
" 4.413 | \n",
" 2 | \n",
" 0.961 | \n",
"
\n",
" \n",
" 47 | \n",
" A1 LM | \n",
" STUART | \n",
" 2831.5 | \n",
" 34.558 | \n",
" 1.018 | \n",
" 1.8 | \n",
" 5.60 | \n",
" 4.511 | \n",
" 2 | \n",
" 0.947 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n",
"43 A1 LM STUART 2829.5 47.345 0.584 7.0 16.30 3.527 \n",
"44 A1 LM STUART 2830.0 35.733 0.730 6.4 10.20 3.928 \n",
"45 A1 LM STUART 2830.5 29.327 0.873 2.7 7.85 4.330 \n",
"46 A1 LM STUART 2831.0 28.242 0.963 1.4 6.30 4.413 \n",
"47 A1 LM STUART 2831.5 34.558 1.018 1.8 5.60 4.511 \n",
"\n",
" NM_M RELPOS \n",
"43 2 1.000 \n",
"44 2 0.987 \n",
"45 2 0.974 \n",
"46 2 0.961 \n",
"47 2 0.947 "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"validation_LM.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Removing the colums: Formation, Well Name, Depth"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_val_SH = validation_SH.drop(['Formation','Well Name','Depth','NM_M'], axis=1)\n",
"X_val_LM = validation_LM.drop(['Formation','Well Name','Depth','NM_M'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 66.276 | \n",
" 0.630 | \n",
" 3.3 | \n",
" 10.65 | \n",
" 3.591 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 1 | \n",
" 77.252 | \n",
" 0.585 | \n",
" 6.5 | \n",
" 11.95 | \n",
" 3.341 | \n",
" 0.978 | \n",
"
\n",
" \n",
" 2 | \n",
" 82.899 | \n",
" 0.566 | \n",
" 9.4 | \n",
" 13.60 | \n",
" 3.064 | \n",
" 0.956 | \n",
"
\n",
" \n",
" 3 | \n",
" 80.671 | \n",
" 0.593 | \n",
" 9.5 | \n",
" 13.25 | \n",
" 2.977 | \n",
" 0.933 | \n",
"
\n",
" \n",
" 4 | \n",
" 75.971 | \n",
" 0.638 | \n",
" 8.7 | \n",
" 12.35 | \n",
" 3.020 | \n",
" 0.911 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" GR ILD_log10 DeltaPHI PHIND PE RELPOS\n",
"0 66.276 0.630 3.3 10.65 3.591 1.000\n",
"1 77.252 0.585 6.5 11.95 3.341 0.978\n",
"2 82.899 0.566 9.4 13.60 3.064 0.956\n",
"3 80.671 0.593 9.5 13.25 2.977 0.933\n",
"4 75.971 0.638 8.7 12.35 3.020 0.911"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_val_SH.head()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" RELPOS | \n",
"
\n",
" \n",
" \n",
" \n",
" 43 | \n",
" 47.345 | \n",
" 0.584 | \n",
" 7.0 | \n",
" 16.30 | \n",
" 3.527 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 44 | \n",
" 35.733 | \n",
" 0.730 | \n",
" 6.4 | \n",
" 10.20 | \n",
" 3.928 | \n",
" 0.987 | \n",
"
\n",
" \n",
" 45 | \n",
" 29.327 | \n",
" 0.873 | \n",
" 2.7 | \n",
" 7.85 | \n",
" 4.330 | \n",
" 0.974 | \n",
"
\n",
" \n",
" 46 | \n",
" 28.242 | \n",
" 0.963 | \n",
" 1.4 | \n",
" 6.30 | \n",
" 4.413 | \n",
" 0.961 | \n",
"
\n",
" \n",
" 47 | \n",
" 34.558 | \n",
" 1.018 | \n",
" 1.8 | \n",
" 5.60 | \n",
" 4.511 | \n",
" 0.947 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" GR ILD_log10 DeltaPHI PHIND PE RELPOS\n",
"43 47.345 0.584 7.0 16.30 3.527 1.000\n",
"44 35.733 0.730 6.4 10.20 3.928 0.987\n",
"45 29.327 0.873 2.7 7.85 4.330 0.974\n",
"46 28.242 0.963 1.4 6.30 4.413 0.961\n",
"47 34.558 1.018 1.8 5.60 4.511 0.947"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_val_LM.head()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pred_val_SH = ETC_SH.predict(X_val_SH)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pred_val_LM =ETC_LM.predict(X_val_LM)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pred_val_SH = pd.DataFrame(pred_val_SH, index=X_val_SH.index)\n",
"pred_val_LM = pd.DataFrame(pred_val_LM, index=X_val_LM.index)\n",
"pred_val = pd.concat([pred_val_SH,pred_val_LM])\n",
"pred_val = pred_val.sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 830.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 5.356627 | \n",
"
\n",
" \n",
" std | \n",
" 2.386239 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 3.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 6.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 8.000000 | \n",
"
\n",
" \n",
" max | \n",
" 9.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0\n",
"count 830.000000\n",
"mean 5.356627\n",
"std 2.386239\n",
"min 1.000000\n",
"25% 3.000000\n",
"50% 6.000000\n",
"75% 8.000000\n",
"max 9.000000"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred_val.describe()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"validation['Facies Pred'] = pred_val"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"validation=validation.drop(['Label_Form_SH_LM'],axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Formation | \n",
" Well Name | \n",
" Depth | \n",
" GR | \n",
" ILD_log10 | \n",
" DeltaPHI | \n",
" PHIND | \n",
" PE | \n",
" NM_M | \n",
" RELPOS | \n",
" Facies Pred | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A1 SH | \n",
" STUART | \n",
" 2808.0 | \n",
" 66.276 | \n",
" 0.630 | \n",
" 3.3 | \n",
" 10.65 | \n",
" 3.591 | \n",
" 1 | \n",
" 1.000 | \n",
" 3 | \n",
"
\n",
" \n",
" 1 | \n",
" A1 SH | \n",
" STUART | \n",
" 2808.5 | \n",
" 77.252 | \n",
" 0.585 | \n",
" 6.5 | \n",
" 11.95 | \n",
" 3.341 | \n",
" 1 | \n",
" 0.978 | \n",
" 3 | \n",
"
\n",
" \n",
" 2 | \n",
" A1 SH | \n",
" STUART | \n",
" 2809.0 | \n",
" 82.899 | \n",
" 0.566 | \n",
" 9.4 | \n",
" 13.60 | \n",
" 3.064 | \n",
" 1 | \n",
" 0.956 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" A1 SH | \n",
" STUART | \n",
" 2809.5 | \n",
" 80.671 | \n",
" 0.593 | \n",
" 9.5 | \n",
" 13.25 | \n",
" 2.977 | \n",
" 1 | \n",
" 0.933 | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
" A1 SH | \n",
" STUART | \n",
" 2810.0 | \n",
" 75.971 | \n",
" 0.638 | \n",
" 8.7 | \n",
" 12.35 | \n",
" 3.020 | \n",
" 1 | \n",
" 0.911 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n",
"0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 \n",
"1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 \n",
"2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 \n",
"3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 \n",
"4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 \n",
"\n",
" NM_M RELPOS Facies Pred \n",
"0 1 1.000 3 \n",
"1 1 0.978 3 \n",
"2 1 0.956 2 \n",
"3 1 0.933 3 \n",
"4 1 0.911 3 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"validation.head()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"validation.to_csv('Prediction.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}