{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Facies classification using Machine Learning\n", "\n", "### aaML Submission\n", "\n", "### By:\n", "\n", "[Alexsandro G. Cerqueira](https://github.com/alexleogc), \n", "[Alã de C. Damasceno](https://github.com/aladamasceno)\n", "\n", "There are tow main notebooks:\n", "\n", "- Data Analysis and edition\n", "- Submission\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from libtools import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading the data training data without Shankle well" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "training = pd.read_csv('data-test.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOSLabel_Form_SH_LM
0377.450.6649.911.9154.61.0001
1378.260.66114.212.5654.10.9791
2379.050.65814.813.0503.60.9571
3386.100.65513.913.1153.50.9361
4374.580.64713.513.3003.40.9151
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM\n", "0 3 77.45 0.664 9.9 11.915 4.6 1.000 1\n", "1 3 78.26 0.661 14.2 12.565 4.1 0.979 1\n", "2 3 79.05 0.658 14.8 13.050 3.6 0.957 1\n", "3 3 86.10 0.655 13.9 13.115 3.5 0.936 1\n", "4 3 74.58 0.647 13.5 13.300 3.4 0.915 1" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOSLabel_Form_SH_LM
count3700.0000003700.0000003700.0000003700.0000003700.0000002783.0000003700.0000003700.000000
mean4.61567664.8736490.6630534.65167712.8928263.8056930.5241251.542973
std2.47580830.8171660.2538635.1090066.7962190.8941180.2871470.498217
min1.00000010.149000-0.025949-21.8320000.5500000.2000000.0000001.000000
25%2.00000043.7782500.5020001.8000008.3500003.2000000.2780001.000000
50%4.00000064.8170000.6456134.40000011.8575003.6000000.5310002.000000
75%7.00000080.3225000.8230007.60000015.7500004.4000000.7720002.000000
max9.000000361.1500001.80000019.31200084.4000008.0940001.0000002.000000
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND \\\n", "count 3700.000000 3700.000000 3700.000000 3700.000000 3700.000000 \n", "mean 4.615676 64.873649 0.663053 4.651677 12.892826 \n", "std 2.475808 30.817166 0.253863 5.109006 6.796219 \n", "min 1.000000 10.149000 -0.025949 -21.832000 0.550000 \n", "25% 2.000000 43.778250 0.502000 1.800000 8.350000 \n", "50% 4.000000 64.817000 0.645613 4.400000 11.857500 \n", "75% 7.000000 80.322500 0.823000 7.600000 15.750000 \n", "max 9.000000 361.150000 1.800000 19.312000 84.400000 \n", "\n", " PE RELPOS Label_Form_SH_LM \n", "count 2783.000000 3700.000000 3700.000000 \n", "mean 3.805693 0.524125 1.542973 \n", "std 0.894118 0.287147 0.498217 \n", "min 0.200000 0.000000 1.000000 \n", "25% 3.200000 0.278000 1.000000 \n", "50% 3.600000 0.531000 2.000000 \n", "75% 4.400000 0.772000 2.000000 \n", "max 8.094000 1.000000 2.000000 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training.describe()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "training = training.fillna(-99999)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading the SHANKLE well" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "blind = pd.read_csv('blind.csv')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOSLabel_Form_SH_LM
0298.360.642-0.118.6852.91.0001
1297.570.6317.916.7453.20.9841
2298.410.61512.814.1053.20.9681
3285.920.59713.013.3853.40.9521
4283.160.59212.313.3453.40.9351
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM\n", "0 2 98.36 0.642 -0.1 18.685 2.9 1.000 1\n", "1 2 97.57 0.631 7.9 16.745 3.2 0.984 1\n", "2 2 98.41 0.615 12.8 14.105 3.2 0.968 1\n", "3 2 85.92 0.597 13.0 13.385 3.4 0.952 1\n", "4 2 83.16 0.592 12.3 13.345 3.4 0.935 1" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blind.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOSLabel_Form_SH_LM
count449.000000449.000000449.000000449.000000449.000000449.000000449.000000449.000000
mean3.57683765.4311800.6308312.34899815.7411253.2249440.5031181.342984
std2.26068825.6964180.2412936.1135439.0804670.7324140.2820820.475236
min1.00000018.4000000.093000-19.9000002.8900001.5000000.0100001.000000
25%2.00000054.9600000.4250000.1000009.1500002.7000000.2580001.000000
50%3.00000066.6000000.6200002.60000013.9350003.1000000.5000001.000000
75%6.00000075.1500000.8170006.20000018.5750003.6000000.7440002.000000
max8.000000242.7500001.31100018.60000055.9150005.4000001.0000002.000000
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND PE \\\n", "count 449.000000 449.000000 449.000000 449.000000 449.000000 449.000000 \n", "mean 3.576837 65.431180 0.630831 2.348998 15.741125 3.224944 \n", "std 2.260688 25.696418 0.241293 6.113543 9.080467 0.732414 \n", "min 1.000000 18.400000 0.093000 -19.900000 2.890000 1.500000 \n", "25% 2.000000 54.960000 0.425000 0.100000 9.150000 2.700000 \n", "50% 3.000000 66.600000 0.620000 2.600000 13.935000 3.100000 \n", "75% 6.000000 75.150000 0.817000 6.200000 18.575000 3.600000 \n", "max 8.000000 242.750000 1.311000 18.600000 55.915000 5.400000 \n", "\n", " RELPOS Label_Form_SH_LM \n", "count 449.000000 449.000000 \n", "mean 0.503118 1.342984 \n", "std 0.282082 0.475236 \n", "min 0.010000 1.000000 \n", "25% 0.258000 1.000000 \n", "50% 0.500000 1.000000 \n", "75% 0.744000 2.000000 \n", "max 1.000000 2.000000 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blind.describe()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "training_SH = divisao_sh(training)\n", "training_LM = divisao_lm(training)\n", "\n", "blind_SH = divisao_sh(blind)\n", "blind_LM = divisao_lm(blind)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOS
0377.450.6649.911.9154.61.000
1378.260.66114.212.5654.10.979
2379.050.65814.813.0503.60.957
3386.100.65513.913.1153.50.936
4374.580.64713.513.3003.40.915
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS\n", "0 3 77.45 0.664 9.9 11.915 4.6 1.000\n", "1 3 78.26 0.661 14.2 12.565 4.1 0.979\n", "2 3 79.05 0.658 14.8 13.050 3.6 0.957\n", "3 3 86.10 0.655 13.9 13.115 3.5 0.936\n", "4 3 74.58 0.647 13.5 13.300 3.4 0.915" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_SH.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOS
43857.020.62811.612.0903.51.000
44843.290.7027.36.7054.00.981
45836.060.7625.74.5955.00.963
46846.000.8235.14.6205.50.944
47660.300.9154.65.0105.60.926
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS\n", "43 8 57.02 0.628 11.6 12.090 3.5 1.000\n", "44 8 43.29 0.702 7.3 6.705 4.0 0.981\n", "45 8 36.06 0.762 5.7 4.595 5.0 0.963\n", "46 8 46.00 0.823 5.1 4.620 5.5 0.944\n", "47 6 60.30 0.915 4.6 5.010 5.6 0.926" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_LM.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOS
0298.360.642-0.118.6852.91.000
1297.570.6317.916.7453.20.984
2298.410.61512.814.1053.20.968
3285.920.59713.013.3853.40.952
4283.160.59212.313.3453.40.935
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS\n", "0 2 98.36 0.642 -0.1 18.685 2.9 1.000\n", "1 2 97.57 0.631 7.9 16.745 3.2 0.984\n", "2 2 98.41 0.615 12.8 14.105 3.2 0.968\n", "3 2 85.92 0.597 13.0 13.385 3.4 0.952\n", "4 2 83.16 0.592 12.3 13.345 3.4 0.935" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blind_SH.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOS
63846.040.645-17.828.0752.31.000
64826.730.748-9.09.1055.00.953
65836.500.7940.15.9454.50.930
66852.240.8674.27.3954.10.907
67661.320.9259.48.8253.60.884
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS\n", "63 8 46.04 0.645 -17.8 28.075 2.3 1.000\n", "64 8 26.73 0.748 -9.0 9.105 5.0 0.953\n", "65 8 36.50 0.794 0.1 5.945 4.5 0.930\n", "66 8 52.24 0.867 4.2 7.395 4.1 0.907\n", "67 6 61.32 0.925 9.4 8.825 3.6 0.884" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blind_LM.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_SH = training_SH.drop(['Facies'],axis=1)\n", "y_SH = training_SH['Facies']\n", "\n", "X_LM = training_LM.drop(['Facies'],axis=1)\n", "y_LM = training_LM['Facies']\n", "\n", "X_SH_blind = blind_SH.drop(['Facies'],axis=1)\n", "y_SH_blind = blind_SH['Facies']\n", "\n", "X_LM_blind = blind_LM.drop(['Facies'],axis=1)\n", "y_LM_blind = blind_LM['Facies']" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)\n", "\n", "X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.ensemble import ExtraTreesClassifier\n", "from sklearn.metrics import classification_report,confusion_matrix" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',\n", " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", " min_impurity_split=1e-07, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " n_estimators=500, n_jobs=1, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)\n", "ETC_LM = ExtraTreesClassifier(n_estimators=500)\n", "\n", "ETC_SH.fit(X_train_SH, y_train_SH)\n", "ETC_LM.fit(X_train_LM, y_train_LM)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 9 1 1 0]\n", " [ 0 79 9 0]\n", " [ 0 11 59 1]\n", " [ 0 0 0 0]]\n", " precision recall f1-score support\n", "\n", " 1 1.00 0.82 0.90 11\n", " 2 0.87 0.90 0.88 88\n", " 3 0.86 0.83 0.84 71\n", " 5 0.00 0.00 0.00 0\n", "\n", "avg / total 0.87 0.86 0.87 170\n", "\n" ] } ], "source": [ "pred_SH = ETC_SH.predict(X_test_SH)\n", "print(confusion_matrix(y_test_SH,pred_SH))\n", "print(classification_report(y_test_SH,pred_SH))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0 0 0 1 0 0 0]\n", " [ 0 20 1 2 0 2 0]\n", " [ 0 0 18 5 1 5 0]\n", " [ 0 1 1 40 0 9 0]\n", " [ 0 0 1 0 13 2 0]\n", " [ 0 0 4 9 1 47 0]\n", " [ 0 0 0 0 0 2 16]]\n", " precision recall f1-score support\n", "\n", " 3 0.00 0.00 0.00 1\n", " 4 0.95 0.80 0.87 25\n", " 5 0.72 0.62 0.67 29\n", " 6 0.70 0.78 0.74 51\n", " 7 0.87 0.81 0.84 16\n", " 8 0.70 0.77 0.73 61\n", " 9 1.00 0.89 0.94 18\n", "\n", "avg / total 0.77 0.77 0.77 201\n", "\n" ] } ], "source": [ "pred_LM = ETC_LM.predict(X_test_LM)\n", "print(confusion_matrix(y_test_LM,pred_LM))\n", "print(classification_report(y_test_LM,pred_LM))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 7 81 1]\n", " [ 5 75 9]\n", " [ 0 43 74]]\n", " precision recall f1-score support\n", "\n", " 1 0.58 0.08 0.14 89\n", " 2 0.38 0.84 0.52 89\n", " 3 0.88 0.63 0.74 117\n", "\n", "avg / total 0.64 0.53 0.49 295\n", "\n" ] } ], "source": [ "blind_pred_SH = ETC_SH.predict(X_SH_blind)\n", "print(confusion_matrix(y_SH_blind, blind_pred_SH))\n", "print(classification_report(y_SH_blind, blind_pred_SH))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0 0 0 0 0 0 0]\n", " [ 0 1 0 5 0 1 0]\n", " [ 0 5 3 6 0 5 0]\n", " [ 1 1 1 52 0 16 0]\n", " [ 0 0 0 1 2 13 1]\n", " [ 0 0 0 15 1 23 1]\n", " [ 0 0 0 0 0 0 0]]\n", " precision recall f1-score support\n", "\n", " 3 0.00 0.00 0.00 0\n", " 4 0.14 0.14 0.14 7\n", " 5 0.75 0.16 0.26 19\n", " 6 0.66 0.73 0.69 71\n", " 7 0.67 0.12 0.20 17\n", " 8 0.40 0.57 0.47 40\n", " 9 0.00 0.00 0.00 0\n", "\n", "avg / total 0.58 0.53 0.50 154\n", "\n" ] } ], "source": [ "blind_pred_LM = ETC_LM.predict(X_LM_blind)\n", "print(confusion_matrix(y_LM_blind, blind_pred_LM))\n", "print(classification_report(y_LM_blind, blind_pred_LM))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "blind_pred_SH = pd.DataFrame(blind_pred_SH, index=X_SH_blind.index)\n", "blind_pred_LM = pd.DataFrame(blind_pred_LM, index=X_LM_blind.index)\n", "pred_blind = pd.concat([blind_pred_SH,blind_pred_LM])\n", "pred_blind = pred_blind.sort_index()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "y_blind = blind['Facies']" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 7 81 1 0 0 0 0 0 0]\n", " [ 5 75 9 0 0 0 0 0 0]\n", " [ 0 43 74 0 0 0 0 0 0]\n", " [ 0 0 0 1 0 5 0 1 0]\n", " [ 0 0 0 5 3 6 0 5 0]\n", " [ 0 0 1 1 1 52 0 16 0]\n", " [ 0 0 0 0 0 1 2 13 1]\n", " [ 0 0 0 0 0 15 1 23 1]\n", " [ 0 0 0 0 0 0 0 0 0]]\n", " precision recall f1-score support\n", "\n", " 1 0.58 0.08 0.14 89\n", " 2 0.38 0.84 0.52 89\n", " 3 0.87 0.63 0.73 117\n", " 4 0.14 0.14 0.14 7\n", " 5 0.75 0.16 0.26 19\n", " 6 0.66 0.73 0.69 71\n", " 7 0.67 0.12 0.20 17\n", " 8 0.40 0.57 0.47 40\n", " 9 0.00 0.00 0.00 0\n", "\n", "avg / total 0.62 0.53 0.49 449\n", "\n" ] } ], "source": [ "print(confusion_matrix(y_blind, pred_blind))\n", "print(classification_report(y_blind, pred_blind))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using the complete training data" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": true }, "outputs": [], "source": [ "training_data = pd.read_csv('training.csv')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOSLabel_Form_SH_LM
0377.450.6649.911.9154.61.0001
1378.260.66114.212.5654.10.9791
2379.050.65814.813.0503.60.9571
3386.100.65513.913.1153.50.9361
4374.580.64713.513.3003.40.9151
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM\n", "0 3 77.45 0.664 9.9 11.915 4.6 1.000 1\n", "1 3 78.26 0.661 14.2 12.565 4.1 0.979 1\n", "2 3 79.05 0.658 14.8 13.050 3.6 0.957 1\n", "3 3 86.10 0.655 13.9 13.115 3.5 0.936 1\n", "4 3 74.58 0.647 13.5 13.300 3.4 0.915 1" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_data.head()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOSLabel_Form_SH_LM
count4149.0000004149.0000004149.0000004149.0000004149.0000004149.0000004149.0000004149.000000
mean4.50325464.9339850.6595664.40248413.201066-22098.5885170.5218521.521330
std2.47432430.3025300.2527035.2749477.13284641499.3301870.2866440.499605
min1.00000010.149000-0.025949-21.8320000.550000-99999.0000000.0000001.000000
25%2.00000044.7300000.4980001.6000008.5000002.4160000.2770001.000000
50%4.00000064.9900000.6390004.30000012.0200003.3000000.5280002.000000
75%6.00000079.4380000.8220007.50000016.0500004.0000000.7690002.000000
max9.000000361.1500001.80000019.31200084.4000008.0940001.0000002.000000
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND \\\n", "count 4149.000000 4149.000000 4149.000000 4149.000000 4149.000000 \n", "mean 4.503254 64.933985 0.659566 4.402484 13.201066 \n", "std 2.474324 30.302530 0.252703 5.274947 7.132846 \n", "min 1.000000 10.149000 -0.025949 -21.832000 0.550000 \n", "25% 2.000000 44.730000 0.498000 1.600000 8.500000 \n", "50% 4.000000 64.990000 0.639000 4.300000 12.020000 \n", "75% 6.000000 79.438000 0.822000 7.500000 16.050000 \n", "max 9.000000 361.150000 1.800000 19.312000 84.400000 \n", "\n", " PE RELPOS Label_Form_SH_LM \n", "count 4149.000000 4149.000000 4149.000000 \n", "mean -22098.588517 0.521852 1.521330 \n", "std 41499.330187 0.286644 0.499605 \n", "min -99999.000000 0.000000 1.000000 \n", "25% 2.416000 0.277000 1.000000 \n", "50% 3.300000 0.528000 2.000000 \n", "75% 4.000000 0.769000 2.000000 \n", "max 8.094000 1.000000 2.000000 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_data.describe()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "training_data_SH = divisao_sh(training_data)\n", "training_data_LM = divisao_lm(training_data)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOS
count1986.0000001986.0000001986.0000001986.0000001986.0000001986.0000001986.000000
mean2.29758375.0039210.5302105.43264616.700658-18980.1496760.511780
std0.81951315.6370850.1600036.0923097.71254939227.2709990.287772
min1.00000026.230000-0.025949-19.9000004.397000-99999.0000000.000000
25%2.00000064.3780000.4410002.79250011.9200002.4595000.261500
50%2.00000073.6800000.5412055.80000014.7935003.0575000.504500
75%3.00000083.7975000.6330009.36850019.1437503.3650000.761500
max9.000000221.1250000.96600019.25700084.4000005.1000001.000000
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND \\\n", "count 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000 \n", "mean 2.297583 75.003921 0.530210 5.432646 16.700658 \n", "std 0.819513 15.637085 0.160003 6.092309 7.712549 \n", "min 1.000000 26.230000 -0.025949 -19.900000 4.397000 \n", "25% 2.000000 64.378000 0.441000 2.792500 11.920000 \n", "50% 2.000000 73.680000 0.541205 5.800000 14.793500 \n", "75% 3.000000 83.797500 0.633000 9.368500 19.143750 \n", "max 9.000000 221.125000 0.966000 19.257000 84.400000 \n", "\n", " PE RELPOS \n", "count 1986.000000 1986.000000 \n", "mean -18980.149676 0.511780 \n", "std 39227.270999 0.287772 \n", "min -99999.000000 0.000000 \n", "25% 2.459500 0.261500 \n", "50% 3.057500 0.504500 \n", "75% 3.365000 0.761500 \n", "max 5.100000 1.000000 " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_data_SH.describe()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FaciesGRILD_log10DeltaPHIPHINDPERELPOS
count2163.0000002163.0000002163.0000002163.0000002163.0000002163.0000002163.000000
mean6.52843355.6880790.7783363.4566209.987847-24961.8430410.531098
std1.59917336.8586620.2636874.1758254.62816943292.6734800.285358
min2.00000010.149000-0.019000-21.8320000.550000-99999.0000000.009000
25%5.00000032.6575000.6285001.1000006.6500001.9525000.290500
50%6.00000046.9230000.7990003.1000008.9920003.9000000.544000
75%8.00000068.3575000.9410005.80050012.4000004.6000000.776000
max9.000000361.1500001.80000019.31200047.7210008.0940001.000000
\n", "
" ], "text/plain": [ " Facies GR ILD_log10 DeltaPHI PHIND \\\n", "count 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000 \n", "mean 6.528433 55.688079 0.778336 3.456620 9.987847 \n", "std 1.599173 36.858662 0.263687 4.175825 4.628169 \n", "min 2.000000 10.149000 -0.019000 -21.832000 0.550000 \n", "25% 5.000000 32.657500 0.628500 1.100000 6.650000 \n", "50% 6.000000 46.923000 0.799000 3.100000 8.992000 \n", "75% 8.000000 68.357500 0.941000 5.800500 12.400000 \n", "max 9.000000 361.150000 1.800000 19.312000 47.721000 \n", "\n", " PE RELPOS \n", "count 2163.000000 2163.000000 \n", "mean -24961.843041 0.531098 \n", "std 43292.673480 0.285358 \n", "min -99999.000000 0.009000 \n", "25% 1.952500 0.290500 \n", "50% 3.900000 0.544000 \n", "75% 4.600000 0.776000 \n", "max 8.094000 1.000000 " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_data_LM.describe()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_SH = training_data_SH.drop(['Facies'],axis=1)\n", "y_SH = training_data_SH['Facies']\n", "\n", "X_LM = training_data_LM.drop(['Facies'],axis=1)\n", "y_LM = training_data_LM['Facies']" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GRILD_log10DeltaPHIPHINDPERELPOS
count1986.0000001986.0000001986.0000001986.0000001986.0000001986.000000
mean75.0039210.5302105.43264616.700658-18980.1496760.511780
std15.6370850.1600036.0923097.71254939227.2709990.287772
min26.230000-0.025949-19.9000004.397000-99999.0000000.000000
25%64.3780000.4410002.79250011.9200002.4595000.261500
50%73.6800000.5412055.80000014.7935003.0575000.504500
75%83.7975000.6330009.36850019.1437503.3650000.761500
max221.1250000.96600019.25700084.4000005.1000001.000000
\n", "
" ], "text/plain": [ " GR ILD_log10 DeltaPHI PHIND PE \\\n", "count 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000 \n", "mean 75.003921 0.530210 5.432646 16.700658 -18980.149676 \n", "std 15.637085 0.160003 6.092309 7.712549 39227.270999 \n", "min 26.230000 -0.025949 -19.900000 4.397000 -99999.000000 \n", "25% 64.378000 0.441000 2.792500 11.920000 2.459500 \n", "50% 73.680000 0.541205 5.800000 14.793500 3.057500 \n", "75% 83.797500 0.633000 9.368500 19.143750 3.365000 \n", "max 221.125000 0.966000 19.257000 84.400000 5.100000 \n", "\n", " RELPOS \n", "count 1986.000000 \n", "mean 0.511780 \n", "std 0.287772 \n", "min 0.000000 \n", "25% 0.261500 \n", "50% 0.504500 \n", "75% 0.761500 \n", "max 1.000000 " ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_SH.describe()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GRILD_log10DeltaPHIPHINDPERELPOS
count2163.0000002163.0000002163.0000002163.0000002163.0000002163.000000
mean55.6880790.7783363.4566209.987847-24961.8430410.531098
std36.8586620.2636874.1758254.62816943292.6734800.285358
min10.149000-0.019000-21.8320000.550000-99999.0000000.009000
25%32.6575000.6285001.1000006.6500001.9525000.290500
50%46.9230000.7990003.1000008.9920003.9000000.544000
75%68.3575000.9410005.80050012.4000004.6000000.776000
max361.1500001.80000019.31200047.7210008.0940001.000000
\n", "
" ], "text/plain": [ " GR ILD_log10 DeltaPHI PHIND PE \\\n", "count 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000 \n", "mean 55.688079 0.778336 3.456620 9.987847 -24961.843041 \n", "std 36.858662 0.263687 4.175825 4.628169 43292.673480 \n", "min 10.149000 -0.019000 -21.832000 0.550000 -99999.000000 \n", "25% 32.657500 0.628500 1.100000 6.650000 1.952500 \n", "50% 46.923000 0.799000 3.100000 8.992000 3.900000 \n", "75% 68.357500 0.941000 5.800500 12.400000 4.600000 \n", "max 361.150000 1.800000 19.312000 47.721000 8.094000 \n", "\n", " RELPOS \n", "count 2163.000000 \n", "mean 0.531098 \n", "std 0.285358 \n", "min 0.009000 \n", "25% 0.290500 \n", "50% 0.544000 \n", "75% 0.776000 \n", "max 1.000000 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_LM.describe()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)\n", "\n", "X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Applying ExtraTreeClassifier" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',\n", " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", " min_impurity_split=1e-07, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " n_estimators=500, n_jobs=1, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)\n", "ETC_LM = ExtraTreesClassifier(n_estimators=500)\n", "\n", "ETC_SH.fit(X_train_SH, y_train_SH)\n", "ETC_LM.fit(X_train_LM, y_train_LM)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[15 8 1 0 0 0]\n", " [ 3 77 8 0 0 0]\n", " [ 0 18 65 0 0 0]\n", " [ 0 0 1 0 0 0]\n", " [ 0 2 0 0 0 0]\n", " [ 0 1 0 0 0 0]]\n", " precision recall f1-score support\n", "\n", " 1 0.83 0.62 0.71 24\n", " 2 0.73 0.88 0.79 88\n", " 3 0.87 0.78 0.82 83\n", " 4 0.00 0.00 0.00 1\n", " 5 0.00 0.00 0.00 2\n", " 7 0.00 0.00 0.00 1\n", "\n", "avg / total 0.78 0.79 0.78 199\n", "\n" ] } ], "source": [ "pred_SH = ETC_SH.predict(X_test_SH)\n", "print(confusion_matrix(y_test_SH,pred_SH))\n", "print(classification_report(y_test_SH,pred_SH))" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 2 0 1 0 0 0 0 0]\n", " [ 1 0 0 0 0 0 0 0]\n", " [ 0 0 30 1 3 0 1 0]\n", " [ 0 0 1 17 6 0 2 0]\n", " [ 0 0 3 2 43 0 10 0]\n", " [ 0 0 1 0 0 9 4 1]\n", " [ 0 0 1 2 6 0 51 0]\n", " [ 0 0 0 0 0 1 2 16]]\n", " precision recall f1-score support\n", "\n", " 2 0.67 0.67 0.67 3\n", " 3 0.00 0.00 0.00 1\n", " 4 0.81 0.86 0.83 35\n", " 5 0.77 0.65 0.71 26\n", " 6 0.74 0.74 0.74 58\n", " 7 0.90 0.60 0.72 15\n", " 8 0.73 0.85 0.78 60\n", " 9 0.94 0.84 0.89 19\n", "\n", "avg / total 0.78 0.77 0.77 217\n", "\n" ] } ], "source": [ "pred_LM = ETC_LM.predict(X_test_LM)\n", "print(confusion_matrix(y_test_LM,pred_LM))\n", "print(classification_report(y_test_LM,pred_LM))" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true }, "outputs": [], "source": [ "validation = pd.read_csv('validation_data_nofacies.csv')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FormationWell NameDepthGRILD_log10DeltaPHIPHINDPENM_MRELPOS
0A1 SHSTUART2808.066.2760.6303.310.653.59111.000
1A1 SHSTUART2808.577.2520.5856.511.953.34110.978
2A1 SHSTUART2809.082.8990.5669.413.603.06410.956
3A1 SHSTUART2809.580.6710.5939.513.252.97710.933
4A1 SHSTUART2810.075.9710.6388.712.353.02010.911
\n", "
" ], "text/plain": [ " Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n", "0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 \n", "1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 \n", "2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 \n", "3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 \n", "4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 \n", "\n", " NM_M RELPOS \n", "0 1 1.000 \n", "1 1 0.978 \n", "2 1 0.956 \n", "3 1 0.933 \n", "4 1 0.911 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "validation.head()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DepthGRILD_log10DeltaPHIPHINDPENM_MRELPOS
count830.000000830.00000830.000000830.000000830.000000830.000000830.000000830.000000
mean2987.07048257.611730.6663122.85196411.6552773.6541781.6783130.535807
std94.39192527.527740.2883673.4420745.1902360.6497930.4674050.283062
min2808.00000012.03600-0.468000-8.9000001.8550002.1130001.0000000.013000
25%2911.62500036.773250.5410000.4112507.7000003.1715001.0000000.300000
50%2993.75000058.344500.6750002.39750010.9500003.5155002.0000000.547500
75%3055.37500073.051500.8507504.60000014.7937504.1915002.0000000.778000
max3160.500000220.413001.50700016.50000031.3350006.3210002.0000001.000000
\n", "
" ], "text/plain": [ " Depth GR ILD_log10 DeltaPHI PHIND PE \\\n", "count 830.000000 830.00000 830.000000 830.000000 830.000000 830.000000 \n", "mean 2987.070482 57.61173 0.666312 2.851964 11.655277 3.654178 \n", "std 94.391925 27.52774 0.288367 3.442074 5.190236 0.649793 \n", "min 2808.000000 12.03600 -0.468000 -8.900000 1.855000 2.113000 \n", "25% 2911.625000 36.77325 0.541000 0.411250 7.700000 3.171500 \n", "50% 2993.750000 58.34450 0.675000 2.397500 10.950000 3.515500 \n", "75% 3055.375000 73.05150 0.850750 4.600000 14.793750 4.191500 \n", "max 3160.500000 220.41300 1.507000 16.500000 31.335000 6.321000 \n", "\n", " NM_M RELPOS \n", "count 830.000000 830.000000 \n", "mean 1.678313 0.535807 \n", "std 0.467405 0.283062 \n", "min 1.000000 0.013000 \n", "25% 1.000000 0.300000 \n", "50% 2.000000 0.547500 \n", "75% 2.000000 0.778000 \n", "max 2.000000 1.000000 " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "validation.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Making the division between SH and LM" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "validation['Label_Form_SH_LM'] = validation.Formation.apply((label_two_groups_formation))" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FormationWell NameDepthGRILD_log10DeltaPHIPHINDPENM_MRELPOSLabel_Form_SH_LM
0A1 SHSTUART2808.066.2760.6303.310.653.59111.0001
1A1 SHSTUART2808.577.2520.5856.511.953.34110.9781
2A1 SHSTUART2809.082.8990.5669.413.603.06410.9561
3A1 SHSTUART2809.580.6710.5939.513.252.97710.9331
4A1 SHSTUART2810.075.9710.6388.712.353.02010.9111
\n", "
" ], "text/plain": [ " Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n", "0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 \n", "1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 \n", "2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 \n", "3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 \n", "4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 \n", "\n", " NM_M RELPOS Label_Form_SH_LM \n", "0 1 1.000 1 \n", "1 1 0.978 1 \n", "2 1 0.956 1 \n", "3 1 0.933 1 \n", "4 1 0.911 1 " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "validation.head()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [], "source": [ "validation_SH = divisao_sh(validation)\n", "validation_LM = divisao_lm(validation)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FormationWell NameDepthGRILD_log10DeltaPHIPHINDPENM_MRELPOS
0A1 SHSTUART2808.066.2760.6303.310.653.59111.000
1A1 SHSTUART2808.577.2520.5856.511.953.34110.978
2A1 SHSTUART2809.082.8990.5669.413.603.06410.956
3A1 SHSTUART2809.580.6710.5939.513.252.97710.933
4A1 SHSTUART2810.075.9710.6388.712.353.02010.911
\n", "
" ], "text/plain": [ " Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n", "0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 \n", "1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 \n", "2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 \n", "3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 \n", "4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 \n", "\n", " NM_M RELPOS \n", "0 1 1.000 \n", "1 1 0.978 \n", "2 1 0.956 \n", "3 1 0.933 \n", "4 1 0.911 " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "validation_SH.head()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FormationWell NameDepthGRILD_log10DeltaPHIPHINDPENM_MRELPOS
43A1 LMSTUART2829.547.3450.5847.016.303.52721.000
44A1 LMSTUART2830.035.7330.7306.410.203.92820.987
45A1 LMSTUART2830.529.3270.8732.77.854.33020.974
46A1 LMSTUART2831.028.2420.9631.46.304.41320.961
47A1 LMSTUART2831.534.5581.0181.85.604.51120.947
\n", "
" ], "text/plain": [ " Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n", "43 A1 LM STUART 2829.5 47.345 0.584 7.0 16.30 3.527 \n", "44 A1 LM STUART 2830.0 35.733 0.730 6.4 10.20 3.928 \n", "45 A1 LM STUART 2830.5 29.327 0.873 2.7 7.85 4.330 \n", "46 A1 LM STUART 2831.0 28.242 0.963 1.4 6.30 4.413 \n", "47 A1 LM STUART 2831.5 34.558 1.018 1.8 5.60 4.511 \n", "\n", " NM_M RELPOS \n", "43 2 1.000 \n", "44 2 0.987 \n", "45 2 0.974 \n", "46 2 0.961 \n", "47 2 0.947 " ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "validation_LM.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Removing the colums: Formation, Well Name, Depth" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_val_SH = validation_SH.drop(['Formation','Well Name','Depth','NM_M'], axis=1)\n", "X_val_LM = validation_LM.drop(['Formation','Well Name','Depth','NM_M'], axis=1)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GRILD_log10DeltaPHIPHINDPERELPOS
066.2760.6303.310.653.5911.000
177.2520.5856.511.953.3410.978
282.8990.5669.413.603.0640.956
380.6710.5939.513.252.9770.933
475.9710.6388.712.353.0200.911
\n", "
" ], "text/plain": [ " GR ILD_log10 DeltaPHI PHIND PE RELPOS\n", "0 66.276 0.630 3.3 10.65 3.591 1.000\n", "1 77.252 0.585 6.5 11.95 3.341 0.978\n", "2 82.899 0.566 9.4 13.60 3.064 0.956\n", "3 80.671 0.593 9.5 13.25 2.977 0.933\n", "4 75.971 0.638 8.7 12.35 3.020 0.911" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_val_SH.head()" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GRILD_log10DeltaPHIPHINDPERELPOS
4347.3450.5847.016.303.5271.000
4435.7330.7306.410.203.9280.987
4529.3270.8732.77.854.3300.974
4628.2420.9631.46.304.4130.961
4734.5581.0181.85.604.5110.947
\n", "
" ], "text/plain": [ " GR ILD_log10 DeltaPHI PHIND PE RELPOS\n", "43 47.345 0.584 7.0 16.30 3.527 1.000\n", "44 35.733 0.730 6.4 10.20 3.928 0.987\n", "45 29.327 0.873 2.7 7.85 4.330 0.974\n", "46 28.242 0.963 1.4 6.30 4.413 0.961\n", "47 34.558 1.018 1.8 5.60 4.511 0.947" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_val_LM.head()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pred_val_SH = ETC_SH.predict(X_val_SH)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": true }, "outputs": [], "source": [ "pred_val_LM =ETC_LM.predict(X_val_LM)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pred_val_SH = pd.DataFrame(pred_val_SH, index=X_val_SH.index)\n", "pred_val_LM = pd.DataFrame(pred_val_LM, index=X_val_LM.index)\n", "pred_val = pd.concat([pred_val_SH,pred_val_LM])\n", "pred_val = pred_val.sort_index()" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
count830.000000
mean5.356627
std2.386239
min1.000000
25%3.000000
50%6.000000
75%8.000000
max9.000000
\n", "
" ], "text/plain": [ " 0\n", "count 830.000000\n", "mean 5.356627\n", "std 2.386239\n", "min 1.000000\n", "25% 3.000000\n", "50% 6.000000\n", "75% 8.000000\n", "max 9.000000" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred_val.describe()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": true }, "outputs": [], "source": [ "validation['Facies Pred'] = pred_val" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [], "source": [ "validation=validation.drop(['Label_Form_SH_LM'],axis=1)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FormationWell NameDepthGRILD_log10DeltaPHIPHINDPENM_MRELPOSFacies Pred
0A1 SHSTUART2808.066.2760.6303.310.653.59111.0003
1A1 SHSTUART2808.577.2520.5856.511.953.34110.9783
2A1 SHSTUART2809.082.8990.5669.413.603.06410.9562
3A1 SHSTUART2809.580.6710.5939.513.252.97710.9333
4A1 SHSTUART2810.075.9710.6388.712.353.02010.9113
\n", "
" ], "text/plain": [ " Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE \\\n", "0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 \n", "1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 \n", "2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 \n", "3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 \n", "4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 \n", "\n", " NM_M RELPOS Facies Pred \n", "0 1 1.000 3 \n", "1 1 0.978 3 \n", "2 1 0.956 2 \n", "3 1 0.933 3 \n", "4 1 0.911 3 " ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "validation.head()" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": true }, "outputs": [], "source": [ "validation.to_csv('Prediction.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }