gautamdudeja90

Gautam Dudeja gautamdudeja90

Online Graduate student at @poloclub.

Achievements

outlier-detection.ipynb

 "cells": [

   "cell_type": "code",

   "execution_count": null,

anova_machine.py

def anova_machine(Cat_col, target_col, df):

    """ANOVA function.  Provide the target variable column y, the main data set and a categorical column.

    A pivot table will be produced. Then an ANOVA performed to see if the columns are significantly different from each other.

    Currently set for 95% confidence, will update later for higher significance setting."""

outlier_isolation.py

isolation_forest = IsolationForest(n_estimators=100)

isolation_forest.fit(df['Sales'].values.reshape(-1, 1))

xx = np.linspace(df['Sales'].min(), df['Sales'].max(), len(df)).reshape(-1,1)

anomaly_score = isolation_forest.decision_function(xx)

outlier = isolation_forest.predict(xx)

deterministic-identity-resolution deterministic-identity-resolution Public

Scala
movie-recommendation movie-recommendation Public

Scala

vif_multicollinearity.py

# ------------------------------------------------------------------------------

# Importing required libraries

# ------------------------------------------------------------------------------

from pyspark.sql.types import Row