Merge pull request #8 from mark-bradshaw/update-language

Update language
skale-me · Sep 27, 2016 · bd28a02 · bd28a02
2 parents 6b2f2d4 + fa35396
commit bd28a02
Show file tree

Hide file tree

Showing 7 changed files with 31 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -1,13 +1,13 @@
 # skale-examples
 ### Description
-A place to share skale sample applications 
+A place to share skale sample applications
 
 ### Requirements
-We assume you already installed [Node.js](https://nodejs.org/en/) and [skale](https://github.com/skale-me/skale-cli) to run those examples.
+We assume you have already installed [Node.js](https://nodejs.org/en/) and [skale](https://github.com/skale-me/skale-cli) to run those examples.
 
 ### Install
 
-First clone locally this repository
+First clone this repository to your local computer:
 
 	git clone https://github.com/skale-me/skale-examples.git
 
@@ -16,26 +16,27 @@ Then navigate to one of the example folders and install dependencies
 	cd skale-examples/wordcount
 	npm install
 
-*NB: For some examples you may need additional tools in your environment, like gnuplot for the adult application.*
+*Note: For some examples you may need additional tools in your environment, like gnuplot for the adult application.*
 
 Finally, run the sample app
-
-	skale run
+
+	skale run # To run it on the skale cloud
+	skale test # If you have skale-engine running locally
 
 ### Examples
 #### wordcount
-Compute the number of occurence of each word of james joyce's Ulysse.
+Compute the number of occurences of each word of James Joyce's Ulysses.
 
 ##### pi
-Compute pi by throwing darts on the unit circle.
+Compute pi by throwing darts on the unit circle.  For more information on the algorithm see https://en.wikipedia.org/wiki/Approximations_of_%CF%80#Summing_a_circle.27s_area
 
 ##### adult
 This example shows machine learning capabilities of skale-ml and skale-engine.
 
-We load and featurize a training dataset and a validation dataset from files. 
+We load and featurize a training dataset and a validation dataset from files.
 
-We standardize features to zero mean unit variance variables. 
+We standardize features to zero mean unit variance variables.
 
-We train a binary logistic regression model using a Stochastic Gradient Descent. 
+We train a binary logistic regression model using a Stochastic Gradient Descent.
 
 We generate the Receiver Operating Characteristic curve as a png image.
diff --git a/adult/adult.ipynb b/adult/adult.ipynb
@@ -11,14 +11,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This notebook demonstrates how use Skale to process the data set from Adult Data Set MLR which \n",
+    "This notebook demonstrates how to use Skale to process the data set from Adult Data Set MLR which \n",
     "contains income data for about 32000 people. The ultimate goal is to see how well we can predict \n",
     "if a person will make more than $50K a year, given data like education, gender and martial status.\n",
     "\n",
     "To achieve this goal we will:\n",
-    "- Run an ETL operation on raw data to obtain label and features from it\n",
+    "- Run an ETL operation on the raw data to obtain label and features from it\n",
     "- Build a machine learning model using the refined data (a logistic regression)\n",
-    "- Evaluate the performance of our model using cross-validation technique"
+    "- Evaluate the performance of our model using a cross-validation technique"
    ]
   },
   {
@@ -146,7 +146,7 @@
    },
    "outputs": [],
    "source": [
-    "// Wrap asynchronous code in co to limit callback imbrications (ES7 async/await will fix this !)\n",
+    "// Wrap asynchronous code in co to limit callback hell (ES7 async/await will fix this !)\n",
     "co(function* () {\n",
     "\t// Standardize features to zero mean and unit variance\n",
     "\tvar scaler = new StandardScaler();\n",

diff --git a/adult/adult.js b/adult/adult.js
@@ -37,10 +37,10 @@ function featurize(data, metadata) {
 		// metadata.education.indexOf(data[3]),			// education (redundant with education-num)
 		Number(data[4]),								// 4 education-num
 		metadata.maritalstatus.indexOf(data[5]),		// 5 marital-status
-		metadata.occupation.indexOf(data[6]),			// 6 occupation	
-		metadata.relationship.indexOf(data[7]),			// 7 relationship	
+		metadata.occupation.indexOf(data[6]),			// 6 occupation
+		metadata.relationship.indexOf(data[7]),			// 7 relationship
 		metadata.race.indexOf(data[8]),					// 8 race
-		metadata.sex.indexOf(data[9]),					// 9 sex	
+		metadata.sex.indexOf(data[9]),					// 9 sex
 		Number(data[10]),								// 10 capital-gain
 		Number(data[11]),								// 11 capital-loss
 		Number(data[12]),								// 12 hours-per-week
@@ -60,7 +60,7 @@ var test_set = sc.textFile('adult.test')
 	.filter(data => data.indexOf('?') == -1)					// remove incomplete data
 	.map(featurize, metadata);									// transform string data to number
 
-// Wrap asynchronous code in co to limit callback imbrications (ES7 async/await will fix this !)
+// Wrap asynchronous code in co to limit callback hell (ES7 async/await will fix this!)
 co(function* () {
 	// Standardize features to zero mean and unit variance
 	var scaler = new StandardScaler();
@@ -98,8 +98,8 @@ co(function* () {
 	data['regParam: ' + parameters.regParam + ', stepSize: ' + parameters.stepSize] = xy;
 	data['Random'] = {0 :0, 1 : 1};
 	plot({
-		title: 'Logistic Regression ROC Curve', 
-		data: data, 
+		title: 'Logistic Regression ROC Curve',
+		data: data,
 		filename: 'roc.png',
 		finish: function() {sc.end();}
 	});

diff --git a/dataframe/dataframe.ipynb b/dataframe/dataframe.ipynb
@@ -11,9 +11,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This note book show how we can use Skale to process the Adult Income Dataset. \n",
+    "This notebook will show how we can use Skale to process the Adult Income Dataset. \n",
     "\n",
-    "First, we will see how to load and exploree the raw data. Finally we will build and evaluate the performance of a logistic regression classifier to predict if an adult earns more or less than 50K a year.\n",
+    "First, we will see how to load and explore the raw data. Finally we will build and evaluate the performance of a logistic regression classifier to predict if an adult earns more or less than 50K a year.\n",
     "\n",
     "Let's require some libraries:"
    ]
@@ -58,7 +58,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Our data is stored in a csv file named 'adult.data', let's load it and show the first five rows."
+    "Our data is stored in a csv file named 'adult.data'.  Let's load it and show the first five rows."
    ]
   },
   {

diff --git a/pi/pi.js b/pi/pi.js
@@ -1,5 +1,8 @@
 #!/usr/bin/env node
 
+// For more information on the algorithm see
+// https://en.wikipedia.org/wiki/Approximations_of_%CF%80#Summing_a_circle.27s_area
+
 var sc = require('skale-engine').context();
 
 var NUM_SAMPLES = 1000000;
@@ -10,6 +13,6 @@ function sample() {
 }
 
 sc.range(0, NUM_SAMPLES).map(sample).reduce((a, b) => a + b, 0, function(err, count) {
-	console.log('Pi is roughly ', (4.0 * count / NUM_SAMPLES));
+	console.log('Pi is roughly', (4.0 * count / NUM_SAMPLES));
 	sc.end();
-})
+})
diff --git a/wordcount/james_joyce_ulysse.txt → wordcount/james_joyce_ulysses.txt b/wordcount/james_joyce_ulysse.txt → wordcount/james_joyce_ulysses.txt
diff --git a/wordcount/wordcount.js b/wordcount/wordcount.js
@@ -2,7 +2,7 @@
 
 var sc = require('skale-engine').context();
 
-sc.textFile(__dirname + '/james_joyce_ulysse.txt')
+sc.textFile(__dirname + '/james_joyce_ulysses.txt')
 	.flatMap(line => line.split(' '))
 	.map(word => [word, 1])
 	.reduceByKey((a, b) => a + b, 0)