General spelling fixes

YutongLi2024 · Dec 16, 2015 · 0485ada · 0485ada
1 parent 9790bc6
commit 0485ada
Show file tree

Hide file tree

Showing 103 changed files with 147 additions and 147 deletions.
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-# simple makefile to simplify repetetive build env management tasks under posix
+# simple makefile to simplify repetitive build env management tasks under posix
 
 # caution: testing won't work on windows, see README
 

diff --git a/appveyor.yml b/appveyor.yml
@@ -5,7 +5,7 @@
 environment:
   global:
     # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
-    # /E:ON and /V:ON options are not enabled in the batch script intepreter
+    # /E:ON and /V:ON options are not enabled in the batch script interpreter
     # See: http://stackoverflow.com/a/13751649/163740
     CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\continuous_integration\\appveyor\\run_with_env.cmd"
     WHEELHOUSE_UPLOADER_USERNAME: sklearn-appveyor

diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py
@@ -7,7 +7,7 @@
 The timings are then output to stdout, or visualized on a log-log scale
 with matplotlib.
 
-This alows the scaling of the algorithm with the problem size to be
+This allows the scaling of the algorithm with the problem size to be
 visualized and understood.
 """
 from __future__ import print_function

diff --git a/continuous_integration/test_script.sh b/continuous_integration/test_script.sh
@@ -19,7 +19,7 @@ python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
 
-# Skip tests that require large downloads over the network to save bandwith
+# Skip tests that require large downloads over the network to save bandwidth
 # usage as travis workers are stateless and therefore traditional local
 # disk caching does not work.
 export SKLEARN_SKIP_NETWORK_TESTS=1

diff --git a/doc/datasets/twenty_newsgroups_fixture.py b/doc/datasets/twenty_newsgroups_fixture.py
@@ -1,6 +1,6 @@
 """Fixture module to skip the datasets loading when offline
 
-Doctests are skipped if the datasets have not already been dowloaded
+Doctests are skipped if the datasets have not already been downloaded
 and cached in the past.
 """
 from os.path import exists

diff --git a/doc/themes/scikit-learn/static/css/bootstrap.css b/doc/themes/scikit-learn/static/css/bootstrap.css
@@ -87,7 +87,7 @@ img {
   /* Responsive images (ensure images don't scale beyond their parents) */
 
   max-width: 100%;
-  /* Part 1: Set a maxium relative to the parent */
+  /* Part 1: Set a maximum relative to the parent */
 
   width: auto\9;
   /* IE7-8 need help adjusting responsive images */

diff --git a/doc/tutorial/machine_learning_map/parse_path.py b/doc/tutorial/machine_learning_map/parse_path.py
@@ -83,7 +83,7 @@ def convertToFloat(s, loc, toks):
 
 coordinate = number
 
-#comma or whitespace can seperate values all over the place in SVG
+#comma or whitespace can separate values all over the place in SVG
 maybeComma = Optional(Literal(',')).suppress()
 
 coordinateSequence = Sequence(coordinate)

diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py
@@ -965,7 +965,7 @@ def parseString( self, instring ):
               (see L{I{parseWithTabs}<parseWithTabs>})
             - define your parse action using the full (s,loc,toks) signature, and
               reference the input string using the parse action's s argument
-            - explictly expand the tabs in your input string before calling
+            - explicitly expand the tabs in your input string before calling
               parseString
         """
         ParserElement.resetCache()

diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py
@@ -284,7 +284,7 @@ def progress(cls_name, stats):
 minibatch_iterators = iter_minibatches(data_stream, minibatch_size)
 total_vect_time = 0.0
 
-# Main loop : iterate on mini-batchs of examples
+# Main loop : iterate on mini-batches of examples
 for i, (X_train_text, y_train) in enumerate(minibatch_iterators):
 
     tick = time.time()

diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py
@@ -79,7 +79,7 @@
 ###############################################################################
 # Retrieve the data from Internet
 
-# Choose a time period reasonnably calm (not too long ago so that we get
+# Choose a time period reasonably calm (not too long ago so that we get
 # high-tech firms, and before the 2008 crash)
 d1 = datetime.datetime(2003, 1, 1)
 d2 = datetime.datetime(2008, 1, 1)

diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py
@@ -179,7 +179,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None):
 U, s, V = randomized_svd(X, 5, n_iter=3)
 print("done in %0.3fs" % (time() - t0))
 
-# print the names of the wikipedia related strongest compenents of the the
+# print the names of the wikipedia related strongest components of the the
 # principal singular vector which should be similar to the highest eigenvector
 print("Top wikipedia pages according to principal singular vectors")
 pprint([names[i] for i in np.abs(U.T[0]).argsort()[-10:]])

diff --git a/examples/calibration/plot_calibration_curve.py b/examples/calibration/plot_calibration_curve.py
@@ -125,10 +125,10 @@ def plot_calibration_curve(est, name, fig_index):
 
     plt.tight_layout()
 
-# Plot calibration cuve for Gaussian Naive Bayes
+# Plot calibration curve for Gaussian Naive Bayes
 plot_calibration_curve(GaussianNB(), "Naive Bayes", 1)
 
-# Plot calibration cuve for Linear SVC
+# Plot calibration curve for Linear SVC
 plot_calibration_curve(LinearSVC(), "SVC", 2)
 
 plt.show()
diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py
@@ -10,10 +10,10 @@
 0.8, approx. 80% actually belong to the positive class.
 
 LogisticRegression returns well calibrated predictions as it directly
-optimizes log-loss. In contrast, the other methods return biased probilities,
+optimizes log-loss. In contrast, the other methods return biased probabilities,
 with different biases per method:
 
-* GaussianNaiveBayes tends to push probabilties to 0 or 1 (note the counts in
+* GaussianNaiveBayes tends to push probabilities to 0 or 1 (note the counts in
   the histograms). This is mainly because it makes the assumption that features
   are conditionally independent given the class, which is not the case in this
   dataset which contains 2 redundant features.
@@ -35,7 +35,7 @@
   trained with random forests have relatively high variance due to feature
   subseting." As a result, the calibration curve shows a characteristic sigmoid
   shape, indicating that the classifier could trust its "intuition" more and
-  return probabilties closer to 0 or 1 typically.
+  return probabilities closer to 0 or 1 typically.
 
 * Support Vector Classification (SVC) shows an even more sigmoid curve as
   the  RandomForestClassifier, which is typical for maximum-margin methods

diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py
@@ -9,7 +9,7 @@
 a way to assess parameters like number of clusters visually. This measure has a
 range of [-1, 1].
 
-Silhoette coefficients (as these values are referred to as) near +1 indicate
+Silhouette coefficients (as these values are referred to as) near +1 indicate
 that the sample is far away from the neighboring clusters. A value of 0
 indicates that the sample is on or very close to the decision boundary between
 two neighboring clusters and negative values indicate that those samples might
@@ -43,7 +43,7 @@
 print(__doc__)
 
 # Generating the sample data from make_blobs
-# This particular setting has one distict cluster and 3 clusters placed close
+# This particular setting has one distinct cluster and 3 clusters placed close
 # together.
 X, y = make_blobs(n_samples=500,
                   n_features=2,
@@ -110,7 +110,7 @@
     ax1.set_xlabel("The silhouette coefficient values")
     ax1.set_ylabel("Cluster label")
 
-    # The vertical line for average silhoutte score of all the values
+    # The vertical line for average silhouette score of all the values
     ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
 
     ax1.set_yticks([])  # Clear the yaxis labels / ticks

diff --git a/examples/cross_decomposition/plot_compare_cross_decomposition.py b/examples/cross_decomposition/plot_compare_cross_decomposition.py
@@ -135,7 +135,7 @@
 y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
 pls1 = PLSRegression(n_components=3)
 pls1.fit(X, y)
-# note that the number of compements exceeds 1 (the dimension of y)
+# note that the number of components exceeds 1 (the dimension of y)
 print("Estimated betas")
 print(np.round(pls1.coef_, 1))
 

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
@@ -10,7 +10,7 @@
 predicted by three different classifiers and averaged by the
 `VotingClassifier`.
 
-First, three examplary classifiers are initialized (`DecisionTreeClassifier`,
+First, three exemplary classifiers are initialized (`DecisionTreeClassifier`,
 `KNeighborsClassifier`, and `SVC`) and used to initialize a
 soft-voting `VotingClassifier` with weights `[2, 1, 2]`, which means that
 the predicted probabilities of the `DecisionTreeClassifier` and `SVC`

diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py
@@ -32,7 +32,7 @@
 n_samples, n_features = 100, 100
 # Create Gaussian data
 X = np.random.randn(n_samples, n_features)
-# Create weigts with a precision lambda_ of 4.
+# Create weights with a precision lambda_ of 4.
 lambda_ = 4.
 w = np.zeros(n_features)
 # Only keep 10 weights of interest

diff --git a/examples/linear_model/plot_bayesian_ridge.py b/examples/linear_model/plot_bayesian_ridge.py
@@ -25,11 +25,11 @@
 from sklearn.linear_model import BayesianRidge, LinearRegression
 
 ###############################################################################
-# Generating simulated data with Gaussian weigthts
+# Generating simulated data with Gaussian weights
 np.random.seed(0)
 n_samples, n_features = 100, 100
 X = np.random.randn(n_samples, n_features)  # Create Gaussian data
-# Create weigts with a precision lambda_ of 4.
+# Create weights with a precision lambda_ of 4.
 lambda_ = 4.
 w = np.zeros(n_features)
 # Only keep 10 weights of interest

diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py
@@ -27,7 +27,7 @@
 def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
                         n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)):
     """
-    Generate a simple plot of the test and traning learning curve.
+    Generate a simple plot of the test and training learning curve.
 
     Parameters
     ----------

diff --git a/examples/neural_networks/plot_mlp_alpha.py b/examples/neural_networks/plot_mlp_alpha.py
@@ -13,7 +13,7 @@
 in a decision boundary plot that appears with lesser curvatures.
 Similarly, decreasing alpha may fix high bias (a sign of underfitting) by
 encouraging larger weights, potentially resulting in a more complicated
-decision boundery.
+decision boundary.
 """
 print(__doc__)
 

diff --git a/examples/plot_kernel_approximation.py b/examples/plot_kernel_approximation.py
@@ -169,7 +169,7 @@
 
 X = pca.transform(data_train)
 
-# Gemerate grid along first two principal components
+# Generate grid along first two principal components
 multiples = np.arange(-2, 2, 0.1)
 # steps along first component
 first = multiples[:, np.newaxis] * pca.components_[0, :]

diff --git a/examples/text/document_clustering.py b/examples/text/document_clustering.py
@@ -27,7 +27,7 @@
 Two algorithms are demoed: ordinary k-means and its more scalable cousin
 minibatch k-means.
 
-Additionally, latent sematic analysis can also be used to reduce dimensionality
+Additionally, latent semantic analysis can also be used to reduce dimensionality
 and discover latent patterns in the data. 
 
 It can be noted that k-means (and minibatch k-means) are very sensitive to

diff --git a/setup.cfg b/setup.cfg
@@ -26,7 +26,7 @@ artifact_indexes=
     # OSX wheels built by travis (only for specific tags):
     # https://github.com/MacPython/scikit-learn-wheels
     http://wheels.scipy.org
-    # Windows wheels buit by:
+    # Windows wheels built by:
     # https://ci.appveyor.com/project/sklearn-ci/scikit-learn/
     http://windows-wheels.scikit-learn.org/
 

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
@@ -49,7 +49,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
     method : 'sigmoid' or 'isotonic'
         The method to use for calibration. Can be 'sigmoid' which
         corresponds to Platt's method or 'isotonic' which is a
-        non-parameteric approach. It is not advised to use isotonic calibration
+        non-parametric approach. It is not advised to use isotonic calibration
         with too few calibration samples ``(<<1000)`` since it tends to overfit.
         Use sigmoids (Platt's calibration) in this case.
 
@@ -204,7 +204,7 @@ def predict_proba(self, X):
         X = check_array(X, accept_sparse=['csc', 'csr', 'coo'],
                         force_all_finite=False)
         # Compute the arithmetic mean of the predictions of the calibrated
-        # classfiers
+        # classifiers
         mean_proba = np.zeros((X.shape[0], len(self.classes_)))
         for calibrated_classifier in self.calibrated_classifiers_:
             proba = calibrated_classifier.predict_proba(X)
@@ -250,7 +250,7 @@ class _CalibratedClassifier(object):
     method : 'sigmoid' | 'isotonic'
         The method to use for calibration. Can be 'sigmoid' which
         corresponds to Platt's method or 'isotonic' which is a
-        non-parameteric approach based on isotonic regression.
+        non-parametric approach based on isotonic regression.
 
     References
     ----------

diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
@@ -21,7 +21,7 @@
 
 def _iterate_sparse_X(X):
     """This little hack returns a densified row when iterating over a sparse
-    matrix, insted of constructing a sparse matrix for every row that is
+    matrix, instead of constructing a sparse matrix for every row that is
     expensive.
     """
     n_samples = X.shape[0]
@@ -205,7 +205,7 @@ def insert_cf_subcluster(self, subcluster):
 
             # things not too good. we need to redistribute the subclusters in
             # our child node, and add a new subcluster in the parent
-            # subcluster to accomodate the new child.
+            # subcluster to accommodate the new child.
             else:
                 new_subcluster1, new_subcluster2 = _split_node(
                     closest_subcluster.child_, threshold, branching_factor)

diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
@@ -322,7 +322,7 @@ def linkage_tree(X, connectivity=None, n_components=None,
         This option is valid only when specifying a connectivity matrix.
 
     linkage : {"average", "complete"}, optional, default: "complete"
-        Which linkage critera to use. The linkage criterion determines which
+        Which linkage criteria to use. The linkage criterion determines which
         distance to use between sets of observation.
             - average uses the average of the distances of each observation of
               the two sets

diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -1047,7 +1047,7 @@ def _mini_batch_step(X, x_squared_norms, centers, counts,
 def _mini_batch_convergence(model, iteration_idx, n_iter, tol,
                             n_samples, centers_squared_diff, batch_inertia,
                             context, verbose=0):
-    """Helper function to encapsulte the early stopping logic"""
+    """Helper function to encapsulate the early stopping logic"""
     # Normalize inertia to be able to compare values when
     # batch_size changes
     batch_inertia /= model.batch_size

diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
@@ -47,13 +47,13 @@ def test_linkage_misc():
     # Smoke test FeatureAgglomeration
     FeatureAgglomeration().fit(X)
 
-    # test hiearchical clustering on a precomputed distances matrix
+    # test hierarchical clustering on a precomputed distances matrix
     dis = cosine_distances(X)
 
     res = linkage_tree(dis, affinity="precomputed")
     assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])
 
-    # test hiearchical clustering on a precomputed distances matrix
+    # test hierarchical clustering on a precomputed distances matrix
     res = linkage_tree(X, affinity=manhattan_distances)
     assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
 

diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py
@@ -251,7 +251,7 @@ def error_norm(self, comp_cov, norm='frobenius', scaling=True,
         else:
             raise NotImplementedError(
                 "Only spectral and frobenius norms are implemented")
-        # optionaly scale the error norm
+        # optionally scale the error norm
         if scaling:
             squared_norm = squared_norm / error.shape[0]
         # finally get either the squared norm or the norm

diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
@@ -194,7 +194,7 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
                       "You may want to reshape your data array")
     n_samples, n_features = X.shape
 
-    # optionaly center data
+    # optionally center data
     if not assume_centered:
         X = X - X.mean(0)
 

diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py
@@ -109,7 +109,7 @@ def check_ortho(M, err_msg):
     # x_weights = X.dot(x_rotation)
     # Hence R/python sign flip should be the same in x_weight and x_rotation
     assert_array_almost_equal(x_rotations_sign_flip, x_weights_sign_flip)
-    # This test that R / python give the same result up to colunm
+    # This test that R / python give the same result up to column
     # sign indeterminacy
     assert_array_almost_equal(np.abs(x_rotations_sign_flip), 1, 4)
     assert_array_almost_equal(np.abs(x_weights_sign_flip), 1, 4)
@@ -259,7 +259,7 @@ def check_ortho(M, err_msg):
 
 def test_PLSSVD():
     # Let's check the PLSSVD doesn't return all possible component but just
-    # the specificied number
+    # the specified number
     d = load_linnerud()
     X = d.data
     Y = d.target

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
@@ -417,7 +417,7 @@ def __init__(self, labels, n_folds=3):
                      " than the number of labels: {1}.").format(n_folds,
                                                                 n_labels))
 
-        # Weight labels by their number of occurences
+        # Weight labels by their number of occurrences
         n_samples_per_label = np.bincount(labels)
 
         # Distribute the most frequent labels first

diff --git a/sklearn/datasets/california_housing.py b/sklearn/datasets/california_housing.py
@@ -115,7 +115,7 @@ def fetch_california_housing(data_home=None, download_if_missing=True):
     # avg bed rooms = total bed rooms / households
     data[:, 3] /= data[:, 5]
 
-    # avg occupancy = population / housholds
+    # avg occupancy = population / households
     data[:, 5] = data[:, 4] / data[:, 5]
 
     # target in units of 100,000

diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
@@ -50,14 +50,14 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
     different types of attack with normal activity in the background.
     As the initial goal was to produce a large training set for supervised
     learning algorithms, there is a large proportion (80.1%) of abnormal
-    data which is unrealistic in real world, and inapropriate for unsupervised
+    data which is unrealistic in real world, and inappropriate for unsupervised
     anomaly detection which aims at detecting 'abnormal' data, ie
 
     1) qualitatively different from normal data.
 
     2) in large minority among the observations.
 
-    We thus transform the KDD Data set into two differents data set: SA and SF.
+    We thus transform the KDD Data set into two different data sets: SA and SF.
 
     - SA is obtained by simply selecting all the normal data, and a small
       proportion of abnormal data to gives an anomaly proportion of 1%.