Skip to content

Commit

Permalink
General spelling fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
seales committed Dec 16, 2015
1 parent 9790bc6 commit 0485ada
Show file tree
Hide file tree
Showing 103 changed files with 147 additions and 147 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# simple makefile to simplify repetetive build env management tasks under posix
# simple makefile to simplify repetitive build env management tasks under posix

# caution: testing won't work on windows, see README

Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
environment:
global:
# SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
# /E:ON and /V:ON options are not enabled in the batch script intepreter
# /E:ON and /V:ON options are not enabled in the batch script interpreter
# See: http://stackoverflow.com/a/13751649/163740
CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\continuous_integration\\appveyor\\run_with_env.cmd"
WHEELHOUSE_UPLOADER_USERNAME: sklearn-appveyor
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_isotonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
The timings are then output to stdout, or visualized on a log-log scale
with matplotlib.
This alows the scaling of the algorithm with the problem size to be
This allows the scaling of the algorithm with the problem size to be
visualized and understood.
"""
from __future__ import print_function
Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/test_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ python --version
python -c "import numpy; print('numpy %s' % numpy.__version__)"
python -c "import scipy; print('scipy %s' % scipy.__version__)"

# Skip tests that require large downloads over the network to save bandwith
# Skip tests that require large downloads over the network to save bandwidth
# usage as travis workers are stateless and therefore traditional local
# disk caching does not work.
export SKLEARN_SKIP_NETWORK_TESTS=1
Expand Down
2 changes: 1 addition & 1 deletion doc/datasets/twenty_newsgroups_fixture.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Fixture module to skip the datasets loading when offline
Doctests are skipped if the datasets have not already been dowloaded
Doctests are skipped if the datasets have not already been downloaded
and cached in the past.
"""
from os.path import exists
Expand Down
2 changes: 1 addition & 1 deletion doc/themes/scikit-learn/static/css/bootstrap.css
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ img {
/* Responsive images (ensure images don't scale beyond their parents) */

max-width: 100%;
/* Part 1: Set a maxium relative to the parent */
/* Part 1: Set a maximum relative to the parent */

width: auto\9;
/* IE7-8 need help adjusting responsive images */
Expand Down
2 changes: 1 addition & 1 deletion doc/tutorial/machine_learning_map/parse_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def convertToFloat(s, loc, toks):

coordinate = number

#comma or whitespace can seperate values all over the place in SVG
#comma or whitespace can separate values all over the place in SVG
maybeComma = Optional(Literal(',')).suppress()

coordinateSequence = Sequence(coordinate)
Expand Down
2 changes: 1 addition & 1 deletion doc/tutorial/machine_learning_map/pyparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ def parseString( self, instring ):
(see L{I{parseWithTabs}<parseWithTabs>})
- define your parse action using the full (s,loc,toks) signature, and
reference the input string using the parse action's s argument
- explictly expand the tabs in your input string before calling
- explicitly expand the tabs in your input string before calling
parseString
"""
ParserElement.resetCache()
Expand Down
2 changes: 1 addition & 1 deletion examples/applications/plot_out_of_core_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def progress(cls_name, stats):
minibatch_iterators = iter_minibatches(data_stream, minibatch_size)
total_vect_time = 0.0

# Main loop : iterate on mini-batchs of examples
# Main loop : iterate on mini-batches of examples
for i, (X_train_text, y_train) in enumerate(minibatch_iterators):

tick = time.time()
Expand Down
2 changes: 1 addition & 1 deletion examples/applications/plot_stock_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
###############################################################################
# Retrieve the data from Internet

# Choose a time period reasonnably calm (not too long ago so that we get
# Choose a time period reasonably calm (not too long ago so that we get
# high-tech firms, and before the 2008 crash)
d1 = datetime.datetime(2003, 1, 1)
d2 = datetime.datetime(2008, 1, 1)
Expand Down
2 changes: 1 addition & 1 deletion examples/applications/wikipedia_principal_eigenvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None):
U, s, V = randomized_svd(X, 5, n_iter=3)
print("done in %0.3fs" % (time() - t0))

# print the names of the wikipedia related strongest compenents of the the
# print the names of the wikipedia related strongest components of the the
# principal singular vector which should be similar to the highest eigenvector
print("Top wikipedia pages according to principal singular vectors")
pprint([names[i] for i in np.abs(U.T[0]).argsort()[-10:]])
Expand Down
4 changes: 2 additions & 2 deletions examples/calibration/plot_calibration_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ def plot_calibration_curve(est, name, fig_index):

plt.tight_layout()

# Plot calibration cuve for Gaussian Naive Bayes
# Plot calibration curve for Gaussian Naive Bayes
plot_calibration_curve(GaussianNB(), "Naive Bayes", 1)

# Plot calibration cuve for Linear SVC
# Plot calibration curve for Linear SVC
plot_calibration_curve(LinearSVC(), "SVC", 2)

plt.show()
6 changes: 3 additions & 3 deletions examples/calibration/plot_compare_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
0.8, approx. 80% actually belong to the positive class.
LogisticRegression returns well calibrated predictions as it directly
optimizes log-loss. In contrast, the other methods return biased probilities,
optimizes log-loss. In contrast, the other methods return biased probabilities,
with different biases per method:
* GaussianNaiveBayes tends to push probabilties to 0 or 1 (note the counts in
* GaussianNaiveBayes tends to push probabilities to 0 or 1 (note the counts in
the histograms). This is mainly because it makes the assumption that features
are conditionally independent given the class, which is not the case in this
dataset which contains 2 redundant features.
Expand All @@ -35,7 +35,7 @@
trained with random forests have relatively high variance due to feature
subseting." As a result, the calibration curve shows a characteristic sigmoid
shape, indicating that the classifier could trust its "intuition" more and
return probabilties closer to 0 or 1 typically.
return probabilities closer to 0 or 1 typically.
* Support Vector Classification (SVC) shows an even more sigmoid curve as
the RandomForestClassifier, which is typical for maximum-margin methods
Expand Down
6 changes: 3 additions & 3 deletions examples/cluster/plot_kmeans_silhouette_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
a way to assess parameters like number of clusters visually. This measure has a
range of [-1, 1].
Silhoette coefficients (as these values are referred to as) near +1 indicate
Silhouette coefficients (as these values are referred to as) near +1 indicate
that the sample is far away from the neighboring clusters. A value of 0
indicates that the sample is on or very close to the decision boundary between
two neighboring clusters and negative values indicate that those samples might
Expand Down Expand Up @@ -43,7 +43,7 @@
print(__doc__)

# Generating the sample data from make_blobs
# This particular setting has one distict cluster and 3 clusters placed close
# This particular setting has one distinct cluster and 3 clusters placed close
# together.
X, y = make_blobs(n_samples=500,
n_features=2,
Expand Down Expand Up @@ -110,7 +110,7 @@
ax1.set_xlabel("The silhouette coefficient values")
ax1.set_ylabel("Cluster label")

# The vertical line for average silhoutte score of all the values
# The vertical line for average silhouette score of all the values
ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

ax1.set_yticks([]) # Clear the yaxis labels / ticks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
pls1 = PLSRegression(n_components=3)
pls1.fit(X, y)
# note that the number of compements exceeds 1 (the dimension of y)
# note that the number of components exceeds 1 (the dimension of y)
print("Estimated betas")
print(np.round(pls1.coef_, 1))

Expand Down
2 changes: 1 addition & 1 deletion examples/ensemble/plot_voting_decision_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
predicted by three different classifiers and averaged by the
`VotingClassifier`.
First, three examplary classifiers are initialized (`DecisionTreeClassifier`,
First, three exemplary classifiers are initialized (`DecisionTreeClassifier`,
`KNeighborsClassifier`, and `SVC`) and used to initialize a
soft-voting `VotingClassifier` with weights `[2, 1, 2]`, which means that
the predicted probabilities of the `DecisionTreeClassifier` and `SVC`
Expand Down
2 changes: 1 addition & 1 deletion examples/linear_model/plot_ard.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
n_samples, n_features = 100, 100
# Create Gaussian data
X = np.random.randn(n_samples, n_features)
# Create weigts with a precision lambda_ of 4.
# Create weights with a precision lambda_ of 4.
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
Expand Down
4 changes: 2 additions & 2 deletions examples/linear_model/plot_bayesian_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
from sklearn.linear_model import BayesianRidge, LinearRegression

###############################################################################
# Generating simulated data with Gaussian weigthts
# Generating simulated data with Gaussian weights
np.random.seed(0)
n_samples, n_features = 100, 100
X = np.random.randn(n_samples, n_features) # Create Gaussian data
# Create weigts with a precision lambda_ of 4.
# Create weights with a precision lambda_ of 4.
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
Expand Down
2 changes: 1 addition & 1 deletion examples/model_selection/plot_learning_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)):
"""
Generate a simple plot of the test and traning learning curve.
Generate a simple plot of the test and training learning curve.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion examples/neural_networks/plot_mlp_alpha.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
in a decision boundary plot that appears with lesser curvatures.
Similarly, decreasing alpha may fix high bias (a sign of underfitting) by
encouraging larger weights, potentially resulting in a more complicated
decision boundery.
decision boundary.
"""
print(__doc__)

Expand Down
2 changes: 1 addition & 1 deletion examples/plot_kernel_approximation.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@

X = pca.transform(data_train)

# Gemerate grid along first two principal components
# Generate grid along first two principal components
multiples = np.arange(-2, 2, 0.1)
# steps along first component
first = multiples[:, np.newaxis] * pca.components_[0, :]
Expand Down
2 changes: 1 addition & 1 deletion examples/text/document_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
Two algorithms are demoed: ordinary k-means and its more scalable cousin
minibatch k-means.
Additionally, latent sematic analysis can also be used to reduce dimensionality
Additionally, latent semantic analysis can also be used to reduce dimensionality
and discover latent patterns in the data.
It can be noted that k-means (and minibatch k-means) are very sensitive to
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ artifact_indexes=
# OSX wheels built by travis (only for specific tags):
# https://github.com/MacPython/scikit-learn-wheels
http://wheels.scipy.org
# Windows wheels buit by:
# Windows wheels built by:
# https://ci.appveyor.com/project/sklearn-ci/scikit-learn/
http://windows-wheels.scikit-learn.org/

Expand Down
6 changes: 3 additions & 3 deletions sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
method : 'sigmoid' or 'isotonic'
The method to use for calibration. Can be 'sigmoid' which
corresponds to Platt's method or 'isotonic' which is a
non-parameteric approach. It is not advised to use isotonic calibration
non-parametric approach. It is not advised to use isotonic calibration
with too few calibration samples ``(<<1000)`` since it tends to overfit.
Use sigmoids (Platt's calibration) in this case.
Expand Down Expand Up @@ -204,7 +204,7 @@ def predict_proba(self, X):
X = check_array(X, accept_sparse=['csc', 'csr', 'coo'],
force_all_finite=False)
# Compute the arithmetic mean of the predictions of the calibrated
# classfiers
# classifiers
mean_proba = np.zeros((X.shape[0], len(self.classes_)))
for calibrated_classifier in self.calibrated_classifiers_:
proba = calibrated_classifier.predict_proba(X)
Expand Down Expand Up @@ -250,7 +250,7 @@ class _CalibratedClassifier(object):
method : 'sigmoid' | 'isotonic'
The method to use for calibration. Can be 'sigmoid' which
corresponds to Platt's method or 'isotonic' which is a
non-parameteric approach based on isotonic regression.
non-parametric approach based on isotonic regression.
References
----------
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/birch.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

def _iterate_sparse_X(X):
"""This little hack returns a densified row when iterating over a sparse
matrix, insted of constructing a sparse matrix for every row that is
matrix, instead of constructing a sparse matrix for every row that is
expensive.
"""
n_samples = X.shape[0]
Expand Down Expand Up @@ -205,7 +205,7 @@ def insert_cf_subcluster(self, subcluster):

# things not too good. we need to redistribute the subclusters in
# our child node, and add a new subcluster in the parent
# subcluster to accomodate the new child.
# subcluster to accommodate the new child.
else:
new_subcluster1, new_subcluster2 = _split_node(
closest_subcluster.child_, threshold, branching_factor)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/cluster/hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def linkage_tree(X, connectivity=None, n_components=None,
This option is valid only when specifying a connectivity matrix.
linkage : {"average", "complete"}, optional, default: "complete"
Which linkage critera to use. The linkage criterion determines which
Which linkage criteria to use. The linkage criterion determines which
distance to use between sets of observation.
- average uses the average of the distances of each observation of
the two sets
Expand Down
2 changes: 1 addition & 1 deletion sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,7 @@ def _mini_batch_step(X, x_squared_norms, centers, counts,
def _mini_batch_convergence(model, iteration_idx, n_iter, tol,
n_samples, centers_squared_diff, batch_inertia,
context, verbose=0):
"""Helper function to encapsulte the early stopping logic"""
"""Helper function to encapsulate the early stopping logic"""
# Normalize inertia to be able to compare values when
# batch_size changes
batch_inertia /= model.batch_size
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/tests/test_hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ def test_linkage_misc():
# Smoke test FeatureAgglomeration
FeatureAgglomeration().fit(X)

# test hiearchical clustering on a precomputed distances matrix
# test hierarchical clustering on a precomputed distances matrix
dis = cosine_distances(X)

res = linkage_tree(dis, affinity="precomputed")
assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

# test hiearchical clustering on a precomputed distances matrix
# test hierarchical clustering on a precomputed distances matrix
res = linkage_tree(X, affinity=manhattan_distances)
assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])

Expand Down
2 changes: 1 addition & 1 deletion sklearn/covariance/empirical_covariance_.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def error_norm(self, comp_cov, norm='frobenius', scaling=True,
else:
raise NotImplementedError(
"Only spectral and frobenius norms are implemented")
# optionaly scale the error norm
# optionally scale the error norm
if scaling:
squared_norm = squared_norm / error.shape[0]
# finally get either the squared norm or the norm
Expand Down
2 changes: 1 addition & 1 deletion sklearn/covariance/shrunk_covariance_.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
"You may want to reshape your data array")
n_samples, n_features = X.shape

# optionaly center data
# optionally center data
if not assume_centered:
X = X - X.mean(0)

Expand Down
4 changes: 2 additions & 2 deletions sklearn/cross_decomposition/tests/test_pls.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def check_ortho(M, err_msg):
# x_weights = X.dot(x_rotation)
# Hence R/python sign flip should be the same in x_weight and x_rotation
assert_array_almost_equal(x_rotations_sign_flip, x_weights_sign_flip)
# This test that R / python give the same result up to colunm
# This test that R / python give the same result up to column
# sign indeterminacy
assert_array_almost_equal(np.abs(x_rotations_sign_flip), 1, 4)
assert_array_almost_equal(np.abs(x_weights_sign_flip), 1, 4)
Expand Down Expand Up @@ -259,7 +259,7 @@ def check_ortho(M, err_msg):

def test_PLSSVD():
# Let's check the PLSSVD doesn't return all possible component but just
# the specificied number
# the specified number
d = load_linnerud()
X = d.data
Y = d.target
Expand Down
2 changes: 1 addition & 1 deletion sklearn/cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ def __init__(self, labels, n_folds=3):
" than the number of labels: {1}.").format(n_folds,
n_labels))

# Weight labels by their number of occurences
# Weight labels by their number of occurrences
n_samples_per_label = np.bincount(labels)

# Distribute the most frequent labels first
Expand Down
2 changes: 1 addition & 1 deletion sklearn/datasets/california_housing.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def fetch_california_housing(data_home=None, download_if_missing=True):
# avg bed rooms = total bed rooms / households
data[:, 3] /= data[:, 5]

# avg occupancy = population / housholds
# avg occupancy = population / households
data[:, 5] = data[:, 4] / data[:, 5]

# target in units of 100,000
Expand Down
4 changes: 2 additions & 2 deletions sklearn/datasets/kddcup99.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
different types of attack with normal activity in the background.
As the initial goal was to produce a large training set for supervised
learning algorithms, there is a large proportion (80.1%) of abnormal
data which is unrealistic in real world, and inapropriate for unsupervised
data which is unrealistic in real world, and inappropriate for unsupervised
anomaly detection which aims at detecting 'abnormal' data, ie
1) qualitatively different from normal data.
2) in large minority among the observations.
We thus transform the KDD Data set into two differents data set: SA and SF.
We thus transform the KDD Data set into two different data sets: SA and SF.
- SA is obtained by simply selecting all the normal data, and a small
proportion of abnormal data to gives an anomaly proportion of 1%.
Expand Down
Loading

0 comments on commit 0485ada

Please sign in to comment.