DOC Explicitly document feature order in make_classification docstr…

…ing (scikit-learn#10731)
ccatalfo · Mar 1, 2018 · b8a1bcf · b8a1bcf
1 parent ec691e9
commit b8a1bcf
Showing 1 changed file with 19 additions and 14 deletions.
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
@@ -43,14 +43,18 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
     """Generate a random n-class classification problem.
 
     This initially creates clusters of points normally distributed (std=1)
-    about vertices of an `n_informative`-dimensional hypercube with sides of
-    length `2*class_sep` and assigns an equal number of clusters to each
+    about vertices of an ``n_informative``-dimensional hypercube with sides of
+    length ``2*class_sep`` and assigns an equal number of clusters to each
     class. It introduces interdependence between these features and adds
     various types of further noise to the data.
 
-    Prior to shuffling, `X` stacks a number of these primary "informative"
-    features, "redundant" linear combinations of these, "repeated" duplicates
-    of sampled features, and arbitrary noise for and remaining features.
+    Without shuffling, ``X`` horizontally stacks features in the following
+    order: the primary ``n_informative`` features, followed by ``n_redundant``
+    linear combinations of the informative features, followed by ``n_repeated``
+    duplicates, drawn randomly with replacement from the informative and
+    redundant features. The remaining features are filled with random noise.
+    Thus, without shuffling, all useful features are contained in the columns
+    ``X[:, :n_informative + n_redundant + n_repeated]``.
 
     Read more in the :ref:`User Guide <sample_generators>`.
 
@@ -60,15 +64,16 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
         The number of samples.
 
     n_features : int, optional (default=20)
-        The total number of features. These comprise `n_informative`
-        informative features, `n_redundant` redundant features, `n_repeated`
-        duplicated features and `n_features-n_informative-n_redundant-
-        n_repeated` useless features drawn at random.
+        The total number of features. These comprise ``n_informative``
+        informative features, ``n_redundant`` redundant features,
+        ``n_repeated`` duplicated features and
+        ``n_features-n_informative-n_redundant-n_repeated`` useless features
+        drawn at random.
 
     n_informative : int, optional (default=2)
         The number of informative features. Each class is composed of a number
         of gaussian clusters each located around the vertices of a hypercube
-        in a subspace of dimension `n_informative`. For each cluster,
+        in a subspace of dimension ``n_informative``. For each cluster,
         informative features are drawn independently from  N(0, 1) and then
         randomly linearly combined within each cluster in order to add
         covariance. The clusters are then placed on the vertices of the
@@ -90,10 +95,10 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
 
     weights : list of floats or None (default=None)
         The proportions of samples assigned to each class. If None, then
-        classes are balanced. Note that if `len(weights) == n_classes - 1`,
+        classes are balanced. Note that if ``len(weights) == n_classes - 1``,
         then the last class weight is automatically inferred.
-        More than `n_samples` samples may be returned if the sum of `weights`
-        exceeds 1.
+        More than ``n_samples`` samples may be returned if the sum of
+        ``weights`` exceeds 1.
 
     flip_y : float, optional (default=0.01)
         The fraction of samples whose class are randomly exchanged. Larger
@@ -124,7 +129,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
-        by `np.random`.
+        by ``np.random``.
 
     Returns
     -------