Skip to content

Commit

Permalink
[data] Enable isort for data directory (ray-project#35836)
Browse files Browse the repository at this point in the history
  • Loading branch information
raulchen authored Jun 1, 2023
1 parent f620ad2 commit 16fca16
Show file tree
Hide file tree
Showing 176 changed files with 742 additions and 879 deletions.
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ filter_files=True
# python/ray/setup-dev.py
# For the rest we will gradually remove them from the blacklist as we
# reformat the code to follow the style guide.
skip_glob=doc/*,python/ray/__init__.py,python/ray/setup-dev.py,python/build/*,python/ray/cloudpickle/*,python/ray/thirdparty_files/*,python/ray/_private/thirdparty/*,python/ray/dag/*.py,ci/*,python/ray/_private/*,python/ray/air/*,dashboard/*,python/ray/data/*,python/ray/includes/*,python/ray/internal/*,python/ray/ray_operator/*,python/ray/scripts/*,python/ray/serve/*,python/ray/serve/__init__.py,python/ray/sgd/*,python/ray/streaming/*,python/ray/tests/*,python/ray/tests/*,python/ray/train/*,python/ray/tune/*,python/ray/util/*,python/ray/workers/*,python/ray/workflow/*,rllib/*,release/*,
skip_glob=doc/*,python/ray/__init__.py,python/ray/setup-dev.py,python/build/*,python/ray/cloudpickle/*,python/ray/thirdparty_files/*,python/ray/_private/thirdparty/*,python/ray/dag/*.py,ci/*,python/ray/_private/*,python/ray/air/*,dashboard/*,python/ray/includes/*,python/ray/internal/*,python/ray/ray_operator/*,python/ray/scripts/*,python/ray/serve/*,python/ray/serve/__init__.py,python/ray/sgd/*,python/ray/streaming/*,python/ray/tests/*,python/ray/tests/*,python/ray/train/*,python/ray/tune/*,python/ray/util/*,python/ray/workers/*,python/ray/workflow/*,rllib/*,release/*,

known_local_folder=ray
known_afterray=psutil,setproctitle
Expand Down
9 changes: 4 additions & 5 deletions python/ray/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
import pandas # noqa

from ray.data._internal.compute import ActorPoolStrategy
from ray.data._internal.progress_bar import set_progress_bars
from ray.data._internal.execution.interfaces import (
ExecutionOptions,
ExecutionResources,
NodeIdStr,
)
from ray.data._internal.progress_bar import set_progress_bars
from ray.data.context import DataContext, DatasetContext
from ray.data.dataset import Dataset, Schema
from ray.data.context import DatasetContext, DataContext
from ray.data.iterator import DatasetIterator, DataIterator
from ray.data.dataset_pipeline import DatasetPipeline
from ray.data.datasource import Datasource, ReadTask
from ray.data.iterator import DataIterator, DatasetIterator
from ray.data.preprocessor import Preprocessor
from ray.data.read_api import ( # noqa: F401
from_arrow,
Expand All @@ -38,17 +38,16 @@
read_datasource,
read_images,
read_json,
read_mongo,
read_numpy,
read_parquet,
read_parquet_bulk,
read_sql,
read_text,
read_mongo,
read_tfrecords,
read_webdataset,
)


# Module-level cached global functions for callable classes. It needs to be defined here
# since it has to be process-global across cloudpickled funcs.
_cached_fn = None
Expand Down
7 changes: 2 additions & 5 deletions python/ray/data/_internal/arrow_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,14 @@

import numpy as np

from ray.air.constants import TENSOR_COLUMN_NAME
from ray._private.utils import _get_pyarrow_version
from ray.air.constants import TENSOR_COLUMN_NAME
from ray.data._internal.arrow_ops import transform_polars, transform_pyarrow
from ray.data._internal.numpy_support import (
convert_udf_returns_to_numpy,
is_valid_udf_return,
)
from ray.data._internal.table_block import (
TableBlockAccessor,
TableBlockBuilder,
)
from ray.data._internal.table_block import TableBlockAccessor, TableBlockBuilder
from ray.data._internal.util import _truncated_repr
from ray.data.aggregate import AggregateFn
from ray.data.block import (
Expand Down
13 changes: 6 additions & 7 deletions python/ray/data/_internal/arrow_ops/transform_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def take_table(
intermediate tables, not underlying an ArrowBlockAccessor.
"""
from ray.air.util.transform_pyarrow import (
_is_column_extension_type,
_concatenate_extension_column,
_is_column_extension_type,
)

if any(_is_column_extension_type(col) for col in table.columns):
Expand All @@ -50,11 +50,12 @@ def unify_schemas(
) -> "pyarrow.Schema":
"""Version of `pyarrow.unify_schemas()` which also handles checks for
variable-shaped tensors in the given schemas."""
import pyarrow as pa

from ray.air.util.tensor_extensions.arrow import (
ArrowTensorType,
ArrowVariableShapedTensorType,
)
import pyarrow as pa

schemas_to_unify = []
schema_field_overrides = {}
Expand Down Expand Up @@ -124,10 +125,7 @@ def _concatenate_chunked_arrays(arrs: "pyarrow.ChunkedArray") -> "pyarrow.Chunke
"""
Concatenate provided chunked arrays into a single chunked array.
"""
from ray.data.extensions import (
ArrowTensorType,
ArrowVariableShapedTensorType,
)
from ray.data.extensions import ArrowTensorType, ArrowVariableShapedTensorType

# Single flat list of chunks across all chunked arrays.
chunks = []
Expand All @@ -152,12 +150,13 @@ def concat(blocks: List["pyarrow.Table"]) -> "pyarrow.Table":
"""Concatenate provided Arrow Tables into a single Arrow Table. This has special
handling for extension types that pyarrow.concat_tables does not yet support.
"""
import pyarrow as pa

from ray.data.extensions import (
ArrowTensorArray,
ArrowTensorType,
ArrowVariableShapedTensorType,
)
import pyarrow as pa

if not blocks:
# Short-circuit on empty list of blocks.
Expand Down
6 changes: 3 additions & 3 deletions python/ray/data/_internal/batcher.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import random
from typing import Optional, List
from typing import List, Optional

from ray.data.block import Block, BlockAccessor
from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
from ray.data._internal.arrow_block import ArrowBlockAccessor
from ray.data._internal.arrow_ops import transform_pyarrow
from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
from ray.data.block import Block, BlockAccessor

# pyarrow.Table.slice is slow when the table has many chunks
# so we combine chunks into a single one to make slice faster
Expand Down
2 changes: 1 addition & 1 deletion python/ray/data/_internal/block_batching/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ray.data._internal.block_batching.block_batching import (
batch_blocks,
batch_block_refs,
batch_blocks,
)

__all__ = ["batch_blocks", "batch_block_refs"]
10 changes: 5 additions & 5 deletions python/ray/data/_internal/block_batching/block_batching.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import collections
import itertools
from typing import Any, Callable, Iterator, Optional, TypeVar, Union
from contextlib import nullcontext
from typing import Any, Callable, Iterator, Optional, TypeVar, Union

import ray
from ray.data._internal.block_batching.interfaces import BlockPrefetcher
from ray.data._internal.block_batching.util import (
resolve_block_refs,
ActorBlockPrefetcher,
WaitBlockPrefetcher,
blocks_to_batches,
format_batches,
collate,
extract_data_from_batch,
WaitBlockPrefetcher,
ActorBlockPrefetcher,
format_batches,
resolve_block_refs,
)
from ray.data._internal.memory_tracing import trace_deallocation
from ray.data._internal.stats import DatasetPipelineStats, DatasetStats
Expand Down
2 changes: 1 addition & 1 deletion python/ray/data/_internal/block_batching/interfaces.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from dataclasses import dataclass
from typing import Any

from ray.types import ObjectRef
from ray.data.block import Block, DataBatch
from ray.types import ObjectRef


@dataclass
Expand Down
15 changes: 6 additions & 9 deletions python/ray/data/_internal/block_batching/iter_batches.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
import collections
from contextlib import nullcontext
from typing import Any, Callable, Dict, Iterator, Optional, Tuple

import ray
from ray.types import ObjectRef
from ray.data.block import Block, BlockMetadata, DataBatch
from ray.data._internal.block_batching.interfaces import (
Batch,
BlockPrefetcher,
)
from ray.data._internal.block_batching.interfaces import Batch, BlockPrefetcher
from ray.data._internal.block_batching.util import (
ActorBlockPrefetcher,
WaitBlockPrefetcher,
resolve_block_refs,
blocks_to_batches,
format_batches,
collate,
extract_data_from_batch,
format_batches,
make_async_gen,
resolve_block_refs,
)
from ray.data._internal.memory_tracing import trace_deallocation
from ray.data._internal.stats import DatasetStats
from ray.data.block import Block, BlockMetadata, DataBatch
from ray.data.context import DataContext
from contextlib import nullcontext
from ray.types import ObjectRef


def iter_batches(
Expand Down
8 changes: 4 additions & 4 deletions python/ray/data/_internal/block_batching/util.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import logging
import threading
from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar, Union
from collections import deque
from contextlib import nullcontext
from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar, Union

import ray
from ray.types import ObjectRef
from ray.actor import ActorHandle
from ray.data.block import Block, BlockAccessor, DataBatch
from ray.data._internal.batcher import Batcher, ShufflingBatcher
from ray.data._internal.block_batching.interfaces import (
Batch,
CollatedBatch,
BlockPrefetcher,
CollatedBatch,
)
from ray.data._internal.stats import DatasetPipelineStats, DatasetStats
from ray.data.block import Block, BlockAccessor, DataBatch
from ray.types import ObjectRef
from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy

T = TypeVar("T")
Expand Down
2 changes: 1 addition & 1 deletion python/ray/data/_internal/block_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import numpy as np

from ray.data.block import Block, BlockMetadata
from ray.data._internal.memory_tracing import trace_allocation
from ray.data.block import Block, BlockMetadata
from ray.types import ObjectRef


Expand Down
2 changes: 1 addition & 1 deletion python/ray/data/_internal/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
from ray.data._internal.progress_bar import ProgressBar
from ray.data._internal.remote_fn import cached_remote_fn
from ray.data.block import (
UserDefinedFunction,
Block,
BlockAccessor,
BlockExecStats,
BlockMetadata,
BlockPartition,
CallableClass,
StrictModeError,
UserDefinedFunction,
)
from ray.data.context import DEFAULT_SCHEDULING_STRATEGY, DataContext
from ray.types import ObjectRef
Expand Down
6 changes: 3 additions & 3 deletions python/ray/data/_internal/delegating_block_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import numpy as np

import ray
from ray.data.block import Block, DataBatch, BlockAccessor
from ray.data._internal.arrow_block import ArrowBlockBuilder, ArrowRow
from ray.data._internal.block_builder import BlockBuilder
from ray.data._internal.pandas_block import PandasBlockBuilder, PandasRow
from ray.data._internal.simple_block import SimpleBlockBuilder
from ray.data._internal.arrow_block import ArrowRow, ArrowBlockBuilder
from ray.data._internal.pandas_block import PandasRow, PandasBlockBuilder
from ray.data.block import Block, BlockAccessor, DataBatch


class DelegatingBlockBuilder(BlockBuilder):
Expand Down
12 changes: 4 additions & 8 deletions python/ray/data/_internal/equalize.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
from typing import Tuple, List
from ray.data._internal.block_list import BlockList
from typing import List, Tuple

from ray.data._internal.split import _split_at_indices, _calculate_blocks_rows
from ray.data.block import (
Block,
BlockPartition,
BlockMetadata,
)
from ray.data._internal.block_list import BlockList
from ray.data._internal.split import _calculate_blocks_rows, _split_at_indices
from ray.data.block import Block, BlockMetadata, BlockPartition
from ray.types import ObjectRef


Expand Down
10 changes: 5 additions & 5 deletions python/ray/data/_internal/execution/bulk_executor.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from typing import Dict, List, Iterator, Optional
from typing import Dict, Iterator, List, Optional

import ray
from ray.data.context import DataContext
from ray.data._internal.dataset_logger import DatasetLogger
from ray.data._internal.execution.interfaces import (
Executor,
ExecutionOptions,
Executor,
OutputIterator,
RefBundle,
PhysicalOperator,
RefBundle,
)
from ray.data._internal.dataset_logger import DatasetLogger
from ray.data._internal.execution.operators.input_data_buffer import InputDataBuffer
from ray.data._internal.progress_bar import ProgressBar
from ray.data._internal.stats import DatasetStats
from ray.data.context import DataContext

logger = DatasetLogger(__name__)

Expand Down
6 changes: 3 additions & 3 deletions python/ray/data/_internal/execution/interfaces.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from dataclasses import dataclass, field
import os
from typing import Any, Dict, List, Optional, Iterable, Iterator, Tuple, Callable, Union
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union

import ray
from ray.util.annotations import DeveloperAPI
from ray.data._internal.execution.util import memory_string
from ray.data._internal.logical.interfaces import Operator
from ray.data._internal.memory_tracing import trace_deallocation
Expand All @@ -12,6 +11,7 @@
from ray.data.block import Block, BlockMetadata
from ray.data.context import DataContext
from ray.types import ObjectRef
from ray.util.annotations import DeveloperAPI

# Node id string returned by `ray.get_runtime_context().get_node_id()`.
NodeIdStr = str
Expand Down
42 changes: 18 additions & 24 deletions python/ray/data/_internal/execution/legacy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,34 @@
It should be deleted once we fully move to the new executor backend.
"""

from typing import Iterator, Tuple, Any
from typing import Any, Iterator, Tuple

import ray
from ray.data._internal.logical.optimizers import get_execution_plan
from ray.data._internal.logical.util import record_operators_usage
from ray.data.context import DataContext
from ray.types import ObjectRef
from ray.data.block import Block, BlockMetadata, CallableClass, List
from ray.data.datasource import ReadTask
from ray.data._internal.stats import StatsDict, DatasetStats
from ray.data._internal.stage_impl import (
RandomizeBlocksStage,
LimitStage,
)
from ray.data._internal.block_list import BlockList
from ray.data._internal.lazy_block_list import LazyBlockList
from ray.data._internal.compute import (
get_compute,
ActorPoolStrategy,
)
from ray.data._internal.memory_tracing import trace_allocation
from ray.data._internal.plan import ExecutionPlan, OneToOneStage, AllToAllStage, Stage
from ray.data._internal.execution.operators.map_operator import MapOperator
from ray.data._internal.execution.operators.limit_operator import LimitOperator
from ray.data._internal.execution.operators.all_to_all_operator import AllToAllOperator
from ray.data._internal.execution.operators.input_data_buffer import InputDataBuffer
from ray.data._internal.compute import ActorPoolStrategy, get_compute
from ray.data._internal.execution.interfaces import (
Executor,
PhysicalOperator,
RefBundle,
TaskContext,
)
from ray.data._internal.util import validate_compute
from ray.data._internal.execution.operators.all_to_all_operator import AllToAllOperator
from ray.data._internal.execution.operators.input_data_buffer import InputDataBuffer
from ray.data._internal.execution.operators.limit_operator import LimitOperator
from ray.data._internal.execution.operators.map_operator import MapOperator
from ray.data._internal.execution.util import make_callable_class_concurrent
from ray.data._internal.lazy_block_list import LazyBlockList
from ray.data._internal.logical.optimizers import get_execution_plan
from ray.data._internal.logical.util import record_operators_usage
from ray.data._internal.memory_tracing import trace_allocation
from ray.data._internal.plan import AllToAllStage, ExecutionPlan, OneToOneStage, Stage
from ray.data._internal.stage_impl import LimitStage, RandomizeBlocksStage
from ray.data._internal.stats import DatasetStats, StatsDict
from ray.data._internal.util import validate_compute
from ray.data.block import Block, BlockMetadata, CallableClass, List
from ray.data.context import DataContext
from ray.data.datasource import ReadTask
from ray.types import ObjectRef

# Warn about tasks larger than this.
TASK_SIZE_WARN_THRESHOLD_BYTES = 100000
Expand Down
Loading

0 comments on commit 16fca16

Please sign in to comment.