Skip to content

Commit

Permalink
fix code after intensive refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
ajz34 committed Jan 16, 2025
1 parent cf3a9d0 commit 44bd49f
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ fn example_memory_aspects() {
// view clone is cheap, given tensor is large
let a_view = a.view();
let b_view = b.view();
let e = a_view.clone() * b_view.clone();
let e = a_view * b_view;
// ANCHOR_END: memory_aspects_01
println!("{:}", c);
println!("{:}", d);
Expand All @@ -200,10 +200,10 @@ fn example_memory_aspects() {
// ANCHOR: memory_aspects_02
let a = rt::arange(5.0);
let b = rt::arange(5.0) + 1.0;
let ptr_a = a.rawvec().as_ptr();
let ptr_a = a.raw().as_ptr();
// if sure that `a` is not used anymore, pass `a` by value instead of reference
let c = a + &b;
let ptr_c = c.rawvec().as_ptr();
let ptr_c = c.raw().as_ptr();
// raw data of `a` is reused in `c`
// similar to `a += &b; let c = a;`
assert_eq!(ptr_a, ptr_c);
Expand Down
2 changes: 1 addition & 1 deletion listings/features-default/tests/indexing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ fn example_ellipsis() {
#[test]
fn example_mixed_indexing() {
// ANCHOR: example_mixed_indexing
let a: Tensor<f64, _> = rt::zeros([6, 7, 5, 9, 8]);
let a: Tensor<f64> = rt::zeros([6, 7, 5, 9, 8]);

// mixed indexing
let b = a.slice((slice!(-2, 1, -1), None, None, Ellipsis, 1, ..-2));
Expand Down
10 changes: 5 additions & 5 deletions listings/features-default/tests/structure_and_ownership.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ fn example_tensor_ownership() {
// ANCHOR: tensor_ownership
// generate 1-D owned tensor
let tensor = rt::arange(12);
let ptr_1 = tensor.rawvec().as_ptr();
let ptr_1 = tensor.raw().as_ptr();

// this will give owned tensor with 2-D shape
// since previous tensor is contiguous, this will not copy memory
let mut tensor = tensor.into_shape([3, 4]);
tensor += 1; // inplace operation
let ptr_2 = tensor.rawvec().as_ptr();
let ptr_2 = tensor.raw().as_ptr();

// until now, memory has not been copied
assert_eq!(ptr_1, ptr_2);
Expand All @@ -21,7 +21,7 @@ fn example_tensor_ownership() {

// from view to owned tensor
let tensor = tensor_view.into_owned();
let ptr_3 = tensor.rawvec().as_ptr();
let ptr_3 = tensor.raw().as_ptr();

// now memory has been copied
assert_ne!(ptr_2, ptr_3);
Expand All @@ -38,7 +38,7 @@ fn example_to_vec() {
// matrix multiplication (gemv 2-D x 1-D case)
let c = a % b;
println!("{:?}", c);
let ptr_1 = c.rawvec().as_ptr();
let ptr_1 = c.raw().as_ptr();

// convert to Vec<f64>
let c = c.into_vec();
Expand Down Expand Up @@ -82,7 +82,7 @@ fn example_dim_conversion() {
// fixed dimension
let a = rt::arange(12).into_shape([3, 4]);
println!("{:?}", a);
// output: 2-Dim, contiguous: Cc
// output: 2-Dim (dyn), contiguous: Cc

// convert to dynamic dimension
let a = a.into_dim::<IxD>(); // or a.into_dyn();
Expand Down
22 changes: 11 additions & 11 deletions listings/features-default/tests/tensor_creation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn example_02() {
// === Debug Tensor Print ===
// [ 1 2 3 4 5]
// DeviceFaer { base: DeviceCpuRayon { num_threads: 4 } }
// 1-Dim, contiguous: CcFf
// 1-Dim (dyn), contiguous: CcFf
// shape: [5], stride: [1], offset: 0
// Type: rstsr_core::tensorbase::TensorBase<rstsr_core::tensor::data::DataOwned<rstsr_core::storage::device::Storage<i32, rstsr_core::device_faer::device::DeviceFaer>>, [usize; 1]>
// ANCHOR_END: example_02
Expand Down Expand Up @@ -87,13 +87,13 @@ fn example_05() {
println!("{:?}", tensor);

// check if pointer of vec and tensor's storage are the same
assert_eq!(vec.as_ptr(), tensor.storage().rawvec().as_ptr());
assert_eq!(vec.as_ptr(), tensor.storage().raw().as_ptr());

// output:
// === Debug Tensor Print ===
// [ 1 2 3 4 5 6]
// DeviceFaer { base: DeviceCpuRayon { num_threads: 0 } }
// 1-Dim, contiguous: CcFf
// 1-Dim (dyn), contiguous: CcFf
// shape: [6], stride: [1], offset: 0
// Type: rstsr_core::tensorbase::TensorBase<rstsr_core::tensor::data::DataRef<rstsr_core::storage::device::Storage<i32, rstsr_core::device_faer::device::DeviceFaer>>, [usize; 1]>
// ANCHOR_END: example_05
Expand Down Expand Up @@ -156,14 +156,14 @@ fn example_linspace() {
fn example_eye() {
// ANCHOR: example_eye
let device = DeviceFaer::new(4);
let tensor: Tensor<f64, _> = rt::eye((3, &device));
let tensor: Tensor<f64> = rt::eye((3, &device));
println!("{:}", tensor);
// output:
// [[ 1 0 0]
// [ 0 1 0]
// [ 0 0 1]]

let tensor: Tensor<f64, _> = rt::eye((3, 4, -1));
let tensor: Tensor<f64> = rt::eye((3, 4, -1));
println!("{:}", tensor);
// output:
// [[ 0 0 0 0]
Expand Down Expand Up @@ -194,7 +194,7 @@ fn example_diag() {
fn example_zeros_01() {
// ANCHOR: example_zeros_01
// generate tensor with default device
let tensor: Tensor<f64, _> = rt::zeros([2, 2, 3]); // Tensor<f64, Ix3>
let tensor: Tensor<f64> = rt::zeros([2, 2, 3]); // Tensor<f64, Ix3>
println!("{:}", tensor);
// output:
// [[[ 0 0 0]
Expand All @@ -206,7 +206,7 @@ fn example_zeros_01() {
// generate tensor with custom device
// note: the third type annotation refers to device type, hence is required if not default device
// Tensor<f64, Ix2, DeviceCpuSerial>
let tensor: Tensor<f64, _, _> = rt::zeros(([3, 4], &DeviceCpuSerial));
let tensor: Tensor<f64, _> = rt::zeros(([3, 4], &DeviceCpuSerial));
println!("{:}", tensor);
// output:
// [[ 0 0 0 0]
Expand All @@ -219,12 +219,12 @@ fn example_zeros_01() {
fn example_zeros_02() {
// ANCHOR: example_zeros_02
// generate tensor with c-contiguous
let tensor: Tensor<f64, _> = rt::zeros([2, 2, 3].c());
let tensor: Tensor<f64> = rt::zeros([2, 2, 3].c());
println!("shape: {:?}, stride: {:?}", tensor.shape(), tensor.stride());
// output: shape: [2, 2, 3], stride: [6, 3, 1]

// generate tensor with f-contiguous
let tensor: Tensor<f64, _> = rt::zeros([2, 2, 3].f());
let tensor: Tensor<f64> = rt::zeros([2, 2, 3].f());
println!("shape: {:?}, stride: {:?}", tensor.shape(), tensor.stride());
// output: shape: [2, 2, 3], stride: [1, 2, 4]
// ANCHOR_END: example_zeros_02
Expand All @@ -234,7 +234,7 @@ fn example_zeros_02() {
fn example_zeros_03() {
// ANCHOR: example_zeros_03
// generate 0-D tensor
let mut a: Tensor<f64, _> = rt::zeros([]);
let mut a: Tensor<f64> = rt::zeros([]);
println!("{:}", a);
// output: 0

Expand All @@ -254,7 +254,7 @@ fn example_zeros_03() {
fn example_empty() {
// ANCHOR: example_empty
// generate empty tensor with default device
let tensor: Tensor<i32, _> = unsafe { rt::empty([10, 10]) };
let tensor: Tensor<i32> = unsafe { rt::empty([10, 10]) };
println!("{:?}", tensor);
// ANCHOR_END: example_empty
}
Expand Down
2 changes: 1 addition & 1 deletion listings/features-default/tests/welcome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fn welcome() {
// print layout of the result
println!("{:?}", c.layout());
// output:
// 3-Dim, contiguous: Cc
// 3-Dim (dyn), contiguous: Cc
// shape: [2, 4, 2], stride: [8, 2, 1], offset: 0

// print the result
Expand Down
Binary file modified src/assets/rstsr-basic-structure.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified src/assets/rstsr-basic-structure.pptx
Binary file not shown.
56 changes: 32 additions & 24 deletions src/fundamentals/02-structure_and_ownership.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,42 @@ In this section, we will try to show how RSTSR constructs tensor struct and diff
## 1. Tensor Structure

RSTSR tensor learns a lot from rust crate `ndarray`.
Structure and usage of RSTSR's `TensorBase` is similar to `ndarray`'s `ArrayBase`; however, they are different in some key points.
Structure and usage of RSTSR's `TensorBase` is similar to `ndarray`'s `ArrayBase`; however, they are different in many key points.

![rstsr-basic-structure](../assets/rstsr-basic-structure.png)

- Tensor is composed by data (how data is stored in memory bulk) and layout (how tensor is represented).
- Layout is composed by shape (shape of tensor), stride (how each value is accessed from memory bulk), and offset (where the tensor starts)[^1].
- Tensor is composed by `storage` (how data is stored in memory bulk) and `layout` (how tensor is represented).
- Layout is composed by `shape` (shape of tensor), `stride` (how each value is accessed from memory bulk), and `offset` (where the tensor starts)[^1].
- Storage is composed by `data` (data with lifetime) and `device` (computation and storage backend)[^2].
- Data is combination of the actual memory storage with the lifetime annotation to it.
Currently, 5 ownership types are supported. The first two (owned and referenced) are the most important[^2].
- Owned (`Tensor<T, D, B>`)
- Referenced (`TensorView<'l, T, D, B>` or `TensorRef<'l, T, D, B>`)
- Mutablly referenced (`TensorViewMut<'l, T, D, B>` or `TensorMut<'l, T, D, B>`)
- Clone on write (not mutable enum of owned and referenced, `TensorCow<'l, T, D, B>`)
- Atomic reference counted (safe in threading, `TensorArc<T, D, B>`)
- Storage is composed by raw memory (type `RawVec`) and backend (device)[^3]
- The actual memory bulk will be stored as `Vec<T>` in CPU, and this can be configured by trait type `RawVec`[^4].

Currently, 5 ownership types are supported. The first two (owned and referenced) are the most important[^3].
- Owned (`Tensor<T, B, D>`)
- Referenced (`TensorView<'l, T, B, D>` or `TensorRef<'l, T, B, D>`)
- Mutablly referenced (`TensorViewMut<'l, T, B, D>` or `TensorMut<'l, T, B, D>`)
- Clone on write (not mutable enum of owned and referenced, `TensorCow<'l, T, B, D>`)
- Atomic reference counted (safe in threading, `TensorArc<T, B, D>`)
- The actual memory bulk will be stored as `Vec<T>` in CPU, and this can be configured by trait type `DeviceRawAPI<T>::Raw`[^4].

Default generic types is applied to some structs or alias. For example, if `DeviceFaer` is the default device (which is enabled by crate feature `faer_as_default`), then
```rust
let a: Tensor<f64> = rt::zeros([3, 4]);
// this refers to Tensor<f64, DeviceFaer, IxD>
// default device: DeviceFaer (rayon parallel with Faer matmul)
// default dimensionality: IxD (dynamic dimension)
```

[^1]: RSTSR is different to `ndarray` in struct construction.
While `ndarray` stores shape and stride directly in `ArrayBase`, in RSTSR shape, stride and offset are stored in `Layout<D>`.
Layout of tensor is meta data of tensor, and it can be detached from data of tensor.

[^2]: In RSTSR, data is stored as variable or its reference, in safe rust.
This is different to `ndarray`, which stores pointer (with offset) and memory data (if owned) or phantom lifetime annotation (if referenced).

[^3]: This distinguishes RSTSR and `ndarray`.
[^2]: This distinguishes RSTSR and `ndarray`.
We hope that RSTSR will become multi-backend framework in future.
Currently, we have implemented serial CPU device (`DeviceCpuSerial`) and parallel CPU device with Faer matmul (`DeviceFaer`),
showing the possiblity of more demanding heterogeneous programming within framework of RSTSR.

[^3]: In RSTSR, data is stored as variable or its reference, in safe rust.
This is different to `ndarray`, which stores pointer (with offset) and memory data (if owned) or phantom lifetime annotation (if referenced).

[^4]: This distinguishes RSTSR and `candle`.
RSTSR allows external implementation of backends, hopefully allowing easy extending to other kind of devices, similar to `burn`.
RSTSR also allows virtaully all kinds of element types (you can take `rug` or even `Vec<T>` as tensor element, as they implemented `Clone`), similar to `ndarray`.
Expand All @@ -50,25 +58,25 @@ Structure and usage of RSTSR's `TensorBase` is similar to `ndarray`'s `ArrayBase

Different ownerships can be converted to each other. However, some conversion functions may have some costs (explicit memory copy).

- `view` gives `TensorView<'l, T, D, B>`.
- `view` gives `TensorView<'l, T, B, D>`.
- This function will always not perform memory copy (of tensor data) in any cases. For this part, it is virtually zero-cost.
- It will still perform clone of tensor layout, so still some overhead occurs. For large tensor, it is cheap.
- `view_mut` gives `TensorMut<'l, T, D, B>`.
- `view_mut` gives `TensorMut<'l, T, B, D>`.
- In rust, either many const references or only one mutable reference is allowed.
This is also true for `TensorView` as const reference and `TensorMut` as mutable reference.
- `into_owned_keep_layout` gives `Tensor<T, D, B>`.
- `into_owned_keep_layout` gives `Tensor<T, B, D>`.
- For `Tensor`, this is free of memory copy;
- For `TensorView` and `TensorMut`, this requires explicit memory copy. Note that it is usually more proper to use `into_owned` in this case.
- For `TensorArc`, this is free of memory copy, but please note that it may panic when strong reference count is not exactly one.
You may use `tensor.data().strong_count()` to check strong reference count.
- For `TensorCow`, if it is owned (`DataCow::Owned`), then it is free of memory copy; if it is reference (`DataCow::Ref`), then it requires explicit memory copy.
- `into_owned` also gives `Tensor<T, D, B>`.
- `into_owned` also gives `Tensor<T, B, D>`.
- This function is different to `into_owned_keep_layout`, in that `into_owned` only not copy memory when layout of tensor covers all memory (size of memory bulk is the same to size of tensor layout). Calling `into_owned` to any non-trivial slicing of tensor will incur memory copy.
- Also note that, if you just want to shrink the memory to a slice of tensor, using `into_owned` is more appropriate.
- For `TensorView` and `TensorMut`, using `into_owned` will copy less memory `into_owned_keep_layout`. So `into_owned` is preferrable for tensor views.
- `into_cow` gives `TensorCow<'l, T, D, B>`.
- `into_cow` gives `TensorCow<'l, T, B, D>`.
- This function does not have any cost.
- `into_shared_keep_layout` and `into_shared` gives `TensorArc<'l, T, D, B>`. This is similar to `into_owned_keep_layout` and `into_owned`.
- `into_shared_keep_layout` and `into_shared` gives `TensorArc<'l, T, B, D>`. This is similar to `into_owned_keep_layout` and `into_owned`.

An example for tensor ownership conversion follows:

Expand All @@ -88,8 +96,7 @@ Converting between Tensor and `Vec<T>` or `&[T]` can be useful if

There are some useful functions to perform Tensor to `Vec<T>`:
- `to_vec()`: copy 1-D tensor to vector, requires memory copy;
- `into_vec()`: move 1-D tensor to vector if memory is contiguous; otherwise copy 1-D tensor to vector;
- `into_rawvec()`: move raw tensor to vector; this does not assume output memory bulk layout is the same to tensor layout.
- `into_vec()`: move 1-D tensor to vector if memory is contiguous; otherwise copy 1-D tensor to vector.

We do not provide functions that give `&[T]` or `&mut [T]`.
However, we provide function `as_ptr()` and `as_mut_ptr()`, giving the pointer of the first element.
Expand Down Expand Up @@ -142,6 +149,7 @@ That is to say, not only dimension $n$ is fixed, but also shape and strides are
For small vectors or matrices, fixing shape and strides can usually be compiled to much more efficient assembly code.
For large vectors or matrices, that will depends on types of arithmetic computations;
compiler with `-O3` is not omniscient, and in most cases, fixing shape and strides will not benefit more than manual cache, pipeline and multi-threading optimization.
Writing a function with more efficiency is more preferrable than telling compiler the dimensionality of tensor.

RSTSR, by design and motivation, is for scientific computation for medium or large tensors.
By concerning benefits and difficulties, we choose not introducing fixed shape and strides.
Expand Down

0 comments on commit 44bd49f

Please sign in to comment.