The rest

roaldarbol · roaldarbol · Jan 4, 2025 · Dec 7, 2024 · Dec 12, 2024 · Dec 12, 2024
commit e64e9fa368da32339dcdc3c231a2064fa02d8984
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,28 +14,32 @@ export(classify_by_stability)
 export(classify_by_threshold)
 export(classify_high_periods)
 export(classify_low_periods)
-export(clean_kinematics)
 export(does_file_have_expected_headers)
 export(ensure_file_has_expected_headers)
 export(ensure_file_has_headers)
-export(filter_by_confidence)
-export(filter_by_speed)
 export(filter_highpass)
 export(filter_highpass_fft)
 export(filter_kalman)
 export(filter_kalman_irregular)
 export(filter_lowpass)
 export(filter_lowpass_fft)
+export(filter_movement)
+export(filter_na_confidence)
+export(filter_na_roi)
+export(filter_na_speed)
+export(filter_rollmean)
+export(filter_rollmedian)
+export(filter_sgolay)
+export(find_lag)
 export(find_peaks)
-export(find_time_lag)
 export(find_troughs)
+export(get_example_data)
 export(get_metadata)
 export(ggplot_na_gapsize)
 export(group_every)
 export(init_metadata)
 export(map_to_cartesian)
 export(map_to_polar)
-export(na_interpolation)
 export(plot_position_timeseries)
 export(plot_speed_timeseries)
 export(read_animalta)
@@ -48,7 +52,6 @@ export(read_sleap)
 export(read_trackball)
 export(read_treadmill)
 export(read_trex)
-export(replace_missing)
 export(replace_na)
 export(replace_na_linear)
 export(replace_na_locf)
@@ -60,8 +63,6 @@ export(set_framerate)
 export(set_individual)
 export(set_start_datetime)
 export(set_uuid)
-export(smooth_by_savgol)
-export(smooth_movement)
 export(transform_to_egocentric)
 export(translate_coords)
 export(validate_animalta)
@@ -132,7 +133,15 @@ importFrom(signal,butter)
 importFrom(signal,filtfilt)
 importFrom(signal,sgolayfilt)
 importFrom(stats,approx)
+importFrom(stats,ccf)
+importFrom(stats,complete.cases)
+importFrom(stats,fft)
+importFrom(stats,mad)
+importFrom(stats,median)
+importFrom(stats,qnorm)
+importFrom(stats,quantile)
 importFrom(stats,spline)
 importFrom(stinepack,stinterp)
 importFrom(stringi,stri_rand_strings)
+importFrom(utils,download.file)
 importFrom(vroom,vroom)
diff --git a/R/add_centroid.R b/R/add_centroid.R
@@ -0,0 +1,86 @@
+#' Add Centroid to Movement Data
+#'
+#' @description
+#' Calculates and adds a centroid point to movement tracking data. The centroid
+#' represents the mean position of selected keypoints at each time point.
+#'
+#' @param data A data frame containing movement tracking data with the following
+#'   required columns:
+#'   - `individual`: Identifier for each tracked subject
+#'   - `keypoint`: Factor specifying tracked points
+#'   - `time`: Time values
+#'   - `x`: x-coordinates
+#'   - `y`: y-coordinates
+#'   - `confidence`: Confidence values for tracked points
+#' @param include_keypoints Optional character vector specifying which keypoints
+#'   to use for centroid calculation. If NULL (default), all keypoints are used
+#'   unless `exclude_keypoints` is specified.
+#' @param exclude_keypoints Optional character vector specifying which keypoints
+#'   to exclude from centroid calculation. If NULL (default), no keypoints are
+#'   excluded unless `include_keypoints` is specified.
+#' @param centroid_name Character string specifying the name for the centroid
+#'   keypoint (default: "centroid")
+#'
+#' @return A data frame with the same structure as the input, but with an
+#'   additional keypoint representing the centroid. The centroid's confidence
+#'   values are set to NA.
+#'
+#' @details
+#' The function calculates the centroid as the mean x and y position of the
+#' selected keypoints at each time point for each individual. Keypoints can be
+#' selected either by specifying which ones to include (`include_keypoints`) or
+#' which ones to exclude (`exclude_keypoints`). The resulting centroid is added
+#' as a new keypoint to the data frame.
+#'
+#' @examples
+#' \dontrun{
+#' # Add centroid using all keypoints
+#' add_centroid(movement_data)
+#'
+#' # Calculate centroid using only specific keypoints
+#' add_centroid(movement_data,
+#'             include_keypoints = c("head", "thorax", "abdomen"))
+#'
+#' # Calculate centroid excluding certain keypoints
+#' add_centroid(movement_data,
+#'             exclude_keypoints = c("antenna_left", "antenna_right"),
+#'             centroid_name = "body_centroid")
+#' }
+#'
+#' @seealso
+#' `convert_nan_to_na()` for NaN handling in the centroid calculation
+#'
+#' @importFrom dplyr filter group_by summarise mutate arrange bind_rows
+#'
+#' @export
+add_centroid <- function(data,
+                         include_keypoints=NULL,
+                         exclude_keypoints=NULL,
+                         centroid_name="centroid"){
+  # Check that centroid isn't there
+  # Check that it's a movement data frame
+  # To be optimised with collapse later on
+  if (!is.null(include_keypoints)){
+    df_centroid <- data |>
+      dplyr::filter(.data$keypoint %in% include_keypoints)
+  } else if (!is.null(exclude_keypoints)){
+    df_centroid <- data |>
+      dplyr::filter(!.data$keypoint %in% exclude_keypoints)
+  } else {
+    df_centroid <- data
+  }
+
+  df_centroid <- df_centroid |>
+    dplyr::group_by(.data$individual, .data$time) |>
+    dplyr::summarise(x = mean(.data$x, na.rm=TRUE),
+                     y = mean(.data$y, na.rm=TRUE),
+                     confidence = NA,
+                     .groups = "keep") |>
+    dplyr::mutate(keypoint = factor(as.character(centroid_name))) |>
+    convert_nan_to_na()
+
+  data <- bind_rows(data, df_centroid) |>
+    dplyr::arrange(.data$time, .data$individual, .data$keypoint)
+
+  return(data)
+}
diff --git a/R/align_timeseries.R b/R/align_timeseries.R
@@ -21,14 +21,16 @@
 #' t <- seq(0, 10, 0.1)
 #' reference <- sin(t)
 #' signal <- sin(t - 0.5)  # Signal delayed by 0.5 units
-#' lag <- find_time_lag(signal, reference)
+#' lag <- find_lag(signal, reference)
 #' print(lag)  # Should be approximately 5 samples (0.5 units)
 #'
-#' @seealso \code{\link{align_time_series}} for applying the computed lag
+#' @seealso \code{\link{align_timeseries}} for applying the computed lag
+#'
+#' @importFrom stats complete.cases ccf
 #'
 #' @export
-find_time_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
-  complete_cases <- complete.cases(signal, reference)
+find_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
+  complete_cases <- stats::complete.cases(signal, reference)
   signal <- signal[complete_cases]
   reference <- reference[complete_cases]
 
@@ -42,7 +44,7 @@ find_time_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
     max_lag = length(signal) - 1
   }
 
-  ccf_result <- ccf(signal, reference, plot = FALSE, lag.max = max_lag)
+  ccf_result <- stats::ccf(signal, reference, plot = FALSE, lag.max = max_lag)
   best_lag <- ccf_result$lag[which.max(abs(ccf_result$acf))]
 
   # Subtract one observation, which seems to be needed in tests
@@ -55,10 +57,10 @@ find_time_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
 #'
 #' This function aligns two time series by shifting one series relative to the
 #' reference based on their cross-correlation. It first finds the optimal lag
-#' using \code{find_time_lag}, then applies the shift by padding with NA values
+#' using \code{find_lag}, then applies the shift by padding with NA values
 #' as needed.
 #'
-#' @inheritParams find_time_lag
+#' @inheritParams find_lag
 #' @param signal Time series to align (numeric vector)
 #' @param reference Reference time series to align against (numeric vector)
 #'
@@ -81,7 +83,7 @@ find_time_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
 #'
 #' @export
 align_timeseries <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
-  lag <- find_time_lag(signal, reference, max_lag, normalize)
+  lag <- find_lag(signal, reference, max_lag, normalize)
 
   if (lag > 0) {
     aligned <- c(rep(NA, lag), signal[1:(length(signal)-lag)])

diff --git a/R/classify_by_high_periods.R b/R/classify_by_high_periods.R
@@ -0,0 +1,78 @@
+#' Classifies Periods of High Activity in Time Series Using Peaks and Troughs
+#'
+#' @description
+#' Identifies periods of high activity in a time series by analyzing peaks and troughs,
+#' returning a logical vector marking these periods. The function handles special cases
+#' like adjacent peaks and the initial/final sequences.
+#'
+#' @param x numeric vector; the time series values
+#' @param peaks logical vector; same length as x, TRUE indicates peak positions
+#' @param troughs logical vector; same length as x, TRUE indicates trough positions
+#'
+#' @return logical vector; TRUE indicates periods of high activity
+#'
+#' @details
+#' The function performs the following steps:
+#' 1. Resolves adjacent peaks by keeping only the highest
+#' 2. Handles the initial sequence before the first trough
+#' 3. Handles the final sequence after the last event
+#' 4. Identifies regions between troughs containing exactly one peak
+#'
+#' @examples
+#' \dontrun{
+#' x <- c(1, 3, 2, 1, 4, 2, 1)
+#' peaks <- c(FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE)
+#' troughs <- c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE)
+#' classify_high_periods(x, peaks, troughs)
+#' }
+#'
+#' @export
+classify_high_periods <- function(x, peaks, troughs) {
+  cli::cli_abort("Doesn't currently work")
+  # # Input validation
+  # if (length(peaks) != length(troughs) || length(x) != length(peaks)) {
+  #   cli::cli_abort("Lengths of x, peaks, and troughs must match")
+  # }
+  #
+  # n <- length(x)
+  # result <- logical(n)
+  #
+  # # First handle adjacent peaks - keep only highest
+  # peak_indices <- which(peaks)
+  # for(i in 1:(length(peak_indices)-1)) {
+  #   # Look at all peaks until we find a trough
+  #   for(j in (i+1):length(peak_indices)) {
+  #     if(any(troughs[peak_indices[i]:peak_indices[j]])) break
+  #     # Keep highest peak, remove others
+  #     if(x[peak_indices[i]] <= x[peak_indices[j]]) {
+  #       peaks[peak_indices[i]] <- FALSE
+  #       break
+  #     } else {
+  #       peaks[peak_indices[j]] <- FALSE
+  #     }
+  #   }
+  # }
+  #
+  # # Handle start sequence
+  # first_event <- min(c(peak_indices[1], trough_indices[1]))
+  # result[1:first_event] <- ifelse(first_event == peak_indices[1], TRUE, FALSE)
+  #
+  # # End sequence
+  # last_event <- max(c(peak_indices[length(peak_indices)],
+  #                     trough_indices[length(trough_indices)]))
+  # result[last_event:n] <- ifelse(last_event == peak_indices[length(peak_indices)],
+  #                                TRUE, FALSE)
+  #
+  # # Find regions between troughs that have exactly one peak
+  # for(i in 1:(length(trough_indices)-1)) {
+  #   current_trough <- trough_indices[i]
+  #   next_trough <- trough_indices[i+1]
+  #   peaks_between <- which(peaks[current_trough:next_trough])
+  #
+  #   if(length(peaks_between) == 1) {
+  #     result[(current_trough+1):(next_trough-1)] <- TRUE
+  #   }
+  # }
+  #
+  # return(result)
+}
diff --git a/R/classify_by_low_periods.R b/R/classify_by_low_periods.R
@@ -0,0 +1,54 @@
+#' Classifies Periods of Low Activity in Time Series Using Peaks and Troughs
+#'
+#' @description
+#' Identifies periods of low activity in a time series by analyzing peaks and troughs,
+#' returning a logical vector marking these periods. Low activity periods are defined
+#' as regions between consecutive troughs that contain no peaks.
+#'
+#' @param peaks logical vector; TRUE indicates peak positions
+#' @param troughs logical vector; same length as peaks, TRUE indicates trough positions
+#'
+#' @return logical vector; TRUE indicates periods of low activity
+#'
+#' @details
+#' The function performs the following steps:
+#' 1. Validates input lengths
+#' 2. Initializes all periods as potentially low activity (TRUE)
+#' 3. For each pair of consecutive troughs:
+#'    - If no peaks exist between them, maintains TRUE for that period
+#'    - If any peaks exist, marks that period as FALSE (not low activity)
+#'
+#' @examples
+#' peaks <- c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)
+#' troughs <- c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE)
+#' classify_low_periods(peaks, troughs)
+#'
+#' @export
+classify_low_periods <- function(peaks, troughs) {
+  # Input validation
+  if (length(peaks) != length(troughs)) {
+    cli::cli_abort("Lengths of peaks and troughs must match")
+  }
+
+  # Initialize output vector
+  result <- rep(TRUE, length(peaks))
+
+  # Find indices of troughs
+  trough_indices <- which(troughs)
+
+  # For each consecutive pair of troughs
+  for (i in seq_len(length(trough_indices) - 1)) {
+    start_idx <- trough_indices[i]
+    end_idx <- trough_indices[i + 1]
+
+    # Check if there are any peaks between these troughs
+    between_slice <- peaks[(start_idx + 1):(end_idx - 1)]
+
+    if (length(between_slice) > 0 && !any(between_slice)) {
+      # If no peaks between troughs, set those positions to FALSE
+      result[(start_idx + 1):(end_idx - 1)] <- FALSE
+    }
+  }
+
+  return(result)
+}
diff --git a/R/classify_by_stability.R b/R/classify_by_stability.R
@@ -42,6 +42,9 @@
 #'   - 1: High activity state
 #'   - 0: Low activity state
 #'   - NA: Unable to classify (usually due to missing data)
+#'
+#' @importFrom stats quantile qnorm median mad
+#'
 #' @export
 classify_by_stability <- function(speed,
                                   window_size = 30,
@@ -78,7 +81,7 @@ classify_by_stability <- function(speed,
   ))
 
   # Find baseline statistics using stable periods
-  var_threshold <- quantile(roll_var, 0.75, na.rm = TRUE)
+  var_threshold <- stats::quantile(roll_var, 0.75, na.rm = TRUE)
   stable_periods <- !is.na(roll_var) & roll_var < var_threshold
 
   rle_obj <- rle(stable_periods)
@@ -109,7 +112,7 @@ classify_by_stability <- function(speed,
   baseline_sd <- sd(speed[baseline_start:baseline_end], na.rm = TRUE)
 
   # Convert tolerance to threshold using inverse normal CDF
-  threshold_multiplier <- qnorm(1 - tolerance)
+  threshold_multiplier <- stats::qnorm(1 - tolerance)
   threshold <- baseline_mean + threshold_multiplier * baseline_sd
 
   # Initial classification
@@ -154,8 +157,8 @@ classify_by_stability <- function(speed,
 
         stable_means <- roll_mean[stable_mask]
         list(
-          level = median(stable_means, na.rm = TRUE),
-          spread = mad(stable_means, na.rm = TRUE)
+          level = stats::median(stable_means, na.rm = TRUE),
+          spread = stats::mad(stable_means, na.rm = TRUE)
         )
       }