Skip to content

Commit

Permalink
Tkakar/cat 946 map ensemble ids (#1970)
Browse files Browse the repository at this point in the history
Co-authored-by: Mark Keller <7525285+keller-mark@users.noreply.github.com>
  • Loading branch information
tkakar and keller-mark authored Dec 19, 2024
1 parent 5503589 commit ffa8d5f
Show file tree
Hide file tree
Showing 29 changed files with 336 additions and 31 deletions.
8 changes: 8 additions & 0 deletions .changeset/kind-eagles-live.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
"@vitessce/feature-list": patch
"@vitessce/heatmap": patch
"@vitessce/tooltip": patch
"@vitessce/vit-s": patch
---

Added support for mapping ensemble gene ids to gene symbols.
1 change: 0 additions & 1 deletion examples/configs/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ import { alignedVisiumXeniumSpatialdata } from './view-configs/spatial-beta/spat
import { exemplarSmall2024, exemplarSmallPartialInit } from './view-configs/spatial-beta/exemplar-small.js';
import { lake2023, lake2023component } from './view-configs/lake-2023.js';
import { salcher2022 } from './view-configs/salcher_2022.js';

// TODO(spatialBeta):
import { kpmpOop2023 } from './view-configs/spatial-beta/kpmp-oop.js';
import { kpmpAutoInit2023 } from './view-configs/spatial-beta/kpmp-auto-init.js';
Expand Down
2 changes: 2 additions & 0 deletions packages/constants-internal/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ export const DataType = {
export const AsyncFunctionType = {
// String input (rather than Node input)
AUTOCOMPLETE_FEATURE: 'autocompleteFeature', // (partial: string, targetModality: null | 'gene' | 'protein' | 'genomic-region' | 'cell-type') -> list of feature nodes
GET_ALTERNATIVE_TERMS: 'getAlternativeTerms', // (curieString) -> list of alternative curie strings
GET_TERM_MAPPING: 'getTermMapping', // (keyCuriePrefix, valueCuriePrefix) -> Record<curieString, curieString> for key to value

TRANSFORM_FEATURE: 'transformFeature', // (featureNode, targetModality) -> list of feature nodes from target modality
RELATED_FEATURES: 'relatedFeatures', // (featureNode) -> list of related feature nodes
Expand Down
8 changes: 6 additions & 2 deletions packages/legend/src/Legend.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import React, { useRef, useEffect, useMemo } from 'react';
import clsx from 'clsx';
import { makeStyles } from '@material-ui/core';
import { capitalize, getDefaultColor } from '@vitessce/utils';
import { capitalize, getDefaultColor, cleanFeatureId } from '@vitessce/utils';
import { select } from 'd3-selection';
import { scaleLinear } from 'd3-scale';
import { axisBottom } from 'd3-axis';
Expand Down Expand Up @@ -320,7 +320,11 @@ export default function Legend(props) {
&& featureSelection.length >= 1
&& !isStaticColor
)
? (featureLabelsMap?.get(featureSelection[0]) || featureSelection[0])
? (
featureLabelsMap?.get(featureSelection[0])
|| featureLabelsMap?.get(cleanFeatureId(featureSelection[0]))
|| featureSelection[0]
)
: null;
// if there are missing values, mention them in the label
const featureSelectionLabel = missing ? `${featureSelectionLabelRaw} (${Math.round(missing * 100)}% NaN)` : featureSelectionLabelRaw;
Expand Down
4 changes: 4 additions & 0 deletions packages/main/all/src/base-plugins.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ import {
BiomarkerSelectSubscriber,
autocompleteFeature,
transformFeature,
getAlternativeTerms,
getTermMapping,
} from '@vitessce/biomarker-select';
import {
expandAnndataZarr,
Expand Down Expand Up @@ -533,4 +535,6 @@ export const baseCoordinationTypes = [
export const baseAsyncFunctions = [
new PluginAsyncFunction(AsyncFunctionType.AUTOCOMPLETE_FEATURE, autocompleteFeature),
new PluginAsyncFunction(AsyncFunctionType.TRANSFORM_FEATURE, transformFeature),
new PluginAsyncFunction(AsyncFunctionType.GET_ALTERNATIVE_TERMS, getAlternativeTerms),
new PluginAsyncFunction(AsyncFunctionType.GET_TERM_MAPPING, getTermMapping),
];
3 changes: 2 additions & 1 deletion packages/tooltip/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
},
"dependencies": {
"@material-ui/core": "catalog:",
"@vitessce/vit-s": "workspace:*"
"@vitessce/vit-s": "workspace:*",
"@vitessce/utils": "workspace:*"
},
"devDependencies": {
"react": "catalog:",
Expand Down
14 changes: 12 additions & 2 deletions packages/tooltip/src/TooltipContent.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
import React from 'react';
import React, { useMemo } from 'react';
import { transformInfoValues } from './utils.js';

export default function TooltipContent(props) {
const {
info,
featureType,
featureLabelsMap,
} = props;

const mappedInfo = useMemo(() => {
if (!featureType || !featureLabelsMap) {
return info;
}
return transformInfoValues(info, featureType, featureLabelsMap);
}, [info, featureType, featureLabelsMap]);

return (
<table>
<tbody>
{Object.entries(info).map(([key, value]) => (
{Object.entries(mappedInfo).map(([key, value]) => (
<tr key={key}>
<th>{key}</th>
<td>{value}</td>
Expand Down
31 changes: 31 additions & 0 deletions packages/tooltip/src/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { cleanFeatureId } from '@vitessce/utils';

/**
* Use the featureLabelsMap to transform the values in the info object.
* For example, there are cases where the info object looks like
* { "Marker Gene ID": "ENSG00000123456.1" }
* And we want to transform the values into Gene Symbols instead of Ensembl IDs.
* @param {object} info
* @param {string} featureType
* @param {Map<string,string>} featureLabelsMap
* @returns
*/
export function transformInfoValues(info, featureType, featureLabelsMap) {
if (info) {
const newInfo = Object.fromEntries(
Object.entries(info).map(([key, value]) => {
if (key.toLowerCase().includes(featureType.toLowerCase())) {
const newValue = (
featureLabelsMap?.get(value)
|| featureLabelsMap?.get(cleanFeatureId(value))
|| value
);
return [key, newValue];
}
return [key, value];
}),
);
return newInfo;
}
return info;
}
4 changes: 4 additions & 0 deletions packages/types/src/biomarkers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,7 @@ export type FeatureToIntervalFunc = (node: KgNode, assembly: string) => Promise<
// TODO: should the node types be more precise?
export type ObsSetToFeaturesFunc = (node: KgNode) => Promise<KgNode[]>;
export type FeaturesToObsSetFunc = (nodes: KgNode[]) => Promise<KgNode>;

// Other async function types.
export type GetAlternativeTermsFunc = (curie: string) => Promise<string[]>;
export type GetTermMappingFunc = (keyCuriePrefix: string, valCuriePrefix: string) => Promise<Map<string, string>>;
2 changes: 2 additions & 0 deletions packages/types/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ export type {
FeatureToIntervalFunc,
ObsSetToFeaturesFunc,
FeaturesToObsSetFunc,
GetAlternativeTermsFunc,
GetTermMappingFunc,
} from './biomarkers.js';

// This is a types-only package.
Expand Down
1 change: 1 addition & 0 deletions packages/utils/other-utils/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ export {
commaNumber,
capitalize,
pluralize,
cleanFeatureId,
getLongestString,
getNextScope,
getNextScopeNumeric,
Expand Down
14 changes: 14 additions & 0 deletions packages/utils/other-utils/src/root.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,20 @@ export function getLongestString(strings: string[]) {
);
}

/**
* Try to clean up a gene ID.
* For example, remove the version number from an ENSG ID.
* @param {string} featureName A gene ID.
* @returns {string} The cleaned gene ID.
*/
export function cleanFeatureId(featureName: string) {
if (featureName.startsWith('ENSG')) {
// Strip the version number from the ENSG ID.
return featureName.split('.')[0];
}
return featureName;
}

/**
* Generate a new scope name which does not
* conflict / overlap with a previous scope name.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ import { csvParse } from 'd3-dsv';
// eslint-disable-next-line import/no-unresolved
import Fuse from 'fuse.js/basic';

/** @import { KgNode, KgEdge, TargetModalityType, AutocompleteFeatureFunc, TransformFeatureFunc } from '@vitessce/types' */
/** @import { KgNode, KgEdge, TargetModalityType, AutocompleteFeatureFunc, TransformFeatureFunc, GetAlternativeTermsFunc, GetTermMappingFunc } from '@vitessce/types' */
/** @import { QueryClient, QueryFunctionContext } from '@tanstack/react-query' */

const KG_BASE_URL = 'https://storage.googleapis.com/vitessce-demo-data/enrichr-kg-september-2023';
const ENSG_TO_GENE_SYMBOL_URL = 'https://vitessce-resources.s3.us-east-2.amazonaws.com/genes_filtered.json';

/**
* @returns {Promise<KgNode[]>}
Expand Down Expand Up @@ -61,6 +62,14 @@ function loadPathwayNodes() {
});
}

/**
* @returns {Promise<Record<string,string>>}
*/
function loadEnsgToGeneSymbolMapping() {
return fetch(ENSG_TO_GENE_SYMBOL_URL)
.then(res => res.json());
}

// Parent app can pass in queryClient
// (to every async function) so that functions can call .fetchQuery
// Reference: https://tanstack.com/query/latest/docs/reference/QueryClient#queryclientfetchquery
Expand Down Expand Up @@ -185,3 +194,65 @@ export async function transformFeature({ queryClient }, node, targetModality) {
// TODO: handle other target modalities
return [];
}

/**
* @satisfies {GetAlternativeTermsFunc}
* @param {object} ctx
* @param {QueryClient} ctx.queryClient
* @param {string} curie
* @returns {Promise<string[]>} A list of curie strings.
*/
export async function getAlternativeTerms({ queryClient }, curie) {
// Reference: https://registry.identifiers.org/registry/ensembl
// Currently, we only map Ensembl gene IDs to gene symbols,
// using our own JSON file.
// In the future, we can expand this functionality.
const inputIsEnsemblGeneId = curie.toUpperCase().startsWith('ENSEMBL:ENSG');

if (inputIsEnsemblGeneId) {
const idMapping = await queryClient.fetchQuery({
queryKey: ['ensgToGeneSymbolMapping'],
staleTime: Infinity,
queryFn: loadEnsgToGeneSymbolMapping,
});
// In our current JSON file, the ENSG IDs are not prefixed with 'ENSEMBL:'.
const ensemblId = curie.split(':')[1];
let geneSymbol = idMapping?.[ensemblId];
if (geneSymbol) {
if (!geneSymbol.toUpperCase().startsWith('HGNC:')) {
// In our current JSON file, the gene symbols are not prefixed with 'HGNC:'.
geneSymbol = `HGNC:${geneSymbol}`;
}
return [geneSymbol];
}
}
return [];
}

/**
* @satisfies {GetTermMappingFunc}
* @param {object} ctx
* @param {QueryClient} ctx.queryClient
* @param {string} keyCuriePrefix
* @param {string} valCuriePrefix
* @returns {Promise<Map<string, string>>} A mapping between curie strings.
*/
export async function getTermMapping({ queryClient }, keyCuriePrefix, valCuriePrefix) {
if (
(keyCuriePrefix.toUpperCase() === 'ENSEMBL' && valCuriePrefix.toUpperCase() === 'HGNC')
|| (keyCuriePrefix.toUpperCase() === 'HGNC' && valCuriePrefix.toUpperCase() === 'ENSEMBL')
) {
const idMapping = await queryClient.fetchQuery({
queryKey: ['ensgToGeneSymbolMapping'],
staleTime: Infinity,
queryFn: loadEnsgToGeneSymbolMapping,
});
const isReversed = (keyCuriePrefix.toUpperCase() === 'HGNC');
return new Map(Object.entries(idMapping).map(([key, value]) => (
isReversed
? ([`HGNC:${value}`, `ENSEMBL:${key}`])
: ([`ENSEMBL:${key}`, `HGNC:${value}`])
)));
}
throw new Error(`Mapping between ${keyCuriePrefix} and ${valCuriePrefix} is not yet implemented.`);
}
2 changes: 2 additions & 0 deletions packages/view-types/biomarker-select/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ export { BiomarkerSelectSubscriber } from './BiomarkerSelectSubscriber.js';
export {
autocompleteFeature,
transformFeature,
getAlternativeTerms,
getTermMapping,
} from './default-async-functions.js';
13 changes: 10 additions & 3 deletions packages/view-types/feature-list/src/FeatureList.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import React, { useEffect, useState, useMemo } from 'react';
import { every } from 'lodash-es';
import { makeStyles } from '@material-ui/core';
import { cleanFeatureId } from '@vitessce/utils';
import { SelectableTable } from './selectable-table/index.js';
import { ALT_COLNAME } from './constants.js';

Expand Down Expand Up @@ -39,12 +40,14 @@ export default function FeatureList(props) {
// passing to the SelectableTable component.
const selectableTableSortKey = (featureListSortKey === 'featureIndex' ? 'key' : 'name');


useEffect(() => {
const results = geneList
.filter(gene => (
gene.toLowerCase().includes(searchTerm.toLowerCase())
|| featureLabelsMap?.get(gene)?.toLowerCase().includes(searchTerm.toLowerCase())
|| featureLabelsMap?.get(gene)
?.toLowerCase().includes(searchTerm.toLowerCase())
|| featureLabelsMap?.get(cleanFeatureId(gene))
?.toLowerCase().includes(searchTerm.toLowerCase())
));
setSearchResults(results);
}, [searchTerm, geneList, featureLabelsMap]);
Expand All @@ -69,7 +72,11 @@ export default function FeatureList(props) {
.map(
gene => ({
key: gene,
name: featureLabelsMap?.get(gene) || gene,
name: (
featureLabelsMap?.get(gene)
|| featureLabelsMap?.get(cleanFeatureId(gene))
|| gene
),
value: (geneSelection ? geneSelection.includes(gene) : false),
}),
);
Expand Down
14 changes: 10 additions & 4 deletions packages/view-types/feature-list/src/FeatureListSubscriber.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
useReady, useUrls,
useFeatureLabelsData, useObsFeatureMatrixIndices,
useCoordination, useLoaders,
useExpandedFeatureLabelsMap,
} from '@vitessce/vit-s';
import { ViewType, COMPONENT_COORDINATION_TYPES, ViewHelpMapping } from '@vitessce/constants-internal';
import FeatureList from './FeatureList.js';
Expand Down Expand Up @@ -75,21 +76,26 @@ export function FeatureListSubscriber(props) {
loaders, dataset, false, {}, {},
{ featureType },
);
const [expandedFeatureLabelsMap, expandedFeatureLabelsStatus] = useExpandedFeatureLabelsMap(
featureType, featureLabelsMap, { stripCuriePrefixes: true },
);
const [{ featureIndex }, matrixIndicesStatus, obsFeatureMatrixUrls] = useObsFeatureMatrixIndices(
loaders, dataset, true,
{ obsType, featureType },
);
const isReady = useReady([
featureLabelsStatus,
expandedFeatureLabelsStatus,
matrixIndicesStatus,
]);
const urls = useUrls([
featureLabelsUrls,
obsFeatureMatrixUrls,
]);

const geneList = featureIndex || [];
const numGenes = geneList.length;
const hasFeatureLabels = Boolean(featureLabelsMap);
const hasFeatureLabels = Boolean(expandedFeatureLabelsMap);

function setGeneSelectionAndColorEncoding(newSelection) {
setGeneSelection(newSelection);
Expand Down Expand Up @@ -125,7 +131,7 @@ export function FeatureListSubscriber(props) {
setFeatureListSortKey={setFeatureListSortKey}
showFeatureTable={showFeatureTable}
setShowFeatureTable={setShowFeatureTable}
hasFeatureLabels={Boolean(featureLabelsMap)}
hasFeatureLabels={hasFeatureLabels}
primaryColumnName={primaryColumnName}
/>
)}
Expand All @@ -136,15 +142,15 @@ export function FeatureListSubscriber(props) {
geneList={geneList}
featureListSort={featureListSort}
featureListSortKey={featureListSortKey || initialSortKey}
featureLabelsMap={featureLabelsMap}
featureLabelsMap={expandedFeatureLabelsMap}
featureType={featureType}
geneSelection={geneSelection}
geneFilter={geneFilter}
setGeneSelection={setGeneSelectionAndColorEncoding}
setGeneFilter={setGeneFilter}
setGeneHighlight={setGeneHighlight}
enableMultiSelect={enableMultiSelect}
hasFeatureLabels={Boolean(featureLabelsMap)}
hasFeatureLabels={hasFeatureLabels}
primaryColumnName={primaryColumnName}
/>
</TitleInfo>
Expand Down
Loading

0 comments on commit ffa8d5f

Please sign in to comment.