Skip to content

Commit

Permalink
Webapp get doc by ID (#2539)
Browse files Browse the repository at this point in the history
* get document by ID dropdown added
* UI styling update
* updated rest-api.md
  • Loading branch information
16BitNarwhal authored Jun 26, 2024
1 parent c62f65c commit 964ffef
Show file tree
Hide file tree
Showing 10 changed files with 1,378 additions and 289 deletions.
87 changes: 55 additions & 32 deletions docs/rest-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,48 @@ java -cp $ANSERINI_JAR io.anserini.server.Application --server.port=8081

And then navigate to [`http://localhost:8081/`](http://localhost:8081/) in your browser.

## List Indexes

To list all the index information, the endpoint is `api/v1.0/indexes/`

Run

```bash
curl -X GET "http://localhost:8081/api/v1.0/indexes"
```

Output is a mapping from index name to `IndexInfo` enum

```json
{
"cacm": {
"urls": [
"https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.20221005.252b5e.tar.gz"
],
"cached": false,
"md5": "cfe14d543c6a27f4d742fb2d0099b8e0",
"indexName": "cacm",
"description": "Lucene index of the CACM corpus.",
"model": "BM25",
"corpus": "CACM",
"filename": "lucene-index.cacm.20221005.252b5e.tar.gz"
},
"msmarco-v1-passage": {
"urls": [
"https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz"
],
"cached": true,
"md5": "678876e8c99a89933d553609a0fd8793",
"indexName": "msmarco-v1-passage",
"description": "Lucene index of the MS MARCO V1 passage corpus.",
"model": "BM25",
"corpus": "MS MARCO V1 Passage",
"filename": "lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz"
},
...
}
```

## Search Queries

The search query endpoint is `api/v1.0/indexes/{index_name}/search?query={query}&hits={hits}&qid={qid}`
Expand Down Expand Up @@ -66,45 +108,21 @@ The json results are the same as the output of the `-outputRerankerRequests` opt
}
```

## List Indexes
## Get Document Content by DocId

To list all the index information, the endpoint is `api/v1.0/indexes/`
To access the content of a document in an index, the endpoint is `api/v1.0/indexes/{index_name}/document/{docid}`

Run
Here's an example of getting the document of the top candidate from the above example:

```bash
curl -X GET "http://localhost:8081/api/v1.0/indexes"
curl -X GET "http://localhost:8080/api/v1.0/indexes/msmarco-v2.1-doc/documents/msmarco_v2.1_doc_15_390497775"
```

Output is a mapping from index name to `IndexInfo` enum
Output is an object containing the 'cached' property

```json
{
"cacm": {
"urls": [
"https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.20221005.252b5e.tar.gz"
],
"cached": false,
"md5": "cfe14d543c6a27f4d742fb2d0099b8e0",
"indexName": "cacm",
"description": "Lucene index of the CACM corpus.",
"model": "BM25",
"corpus": "CACM",
"filename": "lucene-index.cacm.20221005.252b5e.tar.gz"
},
"msmarco-v1-passage": {
"urls": [
"https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz"
],
"cached": true,
"md5": "678876e8c99a89933d553609a0fd8793",
"indexName": "msmarco-v1-passage",
"description": "Lucene index of the MS MARCO V1 passage corpus.",
"model": "BM25",
"corpus": "MS MARCO V1 Passage",
"filename": "lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz"
},
...
"cached": true
}
```

Expand All @@ -122,6 +140,11 @@ Output is an object containing the 'cached' property

```json
{
"cached": true
"doc": {
"url": "https://diabetestalk.net/blood-sugar/conversion-of-carbohydrates-to-glucose",
"title": "Conversion Of Carbohydrates To Glucose | DiabetesTalk.Net",
"headings": "...",
"body": "..."
}
}
```
```
8 changes: 5 additions & 3 deletions src/main/frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
"export": "next build && next export"
},
"dependencies": {
"@emotion/react": "^11.10.4",
"@emotion/styled": "^11.10.4",
"@chakra-ui/react": "^2.8.2",
"@emotion/react": "^11.11.4",
"@emotion/styled": "^11.11.5",
"@mui/material": "^5.10.10",
"framer-motion": "^11.2.11",
"next": "^14.1.1",
"postcss": "^8.4.31",
"react": "18.2.0",
"react-dom": "18.2.0",
"postcss": "^8.4.31",
"zod": "^3.22.3"
},
"devDependencies": {
Expand Down
20 changes: 9 additions & 11 deletions src/main/frontend/pages/_app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import '../styles/globals.css';
import type { AppProps } from 'next/app';
import { ChakraProvider } from '@chakra-ui/react';

import '../styles/globals.css'
import SearchBar from './components/SearchBar'

export default function Home() {
function MyApp({ Component, pageProps } : AppProps) {
return (
<div className="container">
<div className="header">
<h1>Anserini Search Interface</h1>
<p>A Lucene toolkit for reproducible information retrieval research</p>
</div>
<SearchBar />
</div>
<ChakraProvider>
<Component {...pageProps} />
</ChakraProvider>
);
}

export default MyApp;
171 changes: 79 additions & 92 deletions src/main/frontend/pages/components/Dropdown.tsx
Original file line number Diff line number Diff line change
@@ -1,20 +1,5 @@
/*
* Anserini: A Lucene toolkit for reproducible information retrieval research
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import React, { useState, useRef, useEffect } from 'react';
import React, { useState, useEffect } from 'react';
import { Select, HStack, Box } from '@chakra-ui/react';

interface Props {
onSelect: (selectedValue: string) => void;
Expand All @@ -31,97 +16,99 @@ interface IndexInfo {
cached: boolean;
}

interface IndexInfo {
indexName: string;
description: string;
filename: string;
corpus: string;
model: string;
urls: string[];
md5: string;
cached: boolean;
}

const Dropdown: React.FC<Props> = ({ onSelect }) => {
const [selectedCollection, setSelectedCollection] = useState<string | null>(null);
const [selectedCorpus, setSelectedCorpus] = useState<string | null>(null);
const [selectedIndex, setSelectedIndex] = useState<string | null>(null);

const [indexInfoList, setIndexInfoList] = useState<{ [key: string]: IndexInfo }>({});
const [collections, setCollections] = useState<{ [key: string]: string[] | { [key: string]: string[] } }>({});

// Fetch indexes from api
useEffect(
() => {
const fetchIndexes = async () => {
const response = await fetch('/api/v1.0/indexes');
const indexList = await response.json();
setIndexInfoList(indexList);
useEffect(() => {
const fetchIndexes = async () => {
const response = await fetch('/api/v1.0/indexes');
const indexList = await response.json();
setIndexInfoList(indexList);

const dropdownList : { [key: string]: string[] | { [key: string]: string[] } } = {};
for (const value of Object.values(indexList)) {
const index = value as IndexInfo;
const dropdownList: { [key: string]: string[] | { [key: string]: string[] } } = {};
for (const value of Object.values(indexList)) {
const index = value as IndexInfo;

if (index['corpus'].includes('MS MARCO V1')) {
if (!dropdownList['MS MARCO V1']) dropdownList['MS MARCO V1'] = [];
(dropdownList['MS MARCO V1'] as string[]).push(index.indexName);
} else if (index['corpus'].includes('MS MARCO V2')) {
if (!dropdownList['MS MARCO V2']) dropdownList['MS MARCO V2'] = [];
(dropdownList['MS MARCO V2'] as string[]).push(index.indexName);
} else if (index['corpus'].includes('BEIR')) {
if (!dropdownList['BEIR']) dropdownList['BEIR'] = {};
const beir = dropdownList['BEIR'] as { [key: string]: string[] };
const corpus = (index['corpus'] as string);
if (beir[corpus]) {
(beir[corpus] as string[]).push(index.indexName);
} else {
beir[corpus] = [index.indexName];
}
if (index.corpus.includes('MS MARCO')) {
if (!dropdownList['MS MARCO']) dropdownList['MS MARCO'] = {};
const msmarco = dropdownList['MS MARCO'] as { [key: string]: string[] };
const corpus = index.corpus as string;
if (msmarco[corpus]) {
(msmarco[corpus] as string[]).push(index.indexName);
} else {
msmarco[corpus] = [index.indexName];
}
} else if (index.corpus.includes('BEIR')) {
if (!dropdownList['BEIR']) dropdownList['BEIR'] = {};
const beir = dropdownList['BEIR'] as { [key: string]: string[] };
const corpus = index.corpus as string;
if (beir[corpus]) {
(beir[corpus] as string[]).push(index.indexName);
} else {
beir[corpus] = [index.indexName];
}
}
setCollections(dropdownList);
}
setCollections(dropdownList);
};

fetchIndexes();
}
, []);
fetchIndexes();
}, []);

return (
<div className="dropdowns">
<select className="dropdown-button" onChange={(e) => {
setSelectedCollection(e.target.value);
setSelectedCorpus(null);
setSelectedIndex(null);
}}>
<option value="" className="dropdown-item">Select</option>
{Object.keys(collections).map((collection) => (
<option className="dropdown-item" key={collection} value={collection}>{collection}</option>
))}
</select>

{selectedCollection && selectedCollection.includes("MS MARCO") && <>
<select className="dropdown-button" onChange={(e) => {
setSelectedIndex(e.target.value);
onSelect(e.target.value)}}
>
<option value="" className="dropdown-item">Select</option>
{Array.isArray(collections[selectedCollection])
&& (collections[selectedCollection] as string[]).map((index) => (
<option className={`dropdown-item ${indexInfoList[index].cached ? 'cached' : ''}`} key={index} value={index}>{indexInfoList[index].corpus} | {indexInfoList[index].model}</option>
<Box>
<HStack spacing={4}>
<Select placeholder="Select" onChange={(e) => {
setSelectedCollection(e.target.value);
setSelectedCorpus(null);
setSelectedIndex(null);
}}>
{Object.keys(collections).map((collection) => (
<option key={collection} value={collection}>{collection}</option>
))}
</select>
</>}

{selectedCollection=='BEIR' && <>
<select className="dropdown-button" onChange={(e) => {
setSelectedCorpus(e.target.value);
setSelectedIndex(null);
}}>
<option value="" className="dropdown-item">Select</option>
{selectedCollection && collections[selectedCollection] && Object.keys(collections[selectedCollection]).map((corpus) => (
<option className="dropdown-item" key={corpus} value={corpus}>{corpus}</option>
))}
</select>
<select className="dropdown-button" onChange={(e) => {
setSelectedIndex(e.target.value);
onSelect(e.target.value)}}
>
<option value="" className="dropdown-item">Select</option>
{selectedCorpus && !Array.isArray(collections[selectedCollection])
&& (collections[selectedCollection] as { [key: string]: string[] })[selectedCorpus].map((index) => (
<option className={`dropdown-item ${indexInfoList[index].cached ? 'cached' : ''}`} key={index} value={index}>{indexInfoList[index].corpus} | {indexInfoList[index].model}</option>
))}
</select></>}
</div>
</Select>
{selectedCollection !== null && (
<>
<Select placeholder="Select" onChange={(e) => {
setSelectedCorpus(e.target.value);
setSelectedIndex(null);
}}>
{selectedCollection && collections[selectedCollection] && Object.keys(collections[selectedCollection]).map((corpus) => (
<option key={corpus} value={corpus}>{corpus.replace('MS MARCO', '').replace('BEIR: ', '')}</option>
))}
</Select>
{selectedCorpus && (
<Select placeholder="Select" onChange={(e) => {
setSelectedIndex(e.target.value);
onSelect(e.target.value);
}}>
{selectedCorpus && !Array.isArray(collections[selectedCollection]) &&
(collections[selectedCollection] as { [key: string]: string[] })[selectedCorpus].map((index) => (
<option key={index} value={index} style={{ backgroundColor: indexInfoList[index].cached ? '#c8e6c9' : undefined }}>
{indexInfoList[index].model}
</option>
))}
</Select>
)}
</>
)}
</HStack>
</Box>
);
};

Expand Down
Loading

0 comments on commit 964ffef

Please sign in to comment.