Skip to content

Commit

Permalink
Update for Dec'21
Browse files Browse the repository at this point in the history
  • Loading branch information
sdht0 committed Dec 2, 2021
1 parent 1950d6b commit 81b3e3f
Show file tree
Hide file tree
Showing 67 changed files with 405 additions and 120,979 deletions.
121 changes: 65 additions & 56 deletions Automate.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "2f4c1ab9-5cd0-4524-850a-3a133d3c6c95",
"metadata": {},
"outputs": [],
Expand All @@ -34,13 +34,13 @@
"# Necessary: A correct map from the DBLP IDs to the DSG website Node IDs. This is maintained in the `dblp_to_web_mappings.txt` file.\n",
"# When needed: Selenium can also be used to mass update existing entries. Adapt from the code for adding new entries.\n",
"\n",
"working_date = \"2021.11.01\" # Set to the date the entries are being updated on.\n",
"previous_date = \"2021.10.01\" # Set to the date the entries were last updated. This is usually the last `working_date` from above, or the most recent date in the `archive` folder."
"working_date = \"2021.12.01\" # Set to the date the entries are being updated on.\n",
"previous_date = \"2021.11.01\" # Set to the date the entries were last updated. This is usually the last `working_date` from above, or the most recent date in the `archive` folder."
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "2893840d-cdff-4f26-9bcb-cbbf1e799efd",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -150,10 +150,32 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "02fe688c-6051-41b1-84a0-e4bdd2d8c930",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading for ('CharlieClarke', '96/3666', ['Charles L. A. Clarke'], 'Charles Clarke')\n",
"Downloading for ('GordonVCormack', 'c/GVCormack', ['Gordon V. Cormack'], 'Gordon Cormack')\n",
"Downloading for ('KhuzaimaDaudjee', 'd/KhuzaimaDaudjee', ['Khuzaima Daudjee'], 'Khuzaima Daudjee')\n",
"Downloading for ('LukaszGolab', '42/3296', ['Lukasz Golab'], 'Lukasz Golab')\n",
"Downloading for ('MauraGrossman', '122/5875', ['Maura R. Grossman'], 'Maura Grossman')\n",
"Downloading for ('XiHe', '28/949-1', ['Xi He'], 'Xi He')\n",
"Downloading for ('IhabFIlyas', 'i/IhabFIlyas', ['Ihab F. Ilyas', 'Ihab Francis Ilyas'], 'Ihab Ilyas')\n",
"Downloading for ('JimmyLin', '00/7739', ['Jimmy J. Lin', 'Jimmy Lin'], 'Jimmy Lin')\n",
"Downloading for ('TamerOzsu', 'o/MTamerOzsu', ['M. Tamer Özsu'], 'Tamer Özsu')\n",
"Downloading for ('KennethSalem', 's/KennethSalem', ['Kenneth Salem', 'Ken Salem'], 'Ken Salem')\n",
"Downloading for ('SemihSalihoglu', '55/6560', ['Semih Salihoglu'], 'Semih Salihoglu')\n",
"Downloading for ('MarkDSmucker', '07/801', ['Mark D. Smucker'], 'Mark Smucker')\n",
"Downloading for ('DavidToman', 't/DavidToman', ['David Toman'], 'David Toman')\n",
"Downloading for ('FrankTompa', 't/FrankWmTompa', ['Frank Wm. Tompa'], 'Frank Tompa')\n",
"Downloading for ('GrantWeddell', '81/5447', ['Grant E. Weddell'], 'Grant Weddell')\n"
]
}
],
"source": [
"# Download bib files for individual authors and save to disk.\n",
"authors_path = get_authors_path(working_date)\n",
Expand All @@ -170,61 +192,61 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "a61fdb26-cabf-458c-b3ba-09c7707d473e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Working date = 2021.11.01\n",
"len(CharlieClarke) = 218\n",
"Working date = 2021.12.01\n",
"len(CharlieClarke) = 219\n",
"len(GordonVCormack) = 130\n",
"len(KhuzaimaDaudjee) = 57\n",
"len(LukaszGolab) = 131\n",
"len(LukaszGolab) = 132\n",
"len(MauraGrossman) = 34\n",
"len(XiHe) = 31\n",
"len(XiHe) = 34\n",
"len(IhabFIlyas) = 142\n",
"len(JimmyLin) = 450\n",
"len(TamerOzsu) = 287\n",
"len(JimmyLin) = 456\n",
"len(TamerOzsu) = 288\n",
"len(KennethSalem) = 91\n",
"len(SemihSalihoglu) = 50\n",
"len(MarkDSmucker) = 74\n",
"len(DavidToman) = 126\n",
"len(DavidToman) = 127\n",
"len(FrankTompa) = 89\n",
"len(GrantWeddell) = 90\n",
"total entries = 2000, deduplicated and filtered entries = 1691\n",
"article 548\n",
"inproceedings 1077\n",
"total entries = 2013, deduplicated and filtered entries = 1704\n",
"article 552\n",
"inproceedings 1086\n",
"incollection 49\n",
"book 14\n",
"phdthesis 3\n",
"total = 1691\n",
"total = 1704\n",
"\n",
"Working date = 2021.10.01\n",
"Working date = 2021.11.01\n",
"len(CharlieClarke) = 218\n",
"len(GordonVCormack) = 129\n",
"len(GordonVCormack) = 130\n",
"len(KhuzaimaDaudjee) = 57\n",
"len(LukaszGolab) = 131\n",
"len(MauraGrossman) = 33\n",
"len(MauraGrossman) = 34\n",
"len(XiHe) = 31\n",
"len(IhabFIlyas) = 142\n",
"len(JimmyLin) = 447\n",
"len(JimmyLin) = 450\n",
"len(TamerOzsu) = 287\n",
"len(KennethSalem) = 91\n",
"len(SemihSalihoglu) = 50\n",
"len(MarkDSmucker) = 74\n",
"len(DavidToman) = 126\n",
"len(FrankTompa) = 89\n",
"len(GrantWeddell) = 90\n",
"total entries = 1995, deduplicated and filtered entries = 1687\n",
"article 544\n",
"total entries = 2000, deduplicated and filtered entries = 1691\n",
"article 548\n",
"inproceedings 1077\n",
"incollection 49\n",
"book 14\n",
"phdthesis 3\n",
"total = 1687\n"
"total = 1691\n"
]
}
],
Expand All @@ -237,15 +259,16 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "154225ed-ba2b-4aa3-90d9-c8fc91cd28a4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New = 4 Deleted = 0\n",
"New = 13 Deleted = 0\n",
"inproceedings 9\n",
"article 4\n"
]
}
Expand All @@ -272,7 +295,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"id": "2661a3bc-20b9-41a6-8e00-75c3e6b6b184",
"metadata": {
"scrolled": true,
Expand Down Expand Up @@ -302,7 +325,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 8,
"id": "35cd6385-c401-475f-a2bb-db2d9816a726",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -809,15 +832,15 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 9,
"id": "710d51ff-b272-4f0b-8c63-00b2919b3d81",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4\n"
"13\n"
]
}
],
Expand Down Expand Up @@ -885,7 +908,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 10,
"id": "4413e38c-ee55-4e88-ac66-3f5eb147dd80",
"metadata": {
"tags": []
Expand All @@ -911,7 +934,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 11,
"id": "8d117232-66ab-4bbb-90f8-3adbdd56fbb6",
"metadata": {},
"outputs": [],
Expand All @@ -929,14 +952,14 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 18,
"id": "ca73dd03-119d-4be7-b5a7-7a80424ea806",
"metadata": {},
"outputs": [],
"source": [
"# Set the correct values:\n",
"driver_path = \"/home/sdh/Downloads/installations/chromedriver\" # Chromium driver path, downloaded and extracted from https://chromedriver.chromium.org/downloads\n",
"cookie_value = \"RyLKUE5ZgrerBYUQK4WULIcnz5l3u793aR2Hgxu5tNc\" # Login to 'My Workbench' and copy the cookie value of key 'SSESSc2214f6938283aa908e28c4bfa176f5b'\n",
"cookie_value = \"5xWe9H33SnYhAxWEzacc7V_VUIh8qSBr1MILdhoTdSI\" # Login to 'My Workbench' and copy the cookie value of key 'SSESSc2214f6938283aa908e28c4bfa176f5b'\n",
"\n",
"# Now start a browser instance and set cookies required to access 'My Workbench'. If you get an \"Access denied\" error, the cookie value was incorrect.\n",
"browser = webdriver.Chrome(driver_path)\n",
Expand All @@ -950,7 +973,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"id": "2e856f06-3453-4971-84a4-fbd4bbb4dcb5",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -1085,15 +1108,15 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"id": "acebe66b-e422-47b5-8a29-d8f5db3612f6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9\n"
"13\n"
]
}
],
Expand All @@ -1105,7 +1128,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 21,
"id": "8b511efe-9dee-4f45-b958-2fa74aa4eb7a",
"metadata": {},
"outputs": [],
Expand All @@ -1116,26 +1139,12 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": null,
"id": "3105d2dc-4675-4b3d-9cd0-3dad2fc7ce12",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Posted: 2 The Future Is Big Graphs: A Community View on Graph Processing Systems DBLP:journals/cacm/SakrBVIAAAABBDV21 5160\n",
"Posted: 3 Serverless BM25 Search and BERT Reranking DBLP:conf/desires/AnandZDXL21 5161\n",
"Posted: 4 On the Separation of Logical and Physical Ranking Models for Text Retrieval Applications DBLP:conf/desires/LinMMM21 5162\n",
"Posted: 5 The Simplest Thing That Can Possibly Work: (Pseudo-)Relevance Feedback Via Text Classification DBLP:conf/ictir/HanLL21 5163\n",
"Posted: 6 Cross-Lingual Training With Dense Retrieval for Document Retrieval DBLP:journals/corr/abs-2109-01628 5164\n",
"Posted: 7 FO Rewritability for OMQ Using Beth Definability and Interpolation DBLP:conf/dlog/TomanW21 5165\n",
"Posted: 8 Dowsing for Math Answers DBLP:conf/clef/NgFKT20 5166\n"
]
}
],
"outputs": [],
"source": [
"# Publish new entries to the website using Selenium. \n",
"pp = pprint.PrettyPrinter()\n",
Expand Down Expand Up @@ -1179,7 +1188,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 23,
"id": "dc1e1250-25ec-4f06-9dc0-6fad139e6ba5",
"metadata": {},
"outputs": [],
Expand Down
Loading

0 comments on commit 81b3e3f

Please sign in to comment.