diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..a62d90d --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# DeTexD: A Benchmark Dataset for Delicate Text Detection + +This is the official repository for [DeTexD paper](TODO). Here you can find scripts used in the paper to evaluate models. + +See also: [DeTexD dataset](https://huggingface.co/datasets/grammarly/detexd-benchmark), [detexd-roberta-base model](https://huggingface.co/grammarly/detexd-roberta-base). + +## Install + +```sh +pip install -r requirements.txt +``` + +## Usage + +Run `evaluate_detexd_roberta.py` to get the published model (grammarly/detexd-roberta-base) results on published dataset (grammarly/detexd-benchmark). + +Run `founta_basile_comparison.ipynb` to reproduce results for models comparison from the paper. Note that you need to acquire the datsets because they have separate licences. + +Run `country_bias.ipynb` to reproduce country bias analysis. + +Run `compare_hatebert.ipynb` to reproduce hatebert models comparison. diff --git a/compare_hatebert.ipynb b/compare_hatebert.ipynb new file mode 100644 index 0000000..89defa2 --- /dev/null +++ b/compare_hatebert.ipynb @@ -0,0 +1,256 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "c1e3646c-fe09-45ad-96a3-28b12be8abb5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset({\n", + " features: ['text', 'annotator_1', 'annotator_2', 'annotator_3', 'label'],\n", + " num_rows: 1023\n", + "})\n", + "label\n", + "0 687\n", + "1 336\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\"grammarly/detexd-benchmark\", split='test')\n", + "print(dataset)\n", + "print(dataset.to_pandas().label.value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "444670f0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Download hatebert models\n", + "# https://arxiv.org/pdf/2010.12472.pdf\n", + "# https://osf.io/tbd58/\n", + "!wget https://files.de-1.osf.io/v1/resources/tbd58/providers/osfstorage/?zip= -O hatebert.zip\n", + "!mkdir hatebert\n", + "!unzip hatebert.zip -d hatebert\n", + "!rm hatebert.zip\n", + "\n", + "!unzip hatebert/HateBERT_fine_tuned_models/HateBERT_abuseval.zip -d hatebert/HateBERT_fine_tuned_models\n", + "!unzip hatebert/HateBERT_fine_tuned_models/HateBERT_hateval.zip -d hatebert/HateBERT_fine_tuned_models\n", + "!unzip hatebert/HateBERT_fine_tuned_models/HateBERT_offenseval.zip -d hatebert/HateBERT_fine_tuned_models\n", + "!rm hatebert/HateBERT_fine_tuned_models/HateBERT_abuseval.zip\n", + "!rm hatebert/HateBERT_fine_tuned_models/HateBERT_hateval.zip\n", + "!rm hatebert/HateBERT_fine_tuned_models/HateBERT_offenseval.zip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11fbe846", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import pipeline\n", + "from sklearn.metrics import precision_recall_fscore_support\n", + "from tqdm.auto import tqdm\n", + "from sklearn.metrics import precision_recall_curve, f1_score\n", + "import numpy as np\n", + "from transformers.pipelines.pt_utils import KeyDataset\n", + "import pandas as pd\n", + "\n", + "metrics = []\n", + "for name in tqdm(['hatebert/HateBERT_fine_tuned_models/HateBERT_abuseval',\n", + " 'hatebert/HateBERT_fine_tuned_models/HateBERT_hateval',\n", + " 'hatebert/HateBERT_fine_tuned_models/HateBERT_offenseval']):\n", + " pipe = pipeline(\"text-classification\", model=name, device=0, batch_size=8)\n", + " pipe.model.config.id2label = [0, 1]\n", + " preds = tqdm(pipe(KeyDataset(dataset, 'text'), truncation=True, top_k=None), total=len(dataset))\n", + " scores = np.array([next(p['score']\n", + " for p in pr if p['label'] == 1)\n", + " for pr in preds])\n", + "\n", + " precision, recall, thresholds = precision_recall_curve(dataset['label'], scores)\n", + " f_scores = 2*(precision*recall)/(precision+recall)\n", + " optimal_threshold_index = np.argmax(f_scores)\n", + " optimal_threshold = thresholds[optimal_threshold_index]\n", + " for tag, threshold in [('', 0.5), ('_opt', optimal_threshold)]:\n", + " preds = scores > threshold\n", + " metrics.append((name + tag,) + precision_recall_fscore_support(dataset['label'], preds, average='binary')[:-1])\n", + " \n", + "metrics = pd.DataFrame(metrics, columns=['model', 'precision', 'recall', 'f1'])\n", + "metrics.model = metrics.model.str.split('/').str[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4a738076", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 modelprecisionrecallf1
0HateBERT_abuseval86.7%11.6%20.5%
1HateBERT_abuseval_opt57.0%70.2%62.9%
2HateBERT_hateval95.2%6.0%11.2%
3HateBERT_hateval_opt41.1%86.0%55.6%
4HateBERT_offenseval75.4%31.0%43.9%
5HateBERT_offenseval_opt60.1%72.6%65.8%
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics.style.format('{:.1%}', subset=['precision', 'recall', 'f1'])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e9e46544-c16a-4963-8a53-23187dca753a", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "570e34a01db24d98851e86dfc70534b0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/2805 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import plotly.express as px\n", + "from IPython.display import Image, display\n", + "\n", + "px.choropleth(countries_df,\n", + " locations=\"Country\",\n", + " # locationmode='country names',\n", + " featureidkey=\"properties.ADMIN\",\n", + " geojson=countries_json,\n", + " color=\"Score\").write_image(\"tmp.png\")\n", + "display(Image(filename=\"tmp.png\"))\n", + "!rm tmp.png" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/evaluate_detexd_roberta.py b/evaluate_detexd_roberta.py new file mode 100644 index 0000000..2bba059 --- /dev/null +++ b/evaluate_detexd_roberta.py @@ -0,0 +1,32 @@ +from transformers import pipeline +from datasets import load_dataset +from sklearn.metrics import precision_recall_fscore_support +from tqdm.auto import tqdm +from transformers.pipelines.pt_utils import KeyDataset + + +def predict_binary_scores(classifier, texts): + # get multiclass probability scores + all_scores = tqdm(classifier(texts, top_k=None, truncation=True), total=len(texts)) + + # convert to a single score by summing the probability scores + # for the higher-index classes + return [sum(score['score'] + for score in scores + if score['label'] in ('LABEL_3', 'LABEL_4', 'LABEL_5')) + for scores in all_scores] + + +def predict_delicate(classifier, texts, threshold=0.72496545): + return [result > threshold for result in predict_binary_scores(classifier, texts)] + + +if __name__ == '__main__': + dataset = load_dataset("grammarly/detexd-benchmark", split='test') + classifier = pipeline("text-classification", model="grammarly/detexd-roberta-base", device=0) + predictions = predict_delicate(classifier, KeyDataset(dataset, 'text')) + + precision, recall, f_score, _ = precision_recall_fscore_support(y_true=dataset['label'], y_pred=predictions, average='binary') + print(f'precision = {precision:.1%}') # 81.4% + print(f'recall = {recall:.1%}') # 78.3% + print(f'f_score = {f_score:.1%}') # 79.8% diff --git a/founta_basile_comparison.ipynb b/founta_basile_comparison.ipynb new file mode 100644 index 0000000..9e20ef9 --- /dev/null +++ b/founta_basile_comparison.ipynb @@ -0,0 +1,615 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "fb8de06f-6ad3-4732-b42c-92bce13e437e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers\n", + "pip install xformers.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dd27add27bee4f4a83923912788b1b88", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/2 [00:00 100k\n", + "# there's no information whant changed\n", + "# the rows are shuffled and ids removed, so we can not restore original dataset\n", + "df.label.value_counts().sort_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "90314b13-b41e-40ac-8491-5814b2a60c08", + "metadata": {}, + "outputs": [], + "source": [ + "# removing spam since there are delicate texts there that are not filtered like porn\n", + "df = df[df.label != 'spam']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "401d46ce-fbfa-4755-ae52-5418a884451e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "label\n", + "abusive 27150\n", + "hateful 4965\n", + "normal 53851\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.label.value_counts().sort_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "201931ae-5125-4d3c-b98e-2145603f28ac", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a59551635ef44e6f88201a54e4603af9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/85966 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 precisionrecallf-score
Founta76.3%66.6%71.1%
Basile47.5%89.0%62.0%
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame([founta_results, basile_results],\n", + " columns=['precision', 'recall', 'f-score', '_'],\n", + " index=['Founta', 'Basile']\n", + " ).iloc[:, :3].style.format('{:.1%}'.format)" + ] + }, + { + "cell_type": "markdown", + "id": "3143de6c-8a48-4bfc-b5c7-4f4205c5a556", + "metadata": {}, + "source": [ + "### Restoring precision and recall" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d8f4f226-b7bb-43f1-b649-b1f978db1fbb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "precision: 56.1%\n", + "recall: 77.3%\n", + "f1: 65.0%\n" + ] + } + ], + "source": [ + "def positive_class_metrics(positive_class_percent, macro_f1, accuracy):\n", + " def f1_macro_fn(tp):\n", + " tn = accuracy - tp\n", + " fp = negative_class_percent - tn\n", + " fn = positive_class_percent - tp\n", + " p1 = tp / (tp + fp + 1e-100)\n", + " r1 = tp / (tp + fn + 1e-100)\n", + " p0 = tn / (tn + fn + 1e-100)\n", + " r0 = tn / (tn + fp + 1e-100)\n", + " f1_fn = p0 * r0 / (p0 + r0 + 1e-100) + p1 * r1 / (p1 + r1 + 1e-100)\n", + " return f1_fn, p1, r1\n", + "\n", + " negative_class_percent = 1 - positive_class_percent\n", + " n = 1000\n", + " diff, tp_opt = min((abs(f1_macro_fn(tp / n)[0] - macro_f1), tp / n) for tp in range(n + 1))\n", + " _, p1, r1 = f1_macro_fn(tp_opt)\n", + " f1 = 2 / (1 / p1 + 1 / r1)\n", + " print(f'precision: {p1:.1%}')\n", + " print(f'recall: {r1:.1%}')\n", + " print(f'f1: {f1:.1%}')\n", + "\n", + "\n", + "positive_class_metrics(\n", + " positive_class_percent=0.4206773618538324,\n", + " accuracy=0.65,\n", + " macro_f1=0.651)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..311ce9b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +torch +transformers +datasets +scikit-learn +tqdm +pandas +plotly +kaleido \ No newline at end of file