diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e69de29 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3941dc8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2020, CleverInsight +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..e69de29 diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..6dbbd68 --- /dev/null +++ b/README.rst @@ -0,0 +1,70 @@ +.. image:: https://cognito.readthedocs.io/en/latest/_images/logo.png + :target: http://cognito.readthedocs.org + :width: 200pt + +predicteasy : powerful autoML toolkit +========================================== + + ++----------------------+------------------------+ +| Deployment | |pypi| |conda| | ++----------------------+------------------------+ +| Build Status | |travis| | ++----------------------+------------------------+ +| Metrics | |coverall| | ++----------------------+------------------------+ +| GitHub | |contributors| |stars| | ++----------------------+------------------------+ +| License | |BSD| | ++----------------------+------------------------+ +| Community | |gitter| | ++----------------------+------------------------+ + + +PredictEasy is an exclusive python autoML library and command line utility that helps any developer to transform raw data into a machine-learning format. + + +Installation +------------ + +**Prerequisite** + +- Python3. + +Install the extension by using pip. + +.. code:: bash + + $ pip install predicteasy + + + +Contributors +============== + +.. image:: https://avatars3.githubusercontent.com/u/3523655?s=60&v=4 + :target: https://github.com/BastinRobin +.. image:: https://avatars2.githubusercontent.com/u/59742431?s=60&v=4 + :target: https://github.com/nibir-paul +.. image:: https://avatars0.githubusercontent.com/u/32188887?s=60&v=4 + :target: https://github.com/tanvirath +.. image:: https://avatars1.githubusercontent.com/u/29769264?s=60&v=4 + :target: https://github.com/vandana-11 + + + +Core Lead +---------- +* `Bastin Robins J `__ + +Development Leads +-------------------- + +* `Vandana Bhagat `__ + + +Data Science Team +----------------- + +* `Nibir Paul `__ +* `Tanvi Rath `__ diff --git a/predicteasy/__init__.py b/predicteasy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/predicteasy/core/__init__.py b/predicteasy/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/predicteasy/core/__pycache__/__init__.cpython-37.pyc b/predicteasy/core/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..59be56c Binary files /dev/null and b/predicteasy/core/__pycache__/__init__.cpython-37.pyc differ diff --git a/predicteasy/core/nlp/__init__.py b/predicteasy/core/nlp/__init__.py new file mode 100644 index 0000000..48240a6 --- /dev/null +++ b/predicteasy/core/nlp/__init__.py @@ -0,0 +1,5 @@ +from predicteasy.core.nlp.sentiment import * +from predicteasy.core.nlp.summarize import * +from predicteasy.core.nlp.spelling import * + + diff --git a/predicteasy/core/nlp/__pycache__/__init__.cpython-37.pyc b/predicteasy/core/nlp/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..4cbce34 Binary files /dev/null and b/predicteasy/core/nlp/__pycache__/__init__.cpython-37.pyc differ diff --git a/predicteasy/core/nlp/__pycache__/sentiment.cpython-37.pyc b/predicteasy/core/nlp/__pycache__/sentiment.cpython-37.pyc new file mode 100644 index 0000000..35da070 Binary files /dev/null and b/predicteasy/core/nlp/__pycache__/sentiment.cpython-37.pyc differ diff --git a/predicteasy/core/nlp/__pycache__/spelling.cpython-37.pyc b/predicteasy/core/nlp/__pycache__/spelling.cpython-37.pyc new file mode 100644 index 0000000..68e99d6 Binary files /dev/null and b/predicteasy/core/nlp/__pycache__/spelling.cpython-37.pyc differ diff --git a/predicteasy/core/nlp/__pycache__/summarize.cpython-37.pyc b/predicteasy/core/nlp/__pycache__/summarize.cpython-37.pyc new file mode 100644 index 0000000..731ebed Binary files /dev/null and b/predicteasy/core/nlp/__pycache__/summarize.cpython-37.pyc differ diff --git a/predicteasy/core/nlp/sentiment.py b/predicteasy/core/nlp/sentiment.py new file mode 100644 index 0000000..e9a74a7 --- /dev/null +++ b/predicteasy/core/nlp/sentiment.py @@ -0,0 +1,31 @@ +from textblob import TextBlob + + + + +class SentimentClassifier: + + def __init__(self, text): + self.data = text + + + def __str__(self): + return '{self.data} module'.format(self=self) + + + def predict(self): + + _txt = TextBlob(self.data) + + if _txt.sentiment.polarity == 0: + result = 'neutral' + + if _txt.sentiment.polarity > 0: + result = 'positive' + + if _txt.sentiment.polarity < 0: + result = 'negative' + + return dict(polarity=_txt.sentiment.polarity, + subjectivity=_txt.sentiment.subjectivity, + sentiment=result) \ No newline at end of file diff --git a/predicteasy/core/nlp/spelling.py b/predicteasy/core/nlp/spelling.py new file mode 100644 index 0000000..aafbdb1 --- /dev/null +++ b/predicteasy/core/nlp/spelling.py @@ -0,0 +1,41 @@ +import io +import pandas as pd +from textblob import TextBlob + +class SpellCheck: + + def __init__(self, text, multiple=False, column=""): + self.data = text + self.multiple = multiple + self.column = column + + + + def spell_apply(self, data): + """ + spell_apply takes incorrect text and + correct the spell and returns it + + :param data: The data + :type data: { type_description } + + :returns: { description_of_the_return_value } + :rtype: { return_type_description } + """ + return str(TextBlob(data).correct()) + + def correct(self): + """ + Correct method helps us to loop throught the + given dataframe and correct the grammer + + :returns: { description_of_the_return_value } + :rtype: { return_type_description } + """ + if self.multiple: + data = pd.read_csv(io.StringIO(self.data), lineterminator='\n') + data.rename(columns=lambda x: x.strip(), inplace=True) + data[self.column.strip()].apply(self.spell_apply).apply(pd.Series) + return dict(data=data.to_json(orient='records'), summary=[]) + else: + return self.spell_apply(self.data) \ No newline at end of file diff --git a/predicteasy/core/nlp/summarize.py b/predicteasy/core/nlp/summarize.py new file mode 100644 index 0000000..609d31f --- /dev/null +++ b/predicteasy/core/nlp/summarize.py @@ -0,0 +1,12 @@ +from pprint import pprint as print +from gensim.summarization import summarize + + +class TextSummarize: + + def __init__(self, text): + self.data = text + + + def summary(self, **kwargs): + return summarize(self.data, **kwargs) \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..07742e9 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,8 @@ +[egg_info] +tag_build = dev + +[upload] +dry-run = 1 + +[metadata] +description-file = README.rst diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..45bb394 --- /dev/null +++ b/setup.py @@ -0,0 +1,84 @@ +""" +Cognito module +""" +from codecs import open as codecs_open +from setuptools import setup, find_packages + + + +# Get the long description from the relevant file +with codecs_open('README.rst', encoding='utf-8') as f: + LONG_DESCRIPTION = f.read() + + +REQUIRES = [ + 'tqdm', + 'numpy', # REQ: vector algebra operations + 'scipy', + 'numpy', + 'click', # REQ: command line interfacing + 'pandas', # REQ: (conda) sparx.data.filter() + 'textblob', # REQ: report generation engine + 'PyYAML', # REQ: configuration management + 'pyfiglet', # REQ: better cli interface + 'PrettyTable', # REQ: CLI based table structure + 'scikit-learn', # REQ: simplified unity for all ML need +] + + +CLASSIFIERS = [ + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Intended Audience :: Developers', + "Operating System :: OS Independent", + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python', + 'Topic :: Internet :: WWW/HTTP', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Utilities', + 'Topic :: Scientific/Engineering' + +] + + +DOWNLOAD_URL = "" +PROJECT_URLS = { + "Bug Tracker": "https://github.com/CleverInsight/predicteasy-core", + "Documentation": "https://predicteasy.readthedocs.io/en/latest/", + "Source Code": "https://github.com/CleverInsight/predicteasy-core", +} + + +setup(name='predicteasy', + version='0.0.1', + description=u"Auto ML simplified", + long_description=LONG_DESCRIPTION, + classifiers=CLASSIFIERS, + keywords=['AutoML', 'Automated Data Storyteller', 'Data Wrangler', 'Data Preprocessing',\ + 'Machine Learning', 'Hot Encoder', 'Outlier Detection'], + author=u"Bastin Robins .J", + author_email='robin@cleverinsight.co', + url='https://github.com/cleverinsight', + download_url='https://github.com/CleverInsight/predicteasy-core/releases', + project_urls=PROJECT_URLS, + license='BSD', + packages=[pkg for pkg in find_packages() if not pkg.startswith('test')], + include_package_data=True, + zip_safe=False, + install_requires=REQUIRES, + + extras_require={ + 'test': ['pytest'], + }, + + entry_points=""" + [console_scripts] + cognito=cognito.scripts.cli:cli + """) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..6d7839e --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,9 @@ +from click.testing import CliRunner + +# from cognito.scripts.cli import cli + + +# def test_cli_count(): +# runner = CliRunner() +# result = runner.invoke(cli, ['3']) +# assert result.exit_code == 0 \ No newline at end of file diff --git a/tests/test_modules.py b/tests/test_modules.py new file mode 100644 index 0000000..e69de29