Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sk] Improved PostgreSQL Data Loader #651

Merged
merged 17 commits into from
Jul 14, 2022
Prev Previous commit
Next Next commit
[sk] Added test cases
  • Loading branch information
skunichetty committed Jul 14, 2022
commit 3e750eeda8faddf058dc89b4837cdb175a5ebc42
Empty file added mage_ai/tests/io/__init__.py
Empty file.
73 changes: 73 additions & 0 deletions mage_ai/tests/io/test_type_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from mage_ai.io.type_conversion import infer_dtypes, map_to_postgres
from mage_ai.tests.base_test import TestCase
import datetime
import faker
import numpy as np
import pandas as pd


class TypeConversionTests(TestCase):
def setUp(self):
np.random.seed(42)
fake = faker.Faker()
self.data = pd.DataFrame(
[
[
fake.name(),
fake.date_of_birth(),
fake.text(max_nb_chars=100),
np.random.randint(4, 10) / 2,
np.random.randn() * 100 + 20,
True if np.random.uniform() > 0.3 else False,
np.random.choice(['Mage', 'Data Cleaning', 'Magic', 'Awesome']),
np.random.randint(0, 100),
np.random.randint(0, 3),
fake.date_this_century(),
fake.time_object(),
fake.date_time_this_century(),
fake.time_delta(datetime.timedelta(hours=3)),
np.random.randint(10000000000, 1000000000000),
]
for _ in range(6)
],
columns=[
'Name',
'Date of Birth',
'Review',
'Stars',
'Profit',
'Verified Purchase',
'Tags',
'Percentage Used',
'Number of Snacks',
'Watch Date',
'Watch Time',
'Watch Date And Time',
'Elapsed Time',
'Ticket ID',
],
)
self.dtypes = infer_dtypes(self.data)
return super().setUp()

def test_postgres_detection(self):
expected_dtypes = [
'text',
'date',
'text',
'double precision',
'double precision',
'boolean',
'text',
'smallint',
'smallint',
'date',
'time',
'timestamp',
'bigint',
'bigint',
]
for column, expected_dtype in zip(self.data.columns, expected_dtypes):
dtype = self.dtypes[column]
psql_type = map_to_postgres(self.data[column], dtype)
self.assertEqual(psql_type, expected_dtype)