diff --git a/dev-support/csd/scripts/generate_data.sh b/dev-support/csd/scripts/generate_data.sh index 7d75999..59afccf 100755 --- a/dev-support/csd/scripts/generate_data.sh +++ b/dev-support/csd/scripts/generate_data.sh @@ -35,10 +35,10 @@ then exit 1 fi -DATAGEN_URL="http://localhost:${SERVER_PORT}" +DATAGEN_URL="http://localhost:${SERVER_PORT}/api/v1" if [ "${TLS_ENABLED}" = "true" ] then - DATAGEN_URL="https://localhost:${SERVER_PORT}" + DATAGEN_URL="https://localhost:${SERVER_PORT}/api/v1" fi # Generic Function to generate Data diff --git a/pom.xml b/pom.xml index 24225dd..c9654d2 100755 --- a/pom.xml +++ b/pom.xml @@ -87,7 +87,11 @@ io.springboot.ai - spring-ai-ollama-spring-boot-starter + spring-ai-ollama + + + io.springboot.ai + spring-ai-openai @@ -112,6 +116,21 @@ 2.7.0 + + + org.graalvm.polyglot + polyglot + 24.0.1 + + + org.graalvm.polyglot + js-community + 24.0.1 + pom + runtime + + + org.apache.hadoop @@ -218,6 +237,10 @@ software.amazon.awssdk regions + + software.amazon.awssdk + bedrockruntime + software.amazon.awssdk.crt diff --git a/src/main/java/com/datagen/config/ApplicationConfigMapper.java b/src/main/java/com/datagen/config/ApplicationConfigMapper.java index 63ca33d..84d8a44 100755 --- a/src/main/java/com/datagen/config/ApplicationConfigMapper.java +++ b/src/main/java/com/datagen/config/ApplicationConfigMapper.java @@ -21,7 +21,10 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; +import java.util.Collections; import java.util.Locale; +import java.util.Map; +import java.util.stream.Collectors; @Slf4j @@ -30,211 +33,29 @@ public class ApplicationConfigMapper { public static ApplicationConfigs getApplicationConfigFromProperty( String propertyName) { - switch (propertyName.toLowerCase(Locale.ROOT)) { - case "app.name": - return ApplicationConfigs.APP_NAME; - case "app.port": - return ApplicationConfigs.APP_PORT; - case "hadoop.user": - return ApplicationConfigs.HADOOP_USER; - case "hadoop.home": - return ApplicationConfigs.HADOOP_HOME; - case "generation.threads.default": - return ApplicationConfigs.THREADS; - case "generation.batches.default": - return ApplicationConfigs.NUMBER_OF_BATCHES_DEFAULT; - case "generation.rows.default": - return ApplicationConfigs.NUMBER_OF_ROWS_DEFAULT; - case "datagen.home.directory": - return ApplicationConfigs.DATA_HOME_DIRECTORY; - case "datagen.model.path": - return ApplicationConfigs.DATA_MODEL_PATH_DEFAULT; - case "datagen.model.received.path": - return ApplicationConfigs.DATA_MODEL_RECEIVED_PATH; - case "datagen.model.generated.path": - return ApplicationConfigs.DATA_MODEL_GENERATED_PATH; - case "datagen.model.default": - return ApplicationConfigs.DATA_MODEL_DEFAULT; - case "datagen.custom.model": - return ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT; - case "datagen.scheduler.file.path": - return ApplicationConfigs.SCHEDULER_FILE_PATH; - case "kerberos.enabled": - return ApplicationConfigs.KERBEROS_ENABLED; - case "kerberos.user": - return ApplicationConfigs.KERBEROS_USER; - case "kerberos.keytab": - return ApplicationConfigs.KERBEROS_KEYTAB; - case "tls.enabled": - return ApplicationConfigs.TLS_ENABLED; - case "truststore.location": - return ApplicationConfigs.TRUSTSTORE_LOCATION; - case "truststore.password": - return ApplicationConfigs.TRUSTSTORE_PASSWORD; - case "keystore.location": - return ApplicationConfigs.KEYSTORE_LOCATION; - case "keystore.password": - return ApplicationConfigs.KEYSTORE_PASSWORD; - case "keystore.keypassword": - return ApplicationConfigs.KEYSTORE_KEYPASSWORD; - case "admin.user": - return ApplicationConfigs.ADMIN_USER; - case "admin.password": - return ApplicationConfigs.ADMIN_PASSWORD; - case "hadoop.core.site.path": - return ApplicationConfigs.HADOOP_CORE_SITE_PATH; - case "hadoop.hdfs.site.path": - return ApplicationConfigs.HADOOP_HDFS_SITE_PATH; - case "hadoop.ozone.site.path": - return ApplicationConfigs.HADOOP_OZONE_SITE_PATH; - case "hadoop.hbase.site.path": - return ApplicationConfigs.HADOOP_HBASE_SITE_PATH; - case "hadoop.hive.site.path": - return ApplicationConfigs.HADOOP_HIVE_SITE_PATH; - case "cm.autodiscovery": - return ApplicationConfigs.CM_AUTO_DISCOVERY; - case "cm.url": - return ApplicationConfigs.CM_URL; - case "cm.user": - return ApplicationConfigs.CM_USER; - case "cm.password": - return ApplicationConfigs.CM_PASSWORD; - case "cm.cluster.name": - return ApplicationConfigs.CM_CLUSTER_NAME; - case "solr.env.path": - return ApplicationConfigs.SOLR_ENV_PATH; - case "kafka.conf.client.path": - return ApplicationConfigs.KAFKA_CONF_CLIENT_PATH; - case "kafka.conf.cluster.path": - return ApplicationConfigs.KAFKA_CONF_CLUSTER_PATH; - case "schema.registry.conf.path": - return ApplicationConfigs.SCHEMA_REGISTRY_CONF_PATH; - case "kudu.conf.path": - return ApplicationConfigs.KUDU_CONF_PATH; - case "hdfs.uri": - return ApplicationConfigs.HDFS_URI; - case "hdfs.auth.kerberos": - return ApplicationConfigs.HDFS_AUTH_KERBEROS; - case "hdfs.auth.kerberos.user": - return ApplicationConfigs.HDFS_AUTH_KERBEROS_USER; - case "hdfs.auth.kerberos.keytab": - return ApplicationConfigs.HDFS_AUTH_KERBEROS_KEYTAB; - case "hbase.zookeeper.quorum": - return ApplicationConfigs.HBASE_ZK_QUORUM; - case "hbase.zookeeper.port": - return ApplicationConfigs.HBASE_ZK_QUORUM_PORT; - case "hbase.zookeeper.znode": - return ApplicationConfigs.HBASE_ZK_ZNODE; - case "hbase.auth.kerberos": - return ApplicationConfigs.HBASE_AUTH_KERBEROS; - case "hbase.security.user": - return ApplicationConfigs.HBASE_AUTH_KERBEROS_USER; - case "hbase.security.keytab": - return ApplicationConfigs.HBASE_AUTH_KERBEROS_KEYTAB; - case "ozone.service.id": - return ApplicationConfigs.OZONE_SERVICE_ID; - case "ozone.auth.kerberos": - return ApplicationConfigs.OZONE_AUTH_KERBEROS; - case "ozone.auth.kerberos.user": - return ApplicationConfigs.OZONE_AUTH_KERBEROS_USER; - case "ozone.auth.kerberos.keytab": - return ApplicationConfigs.OZONE_AUTH_KERBEROS_KEYTAB; - case "hive.zookeeper.quorum": - return ApplicationConfigs.HIVE_ZK_QUORUM; - case "hive.zookeeper.znode": - return ApplicationConfigs.HIVE_ZK_ZNODE; - case "hive.auth.kerberos": - return ApplicationConfigs.HIVE_AUTH_KERBEROS; - case "hive.security.user": - return ApplicationConfigs.HIVE_AUTH_KERBEROS_USER; - case "hive.security.keytab": - return ApplicationConfigs.HIVE_AUTH_KERBEROS_KEYTAB; - case "hive.truststore.location": - return ApplicationConfigs.HIVE_TRUSTSTORE_LOCATION; - case "hive.truststore.password": - return ApplicationConfigs.HIVE_TRUSTSTORE_PASSWORD; - case "solr.zookeeper.quorum": - return ApplicationConfigs.SOLR_ZK_QUORUM; - case "solr.zookeeper.znode": - return ApplicationConfigs.SOLR_ZK_NODE; - case "solr.tls.enabled": - return ApplicationConfigs.SOLR_TLS_ENABLED; - case "solr.auth.kerberos": - return ApplicationConfigs.SOLR_AUTH_KERBEROS; - case "solr.auth.kerberos.keytab": - return ApplicationConfigs.SOLR_AUTH_KERBEROS_KEYTAB; - case "solr.auth.kerberos.user": - return ApplicationConfigs.SOLR_AUTH_KERBEROS_USER; - case "solr.truststore.location": - return ApplicationConfigs.SOLR_TRUSTSTORE_LOCATION; - case "solr.truststore.password": - return ApplicationConfigs.SOLR_TRUSTSTORE_PASSWORD; - case "solr.keystore.location": - return ApplicationConfigs.SOLR_KEYSTORE_LOCATION; - case "solr.keystore.password": - return ApplicationConfigs.SOLR_KEYSTORE_PASSWORD; - case "kafka.brokers": - return ApplicationConfigs.KAFKA_BROKERS; - case "kafka.security.protocol": - return ApplicationConfigs.KAFKA_SECURITY_PROTOCOL; - case "schema.registry.url": - return ApplicationConfigs.SCHEMA_REGISTRY_URL; - case "schema.registry.tls.enabled": - return ApplicationConfigs.SCHEMA_REGISTRY_TLS_ENABLED; - case "kafka.keystore.location": - return ApplicationConfigs.KAFKA_KEYSTORE_LOCATION; - case "kafka.truststore.location": - return ApplicationConfigs.KAFKA_TRUSTSTORE_LOCATION; - case "kafka.keystore.password": - return ApplicationConfigs.KAFKA_KEYSTORE_PASSWORD; - case "kafka.keystore.key.password": - return ApplicationConfigs.KAFKA_KEYSTORE_KEYPASSWORD; - case "kafka.truststore.password": - return ApplicationConfigs.KAFKA_TRUSTSTORE_PASSWORD; - case "kafka.sasl.mechanism": - return ApplicationConfigs.KAFKA_SASL_MECHANISM; - case "kafka.sasl.kerberos.service.name": - return ApplicationConfigs.KAFKA_SASL_KERBEROS_SERVICE_NAME; - case "kafka.auth.kerberos.keytab": - return ApplicationConfigs.KAFKA_AUTH_KERBEROS_KEYTAB; - case "kafka.auth.kerberos.user": - return ApplicationConfigs.KAFKA_AUTH_KERBEROS_USER; - case "kudu.master.server": - return ApplicationConfigs.KUDU_URL; - case "kudu.auth.kerberos": - return ApplicationConfigs.KUDU_AUTH_KERBEROS; - case "kudu.security.user": - return ApplicationConfigs.KUDU_AUTH_KERBEROS_USER; - case "kudu.security.keytab": - return ApplicationConfigs.KUDU_AUTH_KERBEROS_KEYTAB; - case "kudu.truststore.location": - return ApplicationConfigs.KUDU_TRUSTSTORE_LOCATION; - case "kudu.truststore.password": - return ApplicationConfigs.KUDU_TRUSTSTORE_PASSWORD; - case "s3.access_key.id": - return ApplicationConfigs.S3_ACCESS_KEY_ID; - case "s3.access_key.secret": - return ApplicationConfigs.S3_ACCESS_KEY_SECRET; - case "s3.region": - return ApplicationConfigs.S3_REGION; - case "adls.account.name": - return ApplicationConfigs.ADLS_ACCOUNT_NAME; - case "adls.account.type": - return ApplicationConfigs.ADLS_ACCOUNT_TYPE; - case "adls.sas.token": - return ApplicationConfigs.ADLS_SAS_TOKEN; - case "gcs.project.id": - return ApplicationConfigs.GCS_PROJECT_ID; - case "gcs.accountkey.path": - return ApplicationConfigs.GCS_ACCOUNT_KEY_PATH; - case "gcs.region": - return ApplicationConfigs.GCS_REGION; - - default: - log.warn("Could not guess property: {} , check it is well written", - propertyName); - return null; - + ApplicationConfigs propValue=null; + var propParsed=propertyName.toUpperCase(Locale.ROOT) + .replaceAll("\\.", "_") + .replaceAll("-", "_"); + try { + propValue = ApplicationConfigs.valueOf(propParsed); + } catch (Exception e) { + log.warn("Cannot find property: {}", propParsed); } + return propValue; + } + + /** + * Parse a map of properties into a map of application configs to values + * @param extraProperties + * @return + */ + public static Map parsePropertiesMap(Map extraProperties){ + return extraProperties!=null? + extraProperties.entrySet().stream().collect( + Collectors.toMap( + e -> ApplicationConfigs.valueOf(e.getKey()), + Map.Entry::getValue + )) : Collections.emptyMap(); } } diff --git a/src/main/java/com/datagen/config/ApplicationConfigs.java b/src/main/java/com/datagen/config/ApplicationConfigs.java index 7010792..efb87f8 100755 --- a/src/main/java/com/datagen/config/ApplicationConfigs.java +++ b/src/main/java/com/datagen/config/ApplicationConfigs.java @@ -28,13 +28,13 @@ public enum ApplicationConfigs { THREADS, NUMBER_OF_BATCHES_DEFAULT, NUMBER_OF_ROWS_DEFAULT, - DATA_HOME_DIRECTORY, - DATA_MODEL_PATH_DEFAULT, - DATA_MODEL_RECEIVED_PATH, - DATA_MODEL_GENERATED_PATH, - DATA_MODEL_DEFAULT, - CUSTOM_DATA_MODEL_DEFAULT, - SCHEDULER_FILE_PATH, + DATAGEN_HOME_DIRECTORY, + DATAGEN_MODEL_PATH, + DATAGEN_MODEL_RECEIVED_PATH, + DATAGEN_MODEL_GENERATED_PATH, + DATAGEN_MODEL_DEFAULT, + DATAGEN_CUSTOM_MODEL, + DATAGEN_SCHEDULER_FILE_PATH, ADMIN_USER, ADMIN_PASSWORD, @@ -61,7 +61,7 @@ public enum ApplicationConfigs { SCHEMA_REGISTRY_CONF_PATH, KUDU_CONF_PATH, - CM_AUTO_DISCOVERY, + CM_AUTODISCOVERY, CM_URL, CM_USER, CM_PASSWORD, @@ -72,9 +72,9 @@ public enum ApplicationConfigs { HDFS_AUTH_KERBEROS_USER, HDFS_AUTH_KERBEROS_KEYTAB, - HBASE_ZK_QUORUM, - HBASE_ZK_QUORUM_PORT, - HBASE_ZK_ZNODE, + HBASE_ZOOKEEPER_QUORUM, + HBASE_ZOOKEEPER_PORT, + HBASE_ZOOKEEPER_ZNODE, HBASE_AUTH_KERBEROS, HBASE_AUTH_KERBEROS_USER, HBASE_AUTH_KERBEROS_KEYTAB, @@ -84,17 +84,17 @@ public enum ApplicationConfigs { OZONE_AUTH_KERBEROS_USER, OZONE_AUTH_KERBEROS_KEYTAB, - HIVE_ZK_QUORUM, - HIVE_ZK_ZNODE, + HIVE_ZOOKEEPER_QUORUM, + HIVE_ZOOKEEPER_ZNODE, HIVE_AUTH_KERBEROS, - HIVE_AUTH_KERBEROS_USER, - HIVE_AUTH_KERBEROS_KEYTAB, + HIVE_SECURITY_USER, + HIVE_SECURITY_KEYTAB, HIVE_TRUSTSTORE_LOCATION, HIVE_TRUSTSTORE_PASSWORD, HDFS_FOR_HIVE, - SOLR_ZK_QUORUM, - SOLR_ZK_NODE, + SOLR_ZOOKEEPER_QUORUM, + SOLR_ZOOKEEPER_NODE, SOLR_TLS_ENABLED, SOLR_AUTH_KERBEROS, SOLR_AUTH_KERBEROS_USER, @@ -114,14 +114,14 @@ public enum ApplicationConfigs { KAFKA_TRUSTSTORE_PASSWORD, KAFKA_KEYSTORE_LOCATION, KAFKA_KEYSTORE_PASSWORD, - KAFKA_KEYSTORE_KEYPASSWORD, + KAFKA_KEYSTORE_KEY_PASSWORD, KAFKA_SASL_MECHANISM, KAFKA_SASL_KERBEROS_SERVICE_NAME, KUDU_URL, KUDU_AUTH_KERBEROS, - KUDU_AUTH_KERBEROS_USER, - KUDU_AUTH_KERBEROS_KEYTAB, + KUDU_SECURITY_USER, + KUDU_SECURITY_KEYTAB, KUDU_TRUSTSTORE_LOCATION, KUDU_TRUSTSTORE_PASSWORD, @@ -135,7 +135,29 @@ public enum ApplicationConfigs { GCS_PROJECT_ID, GCS_ACCOUNT_KEY_PATH, - GCS_REGION + GCS_REGION, + + OLLAMA_URL_DEFAULT, + OLLAMA_MODEL_DEFAULT, + OLLAMA_TEMPERATURE_DEFAULT, + OLLAMA_FREQUENCY_PENALTY_DEFAULT, + OLLAMA_PRESENCE_PENALTY_DEFAULT, + OLLAMA_TOP_P_DEFAULT, + + BEDROCK_REGION, + BEDROCK_MODEL_DEFAULT, + BEDROCK_TEMPERATURE_DEFAULT, + BEDROCK_MAX_TOKENS_DEFAULT, + BEDROCK_ACCESS_KEY_ID, + BEDROCK_ACCESS_KEY_SECRET, + + OPENAI_TEMPERATURE_DEFAULT, + OPENAI_MODEL_DEFAULT, + OPENAI_API_KEY, + OPENAI_FREQUENCY_PENALTY_DEFAULT, + OPENAI_PRESENCE_PENALTY_DEFAULT, + OPENAI_MAX_TOKENS_DEFAULT, + OPENAI_TOP_P_DEFAULT } diff --git a/src/main/java/com/datagen/config/ConnectorParser.java b/src/main/java/com/datagen/config/ConnectorParser.java index ee97e0d..d8bdf92 100755 --- a/src/main/java/com/datagen/config/ConnectorParser.java +++ b/src/main/java/com/datagen/config/ConnectorParser.java @@ -28,80 +28,7 @@ private ConnectorParser() { } public static Connector stringToConnector(String connector) { - switch (connector.toUpperCase()) { - case "HDFS-CSV": - return Connector.HDFS_CSV; - case "HDFS-JSON": - return Connector.HDFS_JSON; - case "HDFS-PARQUET": - return Connector.HDFS_PARQUET; - case "HDFS-ORC": - return Connector.HDFS_ORC; - case "HDFS-AVRO": - return Connector.HDFS_AVRO; - case "HBASE": - return Connector.HBASE; - case "HIVE": - return Connector.HIVE; - case "KAFKA": - return Connector.KAFKA; - case "OZONE-PARQUET": - return Connector.OZONE_PARQUET; - case "OZONE-CSV": - return Connector.OZONE_CSV; - case "OZONE-JSON": - return Connector.OZONE_JSON; - case "OZONE-ORC": - return Connector.OZONE_ORC; - case "OZONE-AVRO": - return Connector.OZONE_AVRO; - case "SOLR": - return Connector.SOLR; - case "KUDU": - return Connector.KUDU; - case "CSV": - return Connector.CSV; - case "JSON": - return Connector.JSON; - case "AVRO": - return Connector.AVRO; - case "PARQUET": - return Connector.PARQUET; - case "ORC": - return Connector.ORC; - case "S3-PARQUET": - return Connector.S3_PARQUET; - case "S3-CSV": - return Connector.S3_CSV; - case "S3-JSON": - return Connector.S3_JSON; - case "S3-ORC": - return Connector.S3_ORC; - case "S3-AVRO": - return Connector.S3_AVRO; - case "ADLS-PARQUET": - return Connector.ADLS_PARQUET; - case "ADLS-CSV": - return Connector.ADLS_CSV; - case "ADLS-JSON": - return Connector.ADLS_JSON; - case "ADLS-ORC": - return Connector.ADLS_ORC; - case "ADLS-AVRO": - return Connector.ADLS_AVRO; - case "GCS-PARQUET": - return Connector.GCS_PARQUET; - case "GCS-CSV": - return Connector.GCS_CSV; - case "GCS-JSON": - return Connector.GCS_JSON; - case "GCS-ORC": - return Connector.GCS_ORC; - case "GCS-AVRO": - return Connector.GCS_AVRO; - default: - return null; - } + return Connector.valueOf(connector); } public enum Connector { diff --git a/src/main/java/com/datagen/config/PropertiesLoader.java b/src/main/java/com/datagen/config/PropertiesLoader.java index 256a488..a16378e 100755 --- a/src/main/java/com/datagen/config/PropertiesLoader.java +++ b/src/main/java/com/datagen/config/PropertiesLoader.java @@ -146,7 +146,7 @@ private void autoDiscover() { "Starting auto-discover of properties after load of properties file"); if (Boolean.parseBoolean( - properties.get(ApplicationConfigs.CM_AUTO_DISCOVERY))) { + properties.get(ApplicationConfigs.CM_AUTODISCOVERY))) { autoDiscoverWithCMApi(properties.get(ApplicationConfigs.CM_URL), properties.get(ApplicationConfigs.CM_USER), properties.get(ApplicationConfigs.CM_PASSWORD), @@ -165,34 +165,34 @@ private void autoDiscover() { properties.put(ApplicationConfigs.HDFS_URI, hdfsUri); } - if (properties.get(ApplicationConfigs.HBASE_ZK_QUORUM) == null + if (properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_QUORUM) == null && properties.get(ApplicationConfigs.HADOOP_HBASE_SITE_PATH) != null) { log.info("Going to auto-discover hbase.zookeeper.quorum"); - properties.put(ApplicationConfigs.HBASE_ZK_QUORUM, + properties.put(ApplicationConfigs.HBASE_ZOOKEEPER_QUORUM, Utils.getPropertyFromXMLFile( properties.get(ApplicationConfigs.HADOOP_HBASE_SITE_PATH), "hbase.zookeeper.quorum")); } - if (properties.get(ApplicationConfigs.HBASE_ZK_QUORUM_PORT) == null + if (properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_PORT) == null && properties.get(ApplicationConfigs.HADOOP_HBASE_SITE_PATH) != null) { log.info("Going to auto-discover hbase.zookeeper.port"); - properties.put(ApplicationConfigs.HBASE_ZK_QUORUM_PORT, + properties.put(ApplicationConfigs.HBASE_ZOOKEEPER_PORT, Utils.getPropertyFromXMLFile( properties.get(ApplicationConfigs.HADOOP_HBASE_SITE_PATH), "hbase.zookeeper.property.clientPort")); } - if (properties.get(ApplicationConfigs.HBASE_ZK_ZNODE) == null + if (properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_ZNODE) == null && properties.get(ApplicationConfigs.HADOOP_HBASE_SITE_PATH) != null) { log.info("Going to auto-discover hbase.zookeeper.znode"); - properties.put(ApplicationConfigs.HBASE_ZK_ZNODE, + properties.put(ApplicationConfigs.HBASE_ZOOKEEPER_ZNODE, Utils.getPropertyFromXMLFile( properties.get(ApplicationConfigs.HADOOP_HBASE_SITE_PATH), "zookeeper.znode.parent")); @@ -209,7 +209,7 @@ private void autoDiscover() { "ozone.service.id")); } - if (properties.get(ApplicationConfigs.HIVE_ZK_QUORUM) == null + if (properties.get(ApplicationConfigs.HIVE_ZOOKEEPER_QUORUM) == null && properties.get(ApplicationConfigs.HADOOP_HIVE_SITE_PATH) != null) { log.info("Going to auto-discover hive.zookeeper.quorum"); @@ -223,33 +223,33 @@ private void autoDiscover() { "hive.zookeeper.quorum").replaceAll(",", zookeeperPortSuffix) + ":" + zookeeperPort; - properties.put(ApplicationConfigs.HIVE_ZK_QUORUM, + properties.put(ApplicationConfigs.HIVE_ZOOKEEPER_QUORUM, zookeeperUriWithPort); } - if (properties.get(ApplicationConfigs.HIVE_ZK_ZNODE) == null + if (properties.get(ApplicationConfigs.HIVE_ZOOKEEPER_ZNODE) == null && properties.get(ApplicationConfigs.HADOOP_HIVE_SITE_PATH) != null) { log.info("Going to auto-discover hive.zookeeper.znode"); - properties.put(ApplicationConfigs.HIVE_ZK_ZNODE, + properties.put(ApplicationConfigs.HIVE_ZOOKEEPER_ZNODE, Utils.getPropertyFromXMLFile( properties.get(ApplicationConfigs.HADOOP_HIVE_SITE_PATH), "hive.server2.zookeeper.namespace")); } - if (properties.get(ApplicationConfigs.SOLR_ZK_QUORUM) == null + if (properties.get(ApplicationConfigs.SOLR_ZOOKEEPER_QUORUM) == null && properties.get(ApplicationConfigs.SOLR_ENV_PATH) != null) { log.info("Going to auto-discover solr.zookeeper.quorum"); - properties.put(ApplicationConfigs.SOLR_ZK_QUORUM, + properties.put(ApplicationConfigs.SOLR_ZOOKEEPER_QUORUM, Utils.getSolrZKQuorumFromEnvsh( properties.get(ApplicationConfigs.SOLR_ENV_PATH))); } - if (properties.get(ApplicationConfigs.SOLR_ZK_NODE) == null + if (properties.get(ApplicationConfigs.SOLR_ZOOKEEPER_NODE) == null && properties.get(ApplicationConfigs.SOLR_ENV_PATH) != null) { log.info("Going to auto-discover solr.zookeeper.znode"); - properties.put(ApplicationConfigs.SOLR_ZK_NODE, + properties.put(ApplicationConfigs.SOLR_ZOOKEEPER_NODE, Utils.getSolrZKznodeFromEnvsh( properties.get(ApplicationConfigs.SOLR_ENV_PATH))); } @@ -395,20 +395,20 @@ private void autoDiscoverWithCMApi(String cmURL, String cmUser, } } - if (properties.get(ApplicationConfigs.HBASE_ZK_QUORUM) == null) { + if (properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_QUORUM) == null) { log.info( "Going to auto-discover hbase.zookeeper.quorum with CM API"); String zkQuorum = cmApiService.getZkQuorum(servicesExisting.get("ZOOKEEPER")); if (!zkQuorum.isEmpty()) { - properties.put(ApplicationConfigs.HBASE_ZK_QUORUM, + properties.put(ApplicationConfigs.HBASE_ZOOKEEPER_QUORUM, zkQuorum); } } - if (properties.get(ApplicationConfigs.HBASE_ZK_QUORUM_PORT) == + if (properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_PORT) == null) { log.info( "Going to auto-discover hbase.zookeeper.port with CM API"); @@ -416,19 +416,19 @@ private void autoDiscoverWithCMApi(String cmURL, String cmUser, String zkPort = cmApiService.getZkPort(servicesExisting.get("ZOOKEEPER")); if (!zkPort.isEmpty()) { - properties.put(ApplicationConfigs.HBASE_ZK_QUORUM_PORT, + properties.put(ApplicationConfigs.HBASE_ZOOKEEPER_PORT, zkPort); } } - if (properties.get(ApplicationConfigs.HBASE_ZK_ZNODE) == null) { + if (properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_ZNODE) == null) { log.info( "Going to auto-discover hbase.zookeeper.znode with CM API"); String zknode = cmApiService.getHbaseZkZnode(servicesExisting.get("HBASE")); if (!zknode.isEmpty()) { - properties.put(ApplicationConfigs.HBASE_ZK_ZNODE, zknode); + properties.put(ApplicationConfigs.HBASE_ZOOKEEPER_ZNODE, zknode); } } @@ -445,51 +445,51 @@ private void autoDiscoverWithCMApi(String cmURL, String cmUser, } - if (properties.get(ApplicationConfigs.HIVE_ZK_QUORUM) == null) { + if (properties.get(ApplicationConfigs.HIVE_ZOOKEEPER_QUORUM) == null) { log.info( "Going to auto-discover hive.zookeeper.quorum with CM API"); String zkQuorum = cmApiService.getZkQuorumWithPort( servicesExisting.get("ZOOKEEPER")); if (!zkQuorum.isEmpty()) { - properties.put(ApplicationConfigs.HIVE_ZK_QUORUM, + properties.put(ApplicationConfigs.HIVE_ZOOKEEPER_QUORUM, zkQuorum); } } - if (properties.get(ApplicationConfigs.HIVE_ZK_ZNODE) == null) { + if (properties.get(ApplicationConfigs.HIVE_ZOOKEEPER_ZNODE) == null) { log.info( "Going to auto-discover hive.zookeeper.znode with CM API"); String hiveZnode = cmApiService.getHiveZnode(servicesExisting.get("HIVE_ON_TEZ")); if (!hiveZnode.isEmpty()) { - properties.put(ApplicationConfigs.HIVE_ZK_ZNODE, hiveZnode); + properties.put(ApplicationConfigs.HIVE_ZOOKEEPER_ZNODE, hiveZnode); } } - if (properties.get(ApplicationConfigs.SOLR_ZK_QUORUM) == null) { + if (properties.get(ApplicationConfigs.SOLR_ZOOKEEPER_QUORUM) == null) { log.info( "Going to auto-discover solr.zookeeper.quorum with CM API"); String zkQuorum = cmApiService.getZkQuorumWithPort( servicesExisting.get("ZOOKEEPER")); if (!zkQuorum.isEmpty()) { - properties.put(ApplicationConfigs.SOLR_ZK_QUORUM, + properties.put(ApplicationConfigs.SOLR_ZOOKEEPER_QUORUM, zkQuorum); } } - if (properties.get(ApplicationConfigs.SOLR_ZK_NODE) == null) { + if (properties.get(ApplicationConfigs.SOLR_ZOOKEEPER_NODE) == null) { log.info( "Going to auto-discover solr.zookeeper.znode with CM API"); String solrZnode = cmApiService.getSolRZnode(servicesExisting.get("SOLR")); if (!solrZnode.isEmpty()) { - properties.put(ApplicationConfigs.SOLR_ZK_NODE, solrZnode); + properties.put(ApplicationConfigs.SOLR_ZOOKEEPER_NODE, solrZnode); } } diff --git a/src/main/java/com/datagen/connector/db/hbase/HbaseConnector.java b/src/main/java/com/datagen/connector/db/hbase/HbaseConnector.java index 33bbc62..a002ee5 100755 --- a/src/main/java/com/datagen/connector/db/hbase/HbaseConnector.java +++ b/src/main/java/com/datagen/connector/db/hbase/HbaseConnector.java @@ -69,11 +69,11 @@ public HbaseConnector(Model model, Map properties) { Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", - properties.get(ApplicationConfigs.HBASE_ZK_QUORUM)); + properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_QUORUM)); config.set("hbase.zookeeper.property.clientPort", - properties.get(ApplicationConfigs.HBASE_ZK_QUORUM_PORT)); + properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_PORT)); config.set("zookeeper.znode.parent", - properties.get(ApplicationConfigs.HBASE_ZK_ZNODE)); + properties.get(ApplicationConfigs.HBASE_ZOOKEEPER_ZNODE)); Utils.setupHadoopEnv(config, properties); // Setup Kerberos auth if needed @@ -162,7 +162,7 @@ public Model generateModel(Boolean deepAnalysis) { Map tableNames = new HashMap<>(); Map options = new HashMap<>(); // TODO : Implement logic to create a model with at least names, pk, options and column names/types - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } private void createNamespaceIfNotExists(String namespace) { diff --git a/src/main/java/com/datagen/connector/db/hive/HiveConnector.java b/src/main/java/com/datagen/connector/db/hive/HiveConnector.java index feedd70..9173114 100755 --- a/src/main/java/com/datagen/connector/db/hive/HiveConnector.java +++ b/src/main/java/com/datagen/connector/db/hive/HiveConnector.java @@ -91,10 +91,10 @@ public HiveConnector(Model model, this.locationTemporaryTable = (String) model.getTableNames() .get(OptionsConverter.TableNames.HIVE_HDFS_FILE_PATH); this.hiveUri = - "jdbc:hive2://" + properties.get(ApplicationConfigs.HIVE_ZK_QUORUM) + + "jdbc:hive2://" + properties.get(ApplicationConfigs.HIVE_ZOOKEEPER_QUORUM) + "/" + database + ";serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=" + - properties.get(ApplicationConfigs.HIVE_ZK_ZNODE) + + properties.get(ApplicationConfigs.HIVE_ZOOKEEPER_ZNODE) + "?tez.queue.name=" + queue; this.useKerberos = Boolean.parseBoolean( properties.get(ApplicationConfigs.HIVE_AUTH_KERBEROS)); @@ -132,8 +132,8 @@ public HiveConnector(Model model, try { if (useKerberos) { KerberosUtils.loginUserWithKerberos( - properties.get(ApplicationConfigs.HIVE_AUTH_KERBEROS_USER), - properties.get(ApplicationConfigs.HIVE_AUTH_KERBEROS_KEYTAB), + properties.get(ApplicationConfigs.HIVE_SECURITY_USER), + properties.get(ApplicationConfigs.HIVE_SECURITY_KEYTAB), new Configuration()); } @@ -163,9 +163,9 @@ public HiveConnector(Model model, } String hiveUriWithNoDatabase = - "jdbc:hive2://" + properties.get(ApplicationConfigs.HIVE_ZK_QUORUM) + + "jdbc:hive2://" + properties.get(ApplicationConfigs.HIVE_ZOOKEEPER_QUORUM) + "/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=" + - properties.get(ApplicationConfigs.HIVE_ZK_ZNODE) + + properties.get(ApplicationConfigs.HIVE_ZOOKEEPER_ZNODE) + "?tez.queue.name=" + queue; this.properties = properties; @@ -360,7 +360,7 @@ public Model generateModel(Boolean deepAnalysis) { log.warn("Unable to close Hive connection"); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/index/SolRConnector.java b/src/main/java/com/datagen/connector/index/SolRConnector.java index 11c0faa..d45ca02 100755 --- a/src/main/java/com/datagen/connector/index/SolRConnector.java +++ b/src/main/java/com/datagen/connector/index/SolRConnector.java @@ -62,9 +62,9 @@ public SolRConnector(Model model, properties.get(ApplicationConfigs.SOLR_AUTH_KERBEROS)); List zkHosts = Arrays.stream( - properties.get(ApplicationConfigs.SOLR_ZK_QUORUM).split(",")) + properties.get(ApplicationConfigs.SOLR_ZOOKEEPER_QUORUM).split(",")) .collect(Collectors.toList()); - String znode = properties.get(ApplicationConfigs.SOLR_ZK_NODE); + String znode = properties.get(ApplicationConfigs.SOLR_ZOOKEEPER_NODE); if (Boolean.parseBoolean( properties.get(ApplicationConfigs.SOLR_TLS_ENABLED))) { @@ -172,7 +172,7 @@ public Model generateModel(Boolean deepAnalysis) { Map tableNames = new HashMap<>(); Map options = new HashMap<>(); // TODO : Implement logic to create a model with at least names, pk, options and column names/types - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } private void createSolRCollectionIfNotExists() { diff --git a/src/main/java/com/datagen/connector/queues/KafkaConnector.java b/src/main/java/com/datagen/connector/queues/KafkaConnector.java index 2bd6723..ce951b4 100755 --- a/src/main/java/com/datagen/connector/queues/KafkaConnector.java +++ b/src/main/java/com/datagen/connector/queues/KafkaConnector.java @@ -167,7 +167,7 @@ public KafkaConnector(Model model, props.put(SslConfigs.SSL_KEY_PASSWORD_CONFIG, properties.get(ApplicationConfigs.KAFKA_KEYSTORE_PASSWORD)); props.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, - properties.get(ApplicationConfigs.KAFKA_KEYSTORE_KEYPASSWORD)); + properties.get(ApplicationConfigs.KAFKA_KEYSTORE_KEY_PASSWORD)); props.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, properties.get(ApplicationConfigs.KAFKA_TRUSTSTORE_PASSWORD)); } @@ -258,7 +258,7 @@ public Model generateModel(Boolean deepAnalysis) { Map tableNames = new HashMap<>(); Map options = new HashMap<>(); // TODO : Implement logic to create a model with at least names, pk, options and column names/types - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } /** diff --git a/src/main/java/com/datagen/connector/storage/adls/AdlsAvroConnector.java b/src/main/java/com/datagen/connector/storage/adls/AdlsAvroConnector.java index 22cb648..abbc342 100755 --- a/src/main/java/com/datagen/connector/storage/adls/AdlsAvroConnector.java +++ b/src/main/java/com/datagen/connector/storage/adls/AdlsAvroConnector.java @@ -171,7 +171,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/adls/AdlsCSVConnector.java b/src/main/java/com/datagen/connector/storage/adls/AdlsCSVConnector.java index 7262a32..e500903 100755 --- a/src/main/java/com/datagen/connector/storage/adls/AdlsCSVConnector.java +++ b/src/main/java/com/datagen/connector/storage/adls/AdlsCSVConnector.java @@ -32,7 +32,7 @@ import java.io.*; import java.util.*; -import static com.datagen.config.ApplicationConfigs.DATA_HOME_DIRECTORY; +import static com.datagen.config.ApplicationConfigs.DATAGEN_HOME_DIRECTORY; /** * This is a CSV connector to write to one or multiple CSV files to ADLS @@ -60,7 +60,8 @@ public AdlsCSVConnector(Model model, this.lineSeparator = System.getProperty("line.separator"); this.oneFilePerIteration = (Boolean) model.getOptionsOrDefault( OptionsConverter.Options.ONE_FILE_PER_ITERATION); - this.localFilePathForModelGeneration = properties.get(DATA_HOME_DIRECTORY) + "/model-gen/azure/"; + this.localFilePathForModelGeneration = properties.get( + DATAGEN_HOME_DIRECTORY) + "/model-gen/azure/"; } @Override @@ -163,7 +164,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/adls/AdlsJsonConnector.java b/src/main/java/com/datagen/connector/storage/adls/AdlsJsonConnector.java index d348dcd..b917d45 100755 --- a/src/main/java/com/datagen/connector/storage/adls/AdlsJsonConnector.java +++ b/src/main/java/com/datagen/connector/storage/adls/AdlsJsonConnector.java @@ -157,7 +157,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/adls/AdlsOrcConnector.java b/src/main/java/com/datagen/connector/storage/adls/AdlsOrcConnector.java index 34f7642..75ecece 100755 --- a/src/main/java/com/datagen/connector/storage/adls/AdlsOrcConnector.java +++ b/src/main/java/com/datagen/connector/storage/adls/AdlsOrcConnector.java @@ -185,7 +185,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/adls/AdlsParquetConnector.java b/src/main/java/com/datagen/connector/storage/adls/AdlsParquetConnector.java index ff6fd35..321631e 100755 --- a/src/main/java/com/datagen/connector/storage/adls/AdlsParquetConnector.java +++ b/src/main/java/com/datagen/connector/storage/adls/AdlsParquetConnector.java @@ -171,7 +171,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/adls/AdlsUtils.java b/src/main/java/com/datagen/connector/storage/adls/AdlsUtils.java index 0b095db..8fab572 100644 --- a/src/main/java/com/datagen/connector/storage/adls/AdlsUtils.java +++ b/src/main/java/com/datagen/connector/storage/adls/AdlsUtils.java @@ -17,7 +17,7 @@ import java.util.Map; -import static com.datagen.config.ApplicationConfigs.DATA_HOME_DIRECTORY; +import static com.datagen.config.ApplicationConfigs.DATAGEN_HOME_DIRECTORY; /** * Everything that is only related to ADLS is set here and ADLS connectors extends this class @@ -66,7 +66,8 @@ public abstract class AdlsUtils { this.directoryName = directoryNotFormatted; this.localDirectory = (String) model.getTableNames() .get(OptionsConverter.TableNames.ADLS_LOCAL_FILE_PATH); - this.localFilePathForModelGeneration = properties.get(DATA_HOME_DIRECTORY) + "/model-gen/azure/"; + this.localFilePathForModelGeneration = properties.get( + DATAGEN_HOME_DIRECTORY) + "/model-gen/azure/"; this.sasToken = properties.get(ApplicationConfigs.ADLS_SAS_TOKEN); this.accountName = diff --git a/src/main/java/com/datagen/connector/storage/files/AvroConnector.java b/src/main/java/com/datagen/connector/storage/files/AvroConnector.java index 5dd725c..1b90a09 100755 --- a/src/main/java/com/datagen/connector/storage/files/AvroConnector.java +++ b/src/main/java/com/datagen/connector/storage/files/AvroConnector.java @@ -166,7 +166,7 @@ public Model generateModel(Boolean deepAnalysis) { this.directoryName, e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } } diff --git a/src/main/java/com/datagen/connector/storage/files/CSVConnector.java b/src/main/java/com/datagen/connector/storage/files/CSVConnector.java index c7d99ac..5886e8e 100755 --- a/src/main/java/com/datagen/connector/storage/files/CSVConnector.java +++ b/src/main/java/com/datagen/connector/storage/files/CSVConnector.java @@ -142,7 +142,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } } diff --git a/src/main/java/com/datagen/connector/storage/files/JsonConnector.java b/src/main/java/com/datagen/connector/storage/files/JsonConnector.java index b4b2657..2145214 100755 --- a/src/main/java/com/datagen/connector/storage/files/JsonConnector.java +++ b/src/main/java/com/datagen/connector/storage/files/JsonConnector.java @@ -128,7 +128,7 @@ public Model generateModel(Boolean deepAnalysis) { Map tableNames = new HashMap<>(); Map options = new HashMap<>(); // TODO : Implement logic to create a model with at least names, pk, options and column names/types - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/files/ORCConnector.java b/src/main/java/com/datagen/connector/storage/files/ORCConnector.java index 354e882..4fef15a 100755 --- a/src/main/java/com/datagen/connector/storage/files/ORCConnector.java +++ b/src/main/java/com/datagen/connector/storage/files/ORCConnector.java @@ -175,7 +175,7 @@ public Model generateModel(Boolean deepAnalysis) { log.warn("Could not create reader to ORC local file due to error:", e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/files/ParquetConnector.java b/src/main/java/com/datagen/connector/storage/files/ParquetConnector.java index 30c43b6..c0f3457 100755 --- a/src/main/java/com/datagen/connector/storage/files/ParquetConnector.java +++ b/src/main/java/com/datagen/connector/storage/files/ParquetConnector.java @@ -159,7 +159,7 @@ public Model generateModel(Boolean deepAnalysis) { this.directoryName, e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } } \ No newline at end of file diff --git a/src/main/java/com/datagen/connector/storage/gcs/GcsAvroConnector.java b/src/main/java/com/datagen/connector/storage/gcs/GcsAvroConnector.java index cb947bf..c41b0fa 100755 --- a/src/main/java/com/datagen/connector/storage/gcs/GcsAvroConnector.java +++ b/src/main/java/com/datagen/connector/storage/gcs/GcsAvroConnector.java @@ -172,7 +172,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/gcs/GcsCSVConnector.java b/src/main/java/com/datagen/connector/storage/gcs/GcsCSVConnector.java index 7095459..e807d44 100755 --- a/src/main/java/com/datagen/connector/storage/gcs/GcsCSVConnector.java +++ b/src/main/java/com/datagen/connector/storage/gcs/GcsCSVConnector.java @@ -159,7 +159,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/gcs/GcsJsonConnector.java b/src/main/java/com/datagen/connector/storage/gcs/GcsJsonConnector.java index e25b3f4..2186826 100755 --- a/src/main/java/com/datagen/connector/storage/gcs/GcsJsonConnector.java +++ b/src/main/java/com/datagen/connector/storage/gcs/GcsJsonConnector.java @@ -157,7 +157,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/gcs/GcsOrcConnector.java b/src/main/java/com/datagen/connector/storage/gcs/GcsOrcConnector.java index 26c859d..9039229 100755 --- a/src/main/java/com/datagen/connector/storage/gcs/GcsOrcConnector.java +++ b/src/main/java/com/datagen/connector/storage/gcs/GcsOrcConnector.java @@ -186,7 +186,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/gcs/GcsParquetConnector.java b/src/main/java/com/datagen/connector/storage/gcs/GcsParquetConnector.java index b5bc896..b0c83a6 100755 --- a/src/main/java/com/datagen/connector/storage/gcs/GcsParquetConnector.java +++ b/src/main/java/com/datagen/connector/storage/gcs/GcsParquetConnector.java @@ -172,7 +172,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/gcs/GcsUtils.java b/src/main/java/com/datagen/connector/storage/gcs/GcsUtils.java index 0cde9ec..67b4aee 100644 --- a/src/main/java/com/datagen/connector/storage/gcs/GcsUtils.java +++ b/src/main/java/com/datagen/connector/storage/gcs/GcsUtils.java @@ -15,7 +15,7 @@ import java.util.Locale; import java.util.Map; -import static com.datagen.config.ApplicationConfigs.DATA_HOME_DIRECTORY; +import static com.datagen.config.ApplicationConfigs.DATAGEN_HOME_DIRECTORY; /** * Everything that is only related to GCS is set here and GCS connectors extends this class @@ -61,7 +61,7 @@ public abstract class GcsUtils { this.localDirectory = (String) model.getTableNames() .get(OptionsConverter.TableNames.GCS_LOCAL_FILE_PATH); this.localFilePathForModelGeneration = - properties.get(DATA_HOME_DIRECTORY) + "/model-gen/GCS/"; + properties.get(DATAGEN_HOME_DIRECTORY) + "/model-gen/GCS/"; if (serviceAccountKeyPath != null && !serviceAccountKeyPath.isBlank()) { System.setProperty("GOOGLE_APPLICATION_CREDENTIALS", diff --git a/src/main/java/com/datagen/connector/storage/hdfs/HdfsAvroConnector.java b/src/main/java/com/datagen/connector/storage/hdfs/HdfsAvroConnector.java index 9975cdb..8bdb04d 100755 --- a/src/main/java/com/datagen/connector/storage/hdfs/HdfsAvroConnector.java +++ b/src/main/java/com/datagen/connector/storage/hdfs/HdfsAvroConnector.java @@ -173,7 +173,7 @@ public Model generateModel(Boolean deepAnalysis) { this.directoryName, e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } } diff --git a/src/main/java/com/datagen/connector/storage/hdfs/HdfsCsvConnector.java b/src/main/java/com/datagen/connector/storage/hdfs/HdfsCsvConnector.java index 12256b2..6def49f 100755 --- a/src/main/java/com/datagen/connector/storage/hdfs/HdfsCsvConnector.java +++ b/src/main/java/com/datagen/connector/storage/hdfs/HdfsCsvConnector.java @@ -148,7 +148,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/hdfs/HdfsJsonConnector.java b/src/main/java/com/datagen/connector/storage/hdfs/HdfsJsonConnector.java index 9143e17..20b42ec 100755 --- a/src/main/java/com/datagen/connector/storage/hdfs/HdfsJsonConnector.java +++ b/src/main/java/com/datagen/connector/storage/hdfs/HdfsJsonConnector.java @@ -125,7 +125,7 @@ public Model generateModel(Boolean deepAnalysis) { Map tableNames = new HashMap<>(); Map options = new HashMap<>(); // TODO : Implement logic to create a model with at least names, pk, options and column names/types - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/hdfs/HdfsOrcConnector.java b/src/main/java/com/datagen/connector/storage/hdfs/HdfsOrcConnector.java index 6326f28..887b0d1 100755 --- a/src/main/java/com/datagen/connector/storage/hdfs/HdfsOrcConnector.java +++ b/src/main/java/com/datagen/connector/storage/hdfs/HdfsOrcConnector.java @@ -176,7 +176,7 @@ public Model generateModel(Boolean deepAnalysis) { log.warn("Could not create reader to ORC local file due to error:", e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } } diff --git a/src/main/java/com/datagen/connector/storage/hdfs/HdfsParquetConnector.java b/src/main/java/com/datagen/connector/storage/hdfs/HdfsParquetConnector.java index 58b6548..22c3bf8 100755 --- a/src/main/java/com/datagen/connector/storage/hdfs/HdfsParquetConnector.java +++ b/src/main/java/com/datagen/connector/storage/hdfs/HdfsParquetConnector.java @@ -158,7 +158,7 @@ public Model generateModel(Boolean deepAnalysis) { this.directoryName, e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } } diff --git a/src/main/java/com/datagen/connector/storage/kudu/KuduConnector.java b/src/main/java/com/datagen/connector/storage/kudu/KuduConnector.java index e912bd3..cca90e2 100755 --- a/src/main/java/com/datagen/connector/storage/kudu/KuduConnector.java +++ b/src/main/java/com/datagen/connector/storage/kudu/KuduConnector.java @@ -67,9 +67,9 @@ public KuduConnector(Model model, if (useKerberos) { KerberosUtils.loginUserWithKerberos( - properties.get(ApplicationConfigs.KUDU_AUTH_KERBEROS_USER), + properties.get(ApplicationConfigs.KUDU_SECURITY_USER), properties.get( - ApplicationConfigs.KUDU_AUTH_KERBEROS_KEYTAB), + ApplicationConfigs.KUDU_SECURITY_KEYTAB), new Configuration()); UserGroupInformation.getLoginUser().doAs( @@ -187,7 +187,7 @@ public Model generateModel(Boolean deepAnalysis) { Map tableNames = new HashMap<>(); Map options = new HashMap<>(); // TODO : Implement logic to create a model with at least names, pk, options and column names/types - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } private void createTableIfNotExists() { diff --git a/src/main/java/com/datagen/connector/storage/ozone/OzoneAvroConnector.java b/src/main/java/com/datagen/connector/storage/ozone/OzoneAvroConnector.java index 1191fbd..679b91b 100755 --- a/src/main/java/com/datagen/connector/storage/ozone/OzoneAvroConnector.java +++ b/src/main/java/com/datagen/connector/storage/ozone/OzoneAvroConnector.java @@ -197,7 +197,7 @@ public Model generateModel(Boolean deepAnalysis) { } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/ozone/OzoneCSVConnector.java b/src/main/java/com/datagen/connector/storage/ozone/OzoneCSVConnector.java index ffc21cd..eb9f5c4 100755 --- a/src/main/java/com/datagen/connector/storage/ozone/OzoneCSVConnector.java +++ b/src/main/java/com/datagen/connector/storage/ozone/OzoneCSVConnector.java @@ -168,7 +168,7 @@ public Model generateModel(Boolean deepAnalysis) { keyNamePrefix, e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } } diff --git a/src/main/java/com/datagen/connector/storage/ozone/OzoneJsonConnector.java b/src/main/java/com/datagen/connector/storage/ozone/OzoneJsonConnector.java index 476afa1..81a5899 100755 --- a/src/main/java/com/datagen/connector/storage/ozone/OzoneJsonConnector.java +++ b/src/main/java/com/datagen/connector/storage/ozone/OzoneJsonConnector.java @@ -147,7 +147,7 @@ public Model generateModel(Boolean deepAnalysis) { Map tableNames = new HashMap<>(); Map options = new HashMap<>(); // TODO : Implement logic to create a model with at least names, pk, options and column names/types - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } private void createLocalFileWithOverwrite(String path) { diff --git a/src/main/java/com/datagen/connector/storage/ozone/OzoneOrcConnector.java b/src/main/java/com/datagen/connector/storage/ozone/OzoneOrcConnector.java index 15794aa..455c376 100755 --- a/src/main/java/com/datagen/connector/storage/ozone/OzoneOrcConnector.java +++ b/src/main/java/com/datagen/connector/storage/ozone/OzoneOrcConnector.java @@ -220,7 +220,7 @@ public Model generateModel(Boolean deepAnalysis) { "Could not connect and read key: {} into Ozone, due to error: ", keyNamePrefix, e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } } diff --git a/src/main/java/com/datagen/connector/storage/ozone/OzoneParquetConnector.java b/src/main/java/com/datagen/connector/storage/ozone/OzoneParquetConnector.java index 85fc728..7675674 100755 --- a/src/main/java/com/datagen/connector/storage/ozone/OzoneParquetConnector.java +++ b/src/main/java/com/datagen/connector/storage/ozone/OzoneParquetConnector.java @@ -204,7 +204,7 @@ public Model generateModel(Boolean deepAnalysis) { "Could not connect and read key: {} into Ozone, due to error: ", keyNamePrefix, e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/s3/S3AvroConnector.java b/src/main/java/com/datagen/connector/storage/s3/S3AvroConnector.java index 5b73835..fb6333d 100755 --- a/src/main/java/com/datagen/connector/storage/s3/S3AvroConnector.java +++ b/src/main/java/com/datagen/connector/storage/s3/S3AvroConnector.java @@ -169,7 +169,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/s3/S3CSVConnector.java b/src/main/java/com/datagen/connector/storage/s3/S3CSVConnector.java index a8bc13a..79a1d56 100755 --- a/src/main/java/com/datagen/connector/storage/s3/S3CSVConnector.java +++ b/src/main/java/com/datagen/connector/storage/s3/S3CSVConnector.java @@ -157,7 +157,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/s3/S3JsonConnector.java b/src/main/java/com/datagen/connector/storage/s3/S3JsonConnector.java index 368b959..1575da0 100755 --- a/src/main/java/com/datagen/connector/storage/s3/S3JsonConnector.java +++ b/src/main/java/com/datagen/connector/storage/s3/S3JsonConnector.java @@ -156,7 +156,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/s3/S3OrcConnector.java b/src/main/java/com/datagen/connector/storage/s3/S3OrcConnector.java index faea13d..c270402 100755 --- a/src/main/java/com/datagen/connector/storage/s3/S3OrcConnector.java +++ b/src/main/java/com/datagen/connector/storage/s3/S3OrcConnector.java @@ -184,7 +184,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/s3/S3ParquetConnector.java b/src/main/java/com/datagen/connector/storage/s3/S3ParquetConnector.java index 2be10bb..ed14b0b 100755 --- a/src/main/java/com/datagen/connector/storage/s3/S3ParquetConnector.java +++ b/src/main/java/com/datagen/connector/storage/s3/S3ParquetConnector.java @@ -178,7 +178,7 @@ public Model generateModel(Boolean deepAnalysis) { e); } - return new Model(fields, primaryKeys, tableNames, options); + return new Model(fields, primaryKeys, tableNames, options, null); } diff --git a/src/main/java/com/datagen/connector/storage/s3/S3Utils.java b/src/main/java/com/datagen/connector/storage/s3/S3Utils.java index fa27af9..f98229a 100644 --- a/src/main/java/com/datagen/connector/storage/s3/S3Utils.java +++ b/src/main/java/com/datagen/connector/storage/s3/S3Utils.java @@ -30,7 +30,7 @@ import java.security.MessageDigest; import java.util.Map; -import static com.datagen.config.ApplicationConfigs.DATA_HOME_DIRECTORY; +import static com.datagen.config.ApplicationConfigs.DATAGEN_HOME_DIRECTORY; import static software.amazon.awssdk.transfer.s3.SizeConstant.MB; /** @@ -78,7 +78,8 @@ public abstract class S3Utils { properties.get(ApplicationConfigs.S3_ACCESS_KEY_SECRET); this.region = properties.get(ApplicationConfigs.S3_REGION); - this.localFilePathForModelGeneration = properties.get(DATA_HOME_DIRECTORY) + "/model-gen/s3/"; + this.localFilePathForModelGeneration = properties.get( + DATAGEN_HOME_DIRECTORY) + "/model-gen/s3/"; AwsCredentialsProvider awsCredentialsProvider = StaticCredentialsProvider.create( diff --git a/src/main/java/com/datagen/controller/CommandRunnerController.java b/src/main/java/com/datagen/controller/CommandRunnerController.java index 3ce54bb..c2175e9 100755 --- a/src/main/java/com/datagen/controller/CommandRunnerController.java +++ b/src/main/java/com/datagen/controller/CommandRunnerController.java @@ -34,7 +34,7 @@ @Slf4j @RestController -@RequestMapping("/command") +@RequestMapping("/api/v1/command") public class CommandRunnerController { @Autowired diff --git a/src/main/java/com/datagen/controller/DataGenerationController.java b/src/main/java/com/datagen/controller/DataGenerationController.java index d541f7f..c170c5e 100755 --- a/src/main/java/com/datagen/controller/DataGenerationController.java +++ b/src/main/java/com/datagen/controller/DataGenerationController.java @@ -18,6 +18,7 @@ package com.datagen.controller; +import com.datagen.config.ApplicationConfigMapper; import com.datagen.config.PropertiesLoader; import com.datagen.service.APISevice; import com.datagen.service.CommandRunnerService; @@ -29,12 +30,13 @@ import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.UUID; @Slf4j @RestController -@RequestMapping("/datagen") +@RequestMapping("/api/v1/datagen") public class DataGenerationController { @Autowired @@ -51,13 +53,15 @@ public class DataGenerationController { @ResponseBody public String generateIntoMultipleConnectors( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions, + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties, @RequestParam(name = "connectors") List connectors ) { StringBuffer connectorList = new StringBuffer(); @@ -70,21 +74,24 @@ public String generateIntoMultipleConnectors( "Received request with model: {} , threads: {} , batches: {}, rows: {}, to connectors: {}", modelFilePath, threads, numberOfBatches, rowsPerBatch, connectorList); return commandRunnerService.generateData(modelFile, modelFilePath, threads, - numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, connectors, - null); + numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, + connectors, + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/csv", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoCsv( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for CSV with model: {} , threads: {} , batches: {}, rows: {}", @@ -92,20 +99,23 @@ public String generateIntoCsv( Boolean scheduled = delayBetweenExecutions != null; return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("CSV"), null); + Collections.singletonList("CSV"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoJson( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for JSON with model: {} , threads: {} , batches: {}, rows: {}", @@ -113,20 +123,23 @@ public String generateIntoJson( Boolean scheduled = delayBetweenExecutions != null; return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("JSON"), null); + Collections.singletonList("JSON"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/avro", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoAvro( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for Avro with model: {} , threads: {} , batches: {}, rows: {}", @@ -134,20 +147,23 @@ public String generateIntoAvro( Boolean scheduled = delayBetweenExecutions != null; return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("AVRO"), null); + Collections.singletonList("AVRO"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/parquet", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoParquet( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for Parquet with model: {} , threads: {} , batches: {}, rows: {}", @@ -155,20 +171,23 @@ public String generateIntoParquet( Boolean scheduled = delayBetweenExecutions != null; return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("PARQUET"), null); + Collections.singletonList("PARQUET"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/orc", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoOrc( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for ORC with model: {} , threads: {} , batches: {}, rows: {}", @@ -176,20 +195,23 @@ public String generateIntoOrc( Boolean scheduled = delayBetweenExecutions != null; return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("ORC"), null); + Collections.singletonList("ORC"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/hdfs-csv", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoHdfsCsv( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for HDFS-CSV with model: {} , threads: {} , batches: {}, rows: {}", @@ -198,20 +220,23 @@ public String generateIntoHdfsCsv( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("HDFS-CSV"), null); + Collections.singletonList("HDFS-CSV"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/hdfs-avro", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoHdfsAvro( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for HDFS-AVRO with model: {} , threads: {} , batches: {}, rows: {}", @@ -221,20 +246,23 @@ public String generateIntoHdfsAvro( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("HDFS-AVRO"), null); + Collections.singletonList("HDFS-AVRO"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/hdfs-json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoHdfsJson( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for HDFS-JSON with model: {} , threads: {} , batches: {}, rows: {}", @@ -244,20 +272,23 @@ public String generateIntoHdfsJson( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("HDFS-JSON"), null); + Collections.singletonList("HDFS-JSON"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/hdfs-parquet", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoHdfsParquet( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for HDFS-PARQUET with model: {} , threads: {} , batches: {}, rows: {}", @@ -267,20 +298,23 @@ public String generateIntoHdfsParquet( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("HDFS-PARQUET"), null); + Collections.singletonList("HDFS-PARQUET"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/hdfs-orc", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoHdfsOrc( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for HDFS-ORC with model: {} , threads: {} , batches: {}, rows: {}", @@ -290,20 +324,23 @@ public String generateIntoHdfsOrc( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("HDFS-ORC"), null); + Collections.singletonList("HDFS-ORC"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/hbase", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoHbase( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for HBASE with model: {} , threads: {} , batches: {}, rows: {}", @@ -313,20 +350,23 @@ public String generateIntoHbase( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("HBASE"), null); + Collections.singletonList("HBASE"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/hive", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoHive( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for HIVE with model: {} , threads: {} , batches: {}, rows: {}", @@ -336,20 +376,23 @@ public String generateIntoHive( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("HIVE"), null); + Collections.singletonList("HIVE"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/ozone", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoOzone( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for OZONE with model: {} , threads: {} , batches: {}, rows: {}", @@ -359,20 +402,23 @@ public String generateIntoOzone( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("OZONE"), null); + Collections.singletonList("OZONE"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/ozone-csv", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoOzoneCsv( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for OZONE-CSV with model: {} , threads: {} , batches: {}, rows: {}", @@ -382,20 +428,23 @@ public String generateIntoOzoneCsv( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("OZONE-CSV"), null); + Collections.singletonList("OZONE-CSV"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/ozone-json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoOzoneJson( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for OZONE-JSON with model: {} , threads: {} , batches: {}, rows: {}", @@ -405,20 +454,23 @@ public String generateIntoOzoneJson( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("OZONE-JSON"), null); + Collections.singletonList("OZONE-JSON"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/ozone-avro", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoOzoneAvro( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for OZONE-AVRO with model: {} , threads: {} , batches: {}, rows: {}", @@ -428,20 +480,23 @@ public String generateIntoOzoneAvro( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("OZONE-AVRO"), null); + Collections.singletonList("OZONE-AVRO"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/ozone-parquet", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoOzoneParquet( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for OZONE-PARQUET with model: {} , threads: {} , batches: {}, rows: {}", @@ -451,20 +506,23 @@ public String generateIntoOzoneParquet( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("OZONE-PARQUET"), null); + Collections.singletonList("OZONE-PARQUET"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/ozone-orc", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoOzoneOrc( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for OZONE-ORC with model: {} , threads: {} , batches: {}, rows: {}", @@ -474,20 +532,23 @@ public String generateIntoOzoneOrc( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("OZONE-ORC"), null); + Collections.singletonList("OZONE-ORC"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/kafka", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoKafka( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for KAFKA with model: {} , threads: {} , batches: {}, rows: {}", @@ -497,20 +558,23 @@ public String generateIntoKafka( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("KAFKA"), null); + Collections.singletonList("KAFKA"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/solr", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoSolR( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for SOLR with model: {} , threads: {} , batches: {}, rows: {}", @@ -520,20 +584,23 @@ public String generateIntoSolR( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("SOLR"), null); + Collections.singletonList("SOLR"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/kudu", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoKudu( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for KUDU with model: {} , threads: {} , batches: {}, rows: {}", @@ -543,14 +610,15 @@ public String generateIntoKudu( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("KUDU"), null); + Collections.singletonList("KUDU"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/api", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateAPI( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath ) { log.debug("Received request for API with model: {}", modelFilePath); @@ -572,13 +640,15 @@ public String getFromAPI(@RequestParam(name = "modelId") UUID modelId) { @ResponseBody public String generateIntoS3CSV( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for S3-CSV with model: {} , threads: {} , batches: {}, rows: {}", @@ -588,20 +658,23 @@ public String generateIntoS3CSV( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("S3-CSV"), null); + Collections.singletonList("S3-CSV"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/s3-json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoS3Json( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for S3-JSON with model: {} , threads: {} , batches: {}, rows: {}", @@ -611,20 +684,23 @@ public String generateIntoS3Json( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("S3-JSON"), null); + Collections.singletonList("S3-JSON"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/s3-avro", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoS3Avro( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for S3-AVRO with model: {} , threads: {} , batches: {}, rows: {}", @@ -634,20 +710,23 @@ public String generateIntoS3Avro( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("S3-AVRO"), null); + Collections.singletonList("S3-AVRO"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/s3-parquet", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoS3Parquet( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for S3-PARQUET with model: {} , threads: {} , batches: {}, rows: {}", @@ -657,20 +736,23 @@ public String generateIntoS3Parquet( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("S3-PARQUET"), null); + Collections.singletonList("S3-PARQUET"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/s3-orc", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoS3Orc( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for S3-ORC with model: {} , threads: {} , batches: {}, rows: {}", @@ -680,20 +762,23 @@ public String generateIntoS3Orc( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("S3-ORC"), null); + Collections.singletonList("S3-ORC"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/adls-csv", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoAdlsCSV( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for ADLS-CSV with model: {} , threads: {} , batches: {}, rows: {}", @@ -703,20 +788,23 @@ public String generateIntoAdlsCSV( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("ADLS-CSV"), null); + Collections.singletonList("ADLS-CSV"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/adls-json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoAdlsJson( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for ADLS-JSON with model: {} , threads: {} , batches: {}, rows: {}", @@ -726,20 +814,23 @@ public String generateIntoAdlsJson( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("ADLS-JSON"), null); + Collections.singletonList("ADLS-JSON"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/adls-avro", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoAdlsAvro( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for ADLS-AVRO with model: {} , threads: {} , batches: {}, rows: {}", @@ -749,20 +840,23 @@ public String generateIntoAdlsAvro( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("ADLS-AVRO"), null); + Collections.singletonList("ADLS-AVRO"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/adls-parquet", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoAdlsParquet( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for ADLS-PARQUET with model: {} , threads: {} , batches: {}, rows: {}", @@ -772,20 +866,23 @@ public String generateIntoAdlsParquet( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("ADLS-PARQUET"), null); + Collections.singletonList("ADLS-PARQUET"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/adls-orc", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoAdlsOrc( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for ADLS-ORC with model: {} , threads: {} , batches: {}, rows: {}", @@ -795,7 +892,8 @@ public String generateIntoAdlsOrc( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("ADLS-ORC"), null); + Collections.singletonList("ADLS-ORC"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @@ -803,13 +901,15 @@ public String generateIntoAdlsOrc( @ResponseBody public String generateIntoGcsCSV( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for GCS-CSV with model: {} , threads: {} , batches: {}, rows: {}", @@ -819,20 +919,23 @@ public String generateIntoGcsCSV( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("GCS-CSV"), null); + Collections.singletonList("GCS-CSV"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/gcs-json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoGcsJson( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for GCS-JSON with model: {} , threads: {} , batches: {}, rows: {}", @@ -842,20 +945,23 @@ public String generateIntoGcsJson( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("GCS-JSON"), null); + Collections.singletonList("GCS-JSON"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/gcs-avro", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoGcsAvro( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for GCS-AVRO with model: {} , threads: {} , batches: {}, rows: {}", @@ -865,20 +971,23 @@ public String generateIntoGcsAvro( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("GCS-AVRO"), null); + Collections.singletonList("GCS-AVRO"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/gcs-parquet", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoGcsParquet( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for GCS-PARQUET with model: {} , threads: {} , batches: {}, rows: {}", @@ -888,20 +997,23 @@ public String generateIntoGcsParquet( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("GCS-PARQUET"), null); + Collections.singletonList("GCS-PARQUET"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } @PostMapping(value = "/gcs-orc", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @ResponseBody public String generateIntoGcsOrc( @RequestPart(required = false, name = "model_file") - MultipartFile modelFile, + MultipartFile modelFile, @RequestParam(required = false, name = "model") String modelFilePath, @RequestParam(required = false, name = "threads") Integer threads, @RequestParam(required = false, name = "batches") Long numberOfBatches, @RequestParam(required = false, name = "rows") Long rowsPerBatch, @RequestParam(required = false, name = "delay_between_executions_seconds") - Long delayBetweenExecutions + Long delayBetweenExecutions, + @RequestParam(required = false, name = "extraProperties") + Map extraProperties ) { log.debug( "Received request for GCS-ORC with model: {} , threads: {} , batches: {}, rows: {}", @@ -911,7 +1023,8 @@ public String generateIntoGcsOrc( return commandRunnerService.generateData(modelFile, modelFilePath, threads, numberOfBatches, rowsPerBatch, scheduled, delayBetweenExecutions, - Collections.singletonList("GCS-ORC"), null); + Collections.singletonList("GCS-ORC"), + ApplicationConfigMapper.parsePropertiesMap(extraProperties)); } } diff --git a/src/main/java/com/datagen/controller/HealthController.java b/src/main/java/com/datagen/controller/HealthController.java index fc704b6..b42f430 100755 --- a/src/main/java/com/datagen/controller/HealthController.java +++ b/src/main/java/com/datagen/controller/HealthController.java @@ -24,7 +24,7 @@ @Slf4j @RestController -@RequestMapping("/health") +@RequestMapping("/api/v1/health") public class HealthController { @GetMapping(value = "/status") diff --git a/src/main/java/com/datagen/controller/MetricController.java b/src/main/java/com/datagen/controller/MetricController.java index d359f19..759ca81 100755 --- a/src/main/java/com/datagen/controller/MetricController.java +++ b/src/main/java/com/datagen/controller/MetricController.java @@ -29,7 +29,7 @@ @Slf4j @RestController -@RequestMapping("/metrics") +@RequestMapping("/api/v1/metrics") public class MetricController { @Autowired diff --git a/src/main/java/com/datagen/controller/ModelGenerationController.java b/src/main/java/com/datagen/controller/ModelGenerationController.java index 4ad691d..cc2de11 100755 --- a/src/main/java/com/datagen/controller/ModelGenerationController.java +++ b/src/main/java/com/datagen/controller/ModelGenerationController.java @@ -26,7 +26,7 @@ @Slf4j @RestController -@RequestMapping("/model_generation") +@RequestMapping("/api/v1/model_generation") public class ModelGenerationController { @Autowired diff --git a/src/main/java/com/datagen/controller/ModelTesterController.java b/src/main/java/com/datagen/controller/ModelTesterController.java index 2fb3c44..aeadf53 100755 --- a/src/main/java/com/datagen/controller/ModelTesterController.java +++ b/src/main/java/com/datagen/controller/ModelTesterController.java @@ -28,7 +28,7 @@ @Slf4j @RestController -@RequestMapping("/model") +@RequestMapping("/api/v1/model") public class ModelTesterController { @Autowired diff --git a/src/main/java/com/datagen/model/Model.java b/src/main/java/com/datagen/model/Model.java index 9e6b5df..56f346d 100755 --- a/src/main/java/com/datagen/model/Model.java +++ b/src/main/java/com/datagen/model/Model.java @@ -18,6 +18,7 @@ package com.datagen.model; +import com.datagen.config.ApplicationConfigs; import com.datagen.model.conditions.ConditionalEvaluator; import com.datagen.model.type.Field; import com.datagen.parsers.JsonUnparser; @@ -42,7 +43,7 @@ * - Tables names * - Other options if needed * This class describes also how to generate random data - * It also describe how to initialize certain systems for that model (i.e. table creation) + * It also describes how to initialize certain systems for that model (i.e. table creation) */ @Slf4j @Getter @@ -75,6 +76,11 @@ public class Model { @Setter private Map options; + // Properties of the application for this model (they contain specifications for access to some services) + @Getter + @JsonIgnore + private Map properties; + /** * Constructor that initializes the model and populates it completely @@ -87,7 +93,8 @@ public class Model { */ public Model(LinkedHashMap fields, Map> primaryKeys, - Map tableNames, Map options) { + Map tableNames, Map options, + Map properties) { this.fields = fields; this.fieldsRandomName = fields.entrySet().stream().filter(f -> !f.getValue().computed) @@ -105,6 +112,7 @@ public Model(LinkedHashMap fields, this.tableNames = convertTableNames(tableNames); this.options = convertOptions(options); + this.properties = properties==null?new HashMap<>(): properties; // For all conditions passed, we need to check types used to prepare future comparisons diff --git a/src/main/java/com/datagen/model/conditions/ConditionsLine.java b/src/main/java/com/datagen/model/conditions/ConditionsLine.java index e5004a9..12f060a 100755 --- a/src/main/java/com/datagen/model/conditions/ConditionsLine.java +++ b/src/main/java/com/datagen/model/conditions/ConditionsLine.java @@ -19,6 +19,7 @@ import com.datagen.model.Row; +import com.datagen.utils.ParsingUtils; import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; @@ -42,42 +43,21 @@ public class ConditionsLine { // To indicate if there are multiple conditions on this line or only one - @Getter - @Setter private boolean combinedCondition = false; - @Getter - @Setter - private boolean formula = false; - - @Getter - @Setter - private Formula formulaToEvaluate; - - @Getter - @Setter private boolean defaultValue = false; - @Getter - @Setter private String valueToReturn; @Getter - @Setter - private boolean link = false; - - @Getter - @Setter private Link linkToEvaluate; - @Getter - @Setter + private boolean link = false; + private boolean formula = false; private boolean injection = false; - @Getter - @Setter - private Injection injectionToEvaluate; - + private LinkedList stringFragments; + private JsEvaluator jsEvaluator; public ConditionsLine(String conditionLine, String valueToReturn) { this.valueToReturn = valueToReturn; @@ -93,7 +73,8 @@ public ConditionsLine(String conditionLine, String valueToReturn) { } else if (conditionSplitted[0].equalsIgnoreCase("formula")) { log.debug("Found a formula, that will need to be evaluated"); this.formula = true; - this.formulaToEvaluate = new Formula(valueToReturn); + this.jsEvaluator = new JsEvaluator(); + this.stringFragments = ParsingUtils.parseStringWithVars(valueToReturn); return; } else if (conditionSplitted[0].equalsIgnoreCase("link")) { log.debug("Found a link, that will need to be evaluated"); @@ -103,7 +84,7 @@ public ConditionsLine(String conditionLine, String valueToReturn) { } else if (conditionSplitted[0].equalsIgnoreCase("injection")) { log.debug("Found an injection, that will need to be evaluated"); this.injection = true; - this.injectionToEvaluate = new Injection(valueToReturn); + this.stringFragments = ParsingUtils.parseStringWithVars(valueToReturn); return; } else if (conditionSplitted[0].equalsIgnoreCase("default")) { log.debug("Found a default, No evaluation needed"); @@ -171,7 +152,9 @@ public boolean isLineSatisfied(Row row) { return listOfConditions.get(0).evaluateCondition(row); } else if (this.formula) { // Formula case - this.valueToReturn = formulaToEvaluate.evaluateFormula(row); + this.valueToReturn = jsEvaluator.evaluateJsExpression( + ParsingUtils.injectRowValuesToAString(row, this.stringFragments) + ); return true; } else if (this.link) { // Formula case @@ -179,7 +162,7 @@ public boolean isLineSatisfied(Row row) { return true; } else if (this.injection) { // Formula case - this.valueToReturn = injectionToEvaluate.evaluateInjection(row); + this.valueToReturn = ParsingUtils.injectRowValuesToAString(row, this.stringFragments); return true; } else { // Default case diff --git a/src/main/java/com/datagen/model/conditions/Formula.java b/src/main/java/com/datagen/model/conditions/Formula.java deleted file mode 100755 index 7ea70eb..0000000 --- a/src/main/java/com/datagen/model/conditions/Formula.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.datagen.model.conditions; - -import com.datagen.model.Row; -import lombok.Getter; -import lombok.Setter; -import lombok.extern.slf4j.Slf4j; - -import javax.script.ScriptEngine; -import javax.script.ScriptEngineManager; -import javax.script.ScriptException; -import java.util.LinkedList; - -@Slf4j -public class Formula { - - - // for all cols name existing in model, try to find which one are involved in the formula and put them in a list - @Getter - @Setter - private LinkedList listOfColsToEvaluate; - - @Getter - @Setter - private String formulaToEvaluate; - - private final ScriptEngineManager scriptEngineManager; - private final ScriptEngine scriptEngine; - - Formula(String formula) { - // fill in the listOfColsToEvaluate + Create formula string with no $ - listOfColsToEvaluate = new LinkedList<>(); - for (String field : formula.substring(formula.indexOf("$") + 1) - .split("[$]")) { - listOfColsToEvaluate.add(field.split("\\s+")[0]); - log.debug( - "Add Field : " + field.split("\\s+")[0] + " to be in the formula"); - } - formulaToEvaluate = formula.replaceAll("[$]", ""); - scriptEngineManager = new ScriptEngineManager(); - scriptEngine = scriptEngineManager.getEngineByName("JavaScript"); - } - - public String evaluateFormula(Row row) { - // Evaluate formula using an evaluator (or built this evaluator) - String formulaReplaced = formulaToEvaluate; - for (String colName : listOfColsToEvaluate) { - log.debug(formulaReplaced); - formulaReplaced = formulaReplaced.replaceAll("(^| )" + colName + "($| )", - row.getValues().get(colName).toString()); - } - log.debug(formulaReplaced); - return computeFormula(formulaReplaced); - } - - private String computeFormula(String formula) { - Object value = 0f; - try { - value = scriptEngine.eval(formula); - log.debug("Evaluating formula: " + formula + " to: " + value); - } catch (ScriptException e) { - log.warn("Could not evaluate expression: " + formula + " due to error: ", - e); - } - return value.toString(); - } - - -} diff --git a/src/main/java/com/datagen/model/conditions/Injection.java b/src/main/java/com/datagen/model/conditions/Injection.java deleted file mode 100755 index 6362b24..0000000 --- a/src/main/java/com/datagen/model/conditions/Injection.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.datagen.model.conditions; - - -import com.datagen.model.Row; -import lombok.AllArgsConstructor; -import lombok.extern.slf4j.Slf4j; - - -import java.util.LinkedList; -import java.util.Map; - -@Slf4j -public class Injection { - - @AllArgsConstructor - private class InjectedField { - String stringToPrint; - Boolean toReplace; - } - - private final LinkedList injectedFieldNames = - new LinkedList<>(); - - Injection(String injection) { - for (String s : injection.split("[$]")) { - if (s.length() != 0) { - if (s.charAt(0) != '{') { - log.debug(s + " is not a variable name"); - injectedFieldNames.add(new InjectedField(s, false)); - } else { - String fieldToAdd = s.substring(1, s.indexOf('}')); - log.debug(fieldToAdd + " is found as a variable name"); - injectedFieldNames.add(new InjectedField(fieldToAdd, true)); - if (s.length() > s.indexOf('}')) { - log.debug( - s.substring(s.indexOf('}') + 1) + " is not a variable name"); - injectedFieldNames.add( - new InjectedField(s.substring(s.indexOf('}') + 1), false)); - } - } - } - } - } - - public String evaluateInjection(Row row) { - Map rowValues = row.getValues(); - StringBuilder sb = new StringBuilder(); - try { - for (InjectedField fieldNameToReplace : injectedFieldNames) { - if (fieldNameToReplace.toReplace) { - sb.append( - row.getModel().getFieldFromName(fieldNameToReplace.stringToPrint) - .toStringValue( - rowValues.get(fieldNameToReplace.stringToPrint))); - } else { - sb.append(fieldNameToReplace.stringToPrint); - } - } - } catch (Exception e) { - log.error("Can not evaluate injection so returning empty value, see: ", - e); - } - - return sb.toString(); - } - - -} diff --git a/src/main/java/com/datagen/model/conditions/JsEvaluator.java b/src/main/java/com/datagen/model/conditions/JsEvaluator.java new file mode 100755 index 0000000..8564853 --- /dev/null +++ b/src/main/java/com/datagen/model/conditions/JsEvaluator.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datagen.model.conditions; + +import lombok.extern.slf4j.Slf4j; +import org.graalvm.polyglot.Context; +import org.graalvm.polyglot.PolyglotException; + +@Slf4j +public class JsEvaluator { + + private final Context context; + + JsEvaluator() { + this.context = Context.newBuilder() + .allowAllAccess(true) + .build(); + context.initialize("js"); + } + + String evaluateJsExpression(String expression) { + Object value = 0f; + try { + value = context.eval("js", expression); + log.debug("Evaluating formula: " + expression + " to: " + value); + } catch (PolyglotException e) { + log.warn("Could not evaluate expression: " + expression + " due to error: ", + e); + } + return value.toString(); + } + + +} diff --git a/src/main/java/com/datagen/model/type/BedrockField.java b/src/main/java/com/datagen/model/type/BedrockField.java new file mode 100755 index 0000000..ed5a77d --- /dev/null +++ b/src/main/java/com/datagen/model/type/BedrockField.java @@ -0,0 +1,240 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datagen.model.type; + +import com.datagen.model.Row; +import com.datagen.utils.ParsingUtils; +import lombok.extern.slf4j.Slf4j; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hive.jdbc.HivePreparedStatement; +import org.apache.kudu.Type; +import org.apache.kudu.client.PartialRow; +import org.apache.orc.TypeDescription; +import org.json.JSONException; +import org.json.JSONObject; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeClient; + +import java.sql.SQLException; +import java.util.LinkedList; +import java.util.List; + +@Slf4j +public class BedrockField extends Field { + + private final String url; + private final String user; + private final String password; + private final Double temperature; + private final Integer maxTokens; + private final Region region; + private final LinkedList requestToInject; + private final BedrockRuntimeClient bedrockRuntimeClient; + private final String modelId; + private final BedrockModelType bedrockmodeltype; + private JSONObject preparedRequest = null; + + public BedrockField(String name, String url, String user, String password, + String request, String modelType, Float temperature, String region, Integer maxTokens) { + this.name = name; + this.url = url; + this.user = user; + this.password = password; + this.temperature = temperature == null ? 0.5 : temperature; + this.maxTokens = maxTokens == null ? 256 : maxTokens; + this.requestToInject = ParsingUtils.parseStringWithVars(request); + this.region = region!=null?Region.of(region):Region.US_EAST_1; + + AwsCredentialsProvider awsCredentialsProvider = + StaticCredentialsProvider.create( + AwsBasicCredentials.create(this.user, this.password)); + + this.bedrockRuntimeClient = BedrockRuntimeClient.builder() + .credentialsProvider(awsCredentialsProvider) + .region(this.region) + .build(); + + // See model Ids available at: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html + this.modelId = modelType == null ? "amazon.titan-text-lite-v1" : modelType; + /* + Tested with + MISTRAL: mistral.mistral-small-2402-v1:0 + TITAN: amazon.titan-text-lite-v1 + LLAMA: meta.llama3-8b-instruct-v1:0 + */ + + this.bedrockmodeltype = switch (modelId.split("\\.")[0]) { + case "anthropic": + yield BedrockModelType.ANTHROPIC; + case "mistral": + yield BedrockModelType.MISTRAL; + case "amazon": + yield BedrockModelType.TITAN; + case "meta": + yield BedrockModelType.LLAMA; + default: + yield BedrockModelType.TITAN; + }; + + // JSON prepared request for model + try { + this.preparedRequest = switch (bedrockmodeltype) { + case TITAN: + yield new JSONObject(); + case ANTHROPIC: + yield new JSONObject() + .put("temperature", this.temperature) + .put("stop_sequences", List.of("\n\nHuman:")) + .put("max_tokens_to_sample", this.maxTokens); + case MISTRAL: + yield new JSONObject() + .put("temperature", this.temperature) + .put("max_tokens", this.maxTokens); + case LLAMA: + yield new JSONObject() + .put("temperature", this.temperature); + }; + } catch (JSONException e) { + log.warn("Could not prepare request to Bedrock due to error: ", e); + } + + } + + @Override + public String generateComputedValue(Row row) { + String stringToEvaluate = + ParsingUtils.injectRowValuesToAString(row, requestToInject); + log.debug("Asking to Bedrock: {}", stringToEvaluate); + var responseText = ""; + + try { + switch (this.bedrockmodeltype) { + case ANTHROPIC -> preparedRequest.put("prompt", + "Human: " + stringToEvaluate + "\\n\\nAssistant:"); + case MISTRAL -> preparedRequest.put("prompt", + "[INST] " + stringToEvaluate + "[/INST]"); + case TITAN -> preparedRequest.put("inputText", stringToEvaluate); + default -> preparedRequest.put("prompt", stringToEvaluate); + } + + // Encode and send the request. + var response = bedrockRuntimeClient.invokeModel(req -> req + .accept("application/json") + .contentType("application/json") + .body(SdkBytes.fromUtf8String(preparedRequest.toString())) + .modelId(modelId)); + + // Extract response + var responseBody = new JSONObject(response.body().asUtf8String()); + + log.debug("Response body from Bedrock: {}", responseBody); + + responseText = switch (this.bedrockmodeltype) { + case TITAN: + yield responseBody.getJSONArray("results").getJSONObject(0) + .getString("outputText"); + case LLAMA: + yield responseBody.getString("generation"); + case MISTRAL: + yield responseBody.getJSONArray("outputs").getJSONObject(0) + .getString("text"); + case ANTHROPIC: + yield responseBody.getString("completion"); + }; + + } catch (JSONException e) { + log.warn("Cannot insert or decode JSON from/to Bedrock due to error: ", + e); + } + + return responseText; + } + + @Override + public String generateRandomValue() { + return ""; + } + + @Override + public Put toHbasePut(String value, Put hbasePut) { + hbasePut.addColumn(Bytes.toBytes(hbaseColumnQualifier), Bytes.toBytes(name), + Bytes.toBytes(value)); + return hbasePut; + } + + /* + Override if needed Field function to insert into special connectors + */ + + @Override + public PartialRow toKudu(String value, PartialRow partialRow) { + partialRow.addString(name, value); + return partialRow; + } + + @Override + public Type getKuduType() { + return Type.STRING; + } + + @Override + public HivePreparedStatement toHive(String value, int index, + HivePreparedStatement hivePreparedStatement) { + try { + hivePreparedStatement.setString(index, value); + } catch (SQLException e) { + log.warn("Could not set value : " + value.toString() + + " into hive statement due to error :", e); + } + return hivePreparedStatement; + } + + @Override + public String getHiveType() { + return "STRING"; + } + + @Override + public String getGenericRecordType() { + return "string"; + } + + @Override + public ColumnVector getOrcColumnVector(VectorizedRowBatch batch, int cols) { + return batch.cols[cols]; + } + + @Override + public TypeDescription getTypeDescriptionOrc() { + return TypeDescription.createString(); + } + + private enum BedrockModelType { + ANTHROPIC, + TITAN, + MISTRAL, + LLAMA + } +} diff --git a/src/main/java/com/datagen/model/type/Field.java b/src/main/java/com/datagen/model/type/Field.java index 810851a..eb5d819 100755 --- a/src/main/java/com/datagen/model/type/Field.java +++ b/src/main/java/com/datagen/model/type/Field.java @@ -18,6 +18,7 @@ package com.datagen.model.type; +import com.datagen.config.ApplicationConfigs; import com.datagen.model.Row; import com.datagen.model.conditions.ConditionalEvaluator; import com.fasterxml.jackson.databind.JsonNode; @@ -46,82 +47,48 @@ @Slf4j public abstract class Field { - Random random = new Random(); - @Getter @Setter public String name; - @Getter @Setter public Boolean computed = false; - @Getter @Setter public List possibleValues; - @Getter @Setter public Integer possibleValueSize; - @Getter @Setter public List filters; - @Getter @Setter public String file; - // This is a conditional evaluator holding all complexity (parsing, preparing comparison, evaluating it) @Getter @Setter public ConditionalEvaluator conditional; - // Default length is -1, if user does not provide a strict superior to 0 length, // each Extended field class should by default override it to a number strictly superior to 0 @Getter @Setter public int length = -1; - // Minimum possible value for Int/Long @Getter @Setter public Long min; - // Maximum possible value Int/Long @Getter @Setter public Long max; - @Getter @Setter public String hbaseColumnQualifier = "cq"; - @Getter @Setter public boolean ghost; - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Class Type is " + this.getClass().getSimpleName() + " ; "); - sb.append("name : " + name + " ; "); - sb.append("hbase Column Qualifier : " + hbaseColumnQualifier + " ; "); - sb.append("Length : " + length + " ; "); - if (min != null) { - sb.append("Min : " + min + " ; "); - } - if (max != null) { - sb.append("Max : " + max + " ; "); - } - return sb.toString(); - } - - public abstract T generateRandomValue(); - - public T generateComputedValue(Row row) { - return toCastValue(conditional.evaluateConditions(row)); - } + Random random = new Random(); public static String toString(List fieldList) { StringBuilder sb = new StringBuilder(); @@ -138,7 +105,6 @@ public static String toString(List fieldList) { return sb.toString(); } - /** * Create the right instance of a field (i.e. String, password etc..) according to its type * @@ -148,29 +114,39 @@ public static String toString(List fieldList) { * @param columnQualifier Hbase column qualifier if there is one * @return Field instantiated or null if type has not been recognized */ - public static Field instantiateField(String name, - String type, - Integer length, - String columnQualifier, - List possibleValues, - LinkedHashMap possible_values_weighted, - LinkedHashMap conditionals, - String min, - String max, - List filters, - String file, - String separator, - String pattern, - Boolean useNow, - String regex, - String request, - Boolean ghost, - String mainField, - String formula, - String injection, - String url, - String user, - String password) { + public static Field instantiateField( + Map properties, + String name, + String type, + Integer length, + String min, + String max, + String columnQualifier, + List possibleValues, + LinkedHashMap possible_values_weighted, + List filters, + LinkedHashMap conditionals, + String file, + String separator, + String pattern, + Boolean useNow, + String regex, + String request, + Boolean ghost, + String mainField, + String formula, + String injection, + String link, + String url, + String user, + String password, + String modelType, + Float temperature, + Float frequencyPenalty, + Float presencePenalty, + Integer maxTokens, + Float topP + ) { if (name == null || name.isEmpty()) { throw new IllegalStateException( "Name can not be null or empty for field: " + name); @@ -185,128 +161,189 @@ public static Field instantiateField(String name, length = -1; } - Field field; - - switch (type.toUpperCase()) { - case "STRING": - field = new StringField(name, length, - possibleValues.stream().map(JsonNode::asText) - .collect(Collectors.toList()), possible_values_weighted); - break; - case "STRINGAZ": - field = new StringAZField(name, length, - possibleValues.stream().map(JsonNode::asText) - .collect(Collectors.toList())); - break; - case "INTEGER": - field = new IntegerField(name, - possibleValues.stream().map(JsonNode::asInt) - .collect(Collectors.toList()), possible_values_weighted, min, - max); - break; - case "INCREMENT_INTEGER": - field = new IncrementIntegerField(name, min); - break; - case "BOOLEAN": - field = new BooleanField(name, - possibleValues.stream().map(JsonNode::asBoolean) - .collect(Collectors.toList()), possible_values_weighted); - break; - case "FLOAT": - field = new FloatField(name, - possibleValues.stream().map(j -> (float) j.asDouble()) - .collect(Collectors.toList()), possible_values_weighted, min, - max); - break; - case "LONG": - field = new LongField(name, - possibleValues.stream().map(JsonNode::asLong) - .collect(Collectors.toList()), possible_values_weighted, min, - max); - break; - case "INCREMENT_LONG": - field = new IncrementLongField(name, min); - break; - case "TIMESTAMP": - field = new TimestampField(name, - possibleValues.stream().map(JsonNode::asLong) - .collect(Collectors.toList())); - break; - case "BYTES": - field = new BytesField(name, length, - possibleValues.stream().map(j -> j.asText().getBytes()) - .collect(Collectors.toList())); - break; - case "HASHMD5": - field = new HashMd5Field(name, length, - possibleValues.stream().map(j -> j.asText().getBytes()) - .collect(Collectors.toList())); - break; - case "BIRTHDATE": - field = new BirthdateField(name, length, - possibleValues.stream().map(JsonNode::asText) - .collect(Collectors.toList()), min, max); - break; - case "NAME": - field = new NameField(name, length, - filters.stream().map(JsonNode::asText).collect(Collectors.toList())); - break; - case "COUNTRY": - field = new CountryField(name, length, - possibleValues.stream().map(JsonNode::asText) - .collect(Collectors.toList())); - break; - case "CITY": - field = new CityField(name, - filters.stream().map(JsonNode::asText).collect(Collectors.toList())); - break; - case "BLOB": - field = new BlobField(name, length, - possibleValues.stream().map(j -> j.asText().getBytes()) - .collect(Collectors.toList())); - break; - case "EMAIL": - field = new EmailField(name, - possibleValues.stream().map(JsonNode::asText) - .collect(Collectors.toList()), - filters.stream().map(JsonNode::asText).collect(Collectors.toList())); - break; - case "IP": - field = new IpField(name); - break; - case "LINK": - field = new LinkField(name, length, - possibleValues.stream().map(JsonNode::asText) - .collect(Collectors.toList())); - break; - case "CSV": - field = new CsvField(name, length, - filters.stream().map(JsonNode::asText).collect(Collectors.toList()), - file, separator, mainField); - break; - case "PHONE": - field = new PhoneField(name, length, - filters.stream().map(JsonNode::asText).collect(Collectors.toList())); - break; - case "UUID": - field = new UuidField(name); - break; - case "DATE": - field = new DateField(name, possibleValues.stream().map(JsonNode::asText) - .collect(Collectors.toList()), min, max, useNow); - break; - case "DATE_AS_STRING": - field = new DateAsStringField(name, possibleValues.stream().map(JsonNode::asText) - .collect(Collectors.toList()), min, max, useNow, pattern); - break; - case "STRING_REGEX": - field = new StringRegexField(name, regex); - break; - default: - log.warn("Type : " + type + - " has not been recognized and hence will be ignored"); - return null; - } + Field field = switch (type.toUpperCase()) { + case "STRING": + yield new StringField(name, length, + possibleValues.stream().map(JsonNode::asText) + .collect(Collectors.toList()), possible_values_weighted); + + case "STRINGAZ": + yield new StringAZField(name, length, + possibleValues.stream().map(JsonNode::asText) + .collect(Collectors.toList())); + + case "INTEGER": + yield new IntegerField(name, + possibleValues.stream().map(JsonNode::asInt) + .collect(Collectors.toList()), possible_values_weighted, min, + max); + + case "INCREMENT_INTEGER": + yield new IncrementIntegerField(name, min); + + case "BOOLEAN": + yield new BooleanField(name, + possibleValues.stream().map(JsonNode::asBoolean) + .collect(Collectors.toList()), possible_values_weighted); + + case "FLOAT": + yield new FloatField(name, + possibleValues.stream().map(j -> (float) j.asDouble()) + .collect(Collectors.toList()), possible_values_weighted, min, + max); + + case "LONG": + yield new LongField(name, + possibleValues.stream().map(JsonNode::asLong) + .collect(Collectors.toList()), possible_values_weighted, min, + max); + + case "INCREMENT_LONG": + yield new IncrementLongField(name, min); + + case "TIMESTAMP": + yield new TimestampField(name, + possibleValues.stream().map(JsonNode::asLong) + .collect(Collectors.toList())); + + case "BYTES": + yield new BytesField(name, length, + possibleValues.stream().map(j -> j.asText().getBytes()) + .collect(Collectors.toList())); + + case "HASHMD5": + yield new HashMd5Field(name, length, + possibleValues.stream().map(j -> j.asText().getBytes()) + .collect(Collectors.toList())); + + case "BIRTHDATE": + yield new BirthdateField(name, length, + possibleValues.stream().map(JsonNode::asText) + .collect(Collectors.toList()), min, max); + + case "NAME": + yield new NameField(name, length, + filters.stream().map(JsonNode::asText) + .collect(Collectors.toList())); + + case "COUNTRY": + yield new CountryField(name, length, + possibleValues.stream().map(JsonNode::asText) + .collect(Collectors.toList())); + + case "CITY": + yield new CityField(name, + filters.stream().map(JsonNode::asText) + .collect(Collectors.toList())); + + case "BLOB": + yield new BlobField(name, length, + possibleValues.stream().map(j -> j.asText().getBytes()) + .collect(Collectors.toList())); + + case "EMAIL": + yield new EmailField(name, + possibleValues.stream().map(JsonNode::asText) + .collect(Collectors.toList()), + filters.stream().map(JsonNode::asText) + .collect(Collectors.toList())); + + case "IP": + yield new IpField(name); + + case "LINK": + yield new LinkField(name, length, + possibleValues.stream().map(JsonNode::asText) + .collect(Collectors.toList())); + + case "CSV": + yield new CsvField(name, length, + filters.stream().map(JsonNode::asText).collect(Collectors.toList()), + file, separator, mainField); + + case "PHONE": + yield new PhoneField(name, length, + filters.stream().map(JsonNode::asText) + .collect(Collectors.toList())); + + case "UUID": + yield new UuidField(name); + + case "DATE": + yield new DateField(name, possibleValues.stream().map(JsonNode::asText) + .collect(Collectors.toList()), min, max, useNow); + + case "DATE_AS_STRING": + yield new DateAsStringField(name, + possibleValues.stream().map(JsonNode::asText) + .collect(Collectors.toList()), min, max, useNow, pattern); + + case "STRING_REGEX": + yield new StringRegexField(name, regex); + + case "OLLAMA": + yield new OllamaField(name, + url, + user, password, request, + modelType == null ? + properties.get(ApplicationConfigs.OLLAMA_MODEL_DEFAULT) : + modelType, + temperature == null ? Float.valueOf( + properties.get(ApplicationConfigs.OLLAMA_TEMPERATURE_DEFAULT)) : + temperature, + frequencyPenalty == null ? Float.valueOf(properties.get( + ApplicationConfigs.OLLAMA_FREQUENCY_PENALTY_DEFAULT)) : frequencyPenalty, + presencePenalty == null ? Float.valueOf(properties.get( + ApplicationConfigs.OLLAMA_PRESENCE_PENALTY_DEFAULT)) : presencePenalty, + topP == null ? Float.valueOf(properties.get( + ApplicationConfigs.OLLAMA_TOP_P_DEFAULT)) : topP + ); + + case "BEDROCK": + yield new BedrockField(name, url, + user == null ? + properties.get(ApplicationConfigs.BEDROCK_ACCESS_KEY_ID) : user, + password == null ? + properties.get(ApplicationConfigs.BEDROCK_ACCESS_KEY_SECRET) : + password, + request, + modelType == null ? + properties.get(ApplicationConfigs.BEDROCK_MODEL_DEFAULT) : + modelType, + temperature == null ? Float.valueOf(properties.get( + ApplicationConfigs.BEDROCK_TEMPERATURE_DEFAULT)) : temperature, + properties.get(ApplicationConfigs.BEDROCK_REGION), + maxTokens == null ? Integer.valueOf(properties.get( + ApplicationConfigs.BEDROCK_MAX_TOKENS_DEFAULT)) : maxTokens + ); + + case "OPENAI": + yield new OpenAIField(name, url, + user, + password == null ? + properties.get(ApplicationConfigs.OPENAI_API_KEY) : + password, + request, + modelType == null ? + properties.get(ApplicationConfigs.OPENAI_MODEL_DEFAULT) : + modelType, + temperature == null ? Float.valueOf(properties.get( + ApplicationConfigs.OPENAI_TEMPERATURE_DEFAULT)) : temperature, + frequencyPenalty == null ? Float.valueOf(properties.get( + ApplicationConfigs.OPENAI_FREQUENCY_PENALTY_DEFAULT)) : frequencyPenalty, + presencePenalty == null ? Float.valueOf(properties.get( + ApplicationConfigs.OPENAI_PRESENCE_PENALTY_DEFAULT)) : presencePenalty, + maxTokens == null ? Integer.valueOf(properties.get( + ApplicationConfigs.OPENAI_MAX_TOKENS_DEFAULT)) : maxTokens, + topP == null ? Float.valueOf(properties.get( + ApplicationConfigs.OPENAI_TOP_P_DEFAULT)) : topP + ); + + default: + log.warn("Type : " + type + + " has not been recognized and hence will be ignored"); + yield null; + }; // If hbase column qualifier is not accurate, it should be let as is (default is "cq") if (columnQualifier != null && !columnQualifier.isEmpty()) { @@ -318,12 +355,29 @@ public static Field instantiateField(String name, // If there are some conditions, we consider this field as computed (meaning it requires other fields' values to get its value) // and same thing for request if it contains a '$' if ((conditionals != null && !conditionals.isEmpty()) - || (request!=null && request.contains("$")) - || (formula!=null) - || (injection!=null)) { - log.debug("Field {} has been marked as conditional: ", field); + || (request != null && request.contains("$")) + || (formula != null) + || (injection != null) + || (link != null)) { + log.debug("Field {} has been marked as computed: ", field); field.setComputed(true); + } + + // Set conditionals or formula or injections for the field if there are + if ((conditionals != null && !conditionals.isEmpty())) { field.setConditional(new ConditionalEvaluator(conditionals)); + } else if (formula != null) { + LinkedHashMap lm = new LinkedHashMap<>(); + lm.put("formula", formula); + field.setConditional(new ConditionalEvaluator(lm)); + } else if (injection != null) { + LinkedHashMap lm = new LinkedHashMap<>(); + lm.put("injection", injection); + field.setConditional(new ConditionalEvaluator(lm)); + } else if (link != null) { + LinkedHashMap lm = new LinkedHashMap<>(); + lm.put("link", link); + field.setConditional(new ConditionalEvaluator(lm)); } if (log.isDebugEnabled()) { @@ -333,6 +387,28 @@ public static Field instantiateField(String name, return field; } + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Class Type is " + this.getClass().getSimpleName() + " ; "); + sb.append("name : " + name + " ; "); + sb.append("hbase Column Qualifier : " + hbaseColumnQualifier + " ; "); + sb.append("Length : " + length + " ; "); + if (min != null) { + sb.append("Min : " + min + " ; "); + } + if (max != null) { + sb.append("Max : " + max + " ; "); + } + return sb.toString(); + } + + public abstract T generateRandomValue(); + + public T generateComputedValue(Row row) { + return toCastValue(conditional.evaluateConditions(row)); + } + public String getTypeForModel() { switch (this.getClass().getSimpleName().toLowerCase(Locale.ROOT)) { case "birthdatefield": @@ -381,8 +457,12 @@ public String getTypeForModel() { return "STRING_REGEX"; case "timestampfield": return "TIMESTAMP"; - case "uuidfiel": + case "uuidfield": return "UUID"; + case "ollamafield": + return "OLLAMA"; + case "bedrockfield": + return "BEDROCK"; default: return "STRING"; } diff --git a/src/main/java/com/datagen/model/type/OllamaField.java b/src/main/java/com/datagen/model/type/OllamaField.java new file mode 100755 index 0000000..918c8f3 --- /dev/null +++ b/src/main/java/com/datagen/model/type/OllamaField.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datagen.model.type; + +import com.datagen.model.Row; +import com.datagen.utils.ParsingUtils; +import lombok.extern.slf4j.Slf4j; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hive.jdbc.HivePreparedStatement; +import org.apache.kudu.Type; +import org.apache.kudu.client.PartialRow; +import org.apache.orc.TypeDescription; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.ollama.OllamaChatClient; +import org.springframework.ai.ollama.api.OllamaApi; +import org.springframework.ai.ollama.api.OllamaOptions; + +import java.sql.SQLException; +import java.util.LinkedList; + +@Slf4j +public class OllamaField extends Field { + + private final String url; + private final String user; + private final String password; + private final LinkedList requestToInject; + private final OllamaApi ollamaApi; + private final OllamaChatClient ollamaChatClient; + private final OllamaOptions ollamaOptions; + + + public OllamaField(String name, String url, String user, String password, String request, + String modelType, Float temperature, Float frequencyPenalty, + Float presencePenalty, Float topP) { + this.name = name; + this.url = url; + this.user = user; + this.password = password; + this.requestToInject = ParsingUtils.parseStringWithVars(request); + this.ollamaApi = url == null ? new OllamaApi() : new OllamaApi(url); + this.ollamaChatClient = new OllamaChatClient(this.ollamaApi); + this.ollamaOptions = OllamaOptions.create() + .withModel(modelType==null?"llama3":modelType) + .withTemperature(temperature == null ? 1.0f : temperature) + .withFrequencyPenalty(frequencyPenalty == null ? 1.0f : frequencyPenalty) + .withPresencePenalty(presencePenalty == null ? 1.0f : presencePenalty) + .withTopP(topP == null ? 1.0f : topP); + } + + @Override + public String generateComputedValue(Row row) { + String stringToEvaluate = ParsingUtils.injectRowValuesToAString(row, requestToInject); + log.debug("Asking to Ollama: {}", stringToEvaluate); + + return this.ollamaChatClient.call( + new Prompt( + stringToEvaluate, + this.ollamaOptions + )).getResult().getOutput().getContent(); + } + + @Override + public String generateRandomValue() { + return ""; + } + + /* + Override if needed Field function to insert into special connectors + */ + + @Override + public Put toHbasePut(String value, Put hbasePut) { + hbasePut.addColumn(Bytes.toBytes(hbaseColumnQualifier), Bytes.toBytes(name), + Bytes.toBytes(value)); + return hbasePut; + } + + @Override + public PartialRow toKudu(String value, PartialRow partialRow) { + partialRow.addString(name, value); + return partialRow; + } + + @Override + public Type getKuduType() { + return Type.STRING; + } + + @Override + public HivePreparedStatement toHive(String value, int index, + HivePreparedStatement hivePreparedStatement) { + try { + hivePreparedStatement.setString(index, value); + } catch (SQLException e) { + log.warn("Could not set value : " + value.toString() + + " into hive statement due to error :", e); + } + return hivePreparedStatement; + } + + @Override + public String getHiveType() { + return "STRING"; + } + + @Override + public String getGenericRecordType() { + return "string"; + } + + @Override + public ColumnVector getOrcColumnVector(VectorizedRowBatch batch, int cols) { + return batch.cols[cols]; + } + + @Override + public TypeDescription getTypeDescriptionOrc() { + return TypeDescription.createString(); + } +} diff --git a/src/main/java/com/datagen/model/type/OpenAIField.java b/src/main/java/com/datagen/model/type/OpenAIField.java new file mode 100755 index 0000000..6cff529 --- /dev/null +++ b/src/main/java/com/datagen/model/type/OpenAIField.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datagen.model.type; + +import com.datagen.model.Row; +import com.datagen.utils.ParsingUtils; +import lombok.extern.slf4j.Slf4j; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hive.jdbc.HivePreparedStatement; +import org.apache.kudu.Type; +import org.apache.kudu.client.PartialRow; +import org.apache.orc.TypeDescription; +import org.springframework.ai.openai.OpenAiChatClient; +import org.springframework.ai.openai.OpenAiChatOptions; +import org.springframework.ai.openai.api.OpenAiApi; + +import java.sql.SQLException; +import java.util.LinkedList; + +@Slf4j +public class OpenAIField extends Field { + + private final String url; + private final String user; + private final String password; + + private final LinkedList requestToInject; + private final OpenAiApi openAiApi; + private final OpenAiChatClient openAiChatClient; + private final OpenAiChatOptions openAiChatOptions; + private final String modelId; + + public OpenAIField(String name, String url, String user, String password, + String request, String modelType, Float temperature, Float frequencyPenalty, + Float presencePenalty, Integer maxTokens, Float topP) { + this.name = name; + this.url = url; + this.user = user; + this.password = password; + this.requestToInject = ParsingUtils.parseStringWithVars(request); + + // See model Ids available at: + this.modelId = modelType == null ? "gpt-4-32k" : modelType; + + this.openAiApi = new OpenAiApi(this.password); + this.openAiChatOptions = OpenAiChatOptions.builder() + .withModel(this.modelId) + .withTemperature(temperature == null ? 1.0f : temperature) + .withFrequencyPenalty(frequencyPenalty == null ? 1.0f : frequencyPenalty) + .withPresencePenalty(presencePenalty == null ? 1.0f : presencePenalty) + .withMaxTokens(maxTokens == null ? 256 : maxTokens) + .withTopP(topP == null ? 1.0f : topP) + .build(); + this.openAiChatClient = new OpenAiChatClient(openAiApi, openAiChatOptions); + + } + + @Override + public String generateComputedValue(Row row) { + String stringToEvaluate = + ParsingUtils.injectRowValuesToAString(row, requestToInject); + log.debug("Asking to OpenAI: {}", stringToEvaluate); + return openAiChatClient.call(stringToEvaluate); + } + + @Override + public String generateRandomValue() { + return ""; + } + + @Override + public Put toHbasePut(String value, Put hbasePut) { + hbasePut.addColumn(Bytes.toBytes(hbaseColumnQualifier), Bytes.toBytes(name), + Bytes.toBytes(value)); + return hbasePut; + } + + /* + Override if needed Field function to insert into special connectors + */ + + @Override + public PartialRow toKudu(String value, PartialRow partialRow) { + partialRow.addString(name, value); + return partialRow; + } + + @Override + public Type getKuduType() { + return Type.STRING; + } + + @Override + public HivePreparedStatement toHive(String value, int index, + HivePreparedStatement hivePreparedStatement) { + try { + hivePreparedStatement.setString(index, value); + } catch (SQLException e) { + log.warn("Could not set value : " + value.toString() + + " into hive statement due to error :", e); + } + return hivePreparedStatement; + } + + @Override + public String getHiveType() { + return "STRING"; + } + + @Override + public String getGenericRecordType() { + return "string"; + } + + @Override + public ColumnVector getOrcColumnVector(VectorizedRowBatch batch, int cols) { + return batch.cols[cols]; + } + + @Override + public TypeDescription getTypeDescriptionOrc() { + return TypeDescription.createString(); + } + + private enum BedrockModelType { + ANTHROPIC, + TITAN, + MISTRAL, + LLAMA + } +} diff --git a/src/main/java/com/datagen/parsers/JsonParser.java b/src/main/java/com/datagen/parsers/JsonParser.java index 6b5c0b1..eafe566 100755 --- a/src/main/java/com/datagen/parsers/JsonParser.java +++ b/src/main/java/com/datagen/parsers/JsonParser.java @@ -18,6 +18,7 @@ package com.datagen.parsers; +import com.datagen.config.ApplicationConfigs; import com.datagen.model.Model; import com.datagen.model.type.Field; import com.fasterxml.jackson.databind.JsonNode; @@ -61,7 +62,7 @@ public JsonParser(String jsonFilePath) { * * @return Model instantiated and populated */ - public Model renderModelFromFile() { + public Model renderModelFromFile(Map properties) { // Release 0.4.15 introduced an easier format with PK, TB & Options being just one JSON node instead of an array // But we need to keep working wih old format for retro-compatbility. (Fields is untouched) @@ -118,13 +119,13 @@ public Model renderModelFromFile() { while (fieldsIterator.hasNext()) { JsonNode fieldNode = fieldsIterator.next(); - T field = getOneField(fieldNode, hbaseFamilyColsMap); + T field = getOneField(fieldNode, properties, hbaseFamilyColsMap); if (field != null) { fields.put(fieldNode.get("name").asText(), field); } } - return new Model(fields, pks, tbs, opsMap); + return new Model(fields, pks, tbs, opsMap, properties); } @@ -135,7 +136,21 @@ public Model renderModelFromFile() { * @param jsonField * @return */ - private T getOneField(JsonNode jsonField, Map opsMap) { + private T getOneField(JsonNode jsonField, Map properties, Map opsMap) { + String name; + try { + name = jsonField.get("name").asText(); + } catch (NullPointerException e) { + name = "UNDEFINED_COL_NAME"; + } + + String type; + try { + type = jsonField.get("type").asText(); + } catch (NullPointerException e) { + type = "UNDEFINED_TYPE"; + } + Integer length; try { length = jsonField.get("length").asInt(); @@ -227,6 +242,13 @@ private T getOneField(JsonNode jsonField, Map opsMap) { request = null; } + String link; + try { + link = jsonField.get("link").asText(); + } catch (NullPointerException e) { + link = null; + } + String url; try { url = jsonField.get("url").asText(); @@ -248,6 +270,48 @@ private T getOneField(JsonNode jsonField, Map opsMap) { password = null; } + String modelType; + try { + modelType = jsonField.get("model_type").asText(); + } catch (NullPointerException e) { + modelType = null; + } + + Float temperature; + try { + temperature = Float.valueOf(jsonField.get("temperature").asText()); + } catch (NullPointerException e) { + temperature = null; + } + + Float frequencyPenalty; + try { + frequencyPenalty = Float.valueOf(jsonField.get("frequency_penalty").asText()); + } catch (NullPointerException e) { + frequencyPenalty = null; + } + + Float presencePenalty; + try { + presencePenalty = Float.valueOf(jsonField.get("presence_penalty").asText()); + } catch (NullPointerException e) { + presencePenalty = null; + } + + Integer maxTokens; + try { + maxTokens = Integer.valueOf(jsonField.get("max_tokens").asText()); + } catch (NullPointerException e) { + maxTokens = null; + } + + Float topP; + try { + topP = Float.valueOf(jsonField.get("top_p").asText()); + } catch (NullPointerException e) { + topP = null; + } + JsonNode filtersArray = jsonField.get("filters"); List filters = new ArrayList<>(); try { @@ -297,16 +361,17 @@ private T getOneField(JsonNode jsonField, Map opsMap) { } return (T) Field.instantiateField( - jsonField.get("name").asText(), - jsonField.get("type").asText(), + properties, + name, + type, length, - opsMap.get(jsonField.get("name").asText()), - possibleValues, - possible_values_weighted, - conditionals, min, max, + opsMap.get(name), + possibleValues, + possible_values_weighted, filters, + conditionals, file, separator, pattern, @@ -317,9 +382,16 @@ private T getOneField(JsonNode jsonField, Map opsMap) { field, formula, injection, + link, url, user, - password); + password, + modelType, + temperature, + frequencyPenalty, + presencePenalty, + maxTokens, + topP); } private Map mapColNameToColQual(String mapping) { diff --git a/src/main/java/com/datagen/parsers/Parser.java b/src/main/java/com/datagen/parsers/Parser.java index d7e34b8..ab01053 100755 --- a/src/main/java/com/datagen/parsers/Parser.java +++ b/src/main/java/com/datagen/parsers/Parser.java @@ -18,8 +18,11 @@ package com.datagen.parsers; +import com.datagen.config.ApplicationConfigs; import com.datagen.model.Model; +import java.util.Map; + /** * A parser is an entity able to read one type of file and render a {@see #com.cloudera.frisch.randomdatagen.model.Model} * based on that file @@ -28,5 +31,5 @@ public interface Parser { // TODO: Implement a yaml parser - Model renderModelFromFile(); + Model renderModelFromFile(Map properties); } diff --git a/src/main/java/com/datagen/service/APISevice.java b/src/main/java/com/datagen/service/APISevice.java index dc7c2d4..8e2feab 100755 --- a/src/main/java/com/datagen/service/APISevice.java +++ b/src/main/java/com/datagen/service/APISevice.java @@ -57,25 +57,25 @@ public String saveModel(@Nullable MultipartFile modelFileAsFile, if (modelFilePath == null) { log.info( "No model file passed, will default to custom data model or default defined one in configuration"); - if (properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT) != + if (properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL) != null) { modelFile = - properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT); + properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL); } else { - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) + - properties.get(ApplicationConfigs.DATA_MODEL_DEFAULT); + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) + + properties.get(ApplicationConfigs.DATAGEN_MODEL_DEFAULT); } } if (modelFilePath != null && !modelFilePath.contains("/")) { log.info( "Model file passed is identified as one of the one provided, so will look for it in data model path: {} ", - properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT)); - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) + + properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH)); + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) + modelFilePath; } if (modelFileAsFile != null && !modelFileAsFile.isEmpty()) { log.info("Model passed is an uploaded file"); - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH) + + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH) + "/model-test-" + new Random().nextInt() + ".json"; try { modelFileAsFile.transferTo(new File(modelFile)); @@ -96,7 +96,7 @@ public String saveModel(@Nullable MultipartFile modelFileAsFile, } // Save Model - Model model = parser.renderModelFromFile(); + Model model = parser.renderModelFromFile(properties); UUID modelId = UUID.randomUUID(); modelMap.put(modelId, model); diff --git a/src/main/java/com/datagen/service/CommandRunnerService.java b/src/main/java/com/datagen/service/CommandRunnerService.java index 3ac61e7..a7b0a2e 100755 --- a/src/main/java/com/datagen/service/CommandRunnerService.java +++ b/src/main/java/com/datagen/service/CommandRunnerService.java @@ -58,19 +58,19 @@ public class CommandRunnerService { public CommandRunnerService(PropertiesLoader propertiesLoader) { this.propertiesLoader = propertiesLoader; this.scheduledCommandsFilePath = propertiesLoader.getPropertiesCopy() - .get(ApplicationConfigs.SCHEDULER_FILE_PATH); + .get(ApplicationConfigs.DATAGEN_SCHEDULER_FILE_PATH); this.commandsToProcess = new ConcurrentLinkedQueue<>(); this.scheduledCommands = new HashMap<>(); this.commands = new HashMap<>(); - FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy().get(ApplicationConfigs.DATA_HOME_DIRECTORY)); + FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy().get(ApplicationConfigs.DATAGEN_HOME_DIRECTORY)); readScheduledCommands(); // After reading scheduled values, file should be re-written writeScheduledCommands(); FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy() - .get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH)); + .get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH)); } public CommandSoft getCommandStatusShort(UUID uuid) { @@ -176,7 +176,7 @@ public void readScheduledCommands() { "Model has not been found or is incorrect, correct it. This command has been removed from scheduler"); wrongScheduledCommandsRead.add(c.getCommandUuid()); } - c.setModel(parser.renderModelFromFile()); + c.setModel(parser.renderModelFromFile(propertiesLoader.getPropertiesCopy())); // Previous Failed commands should not be taken if (c.getStatus() == Command.CommandStatus.FAILED) { @@ -273,25 +273,25 @@ public String generateData( (modelFileAsFile == null || modelFileAsFile.isEmpty())) { log.info( "No model file passed, will default to custom data model or default defined one in configuration"); - if (properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT) != + if (properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL) != null) { modelFile = - properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT); + properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL); } else { - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) + - properties.get(ApplicationConfigs.DATA_MODEL_DEFAULT); + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) + + properties.get(ApplicationConfigs.DATAGEN_MODEL_DEFAULT); } } if (modelFilePath != null && !modelFilePath.contains("/")) { log.info( "Model file passed is identified as one of the one provided, so will look for it in data model path: {} ", - properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT)); - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) + + properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH)); + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) + modelFilePath; } if (modelFileAsFile != null && !modelFileAsFile.isEmpty()) { log.info("Model passed is an uploaded file"); - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH) + + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH) + "/model-" + System.currentTimeMillis() + "-" + String.format("%06d",new Random().nextInt(100000)) + ".json"; try { modelFileAsFile.transferTo(new File(modelFile)); @@ -321,7 +321,7 @@ public String generateData( log.warn("Error when parsing model file"); return "{ \"commandUuid\": \"\" , \"error\": \"Error with Model File - Verify its path and structure\" }"; } - Model model = parser.renderModelFromFile(); + Model model = parser.renderModelFromFile(properties); // Creation of connectors List connectorsList = new ArrayList<>(); @@ -347,7 +347,7 @@ public String generateData( if (isModelUploaded) { // If model has been uploaded, it must be renamed to use its UUID for user and admin convenience String newModelFilePath = - properties.get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH) + + properties.get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH) + "/model-" + command.getCommandUuid().toString() + ".json"; FileUtils.moveLocalFile(modelFile, newModelFilePath); command.setModelFilePath(newModelFilePath); diff --git a/src/main/java/com/datagen/service/ModelGeneraterSevice.java b/src/main/java/com/datagen/service/ModelGeneraterSevice.java index b596984..f133e47 100755 --- a/src/main/java/com/datagen/service/ModelGeneraterSevice.java +++ b/src/main/java/com/datagen/service/ModelGeneraterSevice.java @@ -47,9 +47,9 @@ public ModelGeneraterSevice(PropertiesLoader propertiesLoader) { this.propertiesLoader = propertiesLoader; FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy() - .get(ApplicationConfigs.DATA_MODEL_GENERATED_PATH)); + .get(ApplicationConfigs.DATAGEN_MODEL_GENERATED_PATH)); FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy() - .get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT)); + .get(ApplicationConfigs.DATAGEN_MODEL_PATH)); } public String generateModel( @@ -90,7 +90,7 @@ public String generateModel( tableNames.put(OptionsConverter.TableNames.LOCAL_FILE_PATH, filepath); } - String outputPath = properties.get(ApplicationConfigs.DATA_MODEL_GENERATED_PATH) + + String outputPath = properties.get(ApplicationConfigs.DATAGEN_MODEL_GENERATED_PATH) + "/model-generated-" + new Random().nextInt() + ".json"; ConnectorInterface connector = ConnectorsUtils diff --git a/src/main/java/com/datagen/service/ModelTesterSevice.java b/src/main/java/com/datagen/service/ModelTesterSevice.java index 398e359..4e5735a 100755 --- a/src/main/java/com/datagen/service/ModelTesterSevice.java +++ b/src/main/java/com/datagen/service/ModelTesterSevice.java @@ -57,25 +57,25 @@ public String generateData(@Nullable MultipartFile modelFileAsFile, if (modelFilePath == null) { log.info( "No model file passed, will default to custom data model or default defined one in configuration"); - if (properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT) != + if (properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL) != null) { modelFile = - properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT); + properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL); } else { - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) + - properties.get(ApplicationConfigs.DATA_MODEL_DEFAULT); + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) + + properties.get(ApplicationConfigs.DATAGEN_MODEL_DEFAULT); } } if (modelFilePath != null && !modelFilePath.contains("/")) { log.info( "Model file passed is identified as one of the one provided, so will look for it in data model path: {} ", - properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT)); - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) + + properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH)); + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) + modelFilePath; } if (modelFileAsFile != null && !modelFileAsFile.isEmpty()) { log.info("Model passed is an uploaded file"); - modelFile = properties.get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH) + + modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH) + "/model-test-" + new Random().nextInt() + ".json"; try { modelFileAsFile.transferTo(new File(modelFile)); @@ -94,7 +94,7 @@ public String generateData(@Nullable MultipartFile modelFileAsFile, log.warn("Error when parsing model file"); return "{ \"commandUuid\": \"\" , \"error\": \"Error with Model File - Verify its path and structure\" }"; } - Model model = parser.renderModelFromFile(); + Model model = parser.renderModelFromFile(properties); List randomDataList = model.generateRandomRows(1, 1); diff --git a/src/main/java/com/datagen/utils/ParsingUtils.java b/src/main/java/com/datagen/utils/ParsingUtils.java new file mode 100644 index 0000000..301f163 --- /dev/null +++ b/src/main/java/com/datagen/utils/ParsingUtils.java @@ -0,0 +1,77 @@ +package com.datagen.utils; + +import com.datagen.model.Model; +import com.datagen.model.Row; +import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.util.LinkedList; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +@Slf4j +public class ParsingUtils { + + @AllArgsConstructor + public static class StringFragment { + String stringToPrint; + String variableName; + Boolean isAVariableToReplace; + } + + private static final Pattern patternToIdentifyInjections = Pattern.compile("(\\$\\{)([a-zA-Z]*)(\\})"); + + /** + * Parse a String containing column references to other fields + * And prepare it for future evaluation during generation + * @param stringToParse + * @return a linked list of string to either print or compute (get its value from other columns) + */ + public static LinkedList parseStringWithVars(String stringToParse) { + + LinkedList stringParsed = new LinkedList<>(); + + Matcher matcher = patternToIdentifyInjections.matcher(stringToParse); + + // Find all places in the regex string where there are column names to replace + int cursorPosition = 0; + while (matcher.find()) { + if(matcher.start()>cursorPosition) { + // Add string before match + log.debug("Found string to let as is: {}", stringToParse.substring(cursorPosition,matcher.start())); + stringParsed.add(new StringFragment(stringToParse.substring(cursorPosition,matcher.start()),null,false)); + } + // Add match itself + log.debug("Found column to substitute: {}", matcher.group(2)); + stringParsed.add(new StringFragment(null,matcher.group(2),true)); + cursorPosition = matcher.end(); + } + + // If there are still characters left after last match, add it + if(cursorPosition fragments) { + Map rowValues = row.getValues(); + Model model = row.getModel(); + + return fragments.stream().map(f -> { + if(f.isAVariableToReplace) { + return model.getFieldFromName(f.variableName).toStringValue(rowValues.get(f.variableName)); + } else { + return f.stringToPrint; + } + }).reduce("", String::concat); + } + + + +} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 5223009..cc0bbef 100755 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -182,4 +182,9 @@ adls.sas.token= gcs.project.id= # Only if using a service account key, otherwise use any other ADC login gcs.accountkey.path= -gcs.region= \ No newline at end of file +gcs.region= + +# OLLAMA +spring.ai.ollama.base-url=localhost:52764 +spring.ai.ollama.chat.enabled=true +spring.ai.ollama.chat.options.format=json \ No newline at end of file diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml index 1feed91..38586c1 100755 --- a/src/main/resources/logback-spring.xml +++ b/src/main/resources/logback-spring.xml @@ -12,7 +12,7 @@ - + @@ -36,7 +36,7 @@ - + diff --git a/src/main/resources/models/example-model-ai.json b/src/main/resources/models/example-model-ai.json new file mode 100755 index 0000000..d4b759b --- /dev/null +++ b/src/main/resources/models/example-model-ai.json @@ -0,0 +1,125 @@ +{ + "Fields": [ + { + "name": "name", + "type": "NAME" + }, + { + "name": "age", + "type": "LONG", + "min": 18, + "max": 99 + }, + { + "name": "birthday_wish_bedrock", + "type": "BEDROCK", + "request": "generate a one line birthday wish to ${name} who is ${age} years old today", + "model_type": "meta.llama3-8b-instruct-v1:0", + "user": "", + "password": "", + "temperature": 1.0, + "max_tokens": 256 + }, + { + "name": "birthday_wish_ollama", + "type": "OLLAMA", + "request": "generate a one line birthday wish to ${name} who is ${age} years old today", + "model_type": "mistral", + "temperature": 1.0, + "frequency_penalty": 1.5, + "presence_penalty": 1.3, + "top_p": 1.0 + }, + { + "name": "birthday_wish_openai", + "type": "OPENAI", + "request": "generate a one line birthday wish to ${name} who is ${age} years old today", + "model_type": "gpt-4o", + "password": "", + "temperature": 1.0, + "frequency_penalty": 1.5, + "presence_penalty": 1.3, + "max_tokens": 256, + "top_p": 1.0 + } + ], + "Table_Names": { + "HDFS_FILE_PATH": "/user/datagen/hdfs/MODEL_NAME/", + "HDFS_FILE_NAME": "MODEL_NAME", + + "HBASE_TABLE_NAME": "MODEL_NAME", + "HBASE_NAMESPACE": "datagen", + + "KAFKA_TOPIC": "datagen_MODEL_NAME", + + "OZONE_VOLUME": "datagen", + "OZONE_BUCKET": "MODEL_NAME", + "OZONE_KEY_NAME": "MODEL_NAME", + "OZONE_LOCAL_FILE_PATH": "/home/datagen/temp/MODEL_NAME/", + + "SOLR_COLLECTION": "datagen_MODEL_NAME", + + "HIVE_DATABASE": "datagen", + "HIVE_TABLE_NAME": "MODEL_NAME", + "HIVE_HDFS_FILE_PATH": "/user/datagen/hive/MODEL_NAME/", + "HIVE_TEMPORARY_TABLE_NAME": "MODEL_NAME_tmp", + + "KUDU_TABLE_NAME": "datagen.MODEL_NAME", + + "LOCAL_FILE_PATH": "/tmp/datagen/MODEL_NAME/", + "LOCAL_FILE_NAME": "datagen-MODEL_NAME", + + "S3_BUCKET": "datagen-test-fri", + "S3_DIRECTORY": "datagen/MODEL_NAME", + "S3_KEY_NAME": "MODEL_NAME", + "S3_LOCAL_FILE_PATH": "/tmp/datagen/temp/MODEL_NAME/", + + "ADLS_CONTAINER": "dgtest", + "ADLS_DIRECTORY": "datagen/MODEL_NAME", + "ADLS_FILE_NAME": "MODEL_NAME", + "ADLS_LOCAL_FILE_PATH": "/tmp/datagen/temp/MODEL_NAME/", + + "GCS_BUCKET": "datagenfri", + "GCS_DIRECTORY": "datagen/MODEL_NAME", + "GCS_OBJECT_NAME": "MODEL_NAME", + "GCS_LOCAL_FILE_PATH": "/tmp/datagen/temp/MODEL_NAME/", + + "AVRO_NAME": "datagenMODEL_NAME" + }, + "Options": { + "KAFKA_MSG_KEY": "name", + "HBASE_PRIMARY_KEY": "name", + "KUDU_PRIMARY_KEYS": "name,age", + "KUDU_RANGE_KEYS": "age", + "DELETE_PREVIOUS": false, + "ONE_FILE_PER_ITERATION": true, + "CSV_HEADER": true, + "HBASE_COLUMN_FAMILIES_MAPPING": "c:name", + "SOLR_SHARDS": 1, + "SOLR_REPLICAS": 1, + "SOLR_JAAS_FILE_PATH": "/tmp/solr.jaas", + "HIVE_THREAD_NUMBER": 1, + "HIVE_ON_HDFS": true, + "HIVE_TEZ_QUEUE_NAME": "root.default", + "HIVE_TABLE_PARTITIONS_COLS": "name", + "HIVE_TABLE_BUCKETS_COLS": "age", + "HIVE_TABLE_BUCKETS_NUMBER": 32, + "PARQUET_PAGE_SIZE": 1048576, + "PARQUET_ROW_GROUP_SIZE": 134217728, + "PARQUET_DICTIONARY_PAGE_SIZE": 1048576, + "PARQUET_DICTIONARY_ENCODING": true, + "KAFKA_MESSAGE_TYPE": "json", + "KAFKA_JAAS_FILE_PATH": "/tmp/kafka.jaas", + "KAFKA_ACKS_CONFIG": "all", + "KAFKA_RETRIES_CONFIG": 3, + "KUDU_REPLICAS": 1, + "KUDU_BUCKETS": 32, + "KUDU_BUFFER": 100001, + "KUDU_FLUSH": "MANUAL_FLUSH", + "OZONE_REPLICATION_FACTOR": 3, + "HDFS_REPLICATION_FACTOR": 3, + "ADLS_MAX_CONCURRENCY": 4, + "ADLS_MAX_UPLOAD_SIZE": 16777216, + "ADLS_BLOCK_SIZE": 8388608 + } +} \ No newline at end of file diff --git a/src/main/resources/models/example-model.json b/src/main/resources/models/example-model.json index f41a5c6..6f9df67 100755 --- a/src/main/resources/models/example-model.json +++ b/src/main/resources/models/example-model.json @@ -11,9 +11,16 @@ "max": 99 }, { - "name": "age", - "type": "OLLAMA", - "request": "generate a birthday wish to $name who is $age years old today" + "name": "birthday_wish_openai", + "type": "OPENAI", + "request": "generate a one line birthday wish to ${name} who is ${age} years old today", + "model_type": "gpt-4o", + "password": "", + "temperature": 1.0, + "frequency_penalty": 1.5, + "presence_penalty": 1.3, + "max_tokens": 256, + "top_p": 1.0 } ], "Table_Names": { diff --git a/src/main/resources/models/full-model.json b/src/main/resources/models/full-model.json index db3f7c4..e988d92 100755 --- a/src/main/resources/models/full-model.json +++ b/src/main/resources/models/full-model.json @@ -133,7 +133,7 @@ { "name": "formula_1", "type": "STRING", - "formula": "2 * $longPercent + 42 - $onePlusOne" + "formula": "2 * ${longPercent} + 42 - ${onePlusOne}" }, { "name": "condition_2", @@ -202,6 +202,13 @@ "name": "email_from_name", "type": "STRING", "injection": "toto_${name}@our_company_name.${country}" + }, + { + "name": "birthday_wish", + "type": "OLLAMA", + "request": "generate a birthday wish to ${name} born ${birthdate} and who is living in ${city} , ${city_country}", + "temperature": 1.5, + "model_type": "mistral" } ], "Table_Names": {