stringFragments;
+ private JsEvaluator jsEvaluator;
public ConditionsLine(String conditionLine, String valueToReturn) {
this.valueToReturn = valueToReturn;
@@ -93,7 +73,8 @@ public ConditionsLine(String conditionLine, String valueToReturn) {
} else if (conditionSplitted[0].equalsIgnoreCase("formula")) {
log.debug("Found a formula, that will need to be evaluated");
this.formula = true;
- this.formulaToEvaluate = new Formula(valueToReturn);
+ this.jsEvaluator = new JsEvaluator();
+ this.stringFragments = ParsingUtils.parseStringWithVars(valueToReturn);
return;
} else if (conditionSplitted[0].equalsIgnoreCase("link")) {
log.debug("Found a link, that will need to be evaluated");
@@ -103,7 +84,7 @@ public ConditionsLine(String conditionLine, String valueToReturn) {
} else if (conditionSplitted[0].equalsIgnoreCase("injection")) {
log.debug("Found an injection, that will need to be evaluated");
this.injection = true;
- this.injectionToEvaluate = new Injection(valueToReturn);
+ this.stringFragments = ParsingUtils.parseStringWithVars(valueToReturn);
return;
} else if (conditionSplitted[0].equalsIgnoreCase("default")) {
log.debug("Found a default, No evaluation needed");
@@ -171,7 +152,9 @@ public boolean isLineSatisfied(Row row) {
return listOfConditions.get(0).evaluateCondition(row);
} else if (this.formula) {
// Formula case
- this.valueToReturn = formulaToEvaluate.evaluateFormula(row);
+ this.valueToReturn = jsEvaluator.evaluateJsExpression(
+ ParsingUtils.injectRowValuesToAString(row, this.stringFragments)
+ );
return true;
} else if (this.link) {
// Formula case
@@ -179,7 +162,7 @@ public boolean isLineSatisfied(Row row) {
return true;
} else if (this.injection) {
// Formula case
- this.valueToReturn = injectionToEvaluate.evaluateInjection(row);
+ this.valueToReturn = ParsingUtils.injectRowValuesToAString(row, this.stringFragments);
return true;
} else {
// Default case
diff --git a/src/main/java/com/datagen/model/conditions/Formula.java b/src/main/java/com/datagen/model/conditions/Formula.java
deleted file mode 100755
index 7ea70eb..0000000
--- a/src/main/java/com/datagen/model/conditions/Formula.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.datagen.model.conditions;
-
-import com.datagen.model.Row;
-import lombok.Getter;
-import lombok.Setter;
-import lombok.extern.slf4j.Slf4j;
-
-import javax.script.ScriptEngine;
-import javax.script.ScriptEngineManager;
-import javax.script.ScriptException;
-import java.util.LinkedList;
-
-@Slf4j
-public class Formula {
-
-
- // for all cols name existing in model, try to find which one are involved in the formula and put them in a list
- @Getter
- @Setter
- private LinkedList listOfColsToEvaluate;
-
- @Getter
- @Setter
- private String formulaToEvaluate;
-
- private final ScriptEngineManager scriptEngineManager;
- private final ScriptEngine scriptEngine;
-
- Formula(String formula) {
- // fill in the listOfColsToEvaluate + Create formula string with no $
- listOfColsToEvaluate = new LinkedList<>();
- for (String field : formula.substring(formula.indexOf("$") + 1)
- .split("[$]")) {
- listOfColsToEvaluate.add(field.split("\\s+")[0]);
- log.debug(
- "Add Field : " + field.split("\\s+")[0] + " to be in the formula");
- }
- formulaToEvaluate = formula.replaceAll("[$]", "");
- scriptEngineManager = new ScriptEngineManager();
- scriptEngine = scriptEngineManager.getEngineByName("JavaScript");
- }
-
- public String evaluateFormula(Row row) {
- // Evaluate formula using an evaluator (or built this evaluator)
- String formulaReplaced = formulaToEvaluate;
- for (String colName : listOfColsToEvaluate) {
- log.debug(formulaReplaced);
- formulaReplaced = formulaReplaced.replaceAll("(^| )" + colName + "($| )",
- row.getValues().get(colName).toString());
- }
- log.debug(formulaReplaced);
- return computeFormula(formulaReplaced);
- }
-
- private String computeFormula(String formula) {
- Object value = 0f;
- try {
- value = scriptEngine.eval(formula);
- log.debug("Evaluating formula: " + formula + " to: " + value);
- } catch (ScriptException e) {
- log.warn("Could not evaluate expression: " + formula + " due to error: ",
- e);
- }
- return value.toString();
- }
-
-
-}
diff --git a/src/main/java/com/datagen/model/conditions/Injection.java b/src/main/java/com/datagen/model/conditions/Injection.java
deleted file mode 100755
index 6362b24..0000000
--- a/src/main/java/com/datagen/model/conditions/Injection.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.datagen.model.conditions;
-
-
-import com.datagen.model.Row;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
-
-import java.util.LinkedList;
-import java.util.Map;
-
-@Slf4j
-public class Injection {
-
- @AllArgsConstructor
- private class InjectedField {
- String stringToPrint;
- Boolean toReplace;
- }
-
- private final LinkedList injectedFieldNames =
- new LinkedList<>();
-
- Injection(String injection) {
- for (String s : injection.split("[$]")) {
- if (s.length() != 0) {
- if (s.charAt(0) != '{') {
- log.debug(s + " is not a variable name");
- injectedFieldNames.add(new InjectedField(s, false));
- } else {
- String fieldToAdd = s.substring(1, s.indexOf('}'));
- log.debug(fieldToAdd + " is found as a variable name");
- injectedFieldNames.add(new InjectedField(fieldToAdd, true));
- if (s.length() > s.indexOf('}')) {
- log.debug(
- s.substring(s.indexOf('}') + 1) + " is not a variable name");
- injectedFieldNames.add(
- new InjectedField(s.substring(s.indexOf('}') + 1), false));
- }
- }
- }
- }
- }
-
- public String evaluateInjection(Row row) {
- Map rowValues = row.getValues();
- StringBuilder sb = new StringBuilder();
- try {
- for (InjectedField fieldNameToReplace : injectedFieldNames) {
- if (fieldNameToReplace.toReplace) {
- sb.append(
- row.getModel().getFieldFromName(fieldNameToReplace.stringToPrint)
- .toStringValue(
- rowValues.get(fieldNameToReplace.stringToPrint)));
- } else {
- sb.append(fieldNameToReplace.stringToPrint);
- }
- }
- } catch (Exception e) {
- log.error("Can not evaluate injection so returning empty value, see: ",
- e);
- }
-
- return sb.toString();
- }
-
-
-}
diff --git a/src/main/java/com/datagen/model/conditions/JsEvaluator.java b/src/main/java/com/datagen/model/conditions/JsEvaluator.java
new file mode 100755
index 0000000..8564853
--- /dev/null
+++ b/src/main/java/com/datagen/model/conditions/JsEvaluator.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datagen.model.conditions;
+
+import lombok.extern.slf4j.Slf4j;
+import org.graalvm.polyglot.Context;
+import org.graalvm.polyglot.PolyglotException;
+
+@Slf4j
+public class JsEvaluator {
+
+ private final Context context;
+
+ JsEvaluator() {
+ this.context = Context.newBuilder()
+ .allowAllAccess(true)
+ .build();
+ context.initialize("js");
+ }
+
+ String evaluateJsExpression(String expression) {
+ Object value = 0f;
+ try {
+ value = context.eval("js", expression);
+ log.debug("Evaluating formula: " + expression + " to: " + value);
+ } catch (PolyglotException e) {
+ log.warn("Could not evaluate expression: " + expression + " due to error: ",
+ e);
+ }
+ return value.toString();
+ }
+
+
+}
diff --git a/src/main/java/com/datagen/model/type/BedrockField.java b/src/main/java/com/datagen/model/type/BedrockField.java
new file mode 100755
index 0000000..ed5a77d
--- /dev/null
+++ b/src/main/java/com/datagen/model/type/BedrockField.java
@@ -0,0 +1,240 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datagen.model.type;
+
+import com.datagen.model.Row;
+import com.datagen.utils.ParsingUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.jdbc.HivePreparedStatement;
+import org.apache.kudu.Type;
+import org.apache.kudu.client.PartialRow;
+import org.apache.orc.TypeDescription;
+import org.json.JSONException;
+import org.json.JSONObject;
+import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
+import software.amazon.awssdk.core.SdkBytes;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeClient;
+
+import java.sql.SQLException;
+import java.util.LinkedList;
+import java.util.List;
+
+@Slf4j
+public class BedrockField extends Field {
+
+ private final String url;
+ private final String user;
+ private final String password;
+ private final Double temperature;
+ private final Integer maxTokens;
+ private final Region region;
+ private final LinkedList requestToInject;
+ private final BedrockRuntimeClient bedrockRuntimeClient;
+ private final String modelId;
+ private final BedrockModelType bedrockmodeltype;
+ private JSONObject preparedRequest = null;
+
+ public BedrockField(String name, String url, String user, String password,
+ String request, String modelType, Float temperature, String region, Integer maxTokens) {
+ this.name = name;
+ this.url = url;
+ this.user = user;
+ this.password = password;
+ this.temperature = temperature == null ? 0.5 : temperature;
+ this.maxTokens = maxTokens == null ? 256 : maxTokens;
+ this.requestToInject = ParsingUtils.parseStringWithVars(request);
+ this.region = region!=null?Region.of(region):Region.US_EAST_1;
+
+ AwsCredentialsProvider awsCredentialsProvider =
+ StaticCredentialsProvider.create(
+ AwsBasicCredentials.create(this.user, this.password));
+
+ this.bedrockRuntimeClient = BedrockRuntimeClient.builder()
+ .credentialsProvider(awsCredentialsProvider)
+ .region(this.region)
+ .build();
+
+ // See model Ids available at: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
+ this.modelId = modelType == null ? "amazon.titan-text-lite-v1" : modelType;
+ /*
+ Tested with
+ MISTRAL: mistral.mistral-small-2402-v1:0
+ TITAN: amazon.titan-text-lite-v1
+ LLAMA: meta.llama3-8b-instruct-v1:0
+ */
+
+ this.bedrockmodeltype = switch (modelId.split("\\.")[0]) {
+ case "anthropic":
+ yield BedrockModelType.ANTHROPIC;
+ case "mistral":
+ yield BedrockModelType.MISTRAL;
+ case "amazon":
+ yield BedrockModelType.TITAN;
+ case "meta":
+ yield BedrockModelType.LLAMA;
+ default:
+ yield BedrockModelType.TITAN;
+ };
+
+ // JSON prepared request for model
+ try {
+ this.preparedRequest = switch (bedrockmodeltype) {
+ case TITAN:
+ yield new JSONObject();
+ case ANTHROPIC:
+ yield new JSONObject()
+ .put("temperature", this.temperature)
+ .put("stop_sequences", List.of("\n\nHuman:"))
+ .put("max_tokens_to_sample", this.maxTokens);
+ case MISTRAL:
+ yield new JSONObject()
+ .put("temperature", this.temperature)
+ .put("max_tokens", this.maxTokens);
+ case LLAMA:
+ yield new JSONObject()
+ .put("temperature", this.temperature);
+ };
+ } catch (JSONException e) {
+ log.warn("Could not prepare request to Bedrock due to error: ", e);
+ }
+
+ }
+
+ @Override
+ public String generateComputedValue(Row row) {
+ String stringToEvaluate =
+ ParsingUtils.injectRowValuesToAString(row, requestToInject);
+ log.debug("Asking to Bedrock: {}", stringToEvaluate);
+ var responseText = "";
+
+ try {
+ switch (this.bedrockmodeltype) {
+ case ANTHROPIC -> preparedRequest.put("prompt",
+ "Human: " + stringToEvaluate + "\\n\\nAssistant:");
+ case MISTRAL -> preparedRequest.put("prompt",
+ "[INST] " + stringToEvaluate + "[/INST]");
+ case TITAN -> preparedRequest.put("inputText", stringToEvaluate);
+ default -> preparedRequest.put("prompt", stringToEvaluate);
+ }
+
+ // Encode and send the request.
+ var response = bedrockRuntimeClient.invokeModel(req -> req
+ .accept("application/json")
+ .contentType("application/json")
+ .body(SdkBytes.fromUtf8String(preparedRequest.toString()))
+ .modelId(modelId));
+
+ // Extract response
+ var responseBody = new JSONObject(response.body().asUtf8String());
+
+ log.debug("Response body from Bedrock: {}", responseBody);
+
+ responseText = switch (this.bedrockmodeltype) {
+ case TITAN:
+ yield responseBody.getJSONArray("results").getJSONObject(0)
+ .getString("outputText");
+ case LLAMA:
+ yield responseBody.getString("generation");
+ case MISTRAL:
+ yield responseBody.getJSONArray("outputs").getJSONObject(0)
+ .getString("text");
+ case ANTHROPIC:
+ yield responseBody.getString("completion");
+ };
+
+ } catch (JSONException e) {
+ log.warn("Cannot insert or decode JSON from/to Bedrock due to error: ",
+ e);
+ }
+
+ return responseText;
+ }
+
+ @Override
+ public String generateRandomValue() {
+ return "";
+ }
+
+ @Override
+ public Put toHbasePut(String value, Put hbasePut) {
+ hbasePut.addColumn(Bytes.toBytes(hbaseColumnQualifier), Bytes.toBytes(name),
+ Bytes.toBytes(value));
+ return hbasePut;
+ }
+
+ /*
+ Override if needed Field function to insert into special connectors
+ */
+
+ @Override
+ public PartialRow toKudu(String value, PartialRow partialRow) {
+ partialRow.addString(name, value);
+ return partialRow;
+ }
+
+ @Override
+ public Type getKuduType() {
+ return Type.STRING;
+ }
+
+ @Override
+ public HivePreparedStatement toHive(String value, int index,
+ HivePreparedStatement hivePreparedStatement) {
+ try {
+ hivePreparedStatement.setString(index, value);
+ } catch (SQLException e) {
+ log.warn("Could not set value : " + value.toString() +
+ " into hive statement due to error :", e);
+ }
+ return hivePreparedStatement;
+ }
+
+ @Override
+ public String getHiveType() {
+ return "STRING";
+ }
+
+ @Override
+ public String getGenericRecordType() {
+ return "string";
+ }
+
+ @Override
+ public ColumnVector getOrcColumnVector(VectorizedRowBatch batch, int cols) {
+ return batch.cols[cols];
+ }
+
+ @Override
+ public TypeDescription getTypeDescriptionOrc() {
+ return TypeDescription.createString();
+ }
+
+ private enum BedrockModelType {
+ ANTHROPIC,
+ TITAN,
+ MISTRAL,
+ LLAMA
+ }
+}
diff --git a/src/main/java/com/datagen/model/type/Field.java b/src/main/java/com/datagen/model/type/Field.java
index 810851a..eb5d819 100755
--- a/src/main/java/com/datagen/model/type/Field.java
+++ b/src/main/java/com/datagen/model/type/Field.java
@@ -18,6 +18,7 @@
package com.datagen.model.type;
+import com.datagen.config.ApplicationConfigs;
import com.datagen.model.Row;
import com.datagen.model.conditions.ConditionalEvaluator;
import com.fasterxml.jackson.databind.JsonNode;
@@ -46,82 +47,48 @@
@Slf4j
public abstract class Field {
- Random random = new Random();
-
@Getter
@Setter
public String name;
-
@Getter
@Setter
public Boolean computed = false;
-
@Getter
@Setter
public List possibleValues;
-
@Getter
@Setter
public Integer possibleValueSize;
-
@Getter
@Setter
public List filters;
-
@Getter
@Setter
public String file;
-
// This is a conditional evaluator holding all complexity (parsing, preparing comparison, evaluating it)
@Getter
@Setter
public ConditionalEvaluator conditional;
-
// Default length is -1, if user does not provide a strict superior to 0 length,
// each Extended field class should by default override it to a number strictly superior to 0
@Getter
@Setter
public int length = -1;
-
// Minimum possible value for Int/Long
@Getter
@Setter
public Long min;
-
// Maximum possible value Int/Long
@Getter
@Setter
public Long max;
-
@Getter
@Setter
public String hbaseColumnQualifier = "cq";
-
@Getter
@Setter
public boolean ghost;
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("Class Type is " + this.getClass().getSimpleName() + " ; ");
- sb.append("name : " + name + " ; ");
- sb.append("hbase Column Qualifier : " + hbaseColumnQualifier + " ; ");
- sb.append("Length : " + length + " ; ");
- if (min != null) {
- sb.append("Min : " + min + " ; ");
- }
- if (max != null) {
- sb.append("Max : " + max + " ; ");
- }
- return sb.toString();
- }
-
- public abstract T generateRandomValue();
-
- public T generateComputedValue(Row row) {
- return toCastValue(conditional.evaluateConditions(row));
- }
+ Random random = new Random();
public static String toString(List fieldList) {
StringBuilder sb = new StringBuilder();
@@ -138,7 +105,6 @@ public static String toString(List fieldList) {
return sb.toString();
}
-
/**
* Create the right instance of a field (i.e. String, password etc..) according to its type
*
@@ -148,29 +114,39 @@ public static String toString(List fieldList) {
* @param columnQualifier Hbase column qualifier if there is one
* @return Field instantiated or null if type has not been recognized
*/
- public static Field instantiateField(String name,
- String type,
- Integer length,
- String columnQualifier,
- List possibleValues,
- LinkedHashMap possible_values_weighted,
- LinkedHashMap conditionals,
- String min,
- String max,
- List filters,
- String file,
- String separator,
- String pattern,
- Boolean useNow,
- String regex,
- String request,
- Boolean ghost,
- String mainField,
- String formula,
- String injection,
- String url,
- String user,
- String password) {
+ public static Field instantiateField(
+ Map properties,
+ String name,
+ String type,
+ Integer length,
+ String min,
+ String max,
+ String columnQualifier,
+ List possibleValues,
+ LinkedHashMap possible_values_weighted,
+ List filters,
+ LinkedHashMap conditionals,
+ String file,
+ String separator,
+ String pattern,
+ Boolean useNow,
+ String regex,
+ String request,
+ Boolean ghost,
+ String mainField,
+ String formula,
+ String injection,
+ String link,
+ String url,
+ String user,
+ String password,
+ String modelType,
+ Float temperature,
+ Float frequencyPenalty,
+ Float presencePenalty,
+ Integer maxTokens,
+ Float topP
+ ) {
if (name == null || name.isEmpty()) {
throw new IllegalStateException(
"Name can not be null or empty for field: " + name);
@@ -185,128 +161,189 @@ public static Field instantiateField(String name,
length = -1;
}
- Field field;
-
- switch (type.toUpperCase()) {
- case "STRING":
- field = new StringField(name, length,
- possibleValues.stream().map(JsonNode::asText)
- .collect(Collectors.toList()), possible_values_weighted);
- break;
- case "STRINGAZ":
- field = new StringAZField(name, length,
- possibleValues.stream().map(JsonNode::asText)
- .collect(Collectors.toList()));
- break;
- case "INTEGER":
- field = new IntegerField(name,
- possibleValues.stream().map(JsonNode::asInt)
- .collect(Collectors.toList()), possible_values_weighted, min,
- max);
- break;
- case "INCREMENT_INTEGER":
- field = new IncrementIntegerField(name, min);
- break;
- case "BOOLEAN":
- field = new BooleanField(name,
- possibleValues.stream().map(JsonNode::asBoolean)
- .collect(Collectors.toList()), possible_values_weighted);
- break;
- case "FLOAT":
- field = new FloatField(name,
- possibleValues.stream().map(j -> (float) j.asDouble())
- .collect(Collectors.toList()), possible_values_weighted, min,
- max);
- break;
- case "LONG":
- field = new LongField(name,
- possibleValues.stream().map(JsonNode::asLong)
- .collect(Collectors.toList()), possible_values_weighted, min,
- max);
- break;
- case "INCREMENT_LONG":
- field = new IncrementLongField(name, min);
- break;
- case "TIMESTAMP":
- field = new TimestampField(name,
- possibleValues.stream().map(JsonNode::asLong)
- .collect(Collectors.toList()));
- break;
- case "BYTES":
- field = new BytesField(name, length,
- possibleValues.stream().map(j -> j.asText().getBytes())
- .collect(Collectors.toList()));
- break;
- case "HASHMD5":
- field = new HashMd5Field(name, length,
- possibleValues.stream().map(j -> j.asText().getBytes())
- .collect(Collectors.toList()));
- break;
- case "BIRTHDATE":
- field = new BirthdateField(name, length,
- possibleValues.stream().map(JsonNode::asText)
- .collect(Collectors.toList()), min, max);
- break;
- case "NAME":
- field = new NameField(name, length,
- filters.stream().map(JsonNode::asText).collect(Collectors.toList()));
- break;
- case "COUNTRY":
- field = new CountryField(name, length,
- possibleValues.stream().map(JsonNode::asText)
- .collect(Collectors.toList()));
- break;
- case "CITY":
- field = new CityField(name,
- filters.stream().map(JsonNode::asText).collect(Collectors.toList()));
- break;
- case "BLOB":
- field = new BlobField(name, length,
- possibleValues.stream().map(j -> j.asText().getBytes())
- .collect(Collectors.toList()));
- break;
- case "EMAIL":
- field = new EmailField(name,
- possibleValues.stream().map(JsonNode::asText)
- .collect(Collectors.toList()),
- filters.stream().map(JsonNode::asText).collect(Collectors.toList()));
- break;
- case "IP":
- field = new IpField(name);
- break;
- case "LINK":
- field = new LinkField(name, length,
- possibleValues.stream().map(JsonNode::asText)
- .collect(Collectors.toList()));
- break;
- case "CSV":
- field = new CsvField(name, length,
- filters.stream().map(JsonNode::asText).collect(Collectors.toList()),
- file, separator, mainField);
- break;
- case "PHONE":
- field = new PhoneField(name, length,
- filters.stream().map(JsonNode::asText).collect(Collectors.toList()));
- break;
- case "UUID":
- field = new UuidField(name);
- break;
- case "DATE":
- field = new DateField(name, possibleValues.stream().map(JsonNode::asText)
- .collect(Collectors.toList()), min, max, useNow);
- break;
- case "DATE_AS_STRING":
- field = new DateAsStringField(name, possibleValues.stream().map(JsonNode::asText)
- .collect(Collectors.toList()), min, max, useNow, pattern);
- break;
- case "STRING_REGEX":
- field = new StringRegexField(name, regex);
- break;
- default:
- log.warn("Type : " + type +
- " has not been recognized and hence will be ignored");
- return null;
- }
+ Field field = switch (type.toUpperCase()) {
+ case "STRING":
+ yield new StringField(name, length,
+ possibleValues.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()), possible_values_weighted);
+
+ case "STRINGAZ":
+ yield new StringAZField(name, length,
+ possibleValues.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()));
+
+ case "INTEGER":
+ yield new IntegerField(name,
+ possibleValues.stream().map(JsonNode::asInt)
+ .collect(Collectors.toList()), possible_values_weighted, min,
+ max);
+
+ case "INCREMENT_INTEGER":
+ yield new IncrementIntegerField(name, min);
+
+ case "BOOLEAN":
+ yield new BooleanField(name,
+ possibleValues.stream().map(JsonNode::asBoolean)
+ .collect(Collectors.toList()), possible_values_weighted);
+
+ case "FLOAT":
+ yield new FloatField(name,
+ possibleValues.stream().map(j -> (float) j.asDouble())
+ .collect(Collectors.toList()), possible_values_weighted, min,
+ max);
+
+ case "LONG":
+ yield new LongField(name,
+ possibleValues.stream().map(JsonNode::asLong)
+ .collect(Collectors.toList()), possible_values_weighted, min,
+ max);
+
+ case "INCREMENT_LONG":
+ yield new IncrementLongField(name, min);
+
+ case "TIMESTAMP":
+ yield new TimestampField(name,
+ possibleValues.stream().map(JsonNode::asLong)
+ .collect(Collectors.toList()));
+
+ case "BYTES":
+ yield new BytesField(name, length,
+ possibleValues.stream().map(j -> j.asText().getBytes())
+ .collect(Collectors.toList()));
+
+ case "HASHMD5":
+ yield new HashMd5Field(name, length,
+ possibleValues.stream().map(j -> j.asText().getBytes())
+ .collect(Collectors.toList()));
+
+ case "BIRTHDATE":
+ yield new BirthdateField(name, length,
+ possibleValues.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()), min, max);
+
+ case "NAME":
+ yield new NameField(name, length,
+ filters.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()));
+
+ case "COUNTRY":
+ yield new CountryField(name, length,
+ possibleValues.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()));
+
+ case "CITY":
+ yield new CityField(name,
+ filters.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()));
+
+ case "BLOB":
+ yield new BlobField(name, length,
+ possibleValues.stream().map(j -> j.asText().getBytes())
+ .collect(Collectors.toList()));
+
+ case "EMAIL":
+ yield new EmailField(name,
+ possibleValues.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()),
+ filters.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()));
+
+ case "IP":
+ yield new IpField(name);
+
+ case "LINK":
+ yield new LinkField(name, length,
+ possibleValues.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()));
+
+ case "CSV":
+ yield new CsvField(name, length,
+ filters.stream().map(JsonNode::asText).collect(Collectors.toList()),
+ file, separator, mainField);
+
+ case "PHONE":
+ yield new PhoneField(name, length,
+ filters.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()));
+
+ case "UUID":
+ yield new UuidField(name);
+
+ case "DATE":
+ yield new DateField(name, possibleValues.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()), min, max, useNow);
+
+ case "DATE_AS_STRING":
+ yield new DateAsStringField(name,
+ possibleValues.stream().map(JsonNode::asText)
+ .collect(Collectors.toList()), min, max, useNow, pattern);
+
+ case "STRING_REGEX":
+ yield new StringRegexField(name, regex);
+
+ case "OLLAMA":
+ yield new OllamaField(name,
+ url,
+ user, password, request,
+ modelType == null ?
+ properties.get(ApplicationConfigs.OLLAMA_MODEL_DEFAULT) :
+ modelType,
+ temperature == null ? Float.valueOf(
+ properties.get(ApplicationConfigs.OLLAMA_TEMPERATURE_DEFAULT)) :
+ temperature,
+ frequencyPenalty == null ? Float.valueOf(properties.get(
+ ApplicationConfigs.OLLAMA_FREQUENCY_PENALTY_DEFAULT)) : frequencyPenalty,
+ presencePenalty == null ? Float.valueOf(properties.get(
+ ApplicationConfigs.OLLAMA_PRESENCE_PENALTY_DEFAULT)) : presencePenalty,
+ topP == null ? Float.valueOf(properties.get(
+ ApplicationConfigs.OLLAMA_TOP_P_DEFAULT)) : topP
+ );
+
+ case "BEDROCK":
+ yield new BedrockField(name, url,
+ user == null ?
+ properties.get(ApplicationConfigs.BEDROCK_ACCESS_KEY_ID) : user,
+ password == null ?
+ properties.get(ApplicationConfigs.BEDROCK_ACCESS_KEY_SECRET) :
+ password,
+ request,
+ modelType == null ?
+ properties.get(ApplicationConfigs.BEDROCK_MODEL_DEFAULT) :
+ modelType,
+ temperature == null ? Float.valueOf(properties.get(
+ ApplicationConfigs.BEDROCK_TEMPERATURE_DEFAULT)) : temperature,
+ properties.get(ApplicationConfigs.BEDROCK_REGION),
+ maxTokens == null ? Integer.valueOf(properties.get(
+ ApplicationConfigs.BEDROCK_MAX_TOKENS_DEFAULT)) : maxTokens
+ );
+
+ case "OPENAI":
+ yield new OpenAIField(name, url,
+ user,
+ password == null ?
+ properties.get(ApplicationConfigs.OPENAI_API_KEY) :
+ password,
+ request,
+ modelType == null ?
+ properties.get(ApplicationConfigs.OPENAI_MODEL_DEFAULT) :
+ modelType,
+ temperature == null ? Float.valueOf(properties.get(
+ ApplicationConfigs.OPENAI_TEMPERATURE_DEFAULT)) : temperature,
+ frequencyPenalty == null ? Float.valueOf(properties.get(
+ ApplicationConfigs.OPENAI_FREQUENCY_PENALTY_DEFAULT)) : frequencyPenalty,
+ presencePenalty == null ? Float.valueOf(properties.get(
+ ApplicationConfigs.OPENAI_PRESENCE_PENALTY_DEFAULT)) : presencePenalty,
+ maxTokens == null ? Integer.valueOf(properties.get(
+ ApplicationConfigs.OPENAI_MAX_TOKENS_DEFAULT)) : maxTokens,
+ topP == null ? Float.valueOf(properties.get(
+ ApplicationConfigs.OPENAI_TOP_P_DEFAULT)) : topP
+ );
+
+ default:
+ log.warn("Type : " + type +
+ " has not been recognized and hence will be ignored");
+ yield null;
+ };
// If hbase column qualifier is not accurate, it should be let as is (default is "cq")
if (columnQualifier != null && !columnQualifier.isEmpty()) {
@@ -318,12 +355,29 @@ public static Field instantiateField(String name,
// If there are some conditions, we consider this field as computed (meaning it requires other fields' values to get its value)
// and same thing for request if it contains a '$'
if ((conditionals != null && !conditionals.isEmpty())
- || (request!=null && request.contains("$"))
- || (formula!=null)
- || (injection!=null)) {
- log.debug("Field {} has been marked as conditional: ", field);
+ || (request != null && request.contains("$"))
+ || (formula != null)
+ || (injection != null)
+ || (link != null)) {
+ log.debug("Field {} has been marked as computed: ", field);
field.setComputed(true);
+ }
+
+ // Set conditionals or formula or injections for the field if there are
+ if ((conditionals != null && !conditionals.isEmpty())) {
field.setConditional(new ConditionalEvaluator(conditionals));
+ } else if (formula != null) {
+ LinkedHashMap lm = new LinkedHashMap<>();
+ lm.put("formula", formula);
+ field.setConditional(new ConditionalEvaluator(lm));
+ } else if (injection != null) {
+ LinkedHashMap lm = new LinkedHashMap<>();
+ lm.put("injection", injection);
+ field.setConditional(new ConditionalEvaluator(lm));
+ } else if (link != null) {
+ LinkedHashMap lm = new LinkedHashMap<>();
+ lm.put("link", link);
+ field.setConditional(new ConditionalEvaluator(lm));
}
if (log.isDebugEnabled()) {
@@ -333,6 +387,28 @@ public static Field instantiateField(String name,
return field;
}
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Class Type is " + this.getClass().getSimpleName() + " ; ");
+ sb.append("name : " + name + " ; ");
+ sb.append("hbase Column Qualifier : " + hbaseColumnQualifier + " ; ");
+ sb.append("Length : " + length + " ; ");
+ if (min != null) {
+ sb.append("Min : " + min + " ; ");
+ }
+ if (max != null) {
+ sb.append("Max : " + max + " ; ");
+ }
+ return sb.toString();
+ }
+
+ public abstract T generateRandomValue();
+
+ public T generateComputedValue(Row row) {
+ return toCastValue(conditional.evaluateConditions(row));
+ }
+
public String getTypeForModel() {
switch (this.getClass().getSimpleName().toLowerCase(Locale.ROOT)) {
case "birthdatefield":
@@ -381,8 +457,12 @@ public String getTypeForModel() {
return "STRING_REGEX";
case "timestampfield":
return "TIMESTAMP";
- case "uuidfiel":
+ case "uuidfield":
return "UUID";
+ case "ollamafield":
+ return "OLLAMA";
+ case "bedrockfield":
+ return "BEDROCK";
default:
return "STRING";
}
diff --git a/src/main/java/com/datagen/model/type/OllamaField.java b/src/main/java/com/datagen/model/type/OllamaField.java
new file mode 100755
index 0000000..918c8f3
--- /dev/null
+++ b/src/main/java/com/datagen/model/type/OllamaField.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datagen.model.type;
+
+import com.datagen.model.Row;
+import com.datagen.utils.ParsingUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.jdbc.HivePreparedStatement;
+import org.apache.kudu.Type;
+import org.apache.kudu.client.PartialRow;
+import org.apache.orc.TypeDescription;
+import org.springframework.ai.chat.prompt.Prompt;
+import org.springframework.ai.ollama.OllamaChatClient;
+import org.springframework.ai.ollama.api.OllamaApi;
+import org.springframework.ai.ollama.api.OllamaOptions;
+
+import java.sql.SQLException;
+import java.util.LinkedList;
+
+@Slf4j
+public class OllamaField extends Field {
+
+ private final String url;
+ private final String user;
+ private final String password;
+ private final LinkedList requestToInject;
+ private final OllamaApi ollamaApi;
+ private final OllamaChatClient ollamaChatClient;
+ private final OllamaOptions ollamaOptions;
+
+
+ public OllamaField(String name, String url, String user, String password, String request,
+ String modelType, Float temperature, Float frequencyPenalty,
+ Float presencePenalty, Float topP) {
+ this.name = name;
+ this.url = url;
+ this.user = user;
+ this.password = password;
+ this.requestToInject = ParsingUtils.parseStringWithVars(request);
+ this.ollamaApi = url == null ? new OllamaApi() : new OllamaApi(url);
+ this.ollamaChatClient = new OllamaChatClient(this.ollamaApi);
+ this.ollamaOptions = OllamaOptions.create()
+ .withModel(modelType==null?"llama3":modelType)
+ .withTemperature(temperature == null ? 1.0f : temperature)
+ .withFrequencyPenalty(frequencyPenalty == null ? 1.0f : frequencyPenalty)
+ .withPresencePenalty(presencePenalty == null ? 1.0f : presencePenalty)
+ .withTopP(topP == null ? 1.0f : topP);
+ }
+
+ @Override
+ public String generateComputedValue(Row row) {
+ String stringToEvaluate = ParsingUtils.injectRowValuesToAString(row, requestToInject);
+ log.debug("Asking to Ollama: {}", stringToEvaluate);
+
+ return this.ollamaChatClient.call(
+ new Prompt(
+ stringToEvaluate,
+ this.ollamaOptions
+ )).getResult().getOutput().getContent();
+ }
+
+ @Override
+ public String generateRandomValue() {
+ return "";
+ }
+
+ /*
+ Override if needed Field function to insert into special connectors
+ */
+
+ @Override
+ public Put toHbasePut(String value, Put hbasePut) {
+ hbasePut.addColumn(Bytes.toBytes(hbaseColumnQualifier), Bytes.toBytes(name),
+ Bytes.toBytes(value));
+ return hbasePut;
+ }
+
+ @Override
+ public PartialRow toKudu(String value, PartialRow partialRow) {
+ partialRow.addString(name, value);
+ return partialRow;
+ }
+
+ @Override
+ public Type getKuduType() {
+ return Type.STRING;
+ }
+
+ @Override
+ public HivePreparedStatement toHive(String value, int index,
+ HivePreparedStatement hivePreparedStatement) {
+ try {
+ hivePreparedStatement.setString(index, value);
+ } catch (SQLException e) {
+ log.warn("Could not set value : " + value.toString() +
+ " into hive statement due to error :", e);
+ }
+ return hivePreparedStatement;
+ }
+
+ @Override
+ public String getHiveType() {
+ return "STRING";
+ }
+
+ @Override
+ public String getGenericRecordType() {
+ return "string";
+ }
+
+ @Override
+ public ColumnVector getOrcColumnVector(VectorizedRowBatch batch, int cols) {
+ return batch.cols[cols];
+ }
+
+ @Override
+ public TypeDescription getTypeDescriptionOrc() {
+ return TypeDescription.createString();
+ }
+}
diff --git a/src/main/java/com/datagen/model/type/OpenAIField.java b/src/main/java/com/datagen/model/type/OpenAIField.java
new file mode 100755
index 0000000..6cff529
--- /dev/null
+++ b/src/main/java/com/datagen/model/type/OpenAIField.java
@@ -0,0 +1,149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datagen.model.type;
+
+import com.datagen.model.Row;
+import com.datagen.utils.ParsingUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.jdbc.HivePreparedStatement;
+import org.apache.kudu.Type;
+import org.apache.kudu.client.PartialRow;
+import org.apache.orc.TypeDescription;
+import org.springframework.ai.openai.OpenAiChatClient;
+import org.springframework.ai.openai.OpenAiChatOptions;
+import org.springframework.ai.openai.api.OpenAiApi;
+
+import java.sql.SQLException;
+import java.util.LinkedList;
+
+@Slf4j
+public class OpenAIField extends Field {
+
+ private final String url;
+ private final String user;
+ private final String password;
+
+ private final LinkedList requestToInject;
+ private final OpenAiApi openAiApi;
+ private final OpenAiChatClient openAiChatClient;
+ private final OpenAiChatOptions openAiChatOptions;
+ private final String modelId;
+
+ public OpenAIField(String name, String url, String user, String password,
+ String request, String modelType, Float temperature, Float frequencyPenalty,
+ Float presencePenalty, Integer maxTokens, Float topP) {
+ this.name = name;
+ this.url = url;
+ this.user = user;
+ this.password = password;
+ this.requestToInject = ParsingUtils.parseStringWithVars(request);
+
+ // See model Ids available at:
+ this.modelId = modelType == null ? "gpt-4-32k" : modelType;
+
+ this.openAiApi = new OpenAiApi(this.password);
+ this.openAiChatOptions = OpenAiChatOptions.builder()
+ .withModel(this.modelId)
+ .withTemperature(temperature == null ? 1.0f : temperature)
+ .withFrequencyPenalty(frequencyPenalty == null ? 1.0f : frequencyPenalty)
+ .withPresencePenalty(presencePenalty == null ? 1.0f : presencePenalty)
+ .withMaxTokens(maxTokens == null ? 256 : maxTokens)
+ .withTopP(topP == null ? 1.0f : topP)
+ .build();
+ this.openAiChatClient = new OpenAiChatClient(openAiApi, openAiChatOptions);
+
+ }
+
+ @Override
+ public String generateComputedValue(Row row) {
+ String stringToEvaluate =
+ ParsingUtils.injectRowValuesToAString(row, requestToInject);
+ log.debug("Asking to OpenAI: {}", stringToEvaluate);
+ return openAiChatClient.call(stringToEvaluate);
+ }
+
+ @Override
+ public String generateRandomValue() {
+ return "";
+ }
+
+ @Override
+ public Put toHbasePut(String value, Put hbasePut) {
+ hbasePut.addColumn(Bytes.toBytes(hbaseColumnQualifier), Bytes.toBytes(name),
+ Bytes.toBytes(value));
+ return hbasePut;
+ }
+
+ /*
+ Override if needed Field function to insert into special connectors
+ */
+
+ @Override
+ public PartialRow toKudu(String value, PartialRow partialRow) {
+ partialRow.addString(name, value);
+ return partialRow;
+ }
+
+ @Override
+ public Type getKuduType() {
+ return Type.STRING;
+ }
+
+ @Override
+ public HivePreparedStatement toHive(String value, int index,
+ HivePreparedStatement hivePreparedStatement) {
+ try {
+ hivePreparedStatement.setString(index, value);
+ } catch (SQLException e) {
+ log.warn("Could not set value : " + value.toString() +
+ " into hive statement due to error :", e);
+ }
+ return hivePreparedStatement;
+ }
+
+ @Override
+ public String getHiveType() {
+ return "STRING";
+ }
+
+ @Override
+ public String getGenericRecordType() {
+ return "string";
+ }
+
+ @Override
+ public ColumnVector getOrcColumnVector(VectorizedRowBatch batch, int cols) {
+ return batch.cols[cols];
+ }
+
+ @Override
+ public TypeDescription getTypeDescriptionOrc() {
+ return TypeDescription.createString();
+ }
+
+ private enum BedrockModelType {
+ ANTHROPIC,
+ TITAN,
+ MISTRAL,
+ LLAMA
+ }
+}
diff --git a/src/main/java/com/datagen/parsers/JsonParser.java b/src/main/java/com/datagen/parsers/JsonParser.java
index 6b5c0b1..eafe566 100755
--- a/src/main/java/com/datagen/parsers/JsonParser.java
+++ b/src/main/java/com/datagen/parsers/JsonParser.java
@@ -18,6 +18,7 @@
package com.datagen.parsers;
+import com.datagen.config.ApplicationConfigs;
import com.datagen.model.Model;
import com.datagen.model.type.Field;
import com.fasterxml.jackson.databind.JsonNode;
@@ -61,7 +62,7 @@ public JsonParser(String jsonFilePath) {
*
* @return Model instantiated and populated
*/
- public Model renderModelFromFile() {
+ public Model renderModelFromFile(Map properties) {
// Release 0.4.15 introduced an easier format with PK, TB & Options being just one JSON node instead of an array
// But we need to keep working wih old format for retro-compatbility. (Fields is untouched)
@@ -118,13 +119,13 @@ public Model renderModelFromFile() {
while (fieldsIterator.hasNext()) {
JsonNode fieldNode = fieldsIterator.next();
- T field = getOneField(fieldNode, hbaseFamilyColsMap);
+ T field = getOneField(fieldNode, properties, hbaseFamilyColsMap);
if (field != null) {
fields.put(fieldNode.get("name").asText(), field);
}
}
- return new Model(fields, pks, tbs, opsMap);
+ return new Model(fields, pks, tbs, opsMap, properties);
}
@@ -135,7 +136,21 @@ public Model renderModelFromFile() {
* @param jsonField
* @return
*/
- private T getOneField(JsonNode jsonField, Map opsMap) {
+ private T getOneField(JsonNode jsonField, Map properties, Map opsMap) {
+ String name;
+ try {
+ name = jsonField.get("name").asText();
+ } catch (NullPointerException e) {
+ name = "UNDEFINED_COL_NAME";
+ }
+
+ String type;
+ try {
+ type = jsonField.get("type").asText();
+ } catch (NullPointerException e) {
+ type = "UNDEFINED_TYPE";
+ }
+
Integer length;
try {
length = jsonField.get("length").asInt();
@@ -227,6 +242,13 @@ private T getOneField(JsonNode jsonField, Map opsMap) {
request = null;
}
+ String link;
+ try {
+ link = jsonField.get("link").asText();
+ } catch (NullPointerException e) {
+ link = null;
+ }
+
String url;
try {
url = jsonField.get("url").asText();
@@ -248,6 +270,48 @@ private T getOneField(JsonNode jsonField, Map opsMap) {
password = null;
}
+ String modelType;
+ try {
+ modelType = jsonField.get("model_type").asText();
+ } catch (NullPointerException e) {
+ modelType = null;
+ }
+
+ Float temperature;
+ try {
+ temperature = Float.valueOf(jsonField.get("temperature").asText());
+ } catch (NullPointerException e) {
+ temperature = null;
+ }
+
+ Float frequencyPenalty;
+ try {
+ frequencyPenalty = Float.valueOf(jsonField.get("frequency_penalty").asText());
+ } catch (NullPointerException e) {
+ frequencyPenalty = null;
+ }
+
+ Float presencePenalty;
+ try {
+ presencePenalty = Float.valueOf(jsonField.get("presence_penalty").asText());
+ } catch (NullPointerException e) {
+ presencePenalty = null;
+ }
+
+ Integer maxTokens;
+ try {
+ maxTokens = Integer.valueOf(jsonField.get("max_tokens").asText());
+ } catch (NullPointerException e) {
+ maxTokens = null;
+ }
+
+ Float topP;
+ try {
+ topP = Float.valueOf(jsonField.get("top_p").asText());
+ } catch (NullPointerException e) {
+ topP = null;
+ }
+
JsonNode filtersArray = jsonField.get("filters");
List filters = new ArrayList<>();
try {
@@ -297,16 +361,17 @@ private T getOneField(JsonNode jsonField, Map opsMap) {
}
return (T) Field.instantiateField(
- jsonField.get("name").asText(),
- jsonField.get("type").asText(),
+ properties,
+ name,
+ type,
length,
- opsMap.get(jsonField.get("name").asText()),
- possibleValues,
- possible_values_weighted,
- conditionals,
min,
max,
+ opsMap.get(name),
+ possibleValues,
+ possible_values_weighted,
filters,
+ conditionals,
file,
separator,
pattern,
@@ -317,9 +382,16 @@ private T getOneField(JsonNode jsonField, Map opsMap) {
field,
formula,
injection,
+ link,
url,
user,
- password);
+ password,
+ modelType,
+ temperature,
+ frequencyPenalty,
+ presencePenalty,
+ maxTokens,
+ topP);
}
private Map mapColNameToColQual(String mapping) {
diff --git a/src/main/java/com/datagen/parsers/Parser.java b/src/main/java/com/datagen/parsers/Parser.java
index d7e34b8..ab01053 100755
--- a/src/main/java/com/datagen/parsers/Parser.java
+++ b/src/main/java/com/datagen/parsers/Parser.java
@@ -18,8 +18,11 @@
package com.datagen.parsers;
+import com.datagen.config.ApplicationConfigs;
import com.datagen.model.Model;
+import java.util.Map;
+
/**
* A parser is an entity able to read one type of file and render a {@see #com.cloudera.frisch.randomdatagen.model.Model}
* based on that file
@@ -28,5 +31,5 @@ public interface Parser {
// TODO: Implement a yaml parser
- Model renderModelFromFile();
+ Model renderModelFromFile(Map properties);
}
diff --git a/src/main/java/com/datagen/service/APISevice.java b/src/main/java/com/datagen/service/APISevice.java
index dc7c2d4..8e2feab 100755
--- a/src/main/java/com/datagen/service/APISevice.java
+++ b/src/main/java/com/datagen/service/APISevice.java
@@ -57,25 +57,25 @@ public String saveModel(@Nullable MultipartFile modelFileAsFile,
if (modelFilePath == null) {
log.info(
"No model file passed, will default to custom data model or default defined one in configuration");
- if (properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT) !=
+ if (properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL) !=
null) {
modelFile =
- properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT);
+ properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL);
} else {
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) +
- properties.get(ApplicationConfigs.DATA_MODEL_DEFAULT);
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) +
+ properties.get(ApplicationConfigs.DATAGEN_MODEL_DEFAULT);
}
}
if (modelFilePath != null && !modelFilePath.contains("/")) {
log.info(
"Model file passed is identified as one of the one provided, so will look for it in data model path: {} ",
- properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT));
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) +
+ properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH));
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) +
modelFilePath;
}
if (modelFileAsFile != null && !modelFileAsFile.isEmpty()) {
log.info("Model passed is an uploaded file");
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH) +
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH) +
"/model-test-" + new Random().nextInt() + ".json";
try {
modelFileAsFile.transferTo(new File(modelFile));
@@ -96,7 +96,7 @@ public String saveModel(@Nullable MultipartFile modelFileAsFile,
}
// Save Model
- Model model = parser.renderModelFromFile();
+ Model model = parser.renderModelFromFile(properties);
UUID modelId = UUID.randomUUID();
modelMap.put(modelId, model);
diff --git a/src/main/java/com/datagen/service/CommandRunnerService.java b/src/main/java/com/datagen/service/CommandRunnerService.java
index 3ac61e7..a7b0a2e 100755
--- a/src/main/java/com/datagen/service/CommandRunnerService.java
+++ b/src/main/java/com/datagen/service/CommandRunnerService.java
@@ -58,19 +58,19 @@ public class CommandRunnerService {
public CommandRunnerService(PropertiesLoader propertiesLoader) {
this.propertiesLoader = propertiesLoader;
this.scheduledCommandsFilePath = propertiesLoader.getPropertiesCopy()
- .get(ApplicationConfigs.SCHEDULER_FILE_PATH);
+ .get(ApplicationConfigs.DATAGEN_SCHEDULER_FILE_PATH);
this.commandsToProcess = new ConcurrentLinkedQueue<>();
this.scheduledCommands = new HashMap<>();
this.commands = new HashMap<>();
- FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy().get(ApplicationConfigs.DATA_HOME_DIRECTORY));
+ FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy().get(ApplicationConfigs.DATAGEN_HOME_DIRECTORY));
readScheduledCommands();
// After reading scheduled values, file should be re-written
writeScheduledCommands();
FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy()
- .get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH));
+ .get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH));
}
public CommandSoft getCommandStatusShort(UUID uuid) {
@@ -176,7 +176,7 @@ public void readScheduledCommands() {
"Model has not been found or is incorrect, correct it. This command has been removed from scheduler");
wrongScheduledCommandsRead.add(c.getCommandUuid());
}
- c.setModel(parser.renderModelFromFile());
+ c.setModel(parser.renderModelFromFile(propertiesLoader.getPropertiesCopy()));
// Previous Failed commands should not be taken
if (c.getStatus() == Command.CommandStatus.FAILED) {
@@ -273,25 +273,25 @@ public String generateData(
(modelFileAsFile == null || modelFileAsFile.isEmpty())) {
log.info(
"No model file passed, will default to custom data model or default defined one in configuration");
- if (properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT) !=
+ if (properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL) !=
null) {
modelFile =
- properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT);
+ properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL);
} else {
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) +
- properties.get(ApplicationConfigs.DATA_MODEL_DEFAULT);
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) +
+ properties.get(ApplicationConfigs.DATAGEN_MODEL_DEFAULT);
}
}
if (modelFilePath != null && !modelFilePath.contains("/")) {
log.info(
"Model file passed is identified as one of the one provided, so will look for it in data model path: {} ",
- properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT));
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) +
+ properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH));
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) +
modelFilePath;
}
if (modelFileAsFile != null && !modelFileAsFile.isEmpty()) {
log.info("Model passed is an uploaded file");
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH) +
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH) +
"/model-" + System.currentTimeMillis() + "-" + String.format("%06d",new Random().nextInt(100000)) + ".json";
try {
modelFileAsFile.transferTo(new File(modelFile));
@@ -321,7 +321,7 @@ public String generateData(
log.warn("Error when parsing model file");
return "{ \"commandUuid\": \"\" , \"error\": \"Error with Model File - Verify its path and structure\" }";
}
- Model model = parser.renderModelFromFile();
+ Model model = parser.renderModelFromFile(properties);
// Creation of connectors
List connectorsList = new ArrayList<>();
@@ -347,7 +347,7 @@ public String generateData(
if (isModelUploaded) {
// If model has been uploaded, it must be renamed to use its UUID for user and admin convenience
String newModelFilePath =
- properties.get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH) +
+ properties.get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH) +
"/model-" + command.getCommandUuid().toString() + ".json";
FileUtils.moveLocalFile(modelFile, newModelFilePath);
command.setModelFilePath(newModelFilePath);
diff --git a/src/main/java/com/datagen/service/ModelGeneraterSevice.java b/src/main/java/com/datagen/service/ModelGeneraterSevice.java
index b596984..f133e47 100755
--- a/src/main/java/com/datagen/service/ModelGeneraterSevice.java
+++ b/src/main/java/com/datagen/service/ModelGeneraterSevice.java
@@ -47,9 +47,9 @@ public ModelGeneraterSevice(PropertiesLoader propertiesLoader) {
this.propertiesLoader = propertiesLoader;
FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy()
- .get(ApplicationConfigs.DATA_MODEL_GENERATED_PATH));
+ .get(ApplicationConfigs.DATAGEN_MODEL_GENERATED_PATH));
FileUtils.createLocalDirectory(propertiesLoader.getPropertiesCopy()
- .get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT));
+ .get(ApplicationConfigs.DATAGEN_MODEL_PATH));
}
public String generateModel(
@@ -90,7 +90,7 @@ public String generateModel(
tableNames.put(OptionsConverter.TableNames.LOCAL_FILE_PATH, filepath);
}
- String outputPath = properties.get(ApplicationConfigs.DATA_MODEL_GENERATED_PATH) +
+ String outputPath = properties.get(ApplicationConfigs.DATAGEN_MODEL_GENERATED_PATH) +
"/model-generated-" + new Random().nextInt() + ".json";
ConnectorInterface connector = ConnectorsUtils
diff --git a/src/main/java/com/datagen/service/ModelTesterSevice.java b/src/main/java/com/datagen/service/ModelTesterSevice.java
index 398e359..4e5735a 100755
--- a/src/main/java/com/datagen/service/ModelTesterSevice.java
+++ b/src/main/java/com/datagen/service/ModelTesterSevice.java
@@ -57,25 +57,25 @@ public String generateData(@Nullable MultipartFile modelFileAsFile,
if (modelFilePath == null) {
log.info(
"No model file passed, will default to custom data model or default defined one in configuration");
- if (properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT) !=
+ if (properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL) !=
null) {
modelFile =
- properties.get(ApplicationConfigs.CUSTOM_DATA_MODEL_DEFAULT);
+ properties.get(ApplicationConfigs.DATAGEN_CUSTOM_MODEL);
} else {
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) +
- properties.get(ApplicationConfigs.DATA_MODEL_DEFAULT);
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) +
+ properties.get(ApplicationConfigs.DATAGEN_MODEL_DEFAULT);
}
}
if (modelFilePath != null && !modelFilePath.contains("/")) {
log.info(
"Model file passed is identified as one of the one provided, so will look for it in data model path: {} ",
- properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT));
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_PATH_DEFAULT) +
+ properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH));
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_PATH) +
modelFilePath;
}
if (modelFileAsFile != null && !modelFileAsFile.isEmpty()) {
log.info("Model passed is an uploaded file");
- modelFile = properties.get(ApplicationConfigs.DATA_MODEL_RECEIVED_PATH) +
+ modelFile = properties.get(ApplicationConfigs.DATAGEN_MODEL_RECEIVED_PATH) +
"/model-test-" + new Random().nextInt() + ".json";
try {
modelFileAsFile.transferTo(new File(modelFile));
@@ -94,7 +94,7 @@ public String generateData(@Nullable MultipartFile modelFileAsFile,
log.warn("Error when parsing model file");
return "{ \"commandUuid\": \"\" , \"error\": \"Error with Model File - Verify its path and structure\" }";
}
- Model model = parser.renderModelFromFile();
+ Model model = parser.renderModelFromFile(properties);
List randomDataList = model.generateRandomRows(1, 1);
diff --git a/src/main/java/com/datagen/utils/ParsingUtils.java b/src/main/java/com/datagen/utils/ParsingUtils.java
new file mode 100644
index 0000000..301f163
--- /dev/null
+++ b/src/main/java/com/datagen/utils/ParsingUtils.java
@@ -0,0 +1,77 @@
+package com.datagen.utils;
+
+import com.datagen.model.Model;
+import com.datagen.model.Row;
+import lombok.AllArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+@Slf4j
+public class ParsingUtils {
+
+ @AllArgsConstructor
+ public static class StringFragment {
+ String stringToPrint;
+ String variableName;
+ Boolean isAVariableToReplace;
+ }
+
+ private static final Pattern patternToIdentifyInjections = Pattern.compile("(\\$\\{)([a-zA-Z]*)(\\})");
+
+ /**
+ * Parse a String containing column references to other fields
+ * And prepare it for future evaluation during generation
+ * @param stringToParse
+ * @return a linked list of string to either print or compute (get its value from other columns)
+ */
+ public static LinkedList parseStringWithVars(String stringToParse) {
+
+ LinkedList stringParsed = new LinkedList<>();
+
+ Matcher matcher = patternToIdentifyInjections.matcher(stringToParse);
+
+ // Find all places in the regex string where there are column names to replace
+ int cursorPosition = 0;
+ while (matcher.find()) {
+ if(matcher.start()>cursorPosition) {
+ // Add string before match
+ log.debug("Found string to let as is: {}", stringToParse.substring(cursorPosition,matcher.start()));
+ stringParsed.add(new StringFragment(stringToParse.substring(cursorPosition,matcher.start()),null,false));
+ }
+ // Add match itself
+ log.debug("Found column to substitute: {}", matcher.group(2));
+ stringParsed.add(new StringFragment(null,matcher.group(2),true));
+ cursorPosition = matcher.end();
+ }
+
+ // If there are still characters left after last match, add it
+ if(cursorPosition fragments) {
+ Map rowValues = row.getValues();
+ Model model = row.getModel();
+
+ return fragments.stream().map(f -> {
+ if(f.isAVariableToReplace) {
+ return model.getFieldFromName(f.variableName).toStringValue(rowValues.get(f.variableName));
+ } else {
+ return f.stringToPrint;
+ }
+ }).reduce("", String::concat);
+ }
+
+
+
+}
diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties
index 5223009..cc0bbef 100755
--- a/src/main/resources/application.properties
+++ b/src/main/resources/application.properties
@@ -182,4 +182,9 @@ adls.sas.token=
gcs.project.id=
# Only if using a service account key, otherwise use any other ADC login
gcs.accountkey.path=
-gcs.region=
\ No newline at end of file
+gcs.region=
+
+# OLLAMA
+spring.ai.ollama.base-url=localhost:52764
+spring.ai.ollama.chat.enabled=true
+spring.ai.ollama.chat.options.format=json
\ No newline at end of file
diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml
index 1feed91..38586c1 100755
--- a/src/main/resources/logback-spring.xml
+++ b/src/main/resources/logback-spring.xml
@@ -12,7 +12,7 @@
-
+
@@ -36,7 +36,7 @@
-
+
diff --git a/src/main/resources/models/example-model-ai.json b/src/main/resources/models/example-model-ai.json
new file mode 100755
index 0000000..d4b759b
--- /dev/null
+++ b/src/main/resources/models/example-model-ai.json
@@ -0,0 +1,125 @@
+{
+ "Fields": [
+ {
+ "name": "name",
+ "type": "NAME"
+ },
+ {
+ "name": "age",
+ "type": "LONG",
+ "min": 18,
+ "max": 99
+ },
+ {
+ "name": "birthday_wish_bedrock",
+ "type": "BEDROCK",
+ "request": "generate a one line birthday wish to ${name} who is ${age} years old today",
+ "model_type": "meta.llama3-8b-instruct-v1:0",
+ "user": "",
+ "password": "",
+ "temperature": 1.0,
+ "max_tokens": 256
+ },
+ {
+ "name": "birthday_wish_ollama",
+ "type": "OLLAMA",
+ "request": "generate a one line birthday wish to ${name} who is ${age} years old today",
+ "model_type": "mistral",
+ "temperature": 1.0,
+ "frequency_penalty": 1.5,
+ "presence_penalty": 1.3,
+ "top_p": 1.0
+ },
+ {
+ "name": "birthday_wish_openai",
+ "type": "OPENAI",
+ "request": "generate a one line birthday wish to ${name} who is ${age} years old today",
+ "model_type": "gpt-4o",
+ "password": "",
+ "temperature": 1.0,
+ "frequency_penalty": 1.5,
+ "presence_penalty": 1.3,
+ "max_tokens": 256,
+ "top_p": 1.0
+ }
+ ],
+ "Table_Names": {
+ "HDFS_FILE_PATH": "/user/datagen/hdfs/MODEL_NAME/",
+ "HDFS_FILE_NAME": "MODEL_NAME",
+
+ "HBASE_TABLE_NAME": "MODEL_NAME",
+ "HBASE_NAMESPACE": "datagen",
+
+ "KAFKA_TOPIC": "datagen_MODEL_NAME",
+
+ "OZONE_VOLUME": "datagen",
+ "OZONE_BUCKET": "MODEL_NAME",
+ "OZONE_KEY_NAME": "MODEL_NAME",
+ "OZONE_LOCAL_FILE_PATH": "/home/datagen/temp/MODEL_NAME/",
+
+ "SOLR_COLLECTION": "datagen_MODEL_NAME",
+
+ "HIVE_DATABASE": "datagen",
+ "HIVE_TABLE_NAME": "MODEL_NAME",
+ "HIVE_HDFS_FILE_PATH": "/user/datagen/hive/MODEL_NAME/",
+ "HIVE_TEMPORARY_TABLE_NAME": "MODEL_NAME_tmp",
+
+ "KUDU_TABLE_NAME": "datagen.MODEL_NAME",
+
+ "LOCAL_FILE_PATH": "/tmp/datagen/MODEL_NAME/",
+ "LOCAL_FILE_NAME": "datagen-MODEL_NAME",
+
+ "S3_BUCKET": "datagen-test-fri",
+ "S3_DIRECTORY": "datagen/MODEL_NAME",
+ "S3_KEY_NAME": "MODEL_NAME",
+ "S3_LOCAL_FILE_PATH": "/tmp/datagen/temp/MODEL_NAME/",
+
+ "ADLS_CONTAINER": "dgtest",
+ "ADLS_DIRECTORY": "datagen/MODEL_NAME",
+ "ADLS_FILE_NAME": "MODEL_NAME",
+ "ADLS_LOCAL_FILE_PATH": "/tmp/datagen/temp/MODEL_NAME/",
+
+ "GCS_BUCKET": "datagenfri",
+ "GCS_DIRECTORY": "datagen/MODEL_NAME",
+ "GCS_OBJECT_NAME": "MODEL_NAME",
+ "GCS_LOCAL_FILE_PATH": "/tmp/datagen/temp/MODEL_NAME/",
+
+ "AVRO_NAME": "datagenMODEL_NAME"
+ },
+ "Options": {
+ "KAFKA_MSG_KEY": "name",
+ "HBASE_PRIMARY_KEY": "name",
+ "KUDU_PRIMARY_KEYS": "name,age",
+ "KUDU_RANGE_KEYS": "age",
+ "DELETE_PREVIOUS": false,
+ "ONE_FILE_PER_ITERATION": true,
+ "CSV_HEADER": true,
+ "HBASE_COLUMN_FAMILIES_MAPPING": "c:name",
+ "SOLR_SHARDS": 1,
+ "SOLR_REPLICAS": 1,
+ "SOLR_JAAS_FILE_PATH": "/tmp/solr.jaas",
+ "HIVE_THREAD_NUMBER": 1,
+ "HIVE_ON_HDFS": true,
+ "HIVE_TEZ_QUEUE_NAME": "root.default",
+ "HIVE_TABLE_PARTITIONS_COLS": "name",
+ "HIVE_TABLE_BUCKETS_COLS": "age",
+ "HIVE_TABLE_BUCKETS_NUMBER": 32,
+ "PARQUET_PAGE_SIZE": 1048576,
+ "PARQUET_ROW_GROUP_SIZE": 134217728,
+ "PARQUET_DICTIONARY_PAGE_SIZE": 1048576,
+ "PARQUET_DICTIONARY_ENCODING": true,
+ "KAFKA_MESSAGE_TYPE": "json",
+ "KAFKA_JAAS_FILE_PATH": "/tmp/kafka.jaas",
+ "KAFKA_ACKS_CONFIG": "all",
+ "KAFKA_RETRIES_CONFIG": 3,
+ "KUDU_REPLICAS": 1,
+ "KUDU_BUCKETS": 32,
+ "KUDU_BUFFER": 100001,
+ "KUDU_FLUSH": "MANUAL_FLUSH",
+ "OZONE_REPLICATION_FACTOR": 3,
+ "HDFS_REPLICATION_FACTOR": 3,
+ "ADLS_MAX_CONCURRENCY": 4,
+ "ADLS_MAX_UPLOAD_SIZE": 16777216,
+ "ADLS_BLOCK_SIZE": 8388608
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/models/example-model.json b/src/main/resources/models/example-model.json
index f41a5c6..6f9df67 100755
--- a/src/main/resources/models/example-model.json
+++ b/src/main/resources/models/example-model.json
@@ -11,9 +11,16 @@
"max": 99
},
{
- "name": "age",
- "type": "OLLAMA",
- "request": "generate a birthday wish to $name who is $age years old today"
+ "name": "birthday_wish_openai",
+ "type": "OPENAI",
+ "request": "generate a one line birthday wish to ${name} who is ${age} years old today",
+ "model_type": "gpt-4o",
+ "password": "",
+ "temperature": 1.0,
+ "frequency_penalty": 1.5,
+ "presence_penalty": 1.3,
+ "max_tokens": 256,
+ "top_p": 1.0
}
],
"Table_Names": {
diff --git a/src/main/resources/models/full-model.json b/src/main/resources/models/full-model.json
index db3f7c4..e988d92 100755
--- a/src/main/resources/models/full-model.json
+++ b/src/main/resources/models/full-model.json
@@ -133,7 +133,7 @@
{
"name": "formula_1",
"type": "STRING",
- "formula": "2 * $longPercent + 42 - $onePlusOne"
+ "formula": "2 * ${longPercent} + 42 - ${onePlusOne}"
},
{
"name": "condition_2",
@@ -202,6 +202,13 @@
"name": "email_from_name",
"type": "STRING",
"injection": "toto_${name}@our_company_name.${country}"
+ },
+ {
+ "name": "birthday_wish",
+ "type": "OLLAMA",
+ "request": "generate a birthday wish to ${name} born ${birthdate} and who is living in ${city} , ${city_country}",
+ "temperature": 1.5,
+ "model_type": "mistral"
}
],
"Table_Names": {