Skip to content

Commit

Permalink
Support for data template schema field string literals in PDL
Browse files Browse the repository at this point in the history
Encode the SCHEMA field in generated data templates according to the
file format of the source schema file. Postponing the use of a new
interface for files generated from PDSC to avoid build failures,
with the expectation that this will be addressed relatively soon.

RB=1864201
G=sf-reviewers
R=ybi,kbalasub,mnchen
A=ybi,kbalasub
  • Loading branch information
evanw555 committed Nov 21, 2019
1 parent 75325c3 commit adf963b
Show file tree
Hide file tree
Showing 17 changed files with 476 additions and 66 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
28.0.6
------
(RB=1864201)
Support for data template schema field string literals in PDL

28.0.5
------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import com.linkedin.data.message.MessageUtil;

import com.linkedin.data.schema.grammar.PdlSchemaParser;
import com.linkedin.data.schema.resolver.DefaultDataSchemaResolver;
import com.linkedin.data.schema.validation.CoercionMode;
import com.linkedin.data.schema.validation.RequiredMode;
import com.linkedin.data.schema.validation.ValidateDataAgainstSchema;
Expand Down Expand Up @@ -55,17 +56,17 @@ abstract public class AbstractSchemaParser implements PegasusSchemaParser
/**
* Constructor with resolver.
*
* @param resolver to be used to find {@link DataSchema}'s.
* @param resolver to be used to find {@link DataSchema}s.
*/
protected AbstractSchemaParser(DataSchemaResolver resolver)
{
_resolver = resolver;
_resolver = resolver == null ? new DefaultDataSchemaResolver() : resolver;
}

/**
* Get the {@link DataSchemaResolver}.
*
* @return the resolver to used to find {@link DataSchema}'s, may be null
* @return the resolver to used to find {@link DataSchema}s, may be null
* if no resolver has been provided to parser.
*/
public DataSchemaResolver getResolver()
Expand All @@ -74,12 +75,12 @@ public DataSchemaResolver getResolver()
}

/**
* Return the top level {@link DataSchema}'s.
* Return the top level {@link DataSchema}s.
*
* The top level DataSchema's represent the types
* that are not defined within other types.
*
* @return the list of top level {@link DataSchema}'s in the
* @return the list of top level {@link DataSchema}s in the
* order that are defined.
*/
public List<DataSchema> topLevelDataSchemas()
Expand Down Expand Up @@ -796,7 +797,7 @@ protected void validateDefaults(RecordDataSchema recordSchema)
/**
* Set the current location for the source of input to the parser.
*
* This current location is will be used to annotate {@link NamedDataSchema}'s
* This current location is will be used to annotate {@link NamedDataSchema}s
* generated from parsing.
*
* @param location of the input source.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,15 @@ private CompactPdlBuilder(Writer writer)
@Override
PdlBuilder write(String text) throws IOException
{
final boolean writeWhitespaceBuffer = isIdentifierCharacter(text.charAt(0));
processWhitespaceBuffer(writeWhitespaceBuffer);
if (text != null && !text.isEmpty())
{
final boolean writeWhitespaceBuffer = isIdentifierCharacter(text.charAt(0));
processWhitespaceBuffer(writeWhitespaceBuffer);

super.write(text);
super.write(text);

_needsWhitespacePadding = isIdentifierCharacter(text.charAt(text.length() - 1));
_needsWhitespacePadding = isIdentifierCharacter(text.charAt(text.length() - 1));
}

return this;
}
Expand Down
84 changes: 84 additions & 0 deletions data/src/main/java/com/linkedin/data/schema/SchemaFormatType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
Copyright (c) 2019 LinkedIn Corp.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.linkedin.data.schema;

import com.linkedin.data.schema.grammar.PdlSchemaParserFactory;


/**
* Representation of a particular schema format type.
*
* @author Evan Williams
*/
public enum SchemaFormatType
{
PDSC(SchemaParserFactory.instance()),
PDL(PdlSchemaParserFactory.instance());

SchemaFormatType(DataSchemaParserFactory schemaParserFactory)
{
_schemaParserFactory = schemaParserFactory;
}

private final DataSchemaParserFactory _schemaParserFactory;

public DataSchemaParserFactory getSchemaParserFactory()
{
return _schemaParserFactory;
}

/**
* Determines the schema format type corresponding with a given filename, or null if it's indeterminable.
*
* @param filename filename
* @return schema format type or null
*/
public static SchemaFormatType fromFilename(String filename)
{
if (filename == null)
{
return null;
}

final int startIndex = filename.lastIndexOf(".") + 1;

if (startIndex == filename.length())
{
return null;
}

return fromFileExtension(filename.substring(startIndex));
}

/**
* Given some string file extension, determines the schema format type it represents.
* Returns null if the file extension is an unrecognized file extension.
*
* @param fileExtension file extension string
* @return schema format type or null
*/
public static SchemaFormatType fromFileExtension(String fileExtension)
{
for (SchemaFormatType fileType : SchemaFormatType.values())
{
if (fileType.getSchemaParserFactory().getLanguageExtension().equalsIgnoreCase(fileExtension)) {
return fileType;
}
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,10 @@

package com.linkedin.data.schema;


import com.linkedin.data.DataComplex;
import com.linkedin.data.DataList;
import com.linkedin.data.DataMap;
import com.linkedin.data.codec.DataLocation;
import com.linkedin.data.schema.resolver.DefaultDataSchemaResolver;
import com.linkedin.data.schema.UnionDataSchema.Member;
import java.io.InputStream;
import java.io.Reader;
Expand Down Expand Up @@ -71,7 +69,7 @@ public SchemaParser()
*/
public SchemaParser(DataSchemaResolver resolver)
{
super(resolver == null ? new DefaultDataSchemaResolver() : resolver);
super(resolver);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.linkedin.data.DataList;
import com.linkedin.data.DataMap;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Collections;
import java.util.HashSet;
Expand All @@ -40,6 +41,32 @@ public class SchemaToPdlEncoder extends AbstractSchemaEncoder
// Unions with at least this many members will be written onto multiple lines to improve readability
private static final int UNION_MULTILINE_THRESHOLD = 5;

/**
* Encode a {@link DataSchema} to a PDL encoded string.
*
* @param schema is the {@link DataSchema} to build a PDL encoded output for.
* @param encodingStyle is the encoding style.
* @return the PDL encoded string representing the {@link DataSchema}.
*/
public static String schemaToPdl(DataSchema schema, EncodingStyle encodingStyle)
{
StringWriter writer = new StringWriter();

SchemaToPdlEncoder encoder = new SchemaToPdlEncoder(writer);
encoder.setEncodingStyle(encodingStyle);

try
{
encoder.encode(schema);
}
catch (IOException e)
{
throw new IllegalStateException(e);
}

return writer.toString();
}

/**
* Encoding style for PDL.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
import com.linkedin.data.schema.DataSchemaParserFactory;
import com.linkedin.data.schema.DataSchemaResolver;


/**
* DataSchemaParserFactory for the Pegasus data language (.pdl).
* {@link DataSchemaParserFactory} for the Pegasus data language (.pdl).
*
* @author Joe Betz
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import com.linkedin.data.schema.DataSchemaResolver;
import com.linkedin.data.schema.DataSchemaUtil;
import com.linkedin.data.schema.NamedDataSchema;
import com.linkedin.data.schema.SchemaParserFactory;
import com.linkedin.data.schema.SchemaFormatType;
import com.linkedin.data.schema.PegasusSchemaParser;
import com.linkedin.data.schema.validation.CoercionMode;
import com.linkedin.data.schema.validation.RequiredMode;
Expand Down Expand Up @@ -281,14 +281,16 @@ public static <T extends DataTemplate<?>> T wrap(Object object, Constructor<T> c
/**
* Parse data schema in JSON format to obtain a {@link DataSchema}.
*
* TODO: deprecate this later, since current use cases still use this in generated data templates.
*
* @param schemaText provides the data schema in JSON format.
* @return the {@link DataSchema} parsed from the data schema in JSON format.
* @throws IllegalArgumentException if the data schema in JSON format is invalid or
* there is more than one top level schema.
*/
public static DataSchema parseSchema(String schemaText) throws IllegalArgumentException
{
return parseSchema(schemaText, null);
return parseSchema(schemaText, null, SchemaFormatType.PDSC);
}

/**
Expand All @@ -299,10 +301,42 @@ public static DataSchema parseSchema(String schemaText) throws IllegalArgumentEx
* @return the {@link DataSchema} parsed from the data schema in JSON format.
* @throws IllegalArgumentException if the data schema in JSON format is invalid or
* there is more than one top level schema.
* @deprecated This method assumes the data schema is encoded in {@link SchemaFormatType#PDSC},
* use {@link #parseSchema(String, DataSchemaResolver, SchemaFormatType)} instead.
*/
@Deprecated
public static DataSchema parseSchema(String schemaText, DataSchemaResolver schemaResolver) throws IllegalArgumentException
{
PegasusSchemaParser parser = SchemaParserFactory.instance().create(schemaResolver);
return parseSchema(schemaText, schemaResolver, SchemaFormatType.PDSC);
}

/**
* Parse data schema encoded in any format to obtain a {@link DataSchema}.
*
* @param schemaText the encoded data schema.
* @param schemaFormatType the format in which the schema is encoded.
* @return the {@link DataSchema} parsed from the encoded data schema.
* @throws IllegalArgumentException if the encoded data schema is invalid or there is more than one top-level schema.
*/
public static DataSchema parseSchema(String schemaText, SchemaFormatType schemaFormatType) throws IllegalArgumentException
{
return parseSchema(schemaText, null, schemaFormatType);
}

/**
* Parse data schema encoded in any format to obtain a {@link DataSchema}.
*
* @param schemaText the encoded data schema.
* @param schemaFormatType the format in which the schema is encoded.
* @param schemaResolver resolver for resolving referenced schemas.
* @return the {@link DataSchema} parsed from the encoded data schema.
* @throws IllegalArgumentException if the encoded data schema is invalid or there is more than one top-level schema.
*/
public static DataSchema parseSchema(String schemaText, DataSchemaResolver schemaResolver,
SchemaFormatType schemaFormatType) throws IllegalArgumentException
{
final PegasusSchemaParser parser = schemaFormatType.getSchemaParserFactory().create(schemaResolver);

parser.parse(schemaText);
if (parser.hasError())
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
Copyright (c) 2019 LinkedIn Corp.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.linkedin.data.schema;

import com.linkedin.data.schema.grammar.PdlSchemaParserFactory;
import org.testng.Assert;
import org.testng.annotations.Test;


/**
* Tests for {@link SchemaFormatType}.
*
* @author Evan Williams
*/
public class TestSchemaFormatType
{
@Test
public void testGetSchemaParserFactory()
{
Assert.assertSame(SchemaFormatType.PDSC.getSchemaParserFactory(), SchemaParserFactory.instance());
Assert.assertSame(SchemaFormatType.PDL.getSchemaParserFactory(), PdlSchemaParserFactory.instance());
}

@Test
public void testFromFilename()
{
Assert.assertEquals(SchemaFormatType.fromFilename("Foo.pdsc"), SchemaFormatType.PDSC);
Assert.assertEquals(SchemaFormatType.fromFilename("Bar.pdl"), SchemaFormatType.PDL);
Assert.assertEquals(SchemaFormatType.fromFilename("Two.dots.pdsc"), SchemaFormatType.PDSC);
Assert.assertEquals(SchemaFormatType.fromFilename("/some/path/with/Two.dots.pdl"), SchemaFormatType.PDL);
Assert.assertEquals(SchemaFormatType.fromFilename(".pdl"), SchemaFormatType.PDL);
Assert.assertNull(SchemaFormatType.fromFilename("Baz.json"));
Assert.assertNull(SchemaFormatType.fromFilename("Biz"));
Assert.assertNull(SchemaFormatType.fromFilename("Bop."));
Assert.assertNull(SchemaFormatType.fromFilename("."));
Assert.assertNull(SchemaFormatType.fromFilename(""));
Assert.assertNull(SchemaFormatType.fromFilename(null));
}

@Test
public void testFromFileExtension()
{
Assert.assertEquals(SchemaFormatType.fromFileExtension("pdsc"), SchemaFormatType.PDSC);
Assert.assertEquals(SchemaFormatType.fromFileExtension("pdl"), SchemaFormatType.PDL);
Assert.assertEquals(SchemaFormatType.fromFileExtension("PdsC"), SchemaFormatType.PDSC);
Assert.assertEquals(SchemaFormatType.fromFileExtension("PDL"), SchemaFormatType.PDL);
Assert.assertNull(SchemaFormatType.fromFileExtension("json"));
Assert.assertNull(SchemaFormatType.fromFileExtension(""));
Assert.assertNull(SchemaFormatType.fromFileExtension(null));
}
}
Loading

0 comments on commit adf963b

Please sign in to comment.