Skip to content

Commit

Permalink
..
Browse files Browse the repository at this point in the history
  • Loading branch information
xitongsys committed Nov 30, 2020
1 parent fcce0a3 commit d2ea22c
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 35 deletions.
45 changes: 25 additions & 20 deletions common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ type Tag struct {
}

func NewTag() *Tag {
return &Tag{}
return &Tag{
LogicalTypeFields: make(map[string]string),
KeyLogicalTypeFields: make(map[string]string),
ValueLogicalTypeFields: make(map[string]string),
}
}

func StringToTag(tag string) *Tag {
Expand Down Expand Up @@ -308,68 +312,69 @@ func NewLogicalTypeFromFieldsMap(mp map[string]string) *parquet.LogicalType {
} else {
logicalType := parquet.NewLogicalType()
switch val {
case "string":
case "STRING":
logicalType.STRING = parquet.NewStringType()
case "map":
case "MAP":
logicalType.MAP = parquet.NewMapType()
case "list":
case "LIST":
logicalType.LIST = parquet.NewListType()
case "enum":
case "ENUM":
logicalType.ENUM = parquet.NewEnumType()
case "decimal":

case "DECIMAL":
logicalType.DECIMAL = parquet.NewDecimalType()
logicalType.DECIMAL.Precision = Str2Int32(mp["logicaltype.precision"])
logicalType.DECIMAL.Scale = Str2Int32(mp["logicaltype.scale"])

case "date":
case "DATE":
logicalType.DATE = parquet.NewDateType()

case "time":
case "TIME":
logicalType.TIME = parquet.NewTimeType()
logicalType.TIME.IsAdjustedToUTC = Str2Bool(mp["logicaltype.isadjustedtoutc"])
switch mp["logicaltype.unit"] {
case "millis":
case "MILLIS":
logicalType.TIME.Unit = parquet.NewTimeUnit()
logicalType.TIME.Unit.MILLIS = parquet.NewMilliSeconds()
case "micros":
case "MICROS":
logicalType.TIME.Unit = parquet.NewTimeUnit()
logicalType.TIME.Unit.MICROS = parquet.NewMicroSeconds()
case "nanos":
case "NANOS":
logicalType.TIME.Unit = parquet.NewTimeUnit()
logicalType.TIME.Unit.NANOS = parquet.NewNanoSeconds()
default:
panic("logicaltype time error")
}

case "timestamp":
case "TIMESTAMP":
logicalType.TIMESTAMP = parquet.NewTimestampType()
logicalType.TIMESTAMP.IsAdjustedToUTC = Str2Bool(mp["logicaltype.isadjustedtoutc"])
switch mp["logicaltype.unit"] {
case "millis":
case "MILLIS":
logicalType.TIMESTAMP.Unit = parquet.NewTimeUnit()
logicalType.TIMESTAMP.Unit.MILLIS = parquet.NewMilliSeconds()
case "micros":
case "MICROS":
logicalType.TIMESTAMP.Unit = parquet.NewTimeUnit()
logicalType.TIMESTAMP.Unit.MICROS = parquet.NewMicroSeconds()
case "nanos":
case "NANOS":
logicalType.TIMESTAMP.Unit = parquet.NewTimeUnit()
logicalType.TIMESTAMP.Unit.NANOS = parquet.NewNanoSeconds()
default:
panic("logicaltype time error")
}

case "integer":
case "INTEGER":
logicalType.INTEGER = parquet.NewIntType()
logicalType.INTEGER.BitWidth = int8(Str2Int32(mp["logicaltype.bitwidth"]))
logicalType.INTEGER.IsSigned = Str2Bool(mp["logicaltype.issigned"])

case "json":
case "JSON":
logicalType.JSON = parquet.NewJsonType()

case "bson":
case "BSON":
logicalType.BSON = parquet.NewBsonType()

case "uuid":
case "UUID":
logicalType.UUID = parquet.NewUUIDType()

default:
Expand All @@ -386,7 +391,7 @@ func NewLogicalTypeFromConvertedType(schemaElement *parquet.SchemaElement, info
if ct == nil {
return nil
}

logicalType := parquet.NewLogicalType()
switch *ct {
case parquet.ConvertedType_INT_8:
Expand Down
37 changes: 22 additions & 15 deletions example/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,34 @@ type TypeList struct {
ByteArray string `parquet:"name=bytearray, type=BYTE_ARRAY"`
FixedLenByteArray string `parquet:"name=FixedLenByteArray, type=FIXED_LEN_BYTE_ARRAY, length=10"`

Utf8 string `parquet:"name=utf8, type=UTF8, encoding=PLAIN_DICTIONARY"`
Int_8 int8 `parquet:"name=int_8, type=INT_8"`
Int_16 int16 `parquet:"name=int_16, type=INT_16"`
Int_32 int32 `parquet:"name=int_32, type=INT_32"`
Int_64 int64 `parquet:"name=int_64, type=INT_64"`
Uint_8 uint8 `parquet:"name=uint_8, type=UINT_8"`
Uint_16 uint16 `parquet:"name=uint_16, type=UINT_16"`
Uint_32 uint32 `parquet:"name=uint_32, type=UINT_32"`
Uint_64 uint64 `parquet:"name=uint_64, type=UINT_64"`
Date int32 `parquet:"name=date, type=DATE"`
TimeMillis int32 `parquet:"name=timemillis, type=TIME_MILLIS"`
TimeMicros int64 `parquet:"name=timemicros, type=TIME_MICROS"`
TimestampMillis int64 `parquet:"name=timestampmillis, type=TIMESTAMP_MILLIS"`
TimestampMicros int64 `parquet:"name=timestampmicros, type=TIMESTAMP_MICROS"`
Interval string `parquet:"name=interval, type=INTERVAL"`
Utf8 string `parquet:"name=utf8, type=UTF8, encoding=PLAIN_DICTIONARY"`
Int_8 int8 `parquet:"name=int_8, type=INT_8"`
Int_16 int16 `parquet:"name=int_16, type=INT_16"`
Int_32 int32 `parquet:"name=int_32, type=INT_32"`
Int_64 int64 `parquet:"name=int_64, type=INT_64"`
Uint_8 uint8 `parquet:"name=uint_8, type=UINT_8"`
Uint_16 uint16 `parquet:"name=uint_16, type=UINT_16"`
Uint_32 uint32 `parquet:"name=uint_32, type=UINT_32"`
Uint_64 uint64 `parquet:"name=uint_64, type=UINT_64"`
Date int32 `parquet:"name=date, type=DATE"`
Date2 int32 `parquet:"name=date2, type=DATE, logicaltype=DATE"`
TimeMillis int32 `parquet:"name=timemillis, type=TIME_MILLIS"`
TimeMillis2 int32 `parquet:"name=timemillis2, type=TIME_MILLIS, logicaltype=TIME, logicaltype.isadjustedtoutc=true, logicaltype.unit=MILLIS"`
TimeMicros int64 `parquet:"name=timemicros, type=TIME_MICROS"`
TimeMicros2 int64 `parquet:"name=timemicros2, type=TIME_MICROS, logicaltype=TIME, logicaltype.isadjustedtoutc=false, logicaltype.unit=MICROS"`
TimestampMillis int64 `parquet:"name=timestampmillis, type=TIMESTAMP_MILLIS"`
TimestampMillis2 int32 `parquet:"name=timestampmillis2, type=TIMESTAMP_MILLIS, logicaltype=TIMESTAMP, logicaltype.isadjustedtoutc=true, logicaltype.unit=MILLIS"`
TimestampMicros int64 `parquet:"name=timestampmicros, type=TIMESTAMP_MICROS"`
TimestampMicros2 int64 `parquet:"name=timestampmicros2, type=TIMESTAMP_MICROS, logicaltype=TIMESTAMP, logicaltype.isadjustedtoutc=false, logicaltype.unit=MICROS"`
Interval string `parquet:"name=interval, type=INTERVAL"`

Decimal1 int32 `parquet:"name=decimal1, type=DECIMAL, scale=2, precision=9, basetype=INT32"`
Decimal2 int64 `parquet:"name=decimal2, type=DECIMAL, scale=2, precision=18, basetype=INT64"`
Decimal3 string `parquet:"name=decimal3, type=DECIMAL, scale=2, precision=10, basetype=FIXED_LEN_BYTE_ARRAY, length=12"`
Decimal4 string `parquet:"name=decimal4, type=DECIMAL, scale=2, precision=20, basetype=BYTE_ARRAY"`

Decimal5 int32 `parquet:"name=decimalt, type=DECIMAL, basetype=INT32, logicaltype=DECIMAL, logicaltype.precision=10, logicaltype.scale=2"`

Map map[string]int32 `parquet:"name=map, type=MAP, keytype=UTF8, valuetype=INT32"`
List []string `parquet:"name=list, type=LIST, valuetype=UTF8"`
Repeated []int32 `parquet:"name=repeated, type=INT32, repetitiontype=REPEATED"`
Expand Down

0 comments on commit d2ea22c

Please sign in to comment.