Skip to content

Commit

Permalink
Modify pjson format (FerretDB#1620)
Browse files Browse the repository at this point in the history
  • Loading branch information
Elena Grahovac authored Dec 20, 2022
1 parent b4f603a commit a41a947
Show file tree
Hide file tree
Showing 33 changed files with 1,083 additions and 382 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Changelog

## v0.8.0 (to be released)

### What's Changed

In this release, we made a big change in the way FerretDB stores data in PostgreSQL.

Previously, we were storing information about data types in the fields themselves.
Starting from this release, we store information about data types (document's schema) in a special field.

This will allow us to implement more query push downs in the future.

There are no changes in the API, but since the data is stored in a different way, this change is not backward compatible.
Please make a dump of your database before upgrading, delete the databases, upgrade FerretDB, and restore the dump afterwards.


## [v0.7.1](https://github.com/FerretDB/FerretDB/releases/tag/v0.7.1) (2022-12-19)

### New Features 🎉
Expand Down
2 changes: 1 addition & 1 deletion internal/handlers/pg/pgdb/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func DeleteDocumentsByID(ctx context.Context, tx pgx.Tx, sp *SQLParam, ids []any

for i, id := range ids {
placeholders[i] = p.Next()
idsMarshalled[i] = must.NotFail(pjson.Marshal(id))
idsMarshalled[i] = must.NotFail(pjson.MarshalSingleValue(id))
}

sql := `DELETE `
Expand Down
2 changes: 1 addition & 1 deletion internal/handlers/pg/pgdb/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func prepareWhereClause(sqlFilters *types.Document) (string, []any) {
case types.ObjectID:
filters = append(filters, fmt.Sprintf(`((_jsonb->'_id')::jsonb = %s)`, p.Next()))

args = append(args, string(must.NotFail(pjson.Marshal(v))))
args = append(args, string(must.NotFail(pjson.MarshalSingleValue(v))))
}
default:
continue
Expand Down
2 changes: 1 addition & 1 deletion internal/handlers/pg/pgdb/query_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func (it *queryIterator) Next() (uint32, *types.Document, error) {

n := it.currentIter.Add(1)

return n - 1, doc.(*types.Document), nil
return n - 1, doc, nil
}

// Close implements iterator.Interface.
Expand Down
7 changes: 1 addition & 6 deletions internal/handlers/pg/pgdb/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,7 @@ func getSettingsTable(ctx context.Context, tx pgx.Tx, db string, lock bool) (*ty
return nil, lazyerrors.Error(err)
}

settings, ok := doc.(*types.Document)
if !ok {
return nil, lazyerrors.Errorf("invalid settings document: %v", doc)
}

return settings, nil
return doc, nil
}

// setTableInSettings sets the table name for given collection in settings table.
Expand Down
2 changes: 1 addition & 1 deletion internal/handlers/pg/pgdb/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func SetDocumentByID(ctx context.Context, tx pgx.Tx, sp *SQLParam, id any, doc *

sql += pgx.Identifier{sp.DB, table}.Sanitize() + " SET _jsonb = $1 WHERE _jsonb->'_id' = $2"

tag, err := tx.Exec(ctx, sql, must.NotFail(pjson.Marshal(doc)), must.NotFail(pjson.Marshal(id)))
tag, err := tx.Exec(ctx, sql, must.NotFail(pjson.Marshal(doc)), must.NotFail(pjson.MarshalSingleValue(id)))
if err != nil {
return 0, err
}
Expand Down
20 changes: 15 additions & 5 deletions internal/handlers/pg/pjson/array.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ type arrayType types.Array
// pjsontype implements pjsontype interface.
func (a *arrayType) pjsontype() {}

// UnmarshalJSON implements pjsontype interface.
func (a *arrayType) UnmarshalJSON(data []byte) error {
// UnmarshalJSONWithSchema unmarshals the JSON data with the given schema.
func (a *arrayType) UnmarshalJSONWithSchema(data []byte, schemas []*elem) error {
if bytes.Equal(data, []byte("null")) {
panic("null data")
}
Expand All @@ -46,10 +46,20 @@ func (a *arrayType) UnmarshalJSON(data []byte) error {
return lazyerrors.Error(err)
}

if len(rawMessages) > 0 && schemas == nil {
return lazyerrors.Errorf("pjson.arrayType.UnmarshalJSON: array schema is nil for non-empty array")
}

if len(schemas) != len(rawMessages) {
return lazyerrors.Errorf("pjson.arrayType.UnmarshalJSON: %d elements in schema, %d in total",
len(schemas), len(rawMessages),
)
}

ta := types.MakeArray(len(rawMessages))

for _, el := range rawMessages {
v, err := Unmarshal(el)
for i, el := range rawMessages {
v, err := unmarshalSingleValue(el, schemas[i])
if err != nil {
return lazyerrors.Error(err)
}
Expand Down Expand Up @@ -82,7 +92,7 @@ func (a *arrayType) MarshalJSON() ([]byte, error) {
return nil, lazyerrors.Error(err)
}

b, err := Marshal(el)
b, err := MarshalSingleValue(el)
if err != nil {
return nil, lazyerrors.Error(err)
}
Expand Down
73 changes: 53 additions & 20 deletions internal/handlers/pg/pjson/array_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,59 @@ func convertArray(a *types.Array) *arrayType {
return &res
}

var arrayTestCases = []testCase{{
name: "array_all",
v: convertArray(must.NotFail(types.NewArray(
must.NotFail(types.NewArray()),
types.Binary{Subtype: types.BinaryUser, B: []byte{0x42}},
true,
time.Date(2021, 7, 27, 9, 35, 42, 123000000, time.UTC).Local(),
must.NotFail(types.NewDocument()),
42.13,
int32(42),
int64(42),
"foo",
types.Null,
))),
j: `[[],{"$b":"Qg==","s":128},true,{"$d":1627378542123},{"$k":[]},{"$f":42.13},42,{"$l":"42"},"foo",null]`,
}, {
name: "EOF",
j: `[`,
jErr: `unexpected EOF`,
}}
var arrayTestCases = []testCase{
{
name: "array_all",
v: convertArray(must.NotFail(types.NewArray(
must.NotFail(types.NewArray()),
types.Binary{Subtype: types.BinaryUser, B: []byte{0x42}},
true,
time.Date(2021, 7, 27, 9, 35, 42, 123000000, time.UTC).Local(),
must.NotFail(types.NewDocument()),
42.13,
int32(42),
int64(42),
"foo",
types.Null,
))),
sch: &elem{
Type: elemTypeArray,
Items: []*elem{
{Type: elemTypeArray, Items: []*elem{}},
binDataSchema(types.BinaryUser),
boolSchema,
dateSchema,
{Type: elemTypeObject, Schema: &schema{Properties: map[string]*elem{}, Keys: []string{}}},
doubleSchema,
intSchema,
longSchema,
stringSchema,
nullSchema,
},
},
j: `[[],"Qg==",true,1627378542123,{},42.13,42,42,"foo",null]`,
}, {
name: "EOF",
sch: &elem{Type: elemTypeArray, Items: []*elem{}},
j: `[`,
jErr: `unexpected EOF`,
}, {
name: "SchemaIsNil",
sch: new(elem),
j: `["foo"]`,
jErr: `pjson.arrayType.UnmarshalJSON: array schema is nil for non-empty array`,
}, {
name: "ExtraElemsInSchema",
sch: &elem{Type: elemTypeArray, Items: []*elem{stringSchema, stringSchema}},
j: `["foo"]`,
jErr: `pjson.arrayType.UnmarshalJSON: 2 elements in schema, 1 in total`,
}, {
name: "ExtraElemsInArray",
sch: &elem{Type: elemTypeArray, Items: []*elem{stringSchema}},
j: `["foo", "bar"]`,
jErr: `pjson.arrayType.UnmarshalJSON: 1 elements in schema, 2 in total`,
},
}

func TestArray(t *testing.T) {
t.Parallel()
Expand Down
26 changes: 11 additions & 15 deletions internal/handlers/pg/pjson/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,8 @@ type binaryType types.Binary
// pjsontype implements pjsontype interface.
func (bin *binaryType) pjsontype() {}

// binaryJSON is a JSON object representation of the binaryType.
type binaryJSON struct {
B []byte `json:"$b"`
S byte `json:"s"`
}

// UnmarshalJSON implements pjsontype interface.
func (bin *binaryType) UnmarshalJSON(data []byte) error {
// UnmarshalJSONWithSchema unmarshals the JSON data with the given schema.
func (bin *binaryType) UnmarshalJSONWithSchema(data []byte, sch *elem) error {
if bytes.Equal(data, []byte("null")) {
panic("null data")
}
Expand All @@ -44,7 +38,7 @@ func (bin *binaryType) UnmarshalJSON(data []byte) error {
dec := json.NewDecoder(r)
dec.DisallowUnknownFields()

var o binaryJSON
var o []byte

err := dec.Decode(&o)
if err != nil {
Expand All @@ -55,18 +49,20 @@ func (bin *binaryType) UnmarshalJSON(data []byte) error {
return lazyerrors.Error(err)
}

bin.B = o.B
bin.Subtype = types.BinarySubtype(o.S)
bin.B = o

if sch.Subtype == nil {
return lazyerrors.Errorf("binary subtype in the schema is nil")
}

bin.Subtype = *sch.Subtype

return nil
}

// MarshalJSON implements pjsontype interface.
func (bin *binaryType) MarshalJSON() ([]byte, error) {
res, err := json.Marshal(binaryJSON{
B: bin.B,
S: byte(bin.Subtype),
})
res, err := json.Marshal(bin.B)
if err != nil {
return nil, lazyerrors.Error(err)
}
Expand Down
27 changes: 14 additions & 13 deletions internal/handlers/pg/pjson/binary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,36 @@ var binaryTestCases = []testCase{{
Subtype: types.BinaryUser,
B: []byte("foo"),
},
j: `{"$b":"Zm9v","s":128}`,
sch: binDataSchema(types.BinaryUser),
j: `"Zm9v"`,
}, {
name: "empty",
v: &binaryType{
Subtype: types.BinaryGeneric,
B: []byte{},
},
j: `{"$b":""}`,
canonJ: `{"$b":"","s":0}`,
sch: binDataSchema(types.BinaryGeneric),
j: `""`,
}, {
name: "invalid subtype",
v: &binaryType{
Subtype: 0xff,
B: []byte{},
},
j: `{"$b":"","s":255}`,
}, {
name: "extra JSON fields",
v: &binaryType{
Subtype: types.BinaryUser,
B: []byte("foo"),
},
j: `{"$b":"Zm9v","s":128,"foo":"bar"}`,
canonJ: `{"$b":"Zm9v","s":128}`,
jErr: `json: unknown field "foo"`,
sch: binDataSchema(0xff),
j: `""`,
}, {
name: "EOF",
j: `{`,
jErr: `unexpected EOF`,
}, {
name: "NilSubtype",
sch: &elem{
Type: elemTypeBinData,
Subtype: nil,
},
j: `"Zm9v"`,
jErr: `binary subtype in the schema is nil`,
}}

func TestBinary(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion internal/handlers/pg/pjson/bool.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ type boolType bool
// pjsontype implements pjsontype interface.
func (b *boolType) pjsontype() {}

// UnmarshalJSON implements pjsontype interface.
// UnmarshalJSON implements json.Unmarshaler interface.
func (b *boolType) UnmarshalJSON(data []byte) error {
if bytes.Equal(data, []byte("null")) {
panic("null data")
Expand Down
15 changes: 4 additions & 11 deletions internal/handlers/pg/pjson/datetime.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,7 @@ func (dt *dateTimeType) String() string {
return time.Time(*dt).Format(time.RFC3339Nano)
}

// dateTimeJSON is a JSON object representation of the dateTimeType.
type dateTimeJSON struct {
D int64 `json:"$d"`
}

// UnmarshalJSON implements pjsontype interface.
// UnmarshalJSON implements json.Unmarshaler interface.
func (dt *dateTimeType) UnmarshalJSON(data []byte) error {
if bytes.Equal(data, []byte("null")) {
panic("null data")
Expand All @@ -48,7 +43,7 @@ func (dt *dateTimeType) UnmarshalJSON(data []byte) error {
dec := json.NewDecoder(r)
dec.DisallowUnknownFields()

var o dateTimeJSON
var o int64
if err := dec.Decode(&o); err != nil {
return lazyerrors.Error(err)
}
Expand All @@ -58,16 +53,14 @@ func (dt *dateTimeType) UnmarshalJSON(data []byte) error {
}

// TODO Use .UTC(): https://github.com/FerretDB/FerretDB/issues/43
*dt = dateTimeType(time.UnixMilli(o.D))
*dt = dateTimeType(time.UnixMilli(o))

return nil
}

// MarshalJSON implements pjsontype interface.
func (dt *dateTimeType) MarshalJSON() ([]byte, error) {
res, err := json.Marshal(dateTimeJSON{
D: time.Time(*dt).UnixMilli(),
})
res, err := json.Marshal(time.Time(*dt).UnixMilli())
if err != nil {
return nil, lazyerrors.Error(err)
}
Expand Down
8 changes: 4 additions & 4 deletions internal/handlers/pg/pjson/datetime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ import (
var dateTimeTestCases = []testCase{{
name: "2021",
v: pointer.To(dateTimeType(time.Date(2021, 11, 1, 10, 18, 42, 123000000, time.UTC).Local())),
j: `{"$d":1635761922123}`,
j: `1635761922123`,
}, {
name: "unix_zero",
v: pointer.To(dateTimeType(time.Unix(0, 0))),
j: `{"$d":0}`,
j: `0`,
}, {
name: "0",
v: pointer.To(dateTimeType(time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC).Local())),
j: `{"$d":-62167219200000}`,
j: `-62167219200000`,
}, {
name: "9999",
v: pointer.To(dateTimeType(time.Date(9999, 12, 31, 23, 59, 59, 999000000, time.UTC).Local())),
j: `{"$d":253402300799999}`,
j: `253402300799999`,
}, {
name: "EOF",
j: `{`,
Expand Down
Loading

0 comments on commit a41a947

Please sign in to comment.