diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..0053864 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,18 @@ +on: pull_request +name: Test +jobs: + test: + strategy: + matrix: + go-version: ['1.13', '1.14'] + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Install Go + uses: actions/setup-go@v2 + with: + go-version: ${{ matrix.go-version }} + - name: Checkout code + uses: actions/checkout@master + - name: Test + run: go test ./... diff --git a/Makefile b/Makefile deleted file mode 100644 index db83771..0000000 --- a/Makefile +++ /dev/null @@ -1,191 +0,0 @@ -# MAKEFILE -# -# @author Nicola Asuni -# @link https://github.com/willf/bitset -# ------------------------------------------------------------------------------ - -# List special make targets that are not associated with files -.PHONY: help all test format fmtcheck vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan qa deps clean nuke - -# Use bash as shell (Note: Ubuntu now uses dash which doesn't support PIPESTATUS). -SHELL=/bin/bash - -# CVS path (path to the parent dir containing the project) -CVSPATH=github.com/willf - -# Project owner -OWNER=willf - -# Project vendor -VENDOR=willf - -# Project name -PROJECT=bitset - -# Project version -VERSION=$(shell cat VERSION) - -# Name of RPM or DEB package -PKGNAME=${VENDOR}-${PROJECT} - -# Current directory -CURRENTDIR=$(shell pwd) - -# GO lang path -ifneq ($(GOPATH),) - ifeq ($(findstring $(GOPATH),$(CURRENTDIR)),) - # the defined GOPATH is not valid - GOPATH= - endif -endif -ifeq ($(GOPATH),) - # extract the GOPATH - GOPATH=$(firstword $(subst /src/, ,$(CURRENTDIR))) -endif - -# --- MAKE TARGETS --- - -# Display general help about this command -help: - @echo "" - @echo "$(PROJECT) Makefile." - @echo "GOPATH=$(GOPATH)" - @echo "The following commands are available:" - @echo "" - @echo " make qa : Run all the tests" - @echo " make test : Run the unit tests" - @echo "" - @echo " make format : Format the source code" - @echo " make fmtcheck : Check if the source code has been formatted" - @echo " make vet : Check for suspicious constructs" - @echo " make lint : Check for style errors" - @echo " make coverage : Generate the coverage report" - @echo " make cyclo : Generate the cyclomatic complexity report" - @echo " make ineffassign : Detect ineffectual assignments" - @echo " make misspell : Detect commonly misspelled words in source files" - @echo " make structcheck : Find unused struct fields" - @echo " make varcheck : Find unused global variables and constants" - @echo " make errcheck : Check that error return values are used" - @echo " make gosimple : Suggest code simplifications" - @echo " make astscan : GO AST scanner" - @echo "" - @echo " make docs : Generate source code documentation" - @echo "" - @echo " make deps : Get the dependencies" - @echo " make clean : Remove any build artifact" - @echo " make nuke : Deletes any intermediate file" - @echo "" - -# Alias for help target -all: help - -# Run the unit tests -test: - @mkdir -p target/test - @mkdir -p target/report - GOPATH=$(GOPATH) \ - go test \ - -covermode=atomic \ - -bench=. \ - -race \ - -cpuprofile=target/report/cpu.out \ - -memprofile=target/report/mem.out \ - -mutexprofile=target/report/mutex.out \ - -coverprofile=target/report/coverage.out \ - -v ./... | \ - tee >(PATH=$(GOPATH)/bin:$(PATH) go-junit-report > target/test/report.xml); \ - test $${PIPESTATUS[0]} -eq 0 - -# Format the source code -format: - @find . -type f -name "*.go" -exec gofmt -s -w {} \; - -# Check if the source code has been formatted -fmtcheck: - @mkdir -p target - @find . -type f -name "*.go" -exec gofmt -s -d {} \; | tee target/format.diff - @test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; } - -# Check for syntax errors -vet: - GOPATH=$(GOPATH) go vet . - -# Check for style errors -lint: - GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint . - -# Generate the coverage report -coverage: - @mkdir -p target/report - GOPATH=$(GOPATH) \ - go tool cover -html=target/report/coverage.out -o target/report/coverage.html - -# Report cyclomatic complexity -cyclo: - @mkdir -p target/report - GOPATH=$(GOPATH) gocyclo -avg ./ | tee target/report/cyclo.txt ; test $${PIPESTATUS[0]} -eq 0 - -# Detect ineffectual assignments -ineffassign: - @mkdir -p target/report - GOPATH=$(GOPATH) ineffassign ./ | tee target/report/ineffassign.txt ; test $${PIPESTATUS[0]} -eq 0 - -# Detect commonly misspelled words in source files -misspell: - @mkdir -p target/report - GOPATH=$(GOPATH) misspell -error ./ | tee target/report/misspell.txt ; test $${PIPESTATUS[0]} -eq 0 - -# Find unused struct fields -structcheck: - @mkdir -p target/report - GOPATH=$(GOPATH) structcheck -a ./ | tee target/report/structcheck.txt - -# Find unused global variables and constants -varcheck: - @mkdir -p target/report - GOPATH=$(GOPATH) varcheck -e ./ | tee target/report/varcheck.txt - -# Check that error return values are used -errcheck: - @mkdir -p target/report - GOPATH=$(GOPATH) errcheck ./ | tee target/report/errcheck.txt - -# AST scanner -astscan: - @mkdir -p target/report - GOPATH=$(GOPATH) gosec . | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0 || true - -# Generate source docs -docs: - @mkdir -p target/docs - nohup sh -c 'GOPATH=$(GOPATH) godoc -http=127.0.0.1:6060' > target/godoc_server.log 2>&1 & - wget --directory-prefix=target/docs/ --execute robots=off --retry-connrefused --recursive --no-parent --adjust-extension --page-requisites --convert-links http://127.0.0.1:6060/pkg/github.com/${VENDOR}/${PROJECT}/ ; kill -9 `lsof -ti :6060` - @echo ''${PKGNAME}' Documentation ...' > target/docs/index.html - -# Alias to run all quality-assurance checks -qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan - -# --- INSTALL --- - -# Get the dependencies -deps: - GOPATH=$(GOPATH) go get ./... - GOPATH=$(GOPATH) go get golang.org/x/lint/golint - GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report - GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov - GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo - GOPATH=$(GOPATH) go get github.com/gordonklaus/ineffassign - GOPATH=$(GOPATH) go get github.com/client9/misspell/cmd/misspell - GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/structcheck - GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck - GOPATH=$(GOPATH) go get github.com/kisielk/errcheck - GOPATH=$(GOPATH) go get github.com/securego/gosec/cmd/gosec/... - -# Remove any build artifact -clean: - GOPATH=$(GOPATH) go clean ./... - -# Deletes any intermediate file -nuke: - rm -rf ./target - GOPATH=$(GOPATH) go clean -i ./... diff --git a/README.md b/README.md index 6c62b20..50338e7 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,10 @@ *Go language library to map between non-negative integers and boolean values* -[![Master Build Status](https://secure.travis-ci.org/willf/bitset.png?branch=master)](https://travis-ci.org/willf/bitset?branch=master) +[![Test](https://github.com/willf/bitset/workflows/Test/badge.svg)](https://github.com/willf/bitset/actions?query=workflow%3ATest) [![Master Coverage Status](https://coveralls.io/repos/willf/bitset/badge.svg?branch=master&service=github)](https://coveralls.io/github/willf/bitset?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/willf/bitset)](https://goreportcard.com/report/github.com/willf/bitset) -[![GoDoc](https://godoc.org/github.com/willf/bitset?status.svg)](http://godoc.org/github.com/willf/bitset) +[![PkgGoDev](https://pkg.go.dev/badge/github.com/willf/bitset?tab=doc)](https://pkg.go.dev/github.com/willf/bitset?tab=doc) ## Description @@ -63,8 +63,11 @@ func main() { As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. -Godoc documentation is at: https://godoc.org/github.com/willf/bitset +Package documentation is at: https://pkg.go.dev/github.com/willf/bitset?tab=doc +## Memory Usage + +The memory usage of a bitset using N bits is at least N/8 bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). ## Implementation Note @@ -82,15 +85,10 @@ go get github.com/willf/bitset If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)") -This project include a Makefile that allows you to test and build the project with simple commands. -To see all available options: -```bash -make help -``` - ## Running all tests -Before committing the code, please check if it passes all tests using (note: this will install some dependencies): +Before committing the code, please check if it passes tests, has adequate coverage, etc. ```bash -make qa +go test +go test -cover ``` diff --git a/bitset.go b/bitset.go index 22e5d42..21e889d 100644 --- a/bitset.go +++ b/bitset.go @@ -138,6 +138,9 @@ func (b *BitSet) Len() uint { // extendSetMaybe adds additional words to incorporate new bits if needed func (b *BitSet) extendSetMaybe(i uint) { if i >= b.length { // if we need more bits, make 'em + if i >= Cap() { + panic("You are exceeding the capacity") + } nsize := wordsNeeded(i + 1) if b.set == nil { b.set = make([]uint64, nsize) @@ -160,7 +163,12 @@ func (b *BitSet) Test(i uint) bool { return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 } -// Set bit i to 1 +// Set bit i to 1, the capacity of the bitset is automatically +// increased accordingly. +// If i>= Cap(), this function will panic. +// Warning: using a very large value for 'i' +// may lead to a memory shortage and a panic: the caller is responsible +// for providing sensible parameters in line with their memory capacity. func (b *BitSet) Set(i uint) *BitSet { b.extendSetMaybe(i) b.set[i>>log2WordSize] |= 1 << (i & (wordSize - 1)) @@ -176,7 +184,11 @@ func (b *BitSet) Clear(i uint) *BitSet { return b } -// SetTo sets bit i to value +// SetTo sets bit i to value. +// If i>= Cap(), this function will panic. +// Warning: using a very large value for 'i' +// may lead to a memory shortage and a panic: the caller is responsible +// for providing sensible parameters in line with their memory capacity. func (b *BitSet) SetTo(i uint, value bool) *BitSet { if value { return b.Set(i) @@ -184,7 +196,11 @@ func (b *BitSet) SetTo(i uint, value bool) *BitSet { return b.Clear(i) } -// Flip bit at i +// Flip bit at i. +// If i>= Cap(), this function will panic. +// Warning: using a very large value for 'i' +// may lead to a memory shortage and a panic: the caller is responsible +// for providing sensible parameters in line with their memory capacity. func (b *BitSet) Flip(i uint) *BitSet { if i >= b.length { return b.Set(i) @@ -193,26 +209,51 @@ func (b *BitSet) Flip(i uint) *BitSet { return b } -// Shrink shrinks BitSet to desired length in bits. It clears all bits > length -// and reduces the size and length of the set. +// Shrink shrinks BitSet so that the provided value is the last possible +// set value. It clears all bits > the provided index and reduces the size +// and length of the set. +// +// Note that the parameter value is not the new length in bits: it is the +// maximal value that can be stored in the bitset after the function call. +// The new length in bits is the parameter value + 1. Thus it is not possible +// to use this function to set the length to 0, the minimal value of the length +// after this function call is 1. // // A new slice is allocated to store the new bits, so you may see an increase in // memory usage until the GC runs. Normally this should not be a problem, but if you // have an extremely large BitSet its important to understand that the old BitSet will // remain in memory until the GC frees it. -func (b *BitSet) Shrink(length uint) *BitSet { - idx := wordsNeeded(length + 1) +func (b *BitSet) Shrink(lastbitindex uint) *BitSet { + length := lastbitindex + 1 + idx := wordsNeeded(length) if idx > len(b.set) { return b } shrunk := make([]uint64, idx) copy(shrunk, b.set[:idx]) b.set = shrunk - b.length = length + 1 - b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)) - 1)) + b.length = length + b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) return b } +// Compact shrinks BitSet to so that we preserve all set bits, while minimizing +// memory usage. Compact calls Shrink. +func (b *BitSet) Compact() *BitSet { + idx := len(b.set) - 1 + for ; idx >= 0 && b.set[idx] == 0; idx-- { + } + newlength := uint((idx + 1) << log2WordSize) + if newlength >= b.length { + return b // nothing to do + } + if newlength > 0 { + return b.Shrink(newlength - 1) + } + // We preserve one word + return b.Shrink(63) +} + // InsertAt takes an index which indicates where a bit should be // inserted. Then it shifts all the bits in the set to the left by 1, starting // from the given index position, and sets the index position to 0. @@ -323,6 +364,9 @@ func (b *BitSet) DeleteAt(i uint) *BitSet { // including possibly the current index // along with an error code (true = valid, false = no set bit found) // for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...} +// +// Users concerned with performance may want to use NextSetMany to +// retrieve several values at once. func (b *BitSet) NextSet(i uint) (uint, bool) { x := int(i >> log2WordSize) if x >= len(b.set) { @@ -358,6 +402,14 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { // j += 1 // } // +// +// It is possible to retrieve all set bits as follow: +// +// indices := make([]uint, bitmap.Count()) +// bitmap.NextSetMany(0, indices) +// +// However if bitmap.Count() is large, it might be preferable to +// use several calls to NextSetMany, for performance reasons. func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { myanswer := buffer capacity := cap(buffer) @@ -809,7 +861,7 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { newset := New(uint(length)) if uint64(newset.length) != length { - return 0, errors.New("Unmarshalling error: type mismatch") + return 0, errors.New("unmarshalling error: type mismatch") } // Read remaining bytes as set diff --git a/bitset_test.go b/bitset_test.go index 0e2e841..8769a62 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -132,6 +132,20 @@ func TestExendOnBoundary(t *testing.T) { v.Set(32) } +func TestExceedCap(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("Set to capacity should have caused a panic") + } + }() + NumHosts := uint(32768) + bmp := New(NumHosts) + bmp.ClearAll() + d := Cap() + bmp.Set(d) + +} + func TestExpand(t *testing.T) { v := New(0) defer func() { @@ -557,6 +571,28 @@ func TestShrink(t *testing.T) { b.Set(2) b.Set(3) b.Set(64) + b.Compact() + if !b.Test(0) { + t.Error("0 should be set") + return + } + if !b.Test(1) { + t.Error("1 should be set") + return + } + if !b.Test(2) { + t.Error("2 should be set") + return + } + if !b.Test(3) { + t.Error("3 should be set") + return + } + if !b.Test(64) { + t.Error("64 should be set") + return + } + b.Shrink(2) if !b.Test(0) { t.Error("0 should be set") @@ -589,6 +625,20 @@ func TestShrink(t *testing.T) { b.Set(127) b.Set(128) b.Set(129) + b.Compact() + if !b.Test(127) { + t.Error("127 should be set") + return + } + if !b.Test(128) { + t.Error("128 should be set") + return + } + if !b.Test(129) { + t.Error("129 be set") + return + } + b.Shrink(128) if !b.Test(127) { t.Error("127 should be set") diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..583ecab --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/willf/bitset + +go 1.14 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29