diff --git a/AutoComplete/Builders/IndexBuilder.cs b/AutoComplete/Builders/IndexBuilder.cs index 692de2f..098d94d 100644 --- a/AutoComplete/Builders/IndexBuilder.cs +++ b/AutoComplete/Builders/IndexBuilder.cs @@ -8,13 +8,13 @@ namespace AutoComplete.Builders { - public class IndexBuilder : IIndexBuilder + public class IndexBuilder : IIndexBuilder, IDisposable { private static readonly byte[] NewLine = Encoding.UTF8.GetBytes(Environment.NewLine); private readonly Stream _headerStream; private readonly Stream _indexStream; private readonly Dictionary _keywordDictionary; - private readonly Dictionary _keyDictionary; + private readonly Dictionary _keyDictionary; private readonly Stream _tailStream; private readonly Trie _trie; private TrieIndexHeader _header; @@ -30,14 +30,16 @@ public IndexBuilder(Stream headerStream, Stream indexStream, Stream tailStream = _trie = new Trie(); _keywords = new HashSet(); _keywordDictionary = new Dictionary(); - _keyDictionary = new Dictionary(); + _keyDictionary = new Dictionary(); } public IndexBuilder Add(string keyword) { - _trie.Add(keyword); if (keyword != null && !_keywords.Contains(keyword)) + { + _trie.Add(keyword); _keywords.Add(keyword); + } return this; } @@ -62,9 +64,7 @@ private void PrepareForBuild() ReorderTrieAndLoadHeader(_trie.Root); if (_tailStream != null) - { CreateTailAndModifyNodes(_trie.Root); - } } private void ReorderTrieAndLoadHeader(TrieNode rootNode) @@ -210,16 +210,19 @@ private void SerializeKeywords(Stream stream) foreach (var item in keywords) { _keywordDictionary.Add(item, (uint) stream.Position); - uint count = 0; - if (_keyDictionary.TryGetValue(item, out var _count)) - count = _count; - + var count = _keyDictionary.GetValueOrDefault(item, 0); var buffer = Encoding.UTF8.GetBytes($"{count,10},{item}"); stream.Write(buffer, 0, buffer.Length); stream.Write(NewLine, 0, NewLine.Length); } - _keywords.Clear(); + } + + public void Dispose() + { + _headerStream?.Dispose(); + _indexStream?.Dispose(); + _tailStream?.Dispose(); _keywords = null; } } diff --git a/AutoComplete/Builders/TrieIndexHeaderBuilder.cs b/AutoComplete/Builders/TrieIndexHeaderBuilder.cs index f1c4edf..e95c1db 100644 --- a/AutoComplete/Builders/TrieIndexHeaderBuilder.cs +++ b/AutoComplete/Builders/TrieIndexHeaderBuilder.cs @@ -16,9 +16,7 @@ public TrieIndexHeaderBuilder() internal TrieIndexHeaderBuilder AddChar(char character) { if (!_characterList.Contains(character)) - { _characterList.Add(character); - } return this; } @@ -30,9 +28,7 @@ internal TrieIndexHeaderBuilder AddString(string value) throw new ArgumentException(nameof(value)); foreach (var t in value) - { AddChar(t); - } return this; } @@ -42,21 +38,18 @@ internal TrieIndexHeader Build() var header = new TrieIndexHeader(); header.CharacterList = _characterList; - SortCharacterList(); + // SortCharacterList + _characterList.Sort(new TrieCharacterComparer()); + CalculateMetrics(ref header); return header; } - private TrieIndexHeaderBuilder SortCharacterList() - { - _characterList.Sort(new TrieCharacterComparer()); - return this; - } private void CalculateMetrics(ref TrieIndexHeader header) { - // Set structural based properties + // Set structural properties header.COUNT_OF_CHARSET = _characterList.Count; header.COUNT_OF_CHILDREN_FLAGS = header.COUNT_OF_CHARSET / 8 + (header.COUNT_OF_CHARSET % 8 == 0 ? 0 : 1); diff --git a/AutoComplete/Clients/IndexSearchers/FileSystemIndexSearcher.cs b/AutoComplete/Clients/IndexSearchers/FileSystemIndexSearcher.cs index 78bdbe6..4695f14 100644 --- a/AutoComplete/Clients/IndexSearchers/FileSystemIndexSearcher.cs +++ b/AutoComplete/Clients/IndexSearchers/FileSystemIndexSearcher.cs @@ -18,11 +18,24 @@ public FileSystemIndexSearcher(string headerFileName, string indexFileName, stri protected override IndexData InitializeIndexData() { - var indexData = new IndexData(); - indexData.Header = TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName); - indexData.Index = GetStream(indexData.Header.LENGTH_OF_STRUCT, FileOptions.RandomAccess); - if (_tailFileName != null) - indexData.Tail = GetStream(8, FileOptions.SequentialScan); + IndexData indexData; + var header = TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName); + if (_tailFileName == null) + { + indexData = new IndexData( + header, + GetStream(header.LENGTH_OF_STRUCT, FileOptions.RandomAccess) + ); + } + else + { + indexData = new IndexData( + header, + GetStream(header.LENGTH_OF_STRUCT, FileOptions.RandomAccess), + GetStream(8, FileOptions.SequentialScan) + ); + } + return indexData; } diff --git a/AutoComplete/Clients/IndexSearchers/InMemoryIndexSearcher.cs b/AutoComplete/Clients/IndexSearchers/InMemoryIndexSearcher.cs index 855c283..78c86eb 100644 --- a/AutoComplete/Clients/IndexSearchers/InMemoryIndexSearcher.cs +++ b/AutoComplete/Clients/IndexSearchers/InMemoryIndexSearcher.cs @@ -25,15 +25,27 @@ public InMemoryIndexSearcher( protected override IndexData InitializeIndexData() { - var indexData = new IndexData(); - indexData.Index = new ManagedInMemoryStream(GetBytesFromFile(_indexFileName)); - indexData.Header = TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName); - if (_tailFileName != null) - indexData.Tail = new ManagedInMemoryStream(GetBytesFromFile(_tailFileName)); + IndexData indexData; + if (_tailFileName == null) + { + indexData = new IndexData( + TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName), + new ManagedInMemoryStream(GetBytesFromFile(_indexFileName)) + ); + } + else + { + indexData = new IndexData( + TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName), + new ManagedInMemoryStream(GetBytesFromFile(_indexFileName)), + new ManagedInMemoryStream(GetBytesFromFile(_tailFileName)) + ); + } + return indexData; } - private byte[] GetBytesFromFile(string path) + private static byte[] GetBytesFromFile(string path) { using Stream stream = new FileStream( path, diff --git a/AutoComplete/DataStructure/Trie.cs b/AutoComplete/DataStructure/Trie.cs index 8eb27c8..cea408b 100644 --- a/AutoComplete/DataStructure/Trie.cs +++ b/AutoComplete/DataStructure/Trie.cs @@ -52,7 +52,7 @@ public bool Add(string keyword) if (string.IsNullOrWhiteSpace(keyword)) throw new ArgumentNullException(nameof(keyword)); - // Get last node from given input. Next lines we merge keywords when result status is FoundStartWith + // get last node from given input. Next lines we merge keywords when result status is FoundStartWith var result = SearchLastNodeFrom(keyword); if (result.Status == TrieNodeSearchResultType.NotFound) @@ -60,7 +60,7 @@ public bool Add(string keyword) if (result.Status == TrieNodeSearchResultType.FoundStartsWith) { - //result found + // result found var prefix = keyword; // if last found node is start with? get 'word' from key|(word) @@ -78,7 +78,7 @@ public bool Add(string keyword) result.Node.Add(newTrie); return true; - } //result found + } // result found if (result.Status == TrieNodeSearchResultType.FoundEquals && !result.Node.IsTerminal) { diff --git a/AutoComplete/Domain/ManagedInMemoryStream.cs b/AutoComplete/Domain/ManagedInMemoryStream.cs index e53fcac..623396b 100644 --- a/AutoComplete/Domain/ManagedInMemoryStream.cs +++ b/AutoComplete/Domain/ManagedInMemoryStream.cs @@ -6,8 +6,7 @@ public class ManagedInMemoryStream : MemoryStream { public ManagedInMemoryStream(byte[] buffer) : base(buffer) - { - } + { } public override bool CanWrite => false; } diff --git a/AutoComplete/Readers/TrieBinaryReader.cs b/AutoComplete/Readers/TrieBinaryReader.cs index d7c9698..ea89c48 100644 --- a/AutoComplete/Readers/TrieBinaryReader.cs +++ b/AutoComplete/Readers/TrieBinaryReader.cs @@ -39,11 +39,11 @@ Stream tail return new List(GetAutoCompleteNodesWithTail(position, tail, maxItems)); } - private List GetAutoCompleteNodesInternal(long position, object prefix, int maxItems, List results) + private List GetAutoCompleteNodesInternal(long position, string prefix, int maxItems, List results) { var character = ReadCharacter(position); var isTerminal = ReadIsTerminal(position); - + var newPrefix = string.Concat(prefix, character); if (isTerminal) results.Add(newPrefix); @@ -193,9 +193,8 @@ private long[] GetChildrenPositionsFromNode(TrieIndexHeader header, long parentP internal TrieNodeStructSearchResult SearchLastNode(long parentPosition, string keyword) { var result = TrieNodeStructSearchResult.CreateNotFound(); - var currentPosition = parentPosition; - + for (var i = 0; i < keyword.Length; i++) { var childPosition = GetChildPositionFromNode(currentPosition, keyword[i]); diff --git a/AutoComplete/Searchers/IndexData.cs b/AutoComplete/Searchers/IndexData.cs index cce3207..6e8f437 100644 --- a/AutoComplete/Searchers/IndexData.cs +++ b/AutoComplete/Searchers/IndexData.cs @@ -5,8 +5,15 @@ namespace AutoComplete.Searchers { public class IndexData { - public TrieIndexHeader Header { get; set; } - public Stream Index { get; set; } - public Stream Tail { get; set; } + public readonly TrieIndexHeader Header; + public readonly Stream Index; + public readonly Stream Tail; + + public IndexData(TrieIndexHeader header, Stream index, Stream tail = null) + { + Index = index; + Tail = tail; + Header = header; + } } } \ No newline at end of file diff --git a/Samples.ConsoleApp/Program.cs b/Samples.ConsoleApp/Program.cs index 78643a3..b139dab 100644 --- a/Samples.ConsoleApp/Program.cs +++ b/Samples.ConsoleApp/Program.cs @@ -1,5 +1,4 @@ -using System.Diagnostics; -using AutoComplete.Builders; +using AutoComplete.Builders; using AutoComplete.Clients.IndexSearchers; using AutoComplete.Domain; @@ -7,15 +6,10 @@ const string indexFileName = "index.bin"; const string tailFileName = "tail.txt"; -var stopWatch = new Stopwatch(); -stopWatch.Start(); await BuildIndex(); -stopWatch.Stop(); -Console.WriteLine($"Build time(ms) {stopWatch.Elapsed.TotalMilliseconds}"); +Search(); -Search(10); - -void Search(int count) +void Search() { var searcher = new InMemoryIndexSearcher(headerFileName, indexFileName, tailFileName); searcher.Init(); @@ -23,24 +17,14 @@ void Search(int count) { Console.WriteLine("Type a word"); var word = Console.ReadLine(); - var timings = new List(); - for (int i = 0; i < count; i++) + var results = searcher.Search(new SearchOptions() { Term = word, MaxItemCount = 5, SuggestWhenFoundStartsWith = false}); + if (results.Items == null) + continue; + + foreach (var item in results.Items) { - stopWatch.Restart(); - var results = searcher.Search(new SearchOptions() { Term = word, MaxItemCount = 5, SuggestWhenFoundStartsWith = false}); - stopWatch.Stop(); - timings.Add(stopWatch.Elapsed.TotalMilliseconds); - Console.WriteLine($"Search time(ms): {stopWatch.Elapsed.TotalMilliseconds}"); - if (i == count-1 && results.Items != null) - { - foreach (var item in results.Items) - { - Console.WriteLine(item); - } - } + Console.WriteLine(item); } - - Console.WriteLine($"Average Search Time(ms): {timings.Average()}"); } } @@ -57,7 +41,7 @@ async Task BuildIndex() await using var indexStream = File.OpenWrite(indexFileName); await using var tailStream = File.OpenWrite(tailFileName); - var builder = new IndexBuilder(headerStream, indexStream, tailStream); + using var builder = new IndexBuilder(headerStream, indexStream, tailStream); foreach (var line in await File.ReadAllLinesAsync("words350k.txt")) { if(!string.IsNullOrWhiteSpace(line))