Skip to content

Commit

Permalink
Q & A
Browse files Browse the repository at this point in the history
  • Loading branch information
omerfarukz committed Nov 12, 2022
1 parent 017e2ec commit 9265704
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 71 deletions.
25 changes: 14 additions & 11 deletions AutoComplete/Builders/IndexBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

namespace AutoComplete.Builders
{
public class IndexBuilder : IIndexBuilder
public class IndexBuilder : IIndexBuilder, IDisposable
{
private static readonly byte[] NewLine = Encoding.UTF8.GetBytes(Environment.NewLine);
private readonly Stream _headerStream;
private readonly Stream _indexStream;
private readonly Dictionary<string, uint> _keywordDictionary;
private readonly Dictionary<string, uint> _keyDictionary;
private readonly Dictionary<string, int> _keyDictionary;
private readonly Stream _tailStream;
private readonly Trie _trie;
private TrieIndexHeader _header;
Expand All @@ -30,14 +30,16 @@ public IndexBuilder(Stream headerStream, Stream indexStream, Stream tailStream =
_trie = new Trie();
_keywords = new HashSet<string>();
_keywordDictionary = new Dictionary<string, uint>();
_keyDictionary = new Dictionary<string, uint>();
_keyDictionary = new Dictionary<string, int>();
}

public IndexBuilder Add(string keyword)
{
_trie.Add(keyword);
if (keyword != null && !_keywords.Contains(keyword))
{
_trie.Add(keyword);
_keywords.Add(keyword);
}

return this;
}
Expand All @@ -62,9 +64,7 @@ private void PrepareForBuild()
ReorderTrieAndLoadHeader(_trie.Root);

if (_tailStream != null)
{
CreateTailAndModifyNodes(_trie.Root);
}
}

private void ReorderTrieAndLoadHeader(TrieNode rootNode)
Expand Down Expand Up @@ -210,16 +210,19 @@ private void SerializeKeywords(Stream stream)
foreach (var item in keywords)
{
_keywordDictionary.Add(item, (uint) stream.Position);
uint count = 0;
if (_keyDictionary.TryGetValue(item, out var _count))
count = _count;

var count = _keyDictionary.GetValueOrDefault(item, 0);
var buffer = Encoding.UTF8.GetBytes($"{count,10},{item}");
stream.Write(buffer, 0, buffer.Length);
stream.Write(NewLine, 0, NewLine.Length);
}

_keywords.Clear();
}

public void Dispose()
{
_headerStream?.Dispose();
_indexStream?.Dispose();
_tailStream?.Dispose();
_keywords = null;
}
}
Expand Down
15 changes: 4 additions & 11 deletions AutoComplete/Builders/TrieIndexHeaderBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ public TrieIndexHeaderBuilder()
internal TrieIndexHeaderBuilder AddChar(char character)
{
if (!_characterList.Contains(character))
{
_characterList.Add(character);
}

return this;
}
Expand All @@ -30,9 +28,7 @@ internal TrieIndexHeaderBuilder AddString(string value)
throw new ArgumentException(nameof(value));

foreach (var t in value)
{
AddChar(t);
}

return this;
}
Expand All @@ -42,21 +38,18 @@ internal TrieIndexHeader Build()
var header = new TrieIndexHeader();
header.CharacterList = _characterList;

SortCharacterList();
// SortCharacterList
_characterList.Sort(new TrieCharacterComparer());

CalculateMetrics(ref header);

return header;
}

private TrieIndexHeaderBuilder SortCharacterList()
{
_characterList.Sort(new TrieCharacterComparer());
return this;
}

private void CalculateMetrics(ref TrieIndexHeader header)
{
// Set structural based properties
// Set structural properties
header.COUNT_OF_CHARSET = _characterList.Count;

header.COUNT_OF_CHILDREN_FLAGS = header.COUNT_OF_CHARSET / 8 + (header.COUNT_OF_CHARSET % 8 == 0 ? 0 : 1);
Expand Down
23 changes: 18 additions & 5 deletions AutoComplete/Clients/IndexSearchers/FileSystemIndexSearcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,24 @@ public FileSystemIndexSearcher(string headerFileName, string indexFileName, stri

protected override IndexData InitializeIndexData()
{
var indexData = new IndexData();
indexData.Header = TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName);
indexData.Index = GetStream(indexData.Header.LENGTH_OF_STRUCT, FileOptions.RandomAccess);
if (_tailFileName != null)
indexData.Tail = GetStream(8, FileOptions.SequentialScan);
IndexData indexData;
var header = TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName);
if (_tailFileName == null)
{
indexData = new IndexData(
header,
GetStream(header.LENGTH_OF_STRUCT, FileOptions.RandomAccess)
);
}
else
{
indexData = new IndexData(
header,
GetStream(header.LENGTH_OF_STRUCT, FileOptions.RandomAccess),
GetStream(8, FileOptions.SequentialScan)
);
}

return indexData;
}

Expand Down
24 changes: 18 additions & 6 deletions AutoComplete/Clients/IndexSearchers/InMemoryIndexSearcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,27 @@ public InMemoryIndexSearcher(

protected override IndexData InitializeIndexData()
{
var indexData = new IndexData();
indexData.Index = new ManagedInMemoryStream(GetBytesFromFile(_indexFileName));
indexData.Header = TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName);
if (_tailFileName != null)
indexData.Tail = new ManagedInMemoryStream(GetBytesFromFile(_tailFileName));
IndexData indexData;
if (_tailFileName == null)
{
indexData = new IndexData(
TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName),
new ManagedInMemoryStream(GetBytesFromFile(_indexFileName))
);
}
else
{
indexData = new IndexData(
TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName),
new ManagedInMemoryStream(GetBytesFromFile(_indexFileName)),
new ManagedInMemoryStream(GetBytesFromFile(_tailFileName))
);
}

return indexData;
}

private byte[] GetBytesFromFile(string path)
private static byte[] GetBytesFromFile(string path)
{
using Stream stream = new FileStream(
path,
Expand Down
6 changes: 3 additions & 3 deletions AutoComplete/DataStructure/Trie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ public bool Add(string keyword)
if (string.IsNullOrWhiteSpace(keyword))
throw new ArgumentNullException(nameof(keyword));

// Get last node from given input. Next lines we merge keywords when result status is FoundStartWith
// get last node from given input. Next lines we merge keywords when result status is FoundStartWith
var result = SearchLastNodeFrom(keyword);

if (result.Status == TrieNodeSearchResultType.NotFound)
return false;

if (result.Status == TrieNodeSearchResultType.FoundStartsWith)
{
//result found
// result found
var prefix = keyword;

// if last found node is start with? get 'word' from key|(word)
Expand All @@ -78,7 +78,7 @@ public bool Add(string keyword)
result.Node.Add(newTrie);

return true;
} //result found
} // result found

if (result.Status == TrieNodeSearchResultType.FoundEquals && !result.Node.IsTerminal)
{
Expand Down
3 changes: 1 addition & 2 deletions AutoComplete/Domain/ManagedInMemoryStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ public class ManagedInMemoryStream : MemoryStream
{
public ManagedInMemoryStream(byte[] buffer)
: base(buffer)
{
}
{ }

public override bool CanWrite => false;
}
Expand Down
7 changes: 3 additions & 4 deletions AutoComplete/Readers/TrieBinaryReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ Stream tail
return new List<string>(GetAutoCompleteNodesWithTail(position, tail, maxItems));
}

private List<string> GetAutoCompleteNodesInternal(long position, object prefix, int maxItems, List<string> results)
private List<string> GetAutoCompleteNodesInternal(long position, string prefix, int maxItems, List<string> results)
{
var character = ReadCharacter(position);
var isTerminal = ReadIsTerminal(position);

var newPrefix = string.Concat(prefix, character);
if (isTerminal)
results.Add(newPrefix);
Expand Down Expand Up @@ -193,9 +193,8 @@ private long[] GetChildrenPositionsFromNode(TrieIndexHeader header, long parentP
internal TrieNodeStructSearchResult SearchLastNode(long parentPosition, string keyword)
{
var result = TrieNodeStructSearchResult.CreateNotFound();

var currentPosition = parentPosition;

for (var i = 0; i < keyword.Length; i++)
{
var childPosition = GetChildPositionFromNode(currentPosition, keyword[i]);
Expand Down
13 changes: 10 additions & 3 deletions AutoComplete/Searchers/IndexData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,15 @@ namespace AutoComplete.Searchers
{
public class IndexData
{
public TrieIndexHeader Header { get; set; }
public Stream Index { get; set; }
public Stream Tail { get; set; }
public readonly TrieIndexHeader Header;
public readonly Stream Index;
public readonly Stream Tail;

public IndexData(TrieIndexHeader header, Stream index, Stream tail = null)
{
Index = index;
Tail = tail;
Header = header;
}
}
}
36 changes: 10 additions & 26 deletions Samples.ConsoleApp/Program.cs
Original file line number Diff line number Diff line change
@@ -1,46 +1,30 @@
using System.Diagnostics;
using AutoComplete.Builders;
using AutoComplete.Builders;
using AutoComplete.Clients.IndexSearchers;
using AutoComplete.Domain;

const string headerFileName = "header.bin";
const string indexFileName = "index.bin";
const string tailFileName = "tail.txt";

var stopWatch = new Stopwatch();
stopWatch.Start();
await BuildIndex();
stopWatch.Stop();
Console.WriteLine($"Build time(ms) {stopWatch.Elapsed.TotalMilliseconds}");
Search();

Search(10);

void Search(int count)
void Search()
{
var searcher = new InMemoryIndexSearcher(headerFileName, indexFileName, tailFileName);
searcher.Init();
while (true)
{
Console.WriteLine("Type a word");
var word = Console.ReadLine();
var timings = new List<double>();
for (int i = 0; i < count; i++)
var results = searcher.Search(new SearchOptions() { Term = word, MaxItemCount = 5, SuggestWhenFoundStartsWith = false});
if (results.Items == null)
continue;

foreach (var item in results.Items)
{
stopWatch.Restart();
var results = searcher.Search(new SearchOptions() { Term = word, MaxItemCount = 5, SuggestWhenFoundStartsWith = false});
stopWatch.Stop();
timings.Add(stopWatch.Elapsed.TotalMilliseconds);
Console.WriteLine($"Search time(ms): {stopWatch.Elapsed.TotalMilliseconds}");
if (i == count-1 && results.Items != null)
{
foreach (var item in results.Items)
{
Console.WriteLine(item);
}
}
Console.WriteLine(item);
}

Console.WriteLine($"Average Search Time(ms): {timings.Average()}");
}
}

Expand All @@ -57,7 +41,7 @@ async Task BuildIndex()
await using var indexStream = File.OpenWrite(indexFileName);
await using var tailStream = File.OpenWrite(tailFileName);

var builder = new IndexBuilder(headerStream, indexStream, tailStream);
using var builder = new IndexBuilder(headerStream, indexStream, tailStream);
foreach (var line in await File.ReadAllLinesAsync("words350k.txt"))
{
if(!string.IsNullOrWhiteSpace(line))
Expand Down

0 comments on commit 9265704

Please sign in to comment.