Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Phoneme cleanup #3356

Merged
merged 11 commits into from
Oct 13, 2021
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* **Bug Fix**
* **Enhancement**
* CHANGED: Pronunciation for names and destinations [#3132](https://github.com/valhalla/valhalla/pull/3132)
* CHANGED: Requested code clean up for phonemes PR [#3356](https://github.com/valhalla/valhalla/pull/3356)

## Release Date: 2021-10-07 Valhalla 3.1.4
* **Removed**
Expand Down
40 changes: 20 additions & 20 deletions src/baldr/graphtile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ std::string GraphTile::GetName(const uint32_t textlist_offset) const {

// Convenience method to process the signs for an edge given the
// directed edge or node index.
std::vector<SignInfo> GraphTile::ProcessSigns(const uint32_t idx, bool signs_on_node) const {
std::vector<SignInfo> GraphTile::GetSigns(const uint32_t idx, bool signs_on_node) const {
uint32_t count = header_->signcount();
std::vector<SignInfo> signs;
if (count == 0) {
Expand Down Expand Up @@ -729,15 +729,19 @@ std::vector<SignInfo> GraphTile::ProcessSigns(const uint32_t idx, bool signs_on_

// only add named signs when asking for signs at the node and
// only add edge signs when asking for signs at the edges.
// is_route_num_type indicates if this phonome is for a node or not; therefore,
// we only return a node phoneme when is_route_num_type and signs_on_node are both true and
// we only return an edge phoneme when is_route_num_type and signs_on_node are both false
if (((signs_[found].type() == Sign::Type::kJunctionName ||
(signs_[found].type() == Sign::Type::kPronunciation && signs_[found].route_num_type())) &&
(signs_[found].type() == Sign::Type::kPronunciation &&
signs_[found].is_route_num_type())) &&
signs_on_node) ||
(((signs_[found].type() != Sign::Type::kJunctionName &&
signs_[found].type() != Sign::Type::kPronunciation) ||
(signs_[found].type() == Sign::Type::kPronunciation &&
!signs_[found].route_num_type())) &&
!signs_[found].is_route_num_type())) &&
!signs_on_node))
signs.emplace_back(signs_[found].type(), signs_[found].route_num_type(),
signs.emplace_back(signs_[found].type(), signs_[found].is_route_num_type(),
signs_[found].tagged(), false, 0, 0, text);
} else {
throw std::runtime_error("GetSigns: offset exceeds size of text list");
Expand Down Expand Up @@ -791,26 +795,22 @@ std::vector<SignInfo> GraphTile::GetSigns(
const auto* text = (textlist_ + signs_[found].text_offset());
if (signs_[found].tagged() && signs_[found].type() == Sign::Type::kPronunciation) {

// route_num_type indicates if this phonome is for a node or not
if ((signs_[found].route_num_type() && signs_on_node) ||
(!signs_[found].route_num_type() && !signs_on_node)) {
// is_route_num_type indicates if this phonome is for a node or not
if ((signs_[found].is_route_num_type() && signs_on_node) ||
(!signs_[found].is_route_num_type() && !signs_on_node)) {
size_t pos = 0;
while (pos < strlen(text)) {
const auto& header = *reinterpret_cast<const linguistic_text_header_t*>(text + pos);
pos += 3;

std::unordered_map<uint32_t, std::pair<uint8_t, std::string>>::iterator iter =
index_pronunciation_map.find(header.name_index_);

if (iter == index_pronunciation_map.end())
index_pronunciation_map.emplace(
std::make_pair(header.name_index_,
std::make_pair(header.phonetic_alphabet_,
std::string((text + pos), header.length_))));
else {
if (header.phonetic_alphabet_ > (iter->second).first) {
iter->second = std::make_pair(header.phonetic_alphabet_,
std::string((text + pos), header.length_));
auto iter = index_pronunciation_map.insert(
std::make_pair(header.name_index_,
std::make_pair(header.phonetic_alphabet_,
std::string((text + pos), header.length_))));
if (!iter.second) {
if (header.phonetic_alphabet_ > iter.first->second.first) {
iter.first->second = std::make_pair(header.phonetic_alphabet_,
std::string((text + pos), header.length_));
}
}

Expand All @@ -824,7 +824,7 @@ std::vector<SignInfo> GraphTile::GetSigns(
// only add edge signs when asking for signs at the edges.
if ((signs_[found].type() == Sign::Type::kJunctionName && signs_on_node) ||
(signs_[found].type() != Sign::Type::kJunctionName && !signs_on_node))
signs.emplace_back(signs_[found].type(), signs_[found].route_num_type(),
signs.emplace_back(signs_[found].type(), signs_[found].is_route_num_type(),
signs_[found].tagged(), false, 0, 0, text);
} else {
throw std::runtime_error("GetSigns: offset exceeds size of text list");
Expand Down
157 changes: 72 additions & 85 deletions src/mjolnir/graphbuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,11 @@ void GraphBuilder::GetPronunciationTokens(const OSMData& osmdata,
std::vector<std::string>& jeita_tokens,
bool is_node_pronunciation) {

ipa_tokens.clear();
nt_sampa_tokens.clear();
katakana_tokens.clear();
jeita_tokens.clear();

if (is_node_pronunciation) {
if (ipa_index != 0)
ipa_tokens = GetTagTokens(osmdata.node_names.name(ipa_index));
Expand Down Expand Up @@ -1395,6 +1400,25 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,
bool tc) {

bool has_guide = false;
std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
bool add_ipa, add_nt_sampa, add_katakana, add_jeita;

auto get_pronunciations =
[](const OSMData& osmdata, const size_t signs_size, const uint32_t ipa_index,
const uint32_t nt_sampa_index, const uint32_t katakana_index, const uint32_t jeita_index,
std::vector<std::string>& ipa_tokens, std::vector<std::string>& nt_sampa_tokens,
std::vector<std::string>& katakana_tokens, std::vector<std::string>& jeita_tokens,
bool& add_ipa, bool& add_nt_sampa, bool& add_katakana, bool& add_jeita,
bool is_node_pronunciation = false) {
GetPronunciationTokens(osmdata, ipa_index, nt_sampa_index, katakana_index, jeita_index,
ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens,
is_node_pronunciation);

add_ipa = (ipa_tokens.size() && signs_size == ipa_tokens.size());
add_nt_sampa = (nt_sampa_tokens.size() && signs_size == nt_sampa_tokens.size());
add_katakana = (katakana_tokens.size() && signs_size == katakana_tokens.size());
add_jeita = (jeita_tokens.size() && signs_size == jeita_tokens.size());
};

////////////////////////////////////////////////////////////////////////////
// NUMBER
Expand All @@ -1403,17 +1427,12 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,
std::vector<std::string> j_refs =
GetTagTokens(osmdata.name_offset_map.name(way.junction_ref_index()));

std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
GetPronunciationTokens(osmdata, pronunciation.junction_ref_pronunciation_ipa_index(),
pronunciation.junction_ref_pronunciation_nt_sampa_index(),
pronunciation.junction_ref_pronunciation_katakana_index(),
pronunciation.junction_ref_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens);

bool add_ipa = (ipa_tokens.size() && j_refs.size() == ipa_tokens.size());
bool add_nt_sampa = (nt_sampa_tokens.size() && j_refs.size() == nt_sampa_tokens.size());
bool add_katakana = (katakana_tokens.size() && j_refs.size() == katakana_tokens.size());
bool add_jeita = (jeita_tokens.size() && j_refs.size() == jeita_tokens.size());
get_pronunciations(osmdata, j_refs.size(), pronunciation.junction_ref_pronunciation_ipa_index(),
pronunciation.junction_ref_pronunciation_nt_sampa_index(),
pronunciation.junction_ref_pronunciation_katakana_index(),
pronunciation.junction_ref_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens, add_ipa, add_nt_sampa,
add_katakana, add_jeita);

for (size_t i = 0; i < j_refs.size(); ++i) {
if (add_ipa || add_nt_sampa || add_katakana || add_jeita) {
Expand All @@ -1430,17 +1449,11 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,

std::vector<std::string> n_refs = GetTagTokens(osmdata.node_names.name(node.ref_index()));

std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
GetPronunciationTokens(osmdata, node.ref_pronunciation_ipa_index(),
node.ref_pronunciation_nt_sampa_index(),
node.ref_pronunciation_katakana_index(),
node.ref_pronunciation_jeita_index(), ipa_tokens, nt_sampa_tokens,
katakana_tokens, jeita_tokens, true);

bool add_ipa = (ipa_tokens.size() && n_refs.size() == ipa_tokens.size());
bool add_nt_sampa = (nt_sampa_tokens.size() && n_refs.size() == nt_sampa_tokens.size());
bool add_katakana = (katakana_tokens.size() && n_refs.size() == katakana_tokens.size());
bool add_jeita = (jeita_tokens.size() && n_refs.size() == jeita_tokens.size());
get_pronunciations(osmdata, n_refs.size(), node.ref_pronunciation_ipa_index(),
node.ref_pronunciation_nt_sampa_index(),
node.ref_pronunciation_katakana_index(), node.ref_pronunciation_jeita_index(),
ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens, add_ipa,
add_nt_sampa, add_katakana, add_jeita, true);

for (size_t i = 0; i < n_refs.size(); ++i) {
if (add_ipa || add_nt_sampa || add_katakana || add_jeita) {
Expand All @@ -1466,17 +1479,13 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,
std::vector<std::string> branch_refs =
GetTagTokens(osmdata.name_offset_map.name(way.destination_ref_index()));

std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
GetPronunciationTokens(osmdata, pronunciation.destination_ref_pronunciation_ipa_index(),
pronunciation.destination_ref_pronunciation_nt_sampa_index(),
pronunciation.destination_ref_pronunciation_katakana_index(),
pronunciation.destination_ref_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens);

bool add_ipa = (ipa_tokens.size() && branch_refs.size() == ipa_tokens.size());
bool add_nt_sampa = (nt_sampa_tokens.size() && branch_refs.size() == nt_sampa_tokens.size());
bool add_katakana = (katakana_tokens.size() && branch_refs.size() == katakana_tokens.size());
bool add_jeita = (jeita_tokens.size() && branch_refs.size() == jeita_tokens.size());
get_pronunciations(osmdata, branch_refs.size(),
pronunciation.destination_ref_pronunciation_ipa_index(),
pronunciation.destination_ref_pronunciation_nt_sampa_index(),
pronunciation.destination_ref_pronunciation_katakana_index(),
pronunciation.destination_ref_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens, add_ipa, add_nt_sampa,
add_katakana, add_jeita);

for (size_t i = 0; i < branch_refs.size(); ++i) {
if (tc || (!ramp && !fork)) {
Expand Down Expand Up @@ -1508,17 +1517,13 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,
std::vector<std::string> branch_streets =
GetTagTokens(osmdata.name_offset_map.name(way.destination_street_index()));

std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
GetPronunciationTokens(osmdata, pronunciation.destination_street_pronunciation_ipa_index(),
pronunciation.destination_street_pronunciation_nt_sampa_index(),
pronunciation.destination_street_pronunciation_katakana_index(),
pronunciation.destination_street_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens);

bool add_ipa = (ipa_tokens.size() && branch_streets.size() == ipa_tokens.size());
bool add_nt_sampa = (nt_sampa_tokens.size() && branch_streets.size() == nt_sampa_tokens.size());
bool add_katakana = (katakana_tokens.size() && branch_streets.size() == katakana_tokens.size());
bool add_jeita = (jeita_tokens.size() && branch_streets.size() == jeita_tokens.size());
get_pronunciations(osmdata, branch_streets.size(),
pronunciation.destination_street_pronunciation_ipa_index(),
pronunciation.destination_street_pronunciation_nt_sampa_index(),
pronunciation.destination_street_pronunciation_katakana_index(),
pronunciation.destination_street_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens, add_ipa, add_nt_sampa,
add_katakana, add_jeita);

for (size_t i = 0; i < branch_streets.size(); ++i) {
if (tc || (!ramp && !fork)) {
Expand Down Expand Up @@ -1557,17 +1562,13 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,
std::vector<std::string> toward_refs =
GetTagTokens(osmdata.name_offset_map.name(way.destination_ref_to_index()));

std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
GetPronunciationTokens(osmdata, pronunciation.destination_ref_to_pronunciation_ipa_index(),
pronunciation.destination_ref_to_pronunciation_nt_sampa_index(),
pronunciation.destination_ref_to_pronunciation_katakana_index(),
pronunciation.destination_ref_to_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens);

bool add_ipa = (ipa_tokens.size() && toward_refs.size() == ipa_tokens.size());
bool add_nt_sampa = (nt_sampa_tokens.size() && toward_refs.size() == nt_sampa_tokens.size());
bool add_katakana = (katakana_tokens.size() && toward_refs.size() == katakana_tokens.size());
bool add_jeita = (jeita_tokens.size() && toward_refs.size() == jeita_tokens.size());
get_pronunciations(osmdata, toward_refs.size(),
pronunciation.destination_ref_to_pronunciation_ipa_index(),
pronunciation.destination_ref_to_pronunciation_nt_sampa_index(),
pronunciation.destination_ref_to_pronunciation_katakana_index(),
pronunciation.destination_ref_to_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens, add_ipa, add_nt_sampa,
add_katakana, add_jeita);

for (size_t i = 0; i < toward_refs.size(); ++i) {
if (tc || (!ramp && !fork)) {
Expand Down Expand Up @@ -1600,17 +1601,13 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,
std::vector<std::string> toward_streets =
GetTagTokens(osmdata.name_offset_map.name(way.destination_street_to_index()));

std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
GetPronunciationTokens(osmdata, pronunciation.destination_street_to_pronunciation_ipa_index(),
pronunciation.destination_street_to_pronunciation_nt_sampa_index(),
pronunciation.destination_street_to_pronunciation_katakana_index(),
pronunciation.destination_street_to_pronunciation_jeita_index(),
ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens);

bool add_ipa = (ipa_tokens.size() && toward_streets.size() == ipa_tokens.size());
bool add_nt_sampa = (nt_sampa_tokens.size() && toward_streets.size() == nt_sampa_tokens.size());
bool add_katakana = (katakana_tokens.size() && toward_streets.size() == katakana_tokens.size());
bool add_jeita = (jeita_tokens.size() && toward_streets.size() == jeita_tokens.size());
get_pronunciations(osmdata, toward_streets.size(),
pronunciation.destination_street_to_pronunciation_ipa_index(),
pronunciation.destination_street_to_pronunciation_nt_sampa_index(),
pronunciation.destination_street_to_pronunciation_katakana_index(),
pronunciation.destination_street_to_pronunciation_jeita_index(), ipa_tokens,
nt_sampa_tokens, katakana_tokens, jeita_tokens, add_ipa, add_nt_sampa,
add_katakana, add_jeita);

for (size_t i = 0; i < toward_streets.size(); ++i) {
if (tc || (!ramp && !fork)) {
Expand Down Expand Up @@ -1672,14 +1669,9 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,
: (forward ? pronunciation.destination_forward_pronunciation_jeita_index()
: pronunciation.destination_backward_pronunciation_jeita_index());

std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
GetPronunciationTokens(osmdata, ipa_index, nt_sampa_index, katakana_index, jeita_index,
ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens);

bool add_ipa = (ipa_tokens.size() && toward_names.size() == ipa_tokens.size());
bool add_nt_sampa = (nt_sampa_tokens.size() && toward_names.size() == nt_sampa_tokens.size());
bool add_katakana = (katakana_tokens.size() && toward_names.size() == katakana_tokens.size());
bool add_jeita = (jeita_tokens.size() && toward_names.size() == jeita_tokens.size());
get_pronunciations(osmdata, toward_names.size(), ipa_index, nt_sampa_index, katakana_index,
jeita_index, ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens,
add_ipa, add_nt_sampa, add_katakana, add_jeita);

for (size_t i = 0; i < toward_names.size(); ++i) {

Expand Down Expand Up @@ -1781,17 +1773,12 @@ bool GraphBuilder::CreateSignInfoList(const OSMNode& node,
// Get the name from OSMData using the name index
std::vector<std::string> names = GetTagTokens(osmdata.node_names.name(node.name_index()));

std::vector<std::string> ipa_tokens, nt_sampa_tokens, katakana_tokens, jeita_tokens;
GetPronunciationTokens(osmdata, node.name_pronunciation_ipa_index(),
node.name_pronunciation_nt_sampa_index(),
node.name_pronunciation_katakana_index(),
node.name_pronunciation_jeita_index(), ipa_tokens, nt_sampa_tokens,
katakana_tokens, jeita_tokens, true);

bool add_ipa = (ipa_tokens.size() && names.size() == ipa_tokens.size());
bool add_nt_sampa = (nt_sampa_tokens.size() && names.size() == nt_sampa_tokens.size());
bool add_katakana = (katakana_tokens.size() && names.size() == katakana_tokens.size());
bool add_jeita = (jeita_tokens.size() && names.size() == jeita_tokens.size());
get_pronunciations(osmdata, names.size(), node.name_pronunciation_ipa_index(),
node.name_pronunciation_nt_sampa_index(),
node.name_pronunciation_katakana_index(),
node.name_pronunciation_jeita_index(), ipa_tokens, nt_sampa_tokens,
katakana_tokens, jeita_tokens, add_ipa, add_nt_sampa, add_katakana, add_jeita,
true);

for (size_t i = 0; i < names.size(); ++i) {
if (add_ipa || add_nt_sampa || add_katakana || add_jeita) {
Expand Down
Loading