Skip to content

Commit

Permalink
[search] Fixed search rank for the population rank.
Browse files Browse the repository at this point in the history
Added famous cities ranking test.

Signed-off-by: Viktor Govako <viktor.govako@gmail.com>
  • Loading branch information
vng committed Jul 1, 2023
1 parent 46b1a4e commit 094defe
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 24 deletions.
6 changes: 0 additions & 6 deletions search/ranker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -593,12 +593,6 @@ class RankerResultMaker
info.m_errorsMade = errorsMade;
info.m_isAltOrOldName = isAltOrOldName;
info.m_matchedFraction = matchedLength / static_cast<float>(totalLength);

info.m_exactCountryOrCapital = info.m_errorsMade == ErrorsMade(0) && info.m_allTokensUsed &&
info.m_nameScore == NameScore::FULL_MATCH &&
// Upgrade _any_ capital rank, not only _true_ capital (=2).
// For example, search Barcelona from Istanbul or vice-versa.
(m_countryChecker(featureTypes) || m_capitalChecker(featureTypes));
}

CategoriesInfo const categoriesInfo(featureTypes,
Expand Down
8 changes: 2 additions & 6 deletions search/ranking_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ double constexpr kCategoriesRank = 1.0000000;
double constexpr kCategoriesFalseCats = -1.0000000;

double constexpr kDistanceToPivot = -0.2123693;
double constexpr kRank = 0.15;
// This constant is very important and checked in Famous_Cities_Rank test.
double constexpr kRank = 0.23;
double constexpr kPopularity = 1.0000000;

// Decreased this value:
Expand All @@ -39,7 +40,6 @@ double constexpr kFalseCats = -0.01;
double constexpr kErrorsMade = -0.15;
double constexpr kMatchedFraction = 0.1876736;
double constexpr kAllTokensUsed = 0.0478513;
double constexpr kExactCountryOrCapital = 0.1247733;
double constexpr kCommonTokens = -0.05;

double constexpr kNameScore[] = {
Expand Down Expand Up @@ -100,7 +100,6 @@ static_assert(kDistanceToPivot <= 0, "");
static_assert(kRank >= 0, "");
static_assert(kPopularity >= 0, "");
static_assert(kErrorsMade <= 0, "");
static_assert(kExactCountryOrCapital >= 0, "");

double TransformDistance(double distance)
{
Expand Down Expand Up @@ -218,7 +217,6 @@ string DebugPrint(RankingInfo const & info)
<< ", m_pureCats: " << info.m_pureCats
<< ", m_falseCats: " << info.m_falseCats
<< ", m_allTokensUsed: " << info.m_allTokensUsed
<< ", m_exactCountryOrCapital: " << info.m_exactCountryOrCapital
<< ", m_categorialRequest: " << info.m_categorialRequest
<< ", m_hasName: " << info.m_hasName
<< " }";
Expand All @@ -245,7 +243,6 @@ void RankingInfo::ToCSV(ostream & os) const
os << m_pureCats << ",";
os << m_falseCats << ",";
os << (m_allTokensUsed ? 1 : 0) << ",";
os << (m_exactCountryOrCapital ? 1 : 0) << ",";
os << (m_categorialRequest ? 1 : 0) << ",";
os << (m_hasName ? 1 : 0);
}
Expand Down Expand Up @@ -283,7 +280,6 @@ double RankingInfo::GetLinearModelRank() const
}

result += (m_allTokensUsed ? 1 : 0) * kAllTokensUsed;
result += (m_exactCountryOrCapital ? 1 : 0) * kExactCountryOrCapital;
auto const nameRank = kNameScore[static_cast<size_t>(GetNameScore())] +
kErrorsMade * GetErrorsMadePerToken() +
kMatchedFraction * m_matchedFraction;
Expand Down
5 changes: 0 additions & 5 deletions search/ranking_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ struct RankingInfo : public StoredRankingInfo
: m_isAltOrOldName(false)
, m_allTokensUsed(true)
, m_exactMatch(true)
, m_exactCountryOrCapital(true)
, m_pureCats(false)
, m_falseCats(false)
, m_categorialRequest(false)
Expand Down Expand Up @@ -120,10 +119,6 @@ struct RankingInfo : public StoredRankingInfo
// True iff all tokens retrieved from search index were matched without misprints.
bool m_exactMatch : 1;

// True iff feature has country or capital type and matches request: full match with all tokens
// used and without misprints.
bool m_exactCountryOrCapital : 1;

// True if all of the tokens that the feature was matched by
// correspond to this feature's categories.
bool m_pureCats : 1;
Expand Down
100 changes: 97 additions & 3 deletions search/search_quality/search_quality_tests/real_mwm_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,10 +615,13 @@ UNIT_CLASS_TEST(MwmTestsFixture, AddrInterpolation_Rank)

TEST_GREATER(results.size(), kPopularPoiResultsCount, ());

// Top first address results in 50 km.
Range const range(results);
// Top first address results in 20 km.
Range const range(results, 0, 2);
EqualClassifType(range, GetClassifTypes({{"addr:interpolation"}}));
TEST_LESS(SortedByDistance(range, center), 50000.0, ());
TEST_LESS(SortedByDistance(range, center), 20000.0, ());

// - 3(4) place: Exact address in Montevideo, Uruguay (~200km)
// - 4+ places: addr:interpolation in Argentina
}

// Funny object here. barrier=fence with address and postcode=2700.
Expand All @@ -642,4 +645,95 @@ UNIT_CLASS_TEST(MwmTestsFixture, AddrInterpolation_Rank)
}
}

// The idea behind that is to get most famous cities on first (rare second) place.
// Every city has POIs, Stops (Metro), Streets named like other cities, but anyway - city should be on top.
// Each city should have population rank and popularity (will be implemented) that gives them big search rank.
/// @todo Add (restore) popularity at least from Wiki size.
/// @todo Add more search query languages?
UNIT_CLASS_TEST(MwmTestsFixture, Famous_Cities_Rank)
{
auto const & cl = classif();
uint32_t const capitalType = cl.GetTypeByPath({"place", "city", "capital"});

std::string arrCities[] = {
"Buenos Aires",
"Rio de Janeiro",
"New York",
/// @todo After popularity.
//"San Francisco",
//"Las Vegas",
"Los Angeles",
"Toronto",
"Lisboa",
"Madrid",
"Barcelona",
"London",
"Paris",
//"Zurich",
"Rome",
"Milan",
"Venezia",
"Amsterdam",
"Berlin",
"Stockholm",
"Istanbul",
"Minsk",
"Moscow",
"Kyiv",
"New Delhi",
"Bangkok",
"Beijing",
"Tokyo",
"Melbourne",
"Sydney",
};
size_t const count = std::size(arrCities);

std::vector<ms::LatLon> arrCenters;
arrCenters.resize(count);
// Buenos Aires like starting point :)
arrCenters[0] = {-34.60649, -58.43540};

bool isGoGo = false;
for (size_t i = 0; i < count; ++i)
{
// For DEBUG.
// if (!isGoGo && arrCities[i] == "London")
// isGoGo = true;
// if (i > 0 && !isGoGo)
// continue;

/// @todo Temporary, USA has a lot of similar close cities.
if (arrCities[i] == "New York")
continue;

LOG(LINFO, ("=== Processing:", arrCities[i]));
SetViewportAndLoadMaps(arrCenters[i]);

for (size_t j = 0; j < count; ++j)
{
auto request = MakeRequest(arrCities[j] + " ", "en");
auto const & results = request->Results();
TEST_GREATER(results.size(), 0, (arrCities[i], arrCities[j]));

uint32_t type = results[0].GetFeatureType();
ftype::TruncValue(type, 3);
if (type != capitalType)
{
// Buenos Aires should always work.
TEST(i != 0, ());

TEST_GREATER(results.size(), 1, (arrCities[i], arrCities[j]));
type = results[1].GetFeatureType();
ftype::TruncValue(type, 3);

TEST(type == capitalType, (cl.GetReadableObjectName(type), arrCities[i], arrCities[j]));
}

if (i == 0 && i != j)
arrCenters[j] = mercator::ToLatLon(results[0].GetFeatureCenter());
}
}
}

} // namespace real_mwm_tests
4 changes: 0 additions & 4 deletions search/search_tests/ranking_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ UNIT_TEST(NameScore_SubstringVsErrors)
info.m_numTokens = 1;
info.m_allTokensUsed = true;
info.m_exactMatch = false;
info.m_exactCountryOrCapital = false;

{
RankingInfo poi1 = info;
Expand Down Expand Up @@ -192,14 +191,12 @@ UNIT_TEST(RankingInfo_PreferCountry)
auto cafe = info;
cafe.m_distanceToPivot = 1e3;
cafe.m_tokenRanges[Model::TYPE_SUBPOI] = TokenRange(0, 1);
cafe.m_exactCountryOrCapital = false;
cafe.m_type = Model::TYPE_SUBPOI;
cafe.m_classifType.poi = PoiType::Eat;

auto country = info;
country.m_distanceToPivot = 1e6;
country.m_tokenRanges[Model::TYPE_COUNTRY] = TokenRange(0, 1);
country.m_exactCountryOrCapital = true;
country.m_type = Model::TYPE_COUNTRY;

// Country should be preferred even if cafe is much closer to viewport center.
Expand All @@ -213,7 +210,6 @@ UNIT_TEST(RankingInfo_PrefixVsFull)
info.m_matchedFraction = 1;
info.m_allTokensUsed = true;
info.m_exactMatch = false;
info.m_exactCountryOrCapital = false;
info.m_distanceToPivot = 1000;
info.m_type = Model::TYPE_SUBPOI;
info.m_tokenRanges[Model::TYPE_SUBPOI] = TokenRange(0, 2);
Expand Down

0 comments on commit 094defe

Please sign in to comment.