diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/id/indonesianPaper1.js b/packages/yoastseo/spec/fullTextTests/testTexts/id/indonesianPaper1.js index ed6685ea3fa..69b69323452 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/id/indonesianPaper1.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/id/indonesianPaper1.js @@ -78,8 +78,8 @@ const expectedResults = { }, urlKeyword: { isApplicable: true, - score: 6, - resultText: "Keyphrase in slug: (Part of) your keyphrase does not appear in the slug. Change that!", + score: 9, + resultText: "Keyphrase in slug: Great work!", }, urlLength: { isApplicable: true, diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountInUrlSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountInUrlSpec.js index 6a9f6c49758..ec48c5f1cb7 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountInUrlSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountInUrlSpec.js @@ -23,13 +23,27 @@ describe( "test to check url for keyword", function() { expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 2, percentWordMatches: 100 } ); } ); - it( "returns no matches for dashed words", function() { + it( "returns no matches for differently dashed words", function() { const paper = new Paper( "", { url: "url-with-key-word", keyword: "keyword" } ); const researcher = new EnglishResearcher( paper ); researcher.addResearchData( "morphology", morphologyData ); expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 1, percentWordMatches: 0 } ); } ); + it( "returns matches for equally dashed words", function() { + const paper = new Paper( "", { url: "url-with-key-word", keyword: "key-word" } ); + const researcher = new EnglishResearcher( paper ); + researcher.addResearchData( "morphology", morphologyData ); + expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 2, percentWordMatches: 100 } ); + } ); + + it( "returns matches for equally dashed words with more words around", function() { + const paper = new Paper( "", { url: "url-with-key-word", keyword: "exciting key-word exciting" } ); + const researcher = new EnglishResearcher( paper ); + researcher.addResearchData( "morphology", morphologyData ); + expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 4, percentWordMatches: 50 } ); + } ); + it( "returns matches with diacritics", function() { const paper = new Paper( "", { url: "url-with-key-word", keyword: "këyword" } ); const researcher = new EnglishResearcher( paper ); @@ -197,21 +211,20 @@ describe( "test to check url for keyword", function() { const paper = new Paper( "", { url: "buku-buku", keyword: "buku-buku" } ); const researcher = new EnglishResearcher( paper ); researcher.addResearchData( "morphology", morphologyData ); - expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 1, percentWordMatches: 100 } ); + expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 2, percentWordMatches: 100 } ); } ); it( "works with dash within the keyword in url", function() { const paper = new Paper( "", { url: "on-the-go", keyword: "on-the-go" } ); const researcher = new EnglishResearcher( paper ); researcher.addResearchData( "morphology", morphologyData ); - expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 1, percentWordMatches: 100 } ); + expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 3, percentWordMatches: 100 } ); } ); - // eslint-disable-next-line capitalized-comments - /* it( "works with dash within the keyword in url", function() { - const paper = new Paper( "", { url: "two-room-apartment", keyword: "two-room apartment" } ); + it( "works with dash within the keyword in url", function() { + const paper = new Paper( "", { url: "two-room-apartment", keyword: "two-room apartment" } ); const researcher = new EnglishResearcher( paper ); researcher.addResearchData( "morphology", morphologyData ); - expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 1, percentWordMatches: 100 } ); - } );*/ + expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 3, percentWordMatches: 100 } ); + } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCountInUrl.js b/packages/yoastseo/src/languageProcessing/researches/keywordCountInUrl.js index 7254cafbd90..9b4aaedbbaa 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCountInUrl.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCountInUrl.js @@ -2,6 +2,37 @@ import parseSlug from "../helpers/url/parseSlug"; import { findTopicFormsInString } from "../helpers/match/findKeywordFormsInString.js"; +/** + * Splits hyphenated keyphrases so that each compound is an individual word, e.g. 'pop-art' becomes 'pop' and 'art'. + * Splitting the keyphrase forms allows for hyphenated keyphrases to be detected in the slug. The slug is parsed on hyphens, and the words from + * the keyphrase are compared with the words from the slug to find a match. Without dehyphenating the keyphrase, the word from the keyphrase would be + * 'pop-art' while the words from the slug would be 'pop' and 'art', and a match would not be detected. + * + * @param {Array} topicForms The keyphraseForms and synonymsForms of the paper. + * + * @returns {Array} topicForms with split compounds. + */ +function dehyphenateKeyphraseForms( topicForms ) { + const dehyphenatedKeyphraseForms = []; + + topicForms.keyphraseForms.forEach( function( wordForms ) { + // If a word doesn't contain hyphens, don't split it. + if ( wordForms[ 0 ].indexOf( "-" ) === -1 ) { + dehyphenatedKeyphraseForms.push( wordForms ); + return; + } + + // Split each form of a hyphenated word and add each compound to the array of dehyphenated keyphrase forms. + wordForms.forEach( function( wordForm ) { + const splitWordForm = wordForm.split( "-" ); + splitWordForm.forEach( compound => dehyphenatedKeyphraseForms.push( [ compound ] ) ); + } ); + } ); + topicForms.keyphraseForms = dehyphenatedKeyphraseForms; + + return topicForms; +} + /** * Matches the keyword in the URL. Replaces dashes and underscores with whitespaces and uses whitespace as wordboundary. * @@ -11,17 +42,11 @@ import { findTopicFormsInString } from "../helpers/match/findKeywordFormsInStrin * @returns {int} Number of times the keyword is found. */ export default function( paper, researcher ) { - const topicForms = researcher.getResearch( "morphology" ); + const topicForms = dehyphenateKeyphraseForms( researcher.getResearch( "morphology" ) ); const parsedSlug = parseSlug( paper.getUrl() ); - let keyphraseInSlug = findTopicFormsInString( topicForms, parsedSlug, false, paper.getLocale() ); - /* In case we deal with a language where dashes are part of the word (e.g., in Indonesian: buku-buku), - * Try looking for the keywords in the unparsed slug. - */ - if ( keyphraseInSlug.percentWordMatches === 0 ) { - const unparsedSlug = paper.getUrl(); - keyphraseInSlug = findTopicFormsInString( topicForms, unparsedSlug, false, paper.getLocale() ); - } + const keyphraseInSlug = findTopicFormsInString( topicForms, parsedSlug, false, paper.getLocale() ); + return { keyphraseLength: topicForms.keyphraseForms.length, percentWordMatches: keyphraseInSlug.percentWordMatches,