Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert Sentence length and paragraph length to use HTML parser and enable AI button for both assessments #21866

Open
wants to merge 16 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Adds processing of hyphens
  • Loading branch information
mhkuu committed Dec 16, 2024
commit fd3ed61de2936aee2988dbd1e7a9811efba4d426
29 changes: 0 additions & 29 deletions packages/yoastseo/spec/fullTextTests/runFullTextTests.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ import { getLanguagesWithWordComplexity } from "../../src/helpers";

// Import test papers.
import testPapers from "./testTexts";
import fs from "fs";

testPapers.forEach( function( testPaper ) {
// eslint-disable-next-line max-statements
Expand All @@ -66,34 +65,6 @@ testPapers.forEach( function( testPaper ) {

buildTree( paper, researcher );

/**
* Writes the given contents to the given filename in the temporary directory tmp
* @param {string} filename The name of the file.
* @param {string} content The content of the file.
* @returns {void}
*/
const writeToTempFile = ( filename, content ) => {
// Creates a temporary directory in the current working directory to store the data, if it not yet exists.
// (i.e., packages/yoastseo/tmp/ if this function is called from packages/yoastseo/)
const dir = "tmp/";
if ( ! fs.existsSync( dir ) ) {
fs.mkdirSync( dir );
}

// Writes the data to this temporary directory
fs.writeFileSync( dir + filename, content );
};

// Collects the results and the header into list of ;-separated rows
const sentences = researcher.getResearch( "countSentencesFromText" );
const resultLines = sentences.map( sentence => sentence.sentence.trimStart().split( " " )[ 0 ] + ";" + sentence.sentenceLength );

// Set doExport to true to write the results to a temporary file.
const doExport = true;
if ( doExport ) {
writeToTempFile( testPaper.name + ".csv", resultLines.join( "\n" ) );
}

const expectedResults = testPaper.expectedResults;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ import buildTree from "../../specHelpers/parse/buildTree";

describe( "a test for getting paragraph length", function() {
it( "returns the paragraph length of a paragraph between p tags", function() {
const mockPaper = new Paper( "<p>Lorem ipsum</p>" );
const mockPaper = new Paper( "<p>Lorem ipsum, hyphens all-over-the-place</p>" );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 2 );
expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 4 );
} );

it( "returns the paragraph length of a paragraph in Japanese between p tags", function() {
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export default function( sentences, researcher ) {
const sentencesWordCount = [];
sentences.forEach( sentence => {
const customLengthHelper = researcher.getHelper( "customCountLength" );
const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens ).length;
const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens, false ).length;
if ( length > 0 ) {
sentencesWordCount.push( {
sentence: sentence,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,38 @@ import getSentencesFromTree from "../sentence/getSentencesFromTree";
import { flatMap } from "lodash";
import removePunctuation from "../sanitize/removePunctuation";

/**
* Merges words surrounding a separator into one word.
*
* @param {string[]} words The array of words to split and merge.
* @param {string} separator The separator to split on.
*
* @returns {void} This function mutates the `words` array through splicing.
*/
function mergeBy( words, separator ) {
while ( words.indexOf( separator ) !== -1 ) {
const currentSeparator = words.indexOf( separator );
const wordBefore = words[ currentSeparator - 1 ];
const wordAfter = words[ currentSeparator + 1 ];
words.splice( currentSeparator - 1, 3, wordBefore + separator + wordAfter );
}
}

/**
* Gets the words from the tokens.
*
* @param {Token[]} tokens The tokens to get the words from.
* @param {boolean} splitOnHyphens Whether to split words on hyphens.
*
* @returns {string[]} Array of words retrieved from the tokens.
*/
export function getWordsFromTokens( tokens ) {
export function getWordsFromTokens( tokens, splitOnHyphens = true ) {
// Retrieve all texts from the tokens.
let words = tokens.map( token => token.text );
// Combine words separated by a hyphen, if needed.
if ( ! splitOnHyphens ) {
mergeBy( words, "-" );
}
// Remove punctuation and spaces.
words = words.map( token => removePunctuation( token ) );
// Filter out empty tokens.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export default function( paper, researcher ) {
paragraphs.forEach( paragraph => {
const customLengthHelper = researcher.getHelper( "customCountLength" );
const tokens = paragraph.sentences.map( sentence => sentence.tokens ).flat();
const length = customLengthHelper ? customLengthHelper( paragraph.innerText() ) : getWordsFromTokens( tokens ).length;
const length = customLengthHelper ? customLengthHelper( paragraph.innerText() ) : getWordsFromTokens( tokens, false ).length;
if ( length > 0 ) {
paragraphLengths.push( {
paragraph: paragraph,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,13 @@ export default class ParagraphTooLongAssessment extends Assessment {
const paragraphsLength = researcher.getResearch( "getParagraphLength" );
const tooLongParagraphs = this.getTooLongParagraphs( paragraphsLength, this.getConfig( researcher ) );
return tooLongParagraphs.flatMap( ( { paragraph } ) => {
const scl = paragraph.sourceCodeLocation;
return new Mark( {
position: {
startOffset: paragraph.sourceCodeLocation.startTag.endOffset,
endOffset: paragraph.sourceCodeLocation.endTag.startOffset,
startOffset: scl.startTag ? scl.startTag.endOffset : scl.startOffset,
endOffset: scl.endTag ? scl.endTag.startOffset : scl.endOffset,
startOffsetBlock: 0,
endOffsetBlock: paragraph.sourceCodeLocation.endOffset - paragraph.sourceCodeLocation.startOffset,
endOffsetBlock: scl.endOffset - scl.startOffset,
clientId: paragraph.clientId || "",
attributeId: paragraph.parentAttributeId || "",
isFirstSection: paragraph.isParentFirstSectionOfBlock || false,
Expand Down
Loading