Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert Sentence length and paragraph length to use HTML parser and enable AI button for both assessments #21866

Open
wants to merge 16 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Converts the sentence length assessment to use the HTML Parser
  • Loading branch information
mhkuu committed Nov 12, 2024
commit 0eb3bab6607120f98a621cc1ffe97c30e4f1b6e3
Original file line number Diff line number Diff line change
@@ -1,41 +1,50 @@
import sentencesLength from "../../../../src/languageProcessing/helpers/sentence/sentencesLength";
import getSentencesFromTree from "../../../../src/languageProcessing/helpers/sentence/getSentencesFromTree";
import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/Researcher";
import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher";
import Paper from "../../../../src/values/Paper";
import buildTree from "../../../specHelpers/parse/buildTree";

describe( "A test to count sentence lengths.", function() {
it( "should not return a length for an empty sentence", function() {
const sentences = [ "", "A sentence" ];
const mockResearcher = new EnglishResearcher( new Paper( "" ) );
const mockPaper = new Paper( "<p></p><p>A sentence</p>" );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const lengths = sentencesLength( sentences, mockResearcher );
const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher );

expect( lengths ).toEqual( [
{ sentence: "A sentence", sentenceLength: 2 },
] );
expect( sentenceLengths.length ).toEqual( 1 );
expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 2 );
expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "A sentence" );
} );

it( "should return the sentences and their length (the HTML tags should not be counted if present)", function() {
const sentences = [ "A <strong>good</strong> text", "this is a <span style='color: blue;'> textstring </span>" ];
const mockResearcher = new EnglishResearcher( new Paper( "" ) );

const lengths = sentencesLength( sentences, mockResearcher );

expect( lengths ).toEqual( [
{ sentence: "A <strong>good</strong> text", sentenceLength: 3 },
{ sentence: "this is a <span style='color: blue;'> textstring </span>", sentenceLength: 4 },
] );
const mockPaper = new Paper( "<p>A <strong>good</strong> text</p>" +
"<p>this is a <span style='color: blue;'>string</span></p>" );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher );

expect( sentenceLengths.length ).toEqual( 2 );
expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 3 );
expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "A good text" );
expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 4 );
expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "this is a string" );
} );

it( "should return the sentences and their length for Japanese (so counting characters)", function() {
const sentences = [ "自然おのずから存在しているもの", "歩くさわやかな森 <span style='color: red;'> 自然 </span>" ];
const mockJapaneseResearcher = new JapaneseResearcher( new Paper( "" ) );

const lengths = sentencesLength( sentences, mockJapaneseResearcher );

expect( lengths ).toEqual( [
{ sentence: "自然おのずから存在しているもの", sentenceLength: 15 },
{ sentence: "歩くさわやかな森 <span style='color: red;'> 自然 </span>", sentenceLength: 10 },
] );
const mockPaper = new Paper( "<p>自然おのずから存在しているもの</p>" +
"<p>歩くさわやかな森 <span style='color: red;'> 自然 </span></p>" );
const mockJapaneseResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockJapaneseResearcher );

const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockJapaneseResearcher );

expect( sentenceLengths.length ).toEqual( 2 );
expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 15 );
expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "自然おのずから存在しているもの" );
expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 10 );
expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "歩くさわやかな森 自然 " );
} );
} );
Original file line number Diff line number Diff line change
@@ -1,68 +1,122 @@
/* eslint-disable capitalized-comments, spaced-comment */
import getSentences from "../../../src/languageProcessing/researches/countSentencesFromText.js";
import Paper from "../../../src/values/Paper";
import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher";
import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher";
import buildTree from "../../specHelpers/parse/buildTree";

describe( "counts words in sentences from text", function() {
let paper;

it( "returns sentences with question mark", function() {
paper = new Paper( "Hello. How are you? Bye" );
expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 );
expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 );
expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 );
const mockPaper = new Paper( "Hello. How are you? Bye" );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 1 );
expect( sentences[ 1 ].sentenceLength ).toBe( 3 );
expect( sentences[ 2 ].sentenceLength ).toBe( 1 );
} );
it( "returns sentences with exclamation mark", function() {
paper = new Paper( "Hello. How are you! Bye" );
expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 );
expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 );
expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 );
const mockPaper = new Paper( "Hello. How are you! Bye" );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 1 );
expect( sentences[ 1 ].sentenceLength ).toBe( 3 );
expect( sentences[ 2 ].sentenceLength ).toBe( 1 );
} );
it( "returns sentences with many spaces", function() {
paper = new Paper( "Hello. How are you! Bye" );
expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 );
expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 );
expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 );
const mockPaper = new Paper( "Hello. How are you! Bye" );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 1 );
expect( sentences[ 1 ].sentenceLength ).toBe( 3 );
expect( sentences[ 2 ].sentenceLength ).toBe( 1 );
} );
it( "returns sentences with html-tags, should only count words", function() {
paper = new Paper( "This is a text <img src='image.jpg' alt='a bunch of words in an alt-tag' />" );
expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 );
const mockPaper = new Paper( "This is a text <img src='https://example.com/image.jpg' alt='a bunch of words in an alt-tag' />" );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 4 );
} );
it( "returns sentences with html-tags, should only count words", function() {
paper = new Paper( "This is a text <img src='http://domain.com/image.jpg' alt='a bunch of words in an alt-tag' />. Another sentence." );
expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 );
expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 2 );
const mockPaper = new Paper( "This is a text <img src='https://example.com/image.jpg' alt='a bunch of words in an alt-tag' />. Another sentence." );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 4 );
expect( sentences[ 1 ].sentenceLength ).toBe( 2 );
} );
it( "should not count sentences inside elements we want to exclude from the analysis", function() {
paper = new Paper( "This is a text. <code>With some code.</code>. Another sentence." );
expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 );
expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 2 );
const mockPaper = new Paper( "This is a text. <code>With some code.</code>. Another sentence." );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 4 );
expect( sentences[ 1 ].sentenceLength ).toBe( 2 );
} );
/*it( "returns sentences with question mark in Japanese", function() {
paper = new Paper( "雨が降っている。 いつ終わるの? さようなら" );
expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 );
expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 );
expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 );
it( "returns sentences with question mark in Japanese", function() {
const mockPaper = new Paper( "雨が降っている。 いつ終わるの? さようなら" );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 8 );
expect( sentences[ 1 ].sentenceLength ).toBe( 7 );
expect( sentences[ 2 ].sentenceLength ).toBe( 5 );
} );
it( "returns sentences with exclamation mark", function() {
paper = new Paper( "雨が降っている. いつ終わるの!さようなら" );
expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 );
expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 );
expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 );
const mockPaper = new Paper( "雨が降っている. いつ終わるの!さようなら" );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 8 );
expect( sentences[ 1 ].sentenceLength ).toBe( 7 );
expect( sentences[ 2 ].sentenceLength ).toBe( 5 );
} );
it( "returns sentences with many spaces", function() {
paper = new Paper( "雨が降っている。 いつ終わるの? さようなら" );
expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 );
expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 );
expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 );
const mockPaper = new Paper( "雨が降っている。 いつ終わるの? さようなら" );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 8 );
expect( sentences[ 1 ].sentenceLength ).toBe( 7 );
expect( sentences[ 2 ].sentenceLength ).toBe( 5 );
} );
it( "returns sentences with html-tags, should count characters in Japanese", function() {
paper = new Paper( "いつ終わるの <img src='image.jpg' alt='自分を大事にして下さい' />" );
expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 6 );
const mockPaper = new Paper( "いつ終わるの <img src='image.jpg' alt='自分を大事にして下さい' />" );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 6 );
} );
it( "returns sentences with html-tags, should count characters in Japanese", function() {
paper = new Paper( "いつ終わるの <img src='http://domain.com/image.jpg' alt='自分を大事にして下さい' />. 春がやってきます。" );
expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 7 );
expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 9 );
} );*/
const mockPaper = new Paper( "いつ終わるの <img src='http://domain.com/image.jpg' alt='自分を大事にして下さい' />. 春がやってきます。" );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const sentences = getSentences( mockPaper, mockResearcher );

expect( sentences[ 0 ].sentenceLength ).toBe( 7 );
expect( sentences[ 1 ].sentenceLength ).toBe( 9 );
} );
} );
Loading