Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert Sentence length and paragraph length to use HTML parser and enable AI button for both assessments #21866

Open
wants to merge 16 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
First pass on converting the paragraph length assessment to use the H…
…TML Parser
  • Loading branch information
mhkuu committed Nov 14, 2024
commit e338b83c2588d333a9d0f1ba284152a196d4e123

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import Researcher from "../../../../src/languageProcessing/languages/ar/Researcher.js";
import Paper from "../../../../src/values/Paper.js";
import getMorphologyData from "../../../specHelpers/getMorphologyData";
import buildTree from "../../../specHelpers/parse/buildTree";
import functionWords from "../../../../src/languageProcessing/languages/ar/config/functionWords";
import transitionWords from "../../../../src/languageProcessing/languages/ar/config/transitionWords";
import firstWordExceptions from "../../../../src/languageProcessing/languages/ar/config/firstWordExceptions";
Expand All @@ -9,10 +10,12 @@ import twoPartTransitionWords from "../../../../src/languageProcessing/languages
const morphologyDataAR = getMorphologyData( "ar" );

describe( "a test for Arabic Researcher", function() {
const researcher = new Researcher( new Paper( "This is another paper!" ) );
const paper = new Paper( "This is another paper!" );
const researcher = new Researcher( paper );
buildTree( paper, researcher );

it( "checks if the Arabic Researcher still inherit the Abstract Researcher", function() {
expect( researcher.getResearch( "getParagraphLength" ) ).toEqual( [ { text: "This is another paper!", countLength: 4 } ] );
expect( researcher.getResearch( "getParagraphLength" )[ 0 ].paragraphLength ).toEqual( 4 );
} );

it( "returns false if the default research is deleted in Arabic Researcher", function() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,127 +2,237 @@ import getParagraphLength from "../../../src/languageProcessing/researches/getPa
import Paper from "../../../src/values/Paper.js";
import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher.js";
import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher.js";
import buildTree from "../../specHelpers/parse/buildTree";

describe( "a test for getting paragraph length", function() {
it( "returns the paragraph length of a paragraph between p tags", function() {
const mockPaper = new Paper( "<p>Lorem ipsum</p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 2 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 2 );
} );

it( "returns the paragraph length of a paragraph in Japanese between p tags", function() {
const mockPaper = new Paper( "<p>これに対し日本国有鉄道</p>" );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 11 );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 11 );
} );

it( "returns the paragraph length of two paragraphs divided by double linebreaks and ends with a double linebreak", function() {
const mockPaper = new Paper( "Lorem \n\n ipsum two \n\n" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 1 );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 1 ].countLength ).toBe( 2 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 1 );
expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 2 );
} );

it( "returns the paragraph length of two paragraphs in Japanese divided by double linebreaks and ends with a double linebreak", function() {
const mockPaper = new Paper( "1964年 \n\n (昭和39年) \n\n" );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 5 );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 1 ].countLength ).toBe( 7 );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 );
expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 7 );
} );

it( "returns the paragraph length of two paragraphs divided by double linebreaks that don't end with a double linebreak", function() {
const mockPaper = new Paper( "Lorem \n\n ipsum two" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 1 );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 1 ].countLength ).toBe( 2 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 1 );
expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 2 );
} );

it( "returns the paragraph length of two paragraphs in Japanese divided by double linebreaks that don't end with a double linebreak", function() {
const mockPaper = new Paper( "1964年 \n\n (昭和39年)" );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 5 );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 1 ].countLength ).toBe( 7 );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 );
expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 7 );
} );

it( "returns the paragraph length of a paragraph without tags or double linebreaks", function() {
const mockPaper = new Paper( "Lorem ipsum dolor sit amet" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 5 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 );
} );

it( "returns the paragraph length of a paragraph in Japanese without tags or double linebreaks", function() {
const mockPaper = new Paper( "東京オリンピック開会直前の1964年(昭和39年)10月1日に開業した。" );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 36 );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 36 );
} );

it( "returns the paragraph length of 2 paragraphs, both between p tags", function() {
const mockPaper = new Paper( "<p>Lorem ipsum</p><p>dolor sit amet</p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 2 );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 1 ].countLength ).toBe( 3 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );
expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 2 );
expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 3 );
} );

it( "returns the paragraph length of 2 paragraphs in Japanese, both between p tags", function() {
const mockPaper = new Paper( "<p>東京オリンピック開会直前の1964年</p><p>(昭和39年)10月1日に開業した。</p>" );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 18 );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 1 ].countLength ).toBe( 18 );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 18 );
expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 18 );
} );

it( "returns the paragraph length of 2 paragraphs, both between p tags, divided by double linebreaks", function() {
const mockPaper = new Paper( "<p>Lorem ipsum</p> \n\n <p>dolor sit amet</p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 2 );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 1 ].countLength ).toBe( 3 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 2 );
expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 3 );
} );

it( "returns the paragraph length of 2 paragraphs in Japanese, both between p tags, divided by double linebreaks", function() {
const mockPaper = new Paper( "<p>東京オリンピック開会直前の1964年</p> \n\n <p>(昭和39年)10月1日に開業した。</p>" );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 18 );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 1 ].countLength ).toBe( 18 );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 18 );
expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 18 );
} );

it( "returns the paragraph length, with empty paragraphs", function() {
const mockPaper = new Paper( "<p>test</p><p></p><p>more text</p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 2 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths.length ).toBe( 2 );
} );

it( "returns the paragraph length, ignoring text inside an element we want to exclude from the analysis", function() {
const mockPaper = new Paper( "<p>test <code>ignore me</code></p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths.length ).toBe( 1 );
} );

it( "returns the paragraph length, ignoring shortcodes", function() {
const mockPaper = new Paper( "<p>test [shortcode]</p>", { shortcodes: [ "shortcode" ] } );
expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths.length ).toBe( 1 );
} );

it( "returns the paragraph length of paragraph without p tags or double linebreaks, but with h2 tags", function() {
const mockPaper = new Paper( "<h2>Lorem ipsum dolor sit amet</h2>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 5 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 );
} );

it( "returns the paragraph length of paragraph in Japanese without p tags or double linebreaks, but with h2 tags", function() {
const mockPaper = new Paper( "<h2>(昭和39年)10月1日に開業した。</h2>" );
expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 18 );
const mockResearcher = new JapaneseResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 18 );
} );

xit( "returns the paragraph length of an empty paragraph with p tags", function() {
const mockPaper = new Paper( "<p></p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() ).countLength ).not.toContain( 0 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths.paragraphLength ).not.toContain( 0 );
} );

xit( "returns the paragraph length of an empty paragraph without p tags or double line breaks", function() {
const mockPaper = new Paper( "" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() ).countLength ).not.toContain( 0 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths.paragraphLength ).not.toContain( 0 );
} );
} );

describe( "a test for getting paragraph length of a text with image(s)", () => {
it( "should not count a paragraph containing only an image", function() {
// The paper contains 3 paragraphs: 2 paragraphs with text and one paragraph with only an image.
const mockPaper = new Paper( "<p>test</p><p><img src='image.com/image.png' /></p><p>more text</p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher( mockPaper ) ).length ).toBe( 2 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths.length ).toBe( 2 );
} );

it( "should return 0 for paragraphs count when all paragraphs only contain images", function() {
const mockPaper = new Paper( "<p><img src='image.com/image.png' /></p><p><img src='image.com/image.png' /></p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher( mockPaper ) ).length ).toBe( 0 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths.length ).toBe( 0 );
} );

it( "should not include the image in the paragraph length calculation", function() {
const mockPaper = new Paper( "<p><img src='image.com/image.png' />test</p><p><img src='image.com/image.png' /> test </p>" );
expect( getParagraphLength( mockPaper, new EnglishResearcher( mockPaper ) ).length ).toBe( 2 );
expect( getParagraphLength( mockPaper, new EnglishResearcher( mockPaper ) )[ 0 ].countLength ).toBe( 1 );
const mockResearcher = new EnglishResearcher( mockPaper );
buildTree( mockPaper, mockResearcher );

const paragraphLengths = getParagraphLength( mockPaper, mockResearcher );

expect( paragraphLengths.length ).toBe( 2 );
expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 1 );
} );
} );
Loading