|
|
- /**
- * Word or character counting functionality. Count words or characters in a
- * provided text string.
- *
- * @namespace wp.utils
- *
- * @since 2.6.0
- * @output wp-admin/js/word-count.js
- */
-
- ( function() {
- /**
- * Word counting utility
- *
- * @namespace wp.utils.wordcounter
- * @memberof wp.utils
- *
- * @class
- *
- * @param {Object} settings Optional. Key-value object containing overrides for
- * settings.
- * @param {RegExp} settings.HTMLRegExp Optional. Regular expression to find HTML elements.
- * @param {RegExp} settings.HTMLcommentRegExp Optional. Regular expression to find HTML comments.
- * @param {RegExp} settings.spaceRegExp Optional. Regular expression to find irregular space
- * characters.
- * @param {RegExp} settings.HTMLEntityRegExp Optional. Regular expression to find HTML entities.
- * @param {RegExp} settings.connectorRegExp Optional. Regular expression to find connectors that
- * split words.
- * @param {RegExp} settings.removeRegExp Optional. Regular expression to find remove unwanted
- * characters to reduce false-positives.
- * @param {RegExp} settings.astralRegExp Optional. Regular expression to find unwanted
- * characters when searching for non-words.
- * @param {RegExp} settings.wordsRegExp Optional. Regular expression to find words by spaces.
- * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which
- * are non-spaces.
- * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters
- * including spaces.
- * @param {RegExp} settings.shortcodesRegExp Optional. Regular expression to find shortcodes.
- * @param {Object} settings.l10n Optional. Localization object containing specific
- * configuration for the current localization.
- * @param {string} settings.l10n.type Optional. Method of finding words to count.
- * @param {Array} settings.l10n.shortcodes Optional. Array of shortcodes that should be removed
- * from the text.
- *
- * @return {void}
- */
- function WordCounter( settings ) {
- var key,
- shortcodes;
-
- // Apply provided settings to object settings.
- if ( settings ) {
- for ( key in settings ) {
-
- // Only apply valid settings.
- if ( settings.hasOwnProperty( key ) ) {
- this.settings[ key ] = settings[ key ];
- }
- }
- }
-
- shortcodes = this.settings.l10n.shortcodes;
-
- // If there are any localization shortcodes, add this as type in the settings.
- if ( shortcodes && shortcodes.length ) {
- this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' );
- }
- }
-
- // Default settings.
- WordCounter.prototype.settings = {
- HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
- HTMLcommentRegExp: /<!--[\s\S]*?-->/g,
- spaceRegExp: / | /gi,
- HTMLEntityRegExp: /&\S+?;/g,
-
- // \u2014 = em-dash.
- connectorRegExp: /--|\u2014/g,
-
- // Characters to be removed from input text.
- removeRegExp: new RegExp( [
- '[',
-
- // Basic Latin (extract).
- '\u0021-\u0040\u005B-\u0060\u007B-\u007E',
-
- // Latin-1 Supplement (extract).
- '\u0080-\u00BF\u00D7\u00F7',
-
- /*
- * The following range consists of:
- * General Punctuation
- * Superscripts and Subscripts
- * Currency Symbols
- * Combining Diacritical Marks for Symbols
- * Letterlike Symbols
- * Number Forms
- * Arrows
- * Mathematical Operators
- * Miscellaneous Technical
- * Control Pictures
- * Optical Character Recognition
- * Enclosed Alphanumerics
- * Box Drawing
- * Block Elements
- * Geometric Shapes
- * Miscellaneous Symbols
- * Dingbats
- * Miscellaneous Mathematical Symbols-A
- * Supplemental Arrows-A
- * Braille Patterns
- * Supplemental Arrows-B
- * Miscellaneous Mathematical Symbols-B
- * Supplemental Mathematical Operators
- * Miscellaneous Symbols and Arrows
- */
- '\u2000-\u2BFF',
-
- // Supplemental Punctuation.
- '\u2E00-\u2E7F',
- ']'
- ].join( '' ), 'g' ),
-
- // Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF
- astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
- wordsRegExp: /\S\s+/g,
- characters_excluding_spacesRegExp: /\S/g,
-
- /*
- * Match anything that is not a formatting character, excluding:
- * \f = form feed
- * \n = new line
- * \r = carriage return
- * \t = tab
- * \v = vertical tab
- * \u00AD = soft hyphen
- * \u2028 = line separator
- * \u2029 = paragraph separator
- */
- characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g,
- l10n: window.wordCountL10n || {}
- };
-
- /**
- * Counts the number of words (or other specified type) in the specified text.
- *
- * @since 2.6.0
- *
- * @memberof wp.utils.wordcounter
- *
- * @param {string} text Text to count elements in.
- * @param {string} type Optional. Specify type to use.
- *
- * @return {number} The number of items counted.
- */
- WordCounter.prototype.count = function( text, type ) {
- var count = 0;
-
- // Use default type if none was provided.
- type = type || this.settings.l10n.type;
-
- // Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'.
- if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) {
- type = 'words';
- }
-
- // If we have any text at all.
- if ( text ) {
- text = text + '\n';
-
- // Replace all HTML with a new-line.
- text = text.replace( this.settings.HTMLRegExp, '\n' );
-
- // Remove all HTML comments.
- text = text.replace( this.settings.HTMLcommentRegExp, '' );
-
- // If a shortcode regular expression has been provided use it to remove shortcodes.
- if ( this.settings.shortcodesRegExp ) {
- text = text.replace( this.settings.shortcodesRegExp, '\n' );
- }
-
- // Normalize non-breaking space to a normal space.
- text = text.replace( this.settings.spaceRegExp, ' ' );
-
- if ( type === 'words' ) {
-
- // Remove HTML Entities.
- text = text.replace( this.settings.HTMLEntityRegExp, '' );
-
- // Convert connectors to spaces to count attached text as words.
- text = text.replace( this.settings.connectorRegExp, ' ' );
-
- // Remove unwanted characters.
- text = text.replace( this.settings.removeRegExp, '' );
- } else {
-
- // Convert HTML Entities to "a".
- text = text.replace( this.settings.HTMLEntityRegExp, 'a' );
-
- // Remove surrogate points.
- text = text.replace( this.settings.astralRegExp, 'a' );
- }
-
- // Match with the selected type regular expression to count the items.
- text = text.match( this.settings[ type + 'RegExp' ] );
-
- // If we have any matches, set the count to the number of items found.
- if ( text ) {
- count = text.length;
- }
- }
-
- return count;
- };
-
- // Add the WordCounter to the WP Utils.
- window.wp = window.wp || {};
- window.wp.utils = window.wp.utils || {};
- window.wp.utils.WordCounter = WordCounter;
- } )();
|