Changeset 2078109
- Timestamp:
- 04/30/2019 07:31:16 PM (7 years ago)
- Location:
- convertkit
- Files:
-
- 79 added
- 11 edited
-
tags/1.7.5 (added)
-
tags/1.7.5/LICENSE (added)
-
tags/1.7.5/admin (added)
-
tags/1.7.5/admin/class-convertkit-settings.php (added)
-
tags/1.7.5/admin/class-convertkit-tinymce.php (added)
-
tags/1.7.5/admin/class-multi-value-field-table.php (added)
-
tags/1.7.5/admin/section (added)
-
tags/1.7.5/admin/section/class-convertkit-settings-base.php (added)
-
tags/1.7.5/admin/section/class-convertkit-settings-contactform7.php (added)
-
tags/1.7.5/admin/section/class-convertkit-settings-general.php (added)
-
tags/1.7.5/admin/section/class-convertkit-settings-tools.php (added)
-
tags/1.7.5/admin/section/class-convertkit-settings-wishlist.php (added)
-
tags/1.7.5/includes (added)
-
tags/1.7.5/includes/class-ck-widget-form.php (added)
-
tags/1.7.5/includes/class-convertkit-api.php (added)
-
tags/1.7.5/includes/class-convertkit-custom-content.php (added)
-
tags/1.7.5/includes/class-convertkit.php (added)
-
tags/1.7.5/includes/integration (added)
-
tags/1.7.5/includes/integration/class-convertkit-contactform7-integration.php (added)
-
tags/1.7.5/includes/integration/class-convertkit-wishlist-integration.php (added)
-
tags/1.7.5/languages (added)
-
tags/1.7.5/languages/README.md (added)
-
tags/1.7.5/languages/convertkit.pot (added)
-
tags/1.7.5/lib (added)
-
tags/1.7.5/lib/README.md (added)
-
tags/1.7.5/lib/browser.php (added)
-
tags/1.7.5/readme.txt (added)
-
tags/1.7.5/resources (added)
-
tags/1.7.5/resources/backend (added)
-
tags/1.7.5/resources/backend/ck-2x.png (added)
-
tags/1.7.5/resources/backend/ck.png (added)
-
tags/1.7.5/resources/backend/tinymce-buttons.js (added)
-
tags/1.7.5/resources/backend/wp-convertkit.css (added)
-
tags/1.7.5/resources/backend/wp-convertkit.js (added)
-
tags/1.7.5/resources/frontend (added)
-
tags/1.7.5/resources/frontend/jquery.cookie.js (added)
-
tags/1.7.5/resources/frontend/jquery.cookie.min.js (added)
-
tags/1.7.5/resources/frontend/wp-convertkit.css (added)
-
tags/1.7.5/resources/frontend/wp-convertkit.js (added)
-
tags/1.7.5/vendor (added)
-
tags/1.7.5/vendor/autoload.php (added)
-
tags/1.7.5/vendor/composer (added)
-
tags/1.7.5/vendor/composer/ClassLoader.php (added)
-
tags/1.7.5/vendor/composer/LICENSE (added)
-
tags/1.7.5/vendor/composer/autoload_classmap.php (added)
-
tags/1.7.5/vendor/composer/autoload_namespaces.php (added)
-
tags/1.7.5/vendor/composer/autoload_psr4.php (added)
-
tags/1.7.5/vendor/composer/autoload_real.php (added)
-
tags/1.7.5/vendor/composer/autoload_static.php (added)
-
tags/1.7.5/vendor/composer/installed.json (added)
-
tags/1.7.5/vendor/kub-at (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/CONTRIBUTING.md (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/LICENSE (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/README.md (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/composer.json (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/src (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/src/KubAT (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/src/KubAT/PhpSimple (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/src/KubAT/PhpSimple/HtmlDomParser.php (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/src/KubAT/PhpSimple/lib (added)
-
tags/1.7.5/vendor/kub-at/php-simple-html-dom-parser/src/KubAT/PhpSimple/lib/simple_html_dom.php (added)
-
tags/1.7.5/vendor/oldmine (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/.gitignore (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/CHANGELOG.md (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/README.md (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/composer.json (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/src (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/src/RelativeToAbsoluteUrl.php (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/tests (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/tests/oldmine (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/tests/oldmine/tests (added)
-
tags/1.7.5/vendor/oldmine/relative-to-absolute-url/tests/oldmine/tests/RelativeToAbsoluteUrlTest.php (added)
-
tags/1.7.5/views (added)
-
tags/1.7.5/views/backend (added)
-
tags/1.7.5/views/backend/meta-boxes (added)
-
tags/1.7.5/views/backend/meta-boxes/meta-box.php (added)
-
tags/1.7.5/wp-convertkit.php (added)
-
trunk/admin/class-convertkit-settings.php (modified) (1 diff)
-
trunk/admin/section/class-convertkit-settings-tools.php (modified) (1 diff)
-
trunk/includes/class-convertkit-api.php (modified) (3 diffs)
-
trunk/readme.txt (modified) (2 diffs)
-
trunk/vendor/autoload.php (modified) (1 diff)
-
trunk/vendor/composer/autoload_real.php (modified) (3 diffs)
-
trunk/vendor/composer/autoload_static.php (modified) (2 diffs)
-
trunk/vendor/composer/installed.json (modified) (4 diffs)
-
trunk/vendor/kub-at/php-simple-html-dom-parser/README.md (modified) (1 diff)
-
trunk/vendor/kub-at/php-simple-html-dom-parser/src/KubAT/PhpSimple/lib/simple_html_dom.php (modified) (87 diffs)
-
trunk/wp-convertkit.php (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
convertkit/trunk/admin/class-convertkit-settings.php
r2057877 r2078109 131 131 <strong> 132 132 <?php 133 echo sprintf( __( 'Notice: Your database does not appear to support the %s. If you experience difficulties connecting to ConvertKit this may be why. Please contact your webhost to have your database upgraded.',133 echo sprintf( __( 'Notice: Your database does not appear to support the %s. <em>If you experience difficulties</em> connecting to ConvertKit, this may be why. Please contact your webhost to have your database upgraded. If you do not notice any issues, you may safely ignore this message.', 134 134 'convertkit' ), 135 135 '<a href="https://make.wordpress.org/core/2015/04/02/the-utf8mb4-upgrade/">utf8mb4 character set</a>' ); -
convertkit/trunk/admin/section/class-convertkit-settings-tools.php
r2057877 r2078109 67 67 */ 68 68 public function view_log() { 69 // Only try to get file contents if the file exists; otherwise default to empty string 69 70 $log_file = trailingslashit( CONVERTKIT_PLUGIN_PATH ) . 'log.txt'; 70 $log = file_ get_contents( $log_file );71 $log = file_exists( $log_file ) ? file_get_contents( $log_file ) : ''; 71 72 72 73 ?> -
convertkit/trunk/includes/class-convertkit-api.php
r2057877 r2078109 127 127 ); 128 128 129 $update_forms = update_option( 'convertkit_forms', $forms );130 $update_landing_pages = update_option( 'convertkit_landing_pages', $landing_pages );131 $update_tags = update_option( 'convertkit_tags', $tags );129 $update_forms = $this->maybe_update_option( 'convertkit_forms', $forms ); 130 $update_landing_pages = $this->maybe_update_option( 'convertkit_landing_pages', $landing_pages ); 131 $update_tags = $this->maybe_update_option( 'convertkit_tags', $tags ); 132 132 133 133 } else { … … 145 145 } 146 146 } 147 $update_forms = update_option( 'convertkit_forms', $forms );148 $update_landing_pages = update_option( 'convertkit_landing_pages', $landing_pages );147 $update_forms = $this->maybe_update_option( 'convertkit_forms', $forms ); 148 $update_landing_pages = $this->maybe_update_option( 'convertkit_landing_pages', $landing_pages ); 149 149 150 150 // Tags … … 154 154 $tags[] = $tag; 155 155 } 156 $update_tags = update_option( 'convertkit_tags', $tags );156 $update_tags = $this->maybe_update_option( 'convertkit_tags', $tags ); 157 157 } 158 158 159 159 return $update_forms && $update_landing_pages && $update_tags; 160 } 161 162 /** 163 * Attempt to store updated forms, tags, or landing pages retrieved from the ConvertKit API. 164 * If they match what is already stored, WordPress's built in update_option() function will return 165 * false, which is unhelpful. So, if it returns false, we check if what we tried to store matches what is 166 * already stored, and if so, we return true. 167 * 168 * This way, we know if there was a failure or not. 169 * 170 * @param string $option_name 171 * @param mixed $option_value 172 * 173 * return bool true if option was updated or if no update was needed, false if failure 174 */ 175 public function maybe_update_option( $option_name, $option_value ) { 176 $result = update_option( $option_name, $option_value ); 177 178 if ( !$result ) { 179 $old = get_option( $option_name, $option_value ); 180 $result = $old === $option_value ? true : false; 181 } 182 183 return $result; 160 184 } 161 185 -
convertkit/trunk/readme.txt
r2058401 r2078109 5 5 Requires at least: 3.6 6 6 Tested up to: 5.1.1 7 Stable tag: 1.7. 47 Stable tag: 1.7.5 8 8 License: GPLv2 or later 9 9 License URI: http://www.gnu.org/licenses/gpl-2.0.html … … 46 46 47 47 == Changelog == 48 ### 1.7.5 2019-04-30 49 * Fix false positive detection of character set issues related to using emojis in forms & landing pages 50 * Don't show error on tools tab on first visit with logging turned on 51 * Fix code conflict with some other plugins that resulted in PHP warnings being displayed 48 52 49 53 ### 1.7.4 2019-03-27 -
convertkit/trunk/vendor/autoload.php
r2058401 r2078109 5 5 require_once __DIR__ . '/composer/autoload_real.php'; 6 6 7 return ComposerAutoloaderInit d20903299467e7addb7f94ee9c1a18b8::getLoader();7 return ComposerAutoloaderInitcf87f89295679352abe450fddf326efa::getLoader(); -
convertkit/trunk/vendor/composer/autoload_real.php
r2058401 r2078109 3 3 // autoload_real.php @generated by Composer 4 4 5 class ComposerAutoloaderInit d20903299467e7addb7f94ee9c1a18b85 class ComposerAutoloaderInitcf87f89295679352abe450fddf326efa 6 6 { 7 7 private static $loader; … … 20 20 } 21 21 22 spl_autoload_register(array('ComposerAutoloaderInit d20903299467e7addb7f94ee9c1a18b8', 'loadClassLoader'), true, true);22 spl_autoload_register(array('ComposerAutoloaderInitcf87f89295679352abe450fddf326efa', 'loadClassLoader'), true, true); 23 23 self::$loader = $loader = new \Composer\Autoload\ClassLoader(); 24 spl_autoload_unregister(array('ComposerAutoloaderInit d20903299467e7addb7f94ee9c1a18b8', 'loadClassLoader'));24 spl_autoload_unregister(array('ComposerAutoloaderInitcf87f89295679352abe450fddf326efa', 'loadClassLoader')); 25 25 26 26 $useStaticLoader = PHP_VERSION_ID >= 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded()); … … 28 28 require_once __DIR__ . '/autoload_static.php'; 29 29 30 call_user_func(\Composer\Autoload\ComposerStaticInit d20903299467e7addb7f94ee9c1a18b8::getInitializer($loader));30 call_user_func(\Composer\Autoload\ComposerStaticInitcf87f89295679352abe450fddf326efa::getInitializer($loader)); 31 31 } else { 32 32 $map = require __DIR__ . '/autoload_namespaces.php'; -
convertkit/trunk/vendor/composer/autoload_static.php
r2058401 r2078109 5 5 namespace Composer\Autoload; 6 6 7 class ComposerStaticInit d20903299467e7addb7f94ee9c1a18b87 class ComposerStaticInitcf87f89295679352abe450fddf326efa 8 8 { 9 9 public static $prefixLengthsPsr4 = array ( … … 39 39 { 40 40 return \Closure::bind(function () use ($loader) { 41 $loader->prefixLengthsPsr4 = ComposerStaticInit d20903299467e7addb7f94ee9c1a18b8::$prefixLengthsPsr4;42 $loader->prefixDirsPsr4 = ComposerStaticInit d20903299467e7addb7f94ee9c1a18b8::$prefixDirsPsr4;43 $loader->prefixesPsr0 = ComposerStaticInit d20903299467e7addb7f94ee9c1a18b8::$prefixesPsr0;41 $loader->prefixLengthsPsr4 = ComposerStaticInitcf87f89295679352abe450fddf326efa::$prefixLengthsPsr4; 42 $loader->prefixDirsPsr4 = ComposerStaticInitcf87f89295679352abe450fddf326efa::$prefixDirsPsr4; 43 $loader->prefixesPsr0 = ComposerStaticInitcf87f89295679352abe450fddf326efa::$prefixesPsr0; 44 44 45 45 }, null, ClassLoader::class); -
convertkit/trunk/vendor/composer/installed.json
r2058401 r2078109 2 2 { 3 3 "name": "kub-at/php-simple-html-dom-parser", 4 "version": "1. 7.1",5 "version_normalized": "1. 7.1.0",4 "version": "1.8.2", 5 "version_normalized": "1.8.2.0", 6 6 "source": { 7 7 "type": "git", 8 "url": "https://github.com/ Kub-AT/php-simple-html-dom-parser.git",9 "reference": " 7a745b20157efb0f1be3021394769bd6b8e9ed4e"8 "url": "https://github.com/ConvertKit/php-simple-html-dom-parser.git", 9 "reference": "2133982fa268436eca5a8ea2a2dd47344736edb0" 10 10 }, 11 11 "dist": { 12 12 "type": "zip", 13 "url": "https://api.github.com/repos/ Kub-AT/php-simple-html-dom-parser/zipball/7a745b20157efb0f1be3021394769bd6b8e9ed4e",14 "reference": " 7a745b20157efb0f1be3021394769bd6b8e9ed4e",13 "url": "https://api.github.com/repos/ConvertKit/php-simple-html-dom-parser/zipball/2133982fa268436eca5a8ea2a2dd47344736edb0", 14 "reference": "2133982fa268436eca5a8ea2a2dd47344736edb0", 15 15 "shasum": "" 16 16 }, … … 18 18 "php": ">=5.3.2" 19 19 }, 20 "time": "2019-0 1-02T14:33:28+00:00",20 "time": "2019-04-24T20:12:57+00:00", 21 21 "type": "library", 22 22 "installation-source": "dist", … … 26 26 } 27 27 }, 28 "notification-url": "https://packagist.org/downloads/",29 28 "license": [ 30 29 "MIT" … … 43 42 "homepage": "http://simplehtmldom.sourceforge.net/", 44 43 "keywords": [ 45 "Simple",46 44 "dom", 47 "html" 48 ] 45 "html", 46 "simple" 47 ], 48 "support": { 49 "source": "https://github.com/ConvertKit/php-simple-html-dom-parser/tree/1.8.2" 50 } 49 51 }, 50 52 { -
convertkit/trunk/vendor/kub-at/php-simple-html-dom-parser/README.md
r2042284 r2078109 2 2 ========================== 3 3 4 Version 1. 7.1 - PHP 7.3 campatible5 Changelog: https://sourceforge.net/projects/simplehtmldom/files/simplehtmldom/1.7/4 Version 1.8.1 - PHP 7.3 compatible 5 PHP Simple HTML DOM Parser changelog: https://sourceforge.net/projects/simplehtmldom/files/simplehtmldom/1.8.1/ 6 6 7 7 -
convertkit/trunk/vendor/kub-at/php-simple-html-dom-parser/src/KubAT/PhpSimple/lib/simple_html_dom.php
r2042284 r2078109 4 4 /** 5 5 * Website: http://sourceforge.net/projects/simplehtmldom/ 6 * Additional projects that may be used: http://sourceforge.net/projects/debugobject/6 * Additional projects: http://sourceforge.net/projects/debugobject/ 7 7 * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/) 8 8 * Contributions by: … … 14 14 * 15 15 * Paperg - Added case insensitive testing of the value of the selector. 16 * Paperg - Added tag_start for the starting index of tags - NOTE: This works but not accurately. 17 * This tag_start gets counted AFTER \r\n have been crushed out, and after the remove_noice calls so it will not reflect the REAL position of the tag in the source, 18 * it will almost always be smaller by some amount. 19 * We use this to determine how far into the file the tag in question is. This "percentage will never be accurate as the $dom->size is the "real" number of bytes the dom was created from. 20 * but for most purposes, it's a really good estimation. 21 * Paperg - Added the forceTagsClosed to the dom constructor. Forcing tags closed is great for malformed html, but it CAN lead to parsing errors. 16 * 17 * Paperg - Added tag_start for the starting index of tags - NOTE: This works 18 * but not accurately. This tag_start gets counted AFTER \r\n have been crushed 19 * out, and after the remove_noice calls so it will not reflect the REAL 20 * position of the tag in the source, it will almost always be smaller by some 21 * amount. We use this to determine how far into the file the tag in question 22 * is. This "percentage" will never be accurate as the $dom->size is the "real" 23 * number of bytes the dom was created from. But for most purposes, it's a 24 * really good estimation. 25 * 26 * Paperg - Added the forceTagsClosed to the dom constructor. Forcing tags 27 * closed is great for malformed html, but it CAN lead to parsing errors. 28 * 22 29 * Allow the user to tell us how much they trust the html. 23 * Paperg add the text and plaintext to the selectors for the find syntax. plaintext implies text in the innertext of a node. text implies that the tag is a text node.24 * This allows for us to find tags based on the text they contain.25 * Create find_ancestor_tag to see if a tag is - at any level - inside of another specific tag.26 * Paperg: added parse_charset so that we know about the character set of the source document.27 * NOTE: If the user's system has a routine called get_last_retrieve_url_contents_content_type availalbe, we will assume it's returning the content-type header from the28 * last transfer or curl_exec, and we will parse that and use it in preference to any other method of charset detection.29 30 * 30 * Found infinite loop in the case of broken html in restore_noise. Rewrote to protect from that. 31 * Paperg add the text and plaintext to the selectors for the find syntax. 32 * plaintext implies text in the innertext of a node. text implies that the 33 * tag is a text node. This allows for us to find tags based on the text they 34 * contain. 35 * 36 * Create find_ancestor_tag to see if a tag is - at any level - inside of 37 * another specific tag. 38 * 39 * Paperg: added parse_charset so that we know about the character set of 40 * the source document. NOTE: If the user's system has a routine called 41 * get_last_retrieve_url_contents_content_type availalbe, we will assume it's 42 * returning the content-type header from the last transfer or curl_exec, and 43 * we will parse that and use it in preference to any other method of charset 44 * detection. 45 * 46 * Found infinite loop in the case of broken html in restore_noise. Rewrote to 47 * protect from that. 48 * 31 49 * PaperG (John Schlick) Added get_display_size for "IMG" tags. 32 50 * … … 37 55 * @author John Schlick 38 56 * @author Rus Carroll 39 * @version Rev. 1. 7 (214)57 * @version Rev. 1.8.1 (247) 40 58 * @package PlaceLocalInclude 41 59 * @subpackage simple_html_dom … … 46 64 * @author S.C. Chen <[email protected]> 47 65 */ 48 define('HDOM_TYPE_ELEMENT', 1); 49 define('HDOM_TYPE_COMMENT', 2); 50 define('HDOM_TYPE_TEXT', 3); 51 define('HDOM_TYPE_ENDTAG', 4); 52 define('HDOM_TYPE_ROOT', 5); 53 define('HDOM_TYPE_UNKNOWN', 6); 54 define('HDOM_QUOTE_DOUBLE', 0); 55 define('HDOM_QUOTE_SINGLE', 1); 56 define('HDOM_QUOTE_NO', 3); 57 define('HDOM_INFO_BEGIN', 0); 58 define('HDOM_INFO_END', 1); 59 define('HDOM_INFO_QUOTE', 2); 60 define('HDOM_INFO_SPACE', 3); 61 define('HDOM_INFO_TEXT', 4); 62 define('HDOM_INFO_INNER', 5); 63 define('HDOM_INFO_OUTER', 6); 64 define('HDOM_INFO_ENDSPACE',7); 65 define('DEFAULT_TARGET_CHARSET', 'UTF-8'); 66 define('DEFAULT_BR_TEXT', "\r\n"); 67 define('DEFAULT_SPAN_TEXT', " "); 68 define('MAX_FILE_SIZE', 600000); 66 if ( ! defined( 'HDOM_TYPE_ELEMENT' ) ) { 67 define( 'HDOM_TYPE_ELEMENT', 1 ); 68 } 69 if ( ! defined( 'HDOM_TYPE_COMMENT' ) ) { 70 define( 'HDOM_TYPE_COMMENT', 2 ); 71 } 72 if ( ! defined( 'HDOM_TYPE_TEXT' ) ) { 73 define( 'HDOM_TYPE_TEXT', 3 ); 74 } 75 if ( ! defined( 'HDOM_TYPE_ENDTAG' ) ) { 76 define( 'HDOM_TYPE_ENDTAG', 4 ); 77 } 78 if ( ! defined( 'HDOM_TYPE_ROOT' ) ) { 79 define( 'HDOM_TYPE_ROOT', 5 ); 80 } 81 if ( ! defined( 'HDOM_TYPE_UNKNOWN' ) ) { 82 define( 'HDOM_TYPE_UNKNOWN', 6 ); 83 } 84 if ( ! defined( 'HDOM_QUOTE_DOUBLE' ) ) { 85 define( 'HDOM_QUOTE_DOUBLE', 0 ); 86 } 87 if ( ! defined( 'HDOM_QUOTE_SINGLE' ) ) { 88 define( 'HDOM_QUOTE_SINGLE', 1 ); 89 } 90 if ( ! defined( 'HDOM_QUOTE_NO' ) ) { 91 define( 'HDOM_QUOTE_NO', 3 ); 92 } 93 if ( ! defined( 'HDOM_INFO_BEGIN' ) ) { 94 define( 'HDOM_INFO_BEGIN', 0 ); 95 } 96 if ( ! defined( 'HDOM_INFO_END' ) ) { 97 define( 'HDOM_INFO_END', 1 ); 98 } 99 if ( ! defined( 'HDOM_INFO_QUOTE' ) ) { 100 define( 'HDOM_INFO_QUOTE', 2 ); 101 } 102 if ( ! defined( 'HDOM_INFO_SPACE' ) ) { 103 define( 'HDOM_INFO_SPACE', 3 ); 104 } 105 if ( ! defined( 'HDOM_INFO_TEXT' ) ) { 106 define( 'HDOM_INFO_TEXT', 4 ); 107 } 108 if ( ! defined( 'HDOM_INFO_INNER' ) ) { 109 define( 'HDOM_INFO_INNER', 5 ); 110 } 111 if ( ! defined( 'HDOM_INFO_OUTER' ) ) { 112 define( 'HDOM_INFO_OUTER', 6 ); 113 } 114 if ( ! defined( 'HDOM_INFO_ENDSPACE' ) ) { 115 define( 'HDOM_INFO_ENDSPACE', 7 ); 116 } 117 118 /** The default target charset */ 119 if ( ! defined( 'DEFAULT_TARGET_CHARSET' ) ) { 120 define( 'DEFAULT_TARGET_CHARSET', 'UTF-8' ); 121 } 122 123 /** The default <br> text used instead of <br> tags when returning text */ 124 if ( ! defined( 'DEFAULT_BR_TEXT' ) ) { 125 define( 'DEFAULT_BR_TEXT', "\r\n" ); 126 } 127 128 /** The default <span> text used instead of <span> tags when returning text */ 129 if ( ! defined( 'DEFAULT_SPAN_TEXT' ) ) { 130 define( 'DEFAULT_SPAN_TEXT', ' ' ); 131 } 132 133 /** The maximum file size the parser should load */ 134 if ( ! defined( 'MAX_FILE_SIZE' ) ) { 135 define( 'MAX_FILE_SIZE', 600000 ); 136 } 69 137 70 138 /** Contents between curly braces "{" and "}" are interpreted as text */ 71 define('HDOM_SMARTY_AS_TEXT', 1); 139 if ( ! defined( 'HDOM_SMARTY_AS_TEXT' ) ) { 140 define( 'HDOM_SMARTY_AS_TEXT', 1 ); 141 } 72 142 73 143 // helper functions … … 75 145 // get html dom from file 76 146 // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1. 77 function file_get_html($url, $use_include_path = false, $context=null, $offset = 0, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 147 function file_get_html( 148 $url, 149 $use_include_path = false, 150 $context = null, 151 $offset = 0, 152 $maxLen = -1, 153 $lowercase = true, 154 $forceTagsClosed = true, 155 $target_charset = DEFAULT_TARGET_CHARSET, 156 $stripRN = true, 157 $defaultBRText = DEFAULT_BR_TEXT, 158 $defaultSpanText = DEFAULT_SPAN_TEXT) 78 159 { 79 160 // Ensure maximum length is greater than zero … … 81 162 82 163 // We DO force the tags to be terminated. 83 $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); 84 // For sourceforge users: uncomment the next line and comment the retrieve_url_contents line 2 lines down if it is not already done. 85 $contents = file_get_contents($url, $use_include_path, $context, $offset, $maxLen); 86 // Paperg - use our own mechanism for getting the contents as we want to control the timeout. 87 //$contents = retrieve_url_contents($url); 88 if (empty($contents) || strlen($contents) > $maxLen) 89 { 90 return false; 91 } 164 $dom = new simple_html_dom( 165 null, 166 $lowercase, 167 $forceTagsClosed, 168 $target_charset, 169 $stripRN, 170 $defaultBRText, 171 $defaultSpanText); 172 173 /** 174 * For sourceforge users: uncomment the next line and comment the 175 * retrieve_url_contents line 2 lines down if it is not already done. 176 */ 177 $contents = file_get_contents( 178 $url, 179 $use_include_path, 180 $context, 181 $offset, 182 $maxLen); 183 184 // Paperg - use our own mechanism for getting the contents as we want to 185 // control the timeout. 186 // $contents = retrieve_url_contents($url); 187 if (empty($contents) || strlen($contents) > $maxLen) { return false; } 188 92 189 // The second parameter can force the selectors to all be lowercase. 93 190 $dom->load($contents, $lowercase, $stripRN); … … 96 193 97 194 // get html dom from string 98 function str_get_html($str, $lowercase=true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 195 function str_get_html( 196 $str, 197 $lowercase = true, 198 $forceTagsClosed = true, 199 $target_charset = DEFAULT_TARGET_CHARSET, 200 $stripRN = true, 201 $defaultBRText = DEFAULT_BR_TEXT, 202 $defaultSpanText = DEFAULT_SPAN_TEXT) 99 203 { 100 $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); 101 if (empty($str) || strlen($str) > MAX_FILE_SIZE) 102 { 204 $dom = new simple_html_dom( 205 null, 206 $lowercase, 207 $forceTagsClosed, 208 $target_charset, 209 $stripRN, 210 $defaultBRText, 211 $defaultSpanText); 212 213 if (empty($str) || strlen($str) > MAX_FILE_SIZE) { 103 214 $dom->clear(); 104 215 return false; 105 216 } 217 106 218 $dom->load($str, $lowercase, $stripRN); 107 219 return $dom; … … 109 221 110 222 // dump html dom tree 111 function dump_html_tree($node, $show_attr =true, $deep=0)223 function dump_html_tree($node, $show_attr = true, $deep = 0) 112 224 { 113 225 $node->dump($node); 114 226 } 115 227 116 117 228 /** 118 229 * simple html dom node 119 * PaperG - added ability for "find" routine to lowercase the value of the selector. 120 * PaperG - added $tag_start to track the start position of the tag in the total byte index 230 * PaperG - added ability for "find" routine to lowercase the value of the 231 * selector. 232 * 233 * PaperG - added $tag_start to track the start position of the tag in the total 234 * byte index 121 235 * 122 236 * @package PlaceLocalInclude … … 212 326 213 327 // dump node's tree 214 function dump($show_attr =true, $deep=0)328 function dump($show_attr = true, $deep = 0) 215 329 { 216 330 $lead = str_repeat(' ', $deep); 217 331 218 echo $lead .$this->tag;219 if ($show_attr && count($this->attr)>0) 220 {332 echo $lead . $this->tag; 333 334 if ($show_attr && count($this->attr) > 0) { 221 335 echo '('; 222 foreach ($this->attr as $k=>$v) 223 echo "[$k]=>\"".$this->$k.'", '; 336 foreach ($this->attr as $k => $v) { 337 echo "[$k]=>\"" . $this->$k . '", '; 338 } 224 339 echo ')'; 225 340 } 341 226 342 echo "\n"; 227 343 228 if ($this->nodes) 229 { 230 foreach ($this->nodes as $c) 231 { 232 $c->dump($show_attr, $deep+1); 344 if ($this->nodes) { 345 foreach ($this->nodes as $c) { 346 $c->dump($show_attr, $deep + 1); 233 347 } 234 348 } … … 237 351 238 352 // Debugging function to dump a single dom node with a bunch of information about it. 239 function dump_node($echo=true) 240 { 241 353 function dump_node($echo = true) 354 { 242 355 $string = $this->tag; 243 if (count($this->attr)>0) 244 {356 357 if (count($this->attr) > 0) { 245 358 $string .= '('; 246 foreach ($this->attr as $k=>$v) 247 { 248 $string .= "[$k]=>\"".$this->$k.'", '; 359 foreach ($this->attr as $k => $v) { 360 $string .= "[$k]=>\"" . $this->$k . '", '; 249 361 } 250 362 $string .= ')'; 251 363 } 252 if (count($this->_)>0) 253 {364 365 if (count($this->_) > 0) { 254 366 $string .= ' $_ ('; 255 foreach ($this->_ as $k=>$v) 256 { 257 if (is_array($v)) 258 { 367 foreach ($this->_ as $k => $v) { 368 if (is_array($v)) { 259 369 $string .= "[$k]=>("; 260 foreach ($v as $k2=>$v2) 261 { 262 $string .= "[$k2]=>\"".$v2.'", '; 370 foreach ($v as $k2 => $v2) { 371 $string .= "[$k2]=>\"" . $v2 . '", '; 263 372 } 264 $string .= ")";373 $string .= ')'; 265 374 } else { 266 $string .= "[$k]=>\"" .$v.'", ';375 $string .= "[$k]=>\"" . $v . '", '; 267 376 } 268 377 } 269 $string .= ")"; 270 } 271 272 if (isset($this->text)) 273 { 274 $string .= " text: (" . $this->text . ")"; 378 $string .= ')'; 379 } 380 381 if (isset($this->text)) { 382 $string .= ' text: (' . $this->text . ')'; 275 383 } 276 384 277 385 $string .= " HDOM_INNER_INFO: '"; 278 if (isset($node->_[HDOM_INFO_INNER])) 279 {386 387 if (isset($node->_[HDOM_INFO_INNER])) { 280 388 $string .= $node->_[HDOM_INFO_INNER] . "'"; 281 } 282 else 283 { 389 } else { 284 390 $string .= ' NULL '; 285 391 } 286 392 287 $string .= " children: ". count($this->children);288 $string .= " nodes: ". count($this->nodes);289 $string .= " tag_start: ". $this->tag_start;393 $string .= ' children: ' . count($this->children); 394 $string .= ' nodes: ' . count($this->nodes); 395 $string .= ' tag_start: ' . $this->tag_start; 290 396 $string .= "\n"; 291 397 292 if ($echo) 293 { 398 if ($echo) { 294 399 echo $string; 295 400 return; 296 } 297 else 298 { 401 } else { 299 402 return $string; 300 403 } … … 308 411 * @return object|null The parent node 309 412 */ 310 function parent($parent =null)413 function parent($parent = null) 311 414 { 312 415 // I am SURE that this doesn't work properly. 313 // It fails to unset the current node from it's current parents nodes or children list first.314 if ($parent !== null)315 {416 // It fails to unset the current node from it's current parents nodes or 417 // children list first. 418 if ($parent !== null) { 316 419 $this->parent = $parent; 317 420 $this->parent->nodes[] = $this; … … 338 441 * nodes or null if the index is invalid. 339 442 */ 340 function children($idx=-1) 341 { 342 if ($idx===-1) 343 { 443 function children($idx = -1) 444 { 445 if ($idx === -1) { 344 446 return $this->children; 345 447 } 346 if (isset($this->children[$idx])) 347 {448 449 if (isset($this->children[$idx])) { 348 450 return $this->children[$idx]; 349 451 } 452 350 453 return null; 351 454 } … … 362 465 function first_child() 363 466 { 364 if (count($this->children)>0) 365 { 467 if (count($this->children) > 0) { 366 468 return $this->children[0]; 367 469 } … … 379 481 function last_child() 380 482 { 381 if (($count=count($this->children))>0) 382 { 383 return $this->children[$count-1]; 483 if (($count = count($this->children)) > 0) { 484 return $this->children[$count - 1]; 384 485 } 385 486 return null; … … 394 495 function next_sibling() 395 496 { 396 if ($this->parent===null) 397 { 497 if ($this->parent === null) { 398 498 return null; 399 499 } … … 401 501 $idx = 0; 402 502 $count = count($this->parent->children); 403 while ($idx<$count && $this!==$this->parent->children[$idx]) 404 {503 504 while ($idx < $count && $this !== $this->parent->children[$idx]) { 405 505 ++$idx; 406 506 } 407 if (++$idx>=$count) 408 {507 508 if (++$idx >= $count) { 409 509 return null; 410 510 } 511 411 512 return $this->parent->children[$idx]; 412 513 } … … 420 521 function prev_sibling() 421 522 { 422 if ($this->parent===null) return null; 523 if ($this->parent === null) { return null; } 524 423 525 $idx = 0; 424 526 $count = count($this->parent->children); 425 while ($idx<$count && $this!==$this->parent->children[$idx]) 527 528 while ($idx < $count && $this !== $this->parent->children[$idx]) { 426 529 ++$idx; 427 if (--$idx<0) return null; 530 } 531 532 if (--$idx < 0) { return null; } 533 428 534 return $this->parent->children[$idx]; 429 535 } … … 447 553 $returnDom = $this; 448 554 449 while (!is_null($returnDom)) 450 {451 if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); }452 453 if ($returnDom->tag == $tag) 454 {555 while (!is_null($returnDom)) { 556 if (is_object($debug_object)) { 557 $debug_object->debug_log(2, 'Current tag is: ' . $returnDom->tag); 558 } 559 560 if ($returnDom->tag == $tag) { 455 561 break; 456 562 } 563 457 564 $returnDom = $returnDom->parent; 458 565 } 566 459 567 return $returnDom; 460 568 } … … 467 575 function innertext() 468 576 { 469 if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; 470 if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 577 if (isset($this->_[HDOM_INFO_INNER])) { 578 return $this->_[HDOM_INFO_INNER]; 579 } 580 581 if (isset($this->_[HDOM_INFO_TEXT])) { 582 return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 583 } 471 584 472 585 $ret = ''; 473 foreach ($this->nodes as $n) 586 587 foreach ($this->nodes as $n) { 474 588 $ret .= $n->outertext(); 589 } 590 475 591 return $ret; 476 592 } … … 484 600 { 485 601 global $debug_object; 486 if (is_object($debug_object)) 487 {602 603 if (is_object($debug_object)) { 488 604 $text = ''; 489 if ($this->tag == 'text') 490 { 491 if (!empty($this->text)) 492 { 493 $text = " with text: " . $this->text; 605 606 if ($this->tag === 'text') { 607 if (!empty($this->text)) { 608 $text = ' with text: ' . $this->text; 494 609 } 495 610 } 611 496 612 $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text); 497 613 } 498 614 499 if ($this->tag ==='root') return $this->innertext();615 if ($this->tag === 'root') return $this->innertext(); 500 616 501 617 // trigger callback 502 if ($this->dom && $this->dom->callback!==null) 503 { 618 if ($this->dom && $this->dom->callback !== null) { 504 619 call_user_func_array($this->dom->callback, array($this)); 505 620 } 506 621 507 if (isset($this->_[HDOM_INFO_OUTER])) return $this->_[HDOM_INFO_OUTER]; 508 if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 622 if (isset($this->_[HDOM_INFO_OUTER])) { 623 return $this->_[HDOM_INFO_OUTER]; 624 } 625 626 if (isset($this->_[HDOM_INFO_TEXT])) { 627 return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 628 } 509 629 510 630 // render begin tag 511 if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) 512 { 631 if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) { 513 632 $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup(); 514 633 } else { 515 $ret = "";634 $ret = ''; 516 635 } 517 636 518 637 // render inner text 519 if (isset($this->_[HDOM_INFO_INNER])) 520 { 521 // If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added. 522 if ($this->tag != "br") 523 { 638 if (isset($this->_[HDOM_INFO_INNER])) { 639 // If it's a br tag... don't return the HDOM_INNER_INFO that we 640 // may or may not have added. 641 if ($this->tag !== 'br') { 524 642 $ret .= $this->_[HDOM_INFO_INNER]; 525 643 } 526 644 } else { 527 if ($this->nodes) 528 { 529 foreach ($this->nodes as $n) 530 { 645 if ($this->nodes) { 646 foreach ($this->nodes as $n) { 531 647 $ret .= $this->convert_text($n->outertext()); 532 648 } … … 535 651 536 652 // render end tag 537 if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END]!=0) 538 $ret .= '</'.$this->tag.'>'; 653 if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) { 654 $ret .= '</' . $this->tag . '>'; 655 } 656 539 657 return $ret; 540 658 } … … 547 665 function text() 548 666 { 549 if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; 550 switch ($this->nodetype) 551 { 667 if (isset($this->_[HDOM_INFO_INNER])) { 668 return $this->_[HDOM_INFO_INNER]; 669 } 670 671 switch ($this->nodetype) { 552 672 case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 553 673 case HDOM_TYPE_COMMENT: return ''; 554 674 case HDOM_TYPE_UNKNOWN: return ''; 555 675 } 556 if (strcasecmp($this->tag, 'script')===0) return ''; 557 if (strcasecmp($this->tag, 'style')===0) return ''; 676 677 if (strcasecmp($this->tag, 'script') === 0) { return ''; } 678 if (strcasecmp($this->tag, 'style') === 0) { return ''; } 558 679 559 680 $ret = ''; 560 // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL. 561 // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening. 681 682 // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed 683 // for some span tags, and some p tags) $this->nodes is set to NULL. 684 // NOTE: This indicates that there is a problem where it's set to NULL 685 // without a clear happening. 562 686 // WHY is this happening? 563 if (!is_null($this->nodes)) 564 { 565 foreach ($this->nodes as $n) 566 { 687 if (!is_null($this->nodes)) { 688 foreach ($this->nodes as $n) { 567 689 // Start paragraph after a blank line 568 if ($n->tag == 'p') 569 { 690 if ($n->tag === 'p') { 570 691 $ret .= "\n\n"; 571 692 } … … 573 694 $ret .= $this->convert_text($n->text()); 574 695 575 // If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all. 576 if ($n->tag == "span") 577 { 696 // If this node is a span... add a space at the end of it so 697 // multiple spans don't run into each other. This is plaintext 698 // after all. 699 if ($n->tag === 'span') { 578 700 $ret .= $this->dom->default_span_text; 579 701 } … … 600 722 { 601 723 // text, comment, unknown 602 if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 603 604 $ret = '<'.$this->tag; 724 if (isset($this->_[HDOM_INFO_TEXT])) { 725 return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 726 } 727 728 $ret = '<' . $this->tag; 605 729 $i = -1; 606 730 607 foreach ($this->attr as $key=>$val) 608 { 731 foreach ($this->attr as $key => $val) { 609 732 ++$i; 610 733 611 734 // skip removed attribute 612 if ($val===null || $val===false) 613 continue; 735 if ($val === null || $val === false) { continue; } 614 736 615 737 $ret .= $this->_[HDOM_INFO_SPACE][$i][0]; 738 616 739 //no value attr: nowrap, checked selected... 617 if ($val ===true)740 if ($val === true) { 618 741 $ret .= $key; 619 else {742 } else { 620 743 switch ($this->_[HDOM_INFO_QUOTE][$i]) 621 744 { … … 624 747 default: $quote = ''; 625 748 } 626 $ret .= $key.$this->_[HDOM_INFO_SPACE][$i][1].'='.$this->_[HDOM_INFO_SPACE][$i][2].$quote.$val.$quote; 627 } 628 } 749 750 $ret .= $key 751 . $this->_[HDOM_INFO_SPACE][$i][1] 752 . '=' 753 . $this->_[HDOM_INFO_SPACE][$i][2] 754 . $quote 755 . $val 756 . $quote; 757 } 758 } 759 629 760 $ret = $this->dom->restore_noise($ret); 630 761 return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; 631 762 } 632 763 633 // find elements by css selector 634 //PaperG - added ability for find to lowercase the value of the selector. 635 function find($selector, $idx=null, $lowercase=false) 764 /** 765 * Find elements by CSS selector 766 * 767 * @param string $selector The CSS selector 768 * @param int|null $idx Index of element to return form the list of matching 769 * elements (default: `null` = disabled). 770 * @param bool $lowercase Matches tag names case insensitive (lowercase) if 771 * enabled (default: `false`) 772 * @return array|object|null A list of elements matching the specified CSS 773 * selector or a single element if $idx is specified or null if no element 774 * was found. 775 */ 776 function find($selector, $idx = null, $lowercase = false) 636 777 { 637 778 $selectors = $this->parse_selector($selector); 638 if (($count =count($selectors))===0) return array();779 if (($count = count($selectors)) === 0) { return array(); } 639 780 $found_keys = array(); 640 781 641 782 // find each selector 642 for ($c =0; $c<$count; ++$c)643 {644 // The change on the below line was documented on the sourceforgecode tracker id 2788009783 for ($c = 0; $c < $count; ++$c) { 784 // The change on the below line was documented on the sourceforge 785 // code tracker id 2788009 645 786 // used to be: if (($levle=count($selectors[0]))===0) return array(); 646 if (($levle=count($selectors[$c]))===0) return array(); 647 if (!isset($this->_[HDOM_INFO_BEGIN])) return array(); 648 649 $head = array($this->_[HDOM_INFO_BEGIN]=>1); 787 if (($levle = count($selectors[$c])) === 0) { return array(); } 788 if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); } 789 790 $head = array($this->_[HDOM_INFO_BEGIN] => 1); 791 $cmd = ' '; // Combinator 650 792 651 793 // handle descendant selectors, no recursive! 652 for ($l=0; $l<$levle; ++$l) 653 { 794 for ($l = 0; $l < $levle; ++$l) { 654 795 $ret = array(); 655 foreach ($head as $k=>$v) 656 {657 $n = ($k ===-1) ? $this->dom->root : $this->dom->nodes[$k];796 797 foreach ($head as $k => $v) { 798 $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k]; 658 799 //PaperG - Pass this optional parameter on to the seek function. 659 $n->seek($selectors[$c][$l], $ret, $ lowercase);800 $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase); 660 801 } 802 661 803 $head = $ret; 662 } 663 664 foreach ($head as $k=>$v) 665 { 666 if (!isset($found_keys[$k])) 667 { 804 $cmd = $selectors[$c][$l][4]; // Next Combinator 805 } 806 807 foreach ($head as $k => $v) { 808 if (!isset($found_keys[$k])) { 668 809 $found_keys[$k] = 1; 669 810 } … … 675 816 676 817 $found = array(); 677 foreach ($found_keys as $k =>$v)818 foreach ($found_keys as $k => $v) { 678 819 $found[] = $this->dom->nodes[$k]; 820 } 679 821 680 822 // return nth-element or array 681 if (is_null($idx)) return $found;682 else if ($idx<0) $idx = count($found) + $idx;823 if (is_null($idx)) { return $found; } 824 elseif ($idx < 0) { $idx = count($found) + $idx; } 683 825 return (isset($found[$idx])) ? $found[$idx] : null; 684 826 } 685 827 686 // seek for given conditions 687 // PaperG - added parameter to allow for case insensitive testing of the value of a selector. 688 protected function seek($selector, &$ret, $lowercase=false) 828 /** 829 * Seek DOM elements by selector 830 * 831 * **Note** 832 * The selector element must be compatible to a selector from 833 * {@see simple_html_dom_node::parse_selector()} 834 * 835 * @param array $selector A selector element 836 * @param array $ret An array of matches 837 * @param bool $lowercase Matches tag names case insensitive (lowercase) if 838 * enabled (default: `false`) 839 * @return void 840 */ 841 protected function seek($selector, &$ret, $parent_cmd, $lowercase = false) 689 842 { 690 843 global $debug_object; 691 844 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 692 845 693 list($tag, $key, $val, $exp, $no_key) = $selector; 694 695 // xpath index 696 if ($tag && $key && is_numeric($key)) 697 { 698 $count = 0; 699 foreach ($this->children as $c) 700 { 701 if ($tag==='*' || $tag===$c->tag) { 702 if (++$count==$key) { 703 $ret[$c->_[HDOM_INFO_BEGIN]] = 1; 704 return; 846 list($tag, $id, $class, $attributes, $cmb) = $selector; 847 $nodes = array(); 848 849 if ($parent_cmd === ' ') { // Descendant Combinator 850 // Find parent closing tag if the current element doesn't have a closing 851 // tag (i.e. void element) 852 $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; 853 if ($end == 0) { 854 $parent = $this->parent; 855 while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) { 856 $end -= 1; 857 $parent = $parent->parent; 858 } 859 $end += $parent->_[HDOM_INFO_END]; 860 } 861 862 // Get list of target nodes 863 $nodes_start = $this->_[HDOM_INFO_BEGIN] + 1; 864 $nodes_count = $end - $nodes_start; 865 $nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true); 866 } elseif ($parent_cmd === '>') { // Child Combinator 867 $nodes = $this->children; 868 } elseif ($parent_cmd === '+' 869 && $this->parent 870 && in_array($this, $this->parent->children)) { // Next-Sibling Combinator 871 $index = array_search($this, $this->parent->children, true) + 1; 872 $nodes[] = $this->parent->children[$index]; 873 } elseif ($parent_cmd === '~' 874 && $this->parent 875 && in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator 876 $index = array_search($this, $this->parent->children, true); 877 $nodes = array_slice($this->parent->children, $index); 878 } 879 880 // Go throgh each element starting at this element until the end tag 881 // Note: If this element is a void tag, any previous void element is 882 // skipped. 883 foreach($nodes as $node) { 884 $pass = true; 885 886 // Skip root nodes 887 if(!$node->parent) { 888 $pass = false; 889 } 890 891 // Skip if node isn't a child node (i.e. text nodes) 892 if($pass && !in_array($node, $node->parent->children, true)) { 893 $pass = false; 894 } 895 896 // Skip if tag doesn't match 897 if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') { 898 $pass = false; 899 } 900 901 // Skip if ID doesn't exist 902 if ($pass && $id !== '' && !isset($node->attr['id'])) { 903 $pass = false; 904 } 905 906 // Check if ID matches 907 if ($pass && $id !== '' && isset($node->attr['id'])) { 908 // Note: Only consider the first ID (as browsers do) 909 $node_id = explode(' ', trim($node->attr['id']))[0]; 910 911 if($id !== $node_id) { $pass = false; } 912 } 913 914 // Check if all class(es) exist 915 if ($pass && $class !== '' && is_array($class) && !empty($class)) { 916 if (isset($node->attr['class'])) { 917 $node_classes = explode(' ', $node->attr['class']); 918 919 if ($lowercase) { 920 $node_classes = array_map('strtolower', $node_classes); 705 921 } 706 } 707 } 708 return; 709 } 710 711 $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; 712 if ($end==0) { 713 $parent = $this->parent; 714 while (!isset($parent->_[HDOM_INFO_END]) && $parent!==null) { 715 $end -= 1; 716 $parent = $parent->parent; 717 } 718 $end += $parent->_[HDOM_INFO_END]; 719 } 720 721 for ($i=$this->_[HDOM_INFO_BEGIN]+1; $i<$end; ++$i) { 722 $node = $this->dom->nodes[$i]; 723 724 $pass = true; 725 726 if ($tag==='*' && !$key) { 727 if (in_array($node, $this->children, true)) 728 $ret[$i] = 1; 729 continue; 730 } 731 732 // compare tag 733 if ($tag && $tag!=$node->tag && $tag!=='*') {$pass=false;} 734 // compare key 735 if ($pass && $key) { 736 if ($no_key) { 737 if (isset($node->attr[$key])) $pass=false; 738 } else { 739 if (($key != "plaintext") && !isset($node->attr[$key])) $pass=false; 740 } 741 } 742 // compare value 743 if ($pass && $key && $val && $val!=='*') { 744 // If they have told us that this is a "plaintext" search then we want the plaintext of the node - right? 745 if ($key == "plaintext") { 746 // $node->plaintext actually returns $node->text(); 747 $nodeKeyValue = $node->text(); 748 } else { 749 // this is a normal search, we want the value of that attribute of the tag. 750 $nodeKeyValue = $node->attr[$key]; 751 } 752 if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} 753 754 //PaperG - If lowercase is set, do a case insensitive test of the value of the selector. 755 if ($lowercase) { 756 $check = $this->match($exp, strtolower($val), strtolower($nodeKeyValue)); 757 } else { 758 $check = $this->match($exp, $val, $nodeKeyValue); 759 } 760 if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));} 761 762 // handle multiple class 763 if (!$check && strcasecmp($key, 'class')===0) { 764 foreach (explode(' ',$node->attr[$key]) as $k) { 765 // Without this, there were cases where leading, trailing, or double spaces lead to our comparing blanks - bad form. 766 if (!empty($k)) { 767 if ($lowercase) { 768 $check = $this->match($exp, strtolower($val), strtolower($k)); 769 } else { 770 $check = $this->match($exp, $val, $k); 771 } 772 if ($check) break; 922 923 foreach($class as $c) { 924 if(!in_array($c, $node_classes)) { 925 $pass = false; 926 break; 773 927 } 774 928 } 929 } else { 930 $pass = false; 775 931 } 776 if (!$check) $pass = false; 777 } 778 if ($pass) $ret[$i] = 1; 932 } 933 934 // Check attributes 935 if ($pass 936 && $attributes !== '' 937 && is_array($attributes) 938 && !empty($attributes)) { 939 foreach($attributes as $a) { 940 list ( 941 $att_name, 942 $att_expr, 943 $att_val, 944 $att_inv, 945 $att_case_sensitivity 946 ) = $a; 947 948 // Handle indexing attributes (i.e. "[2]") 949 /** 950 * Note: This is not supported by the CSS Standard but adds 951 * the ability to select items compatible to XPath (i.e. 952 * the 3rd element within it's parent). 953 * 954 * Note: This doesn't conflict with the CSS Standard which 955 * doesn't work on numeric attributes anyway. 956 */ 957 if (is_numeric($att_name) 958 && $att_expr === '' 959 && $att_val === '') { 960 $count = 0; 961 962 // Find index of current element in parent 963 foreach ($node->parent->children as $c) { 964 if ($c->tag === $node->tag) ++$count; 965 if ($c === $node) break; 966 } 967 968 // If this is the correct node, continue with next 969 // attribute 970 if ($count === (int)$att_name) continue; 971 } 972 973 // Check attribute availability 974 if ($att_inv) { // Attribute should NOT be set 975 if (isset($node->attr[$att_name])) { 976 $pass = false; 977 break; 978 } 979 } else { // Attribute should be set 980 // todo: "plaintext" is not a valid CSS selector! 981 if ($att_name !== 'plaintext' 982 && !isset($node->attr[$att_name])) { 983 $pass = false; 984 break; 985 } 986 } 987 988 // Continue with next attribute if expression isn't defined 989 if ($att_expr === '') continue; 990 991 // If they have told us that this is a "plaintext" 992 // search then we want the plaintext of the node - right? 993 // todo "plaintext" is not a valid CSS selector! 994 if ($att_name === 'plaintext') { 995 $nodeKeyValue = $node->text(); 996 } else { 997 $nodeKeyValue = $node->attr[$att_name]; 998 } 999 1000 if (is_object($debug_object)) { 1001 $debug_object->debug_log(2, 1002 'testing node: ' 1003 . $node->tag 1004 . ' for attribute: ' 1005 . $att_name 1006 . $att_expr 1007 . $att_val 1008 . ' where nodes value is: ' 1009 . $nodeKeyValue 1010 ); 1011 } 1012 1013 // If lowercase is set, do a case insensitive test of 1014 // the value of the selector. 1015 if ($lowercase) { 1016 $check = $this->match( 1017 $att_expr, 1018 strtolower($att_val), 1019 strtolower($nodeKeyValue), 1020 $att_case_sensitivity 1021 ); 1022 } else { 1023 $check = $this->match( 1024 $att_expr, 1025 $att_val, 1026 $nodeKeyValue, 1027 $att_case_sensitivity 1028 ); 1029 } 1030 1031 if (is_object($debug_object)) { 1032 $debug_object->debug_log(2, 1033 'after match: ' 1034 . ($check ? 'true' : 'false') 1035 ); 1036 } 1037 1038 if (!$check) { 1039 $pass = false; 1040 break; 1041 } 1042 } 1043 } 1044 1045 // Found a match. Add to list and clear node 1046 if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1; 779 1047 unset($node); 780 1048 } 781 1049 // It's passed by reference so this is actually what this function returns. 782 if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);} 783 } 784 785 protected function match($exp, $pattern, $value) { 1050 if (is_object($debug_object)) { 1051 $debug_object->debug_log(1, 'EXIT - ret: ', $ret); 1052 } 1053 } 1054 1055 /** 1056 * Match value and pattern for a given CSS expression 1057 * 1058 * **Supported Expressions** 1059 * 1060 * | Expression | Description 1061 * | ---------- | ----------- 1062 * | `=` | $value and $pattern must be equal 1063 * | `!=` | $value and $pattern must not be equal 1064 * | `^=` | $value must start with $pattern 1065 * | `$=` | $value must end with $pattern 1066 * | `*=` | $value must contain $pattern 1067 * 1068 * @param string $exp The expression. 1069 * @param string $pattern The pattern 1070 * @param string $value The value 1071 * @value bool True if $value matches $pattern 1072 */ 1073 protected function match($exp, $pattern, $value, $case_sensitivity) 1074 { 786 1075 global $debug_object; 787 1076 if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} 788 1077 1078 if ($case_sensitivity === 'i') { 1079 $pattern = strtolower($pattern); 1080 $value = strtolower($value); 1081 } 1082 789 1083 switch ($exp) { 790 1084 case '=': 791 return ($value ===$pattern);1085 return ($value === $pattern); 792 1086 case '!=': 793 return ($value !==$pattern);1087 return ($value !== $pattern); 794 1088 case '^=': 795 return preg_match( "/^".preg_quote($pattern,'/')."/", $value);1089 return preg_match('/^' . preg_quote($pattern, '/') . '/', $value); 796 1090 case '$=': 797 return preg_match( "/".preg_quote($pattern,'/')."$/", $value);1091 return preg_match('/' . preg_quote($pattern, '/') . '$/', $value); 798 1092 case '*=': 799 if ($pattern[0]=='/') { 800 return preg_match($pattern, $value); 801 } 802 return preg_match("/".$pattern."/i", $value); 1093 return preg_match('/' . preg_quote($pattern, '/') . '/', $value); 1094 case '|=': 1095 /** 1096 * [att|=val] 1097 * 1098 * Represents an element with the att attribute, its value 1099 * either being exactly "val" or beginning with "val" 1100 * immediately followed by "-" (U+002D). 1101 */ 1102 return strpos($value, $pattern) === 0; 1103 case '~=': 1104 /** 1105 * [att~=val] 1106 * 1107 * Represents an element with the att attribute whose value is a 1108 * whitespace-separated list of words, one of which is exactly 1109 * "val". If "val" contains whitespace, it will never represent 1110 * anything (since the words are separated by spaces). Also if 1111 * "val" is the empty string, it will never represent anything. 1112 */ 1113 return in_array($pattern, explode(' ', trim($value)), true); 803 1114 } 804 1115 return false; 805 1116 } 806 1117 807 protected function parse_selector($selector_string) { 1118 /** 1119 * Parse CSS selector 1120 * 1121 * @param string $selector_string CSS selector string 1122 * @return array List of CSS selectors. The format depends on the type of 1123 * selector: 1124 * 1125 * ```php 1126 * 1127 * array( // list of selectors (each separated by a comma), i.e. 'img, p, div' 1128 * array( // list of combinator selectors, i.e. 'img > p > div' 1129 * array( // selector element 1130 * [0], // (string) The element tag 1131 * [1], // (string) The element id 1132 * [2], // (array<string>) The element classes 1133 * [3], // (array<array<string>>) The list of attributes, each 1134 * // with four elements: name, expression, value, inverted 1135 * [4] // (string) The selector combinator (' ' | '>' | '+' | '~') 1136 * ) 1137 * ) 1138 * ) 1139 * ``` 1140 * 1141 * @link https://www.w3.org/TR/selectors/#compound Compound selector 1142 */ 1143 protected function parse_selector($selector_string) 1144 { 808 1145 global $debug_object; 809 if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} 810 811 // pattern of CSS selectors, modified from mootools 812 // Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does. 813 // Note: if you try to look at this attribute, yo MUST use getAttribute since $dom->x:y will fail the php syntax check. 814 // Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured. 815 // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression. 816 // farther study is required to determine of this should be documented or removed. 817 // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; 818 $pattern = "/([\w:\*-]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w:-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; 819 preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); 820 if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);} 1146 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 1147 1148 /** 1149 * Pattern of CSS selectors, modified from mootools (https://mootools.net/) 1150 * 1151 * Paperg: Add the colon to the attribute, so that it properly finds 1152 * <tag attr:ibute="something" > like google does. 1153 * 1154 * Note: if you try to look at this attribute, you MUST use getAttribute 1155 * since $dom->x:y will fail the php syntax check. 1156 * 1157 * Notice the \[ starting the attribute? and the @? following? This 1158 * implies that an attribute can begin with an @ sign that is not 1159 * captured. This implies that an html attribute specifier may start 1160 * with an @ sign that is NOT captured by the expression. Farther study 1161 * is required to determine of this should be documented or removed. 1162 * 1163 * Matches selectors in this order: 1164 * 1165 * [0] - full match 1166 * 1167 * [1] - tag name 1168 * ([\w:\*-]*) 1169 * Matches the tag name consisting of zero or more words, colons, 1170 * asterisks and hyphens. 1171 * 1172 * [2] - id name 1173 * (?:\#([\w-]+)) 1174 * Optionally matches a id name, consisting of an "#" followed by 1175 * the id name (one or more words and hyphens). 1176 * 1177 * [3] - class names (including dots) 1178 * (?:\.([\w\.-]+))? 1179 * Optionally matches a list of classs, consisting of an "." 1180 * followed by the class name (one or more words and hyphens) 1181 * where multiple classes can be chained (i.e. ".foo.bar.baz") 1182 * 1183 * [4] - attributes 1184 * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)? 1185 * Optionally matches the attributes list 1186 * 1187 * [5] - separator 1188 * ([\/, >+~]+) 1189 * Matches the selector list separator 1190 */ 1191 // phpcs:ignore Generic.Files.LineLength 1192 $pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is"; 1193 1194 preg_match_all( 1195 $pattern, 1196 trim($selector_string) . ' ', // Add final ' ' as pseudo separator 1197 $matches, 1198 PREG_SET_ORDER 1199 ); 1200 1201 if (is_object($debug_object)) { 1202 $debug_object->debug_log(2, 'Matches Array: ', $matches); 1203 } 821 1204 822 1205 $selectors = array(); 823 1206 $result = array(); 824 //print_r($matches);825 1207 826 1208 foreach ($matches as $m) { 827 1209 $m[0] = trim($m[0]); 828 if ($m[0]==='' || $m[0]==='/' || $m[0]==='//') continue; 829 // for browser generated xpath 830 if ($m[1]==='tbody') continue; 831 832 list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false); 833 if (!empty($m[2])) {$key='id'; $val=$m[2];} 834 if (!empty($m[3])) {$key='class'; $val=$m[3];} 835 if (!empty($m[4])) {$key=$m[4];} 836 if (!empty($m[5])) {$exp=$m[5];} 837 if (!empty($m[6])) {$val=$m[6];} 838 839 // convert to lowercase 840 if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);} 841 //elements that do NOT have the specified attribute 842 if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;} 843 844 $result[] = array($tag, $key, $val, $exp, $no_key); 845 if (trim($m[7])===',') { 1210 1211 // Skip NoOps 1212 if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; } 1213 1214 // Convert to lowercase 1215 if ($this->dom->lowercase) { 1216 $m[1] = strtolower($m[1]); 1217 } 1218 1219 // Extract classes 1220 if ($m[3] !== '') { $m[3] = explode('.', $m[3]); } 1221 1222 /* Extract attributes (pattern based on the pattern above!) 1223 1224 * [0] - full match 1225 * [1] - attribute name 1226 * [2] - attribute expression 1227 * [3] - attribute value 1228 * [4] - case sensitivity 1229 * 1230 * Note: Attributes can be negated with a "!" prefix to their name 1231 */ 1232 if($m[4] !== '') { 1233 preg_match_all( 1234 "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s*?([iIsS])?)?\]/is", 1235 trim($m[4]), 1236 $attributes, 1237 PREG_SET_ORDER 1238 ); 1239 1240 // Replace element by array 1241 $m[4] = array(); 1242 1243 foreach($attributes as $att) { 1244 // Skip empty matches 1245 if(trim($att[0]) === '') { continue; } 1246 1247 $inverted = (isset($att[1][0]) && $att[1][0] === '!'); 1248 $m[4][] = array( 1249 $inverted ? substr($att[1], 1) : $att[1], // Name 1250 (isset($att[2])) ? $att[2] : '', // Expression 1251 (isset($att[3])) ? $att[3] : '', // Value 1252 $inverted, // Inverted Flag 1253 (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity 1254 ); 1255 } 1256 } 1257 1258 // Sanitize Separator 1259 if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator 1260 $m[5] = ' '; 1261 } else { // Other Separator 1262 $m[5] = trim($m[5]); 1263 } 1264 1265 // Clear Separator if it's a Selector List 1266 if ($is_list = ($m[5] === ',')) { $m[5] = ''; } 1267 1268 // Remove full match before adding to results 1269 array_shift($m); 1270 $result[] = $m; 1271 1272 if ($is_list) { // Selector List 846 1273 $selectors[] = $result; 847 1274 $result = array(); 848 1275 } 849 1276 } 850 if (count($result)>0) 851 $selectors[] = $result;1277 1278 if (count($result) > 0) { $selectors[] = $result; } 852 1279 return $selectors; 853 1280 } … … 855 1282 function __get($name) 856 1283 { 857 if (isset($this->attr[$name])) 858 { 1284 if (isset($this->attr[$name])) { 859 1285 return $this->convert_text($this->attr[$name]); 860 1286 } 861 switch ($name) 862 { 1287 switch ($name) { 863 1288 case 'outertext': return $this->outertext(); 864 1289 case 'innertext': return $this->innertext(); … … 872 1297 { 873 1298 global $debug_object; 874 if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} 875 876 switch ($name) 877 { 1299 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 1300 1301 switch ($name) { 878 1302 case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; 879 1303 case 'innertext': 880 if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; 1304 if (isset($this->_[HDOM_INFO_TEXT])) { 1305 return $this->_[HDOM_INFO_TEXT] = $value; 1306 } 881 1307 return $this->_[HDOM_INFO_INNER] = $value; 882 1308 } 883 if (!isset($this->attr[$name])) 884 {1309 1310 if (!isset($this->attr[$name])) { 885 1311 $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); 886 1312 $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; 887 1313 } 1314 888 1315 $this->attr[$name] = $value; 889 1316 } … … 891 1318 function __isset($name) 892 1319 { 893 switch ($name) 894 { 1320 switch ($name) { 895 1321 case 'outertext': return true; 896 1322 case 'innertext': return true; … … 901 1327 } 902 1328 903 function __unset($name) { 904 if (isset($this->attr[$name])) 905 unset($this->attr[$name]); 906 } 907 908 // PaperG - Function to convert the text from one character set to another if the two sets are not the same. 1329 function __unset($name) 1330 { 1331 if (isset($this->attr[$name])) { unset($this->attr[$name]); } 1332 } 1333 1334 // PaperG - Function to convert the text from one character set to another 1335 // if the two sets are not the same. 909 1336 function convert_text($text) 910 1337 { 911 1338 global $debug_object; 912 if (is_object($debug_object)) { $debug_object->debug_log_entry(1);}1339 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 913 1340 914 1341 $converted_text = $text; 915 1342 916 $sourceCharset = ""; 917 $targetCharset = ""; 918 919 if ($this->dom) 920 { 1343 $sourceCharset = ''; 1344 $targetCharset = ''; 1345 1346 if ($this->dom) { 921 1347 $sourceCharset = strtoupper($this->dom->_charset); 922 1348 $targetCharset = strtoupper($this->dom->_target_charset); 923 1349 } 924 if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} 925 926 if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) 927 { 1350 1351 if (is_object($debug_object)) { 1352 $debug_object->debug_log(3, 1353 'source charset: ' 1354 . $sourceCharset 1355 . ' target charaset: ' 1356 . $targetCharset 1357 ); 1358 } 1359 1360 if (!empty($sourceCharset) 1361 && !empty($targetCharset) 1362 && (strcasecmp($sourceCharset, $targetCharset) != 0)) { 928 1363 // Check if the reported encoding could have been incorrect and the text is actually already UTF-8 929 if ((strcasecmp($targetCharset, 'UTF-8') == 0) && ($this->is_utf8($text)))930 {1364 if ((strcasecmp($targetCharset, 'UTF-8') == 0) 1365 && ($this->is_utf8($text))) { 931 1366 $converted_text = $text; 932 } 933 else 934 { 1367 } else { 935 1368 $converted_text = iconv($sourceCharset, $targetCharset, $text); 936 1369 } … … 938 1371 939 1372 // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output. 940 if ($targetCharset == 'UTF-8') 941 { 942 if (substr($converted_text, 0, 3) == "\xef\xbb\xbf") 943 { 1373 if ($targetCharset === 'UTF-8') { 1374 if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") { 944 1375 $converted_text = substr($converted_text, 3); 945 1376 } 946 if (substr($converted_text, -3) == "\xef\xbb\xbf") 947 {1377 1378 if (substr($converted_text, -3) === "\xef\xbb\xbf") { 948 1379 $converted_text = substr($converted_text, 0, -3); 949 1380 } … … 961 1392 static function is_utf8($str) 962 1393 { 963 $c=0; $b=0; 964 $bits=0; 965 $len=strlen($str); 966 for($i=0; $i<$len; $i++) 967 { 968 $c=ord($str[$i]); 969 if($c > 128) 970 { 971 if(($c >= 254)) return false; 972 elseif($c >= 252) $bits=6; 973 elseif($c >= 248) $bits=5; 974 elseif($c >= 240) $bits=4; 975 elseif($c >= 224) $bits=3; 976 elseif($c >= 192) $bits=2; 977 else return false; 978 if(($i+$bits) > $len) return false; 979 while($bits > 1) 980 { 1394 $c = 0; $b = 0; 1395 $bits = 0; 1396 $len = strlen($str); 1397 for($i = 0; $i < $len; $i++) { 1398 $c = ord($str[$i]); 1399 if($c > 128) { 1400 if(($c >= 254)) { return false; } 1401 elseif($c >= 252) { $bits = 6; } 1402 elseif($c >= 248) { $bits = 5; } 1403 elseif($c >= 240) { $bits = 4; } 1404 elseif($c >= 224) { $bits = 3; } 1405 elseif($c >= 192) { $bits = 2; } 1406 else { return false; } 1407 if(($i + $bits) > $len) { return false; } 1408 while($bits > 1) { 981 1409 $i++; 982 $b =ord($str[$i]);983 if($b < 128 || $b > 191) return false;1410 $b = ord($str[$i]); 1411 if($b < 128 || $b > 191) { return false; } 984 1412 $bits--; 985 1413 } … … 988 1416 return true; 989 1417 } 990 /* 991 function is_utf8($string) 992 { 993 //this is buggy 994 return (utf8_encode(utf8_decode($string)) == $string); 995 } 996 */ 997 998 /** 999 * Function to try a few tricks to determine the displayed size of an img on the page. 1000 * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types. 1418 1419 /** 1420 * Function to try a few tricks to determine the displayed size of an img on 1421 * the page. NOTE: This will ONLY work on an IMG tag. Returns FALSE on all 1422 * other tag types. 1001 1423 * 1002 1424 * @author John Schlick 1003 1425 * @version April 19 2012 1004 * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out. 1426 * @return array an array containing the 'height' and 'width' of the image 1427 * on the page or -1 if we can't figure it out. 1005 1428 */ 1006 1429 function get_display_size() … … 1011 1434 $height = -1; 1012 1435 1013 if ($this->tag !== 'img') 1014 { 1436 if ($this->tag !== 'img') { 1015 1437 return false; 1016 1438 } 1017 1439 1018 1440 // See if there is aheight or width attribute in the tag itself. 1019 if (isset($this->attr['width'])) 1020 { 1441 if (isset($this->attr['width'])) { 1021 1442 $width = $this->attr['width']; 1022 1443 } 1023 1444 1024 if (isset($this->attr['height'])) 1025 { 1445 if (isset($this->attr['height'])) { 1026 1446 $height = $this->attr['height']; 1027 1447 } 1028 1448 1029 1449 // Now look for an inline style. 1030 if (isset($this->attr['style'])) 1031 { 1450 if (isset($this->attr['style'])) { 1032 1451 // Thanks to user gnarf from stackoverflow for this regular expression. 1033 1452 $attributes = array(); 1034 preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); 1453 1454 preg_match_all( 1455 '/([\w-]+)\s*:\s*([^;]+)\s*;?/', 1456 $this->attr['style'], 1457 $matches, 1458 PREG_SET_ORDER 1459 ); 1460 1035 1461 foreach ($matches as $match) { 1036 $attributes[$match[1]] = $match[2];1462 $attributes[$match[1]] = $match[2]; 1037 1463 } 1038 1464 1039 1465 // If there is a width in the style attributes: 1040 if (isset($attributes['width']) && $width == -1) 1041 { 1466 if (isset($attributes['width']) && $width == -1) { 1042 1467 // check that the last two characters are px (pixels) 1043 if (strtolower(substr($attributes['width'], -2)) == 'px') 1044 { 1468 if (strtolower(substr($attributes['width'], -2)) === 'px') { 1045 1469 $proposed_width = substr($attributes['width'], 0, -2); 1046 1470 // Now make sure that it's an integer and not something stupid. 1047 if (filter_var($proposed_width, FILTER_VALIDATE_INT)) 1048 { 1471 if (filter_var($proposed_width, FILTER_VALIDATE_INT)) { 1049 1472 $width = $proposed_width; 1050 1473 } … … 1053 1476 1054 1477 // If there is a width in the style attributes: 1055 if (isset($attributes['height']) && $height == -1) 1056 { 1478 if (isset($attributes['height']) && $height == -1) { 1057 1479 // check that the last two characters are px (pixels) 1058 if (strtolower(substr($attributes['height'], -2)) == 'px') 1059 { 1480 if (strtolower(substr($attributes['height'], -2)) == 'px') { 1060 1481 $proposed_height = substr($attributes['height'], 0, -2); 1061 1482 // Now make sure that it's an integer and not something stupid. 1062 if (filter_var($proposed_height, FILTER_VALIDATE_INT)) 1063 { 1483 if (filter_var($proposed_height, FILTER_VALIDATE_INT)) { 1064 1484 $height = $proposed_height; 1065 1485 } … … 1070 1490 1071 1491 // Future enhancement: 1072 // Look in the tag to see if there is a class or id specified that has a height or width attribute to it. 1492 // Look in the tag to see if there is a class or id specified that has 1493 // a height or width attribute to it. 1073 1494 1074 1495 // Far future enhancement 1075 // Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width 1076 // Note that in this case, the class or id will have the img subselector for it to apply to the image. 1496 // Look at all the parent tags of this image to see if they specify a 1497 // class or id that has an img selector that specifies a height or width 1498 // Note that in this case, the class or id will have the img subselector 1499 // for it to apply to the image. 1077 1500 1078 1501 // ridiculously far future development 1079 // If the class or id is specified in a SEPARATE css file thats not on the page, go get it and do what we were just doing for the ones on the page. 1080 1081 $result = array('height' => $height, 1082 'width' => $width); 1502 // If the class or id is specified in a SEPARATE css file thats not on 1503 // the page, go get it and do what we were just doing for the ones on 1504 // the page. 1505 1506 $result = array( 1507 'height' => $height, 1508 'width' => $width 1509 ); 1510 1083 1511 return $result; 1084 1512 } 1085 1513 1086 1514 // camel naming conventions 1087 function getAllAttributes() {return $this->attr;} 1088 function getAttribute($name) {return $this->__get($name);} 1089 function setAttribute($name, $value) {$this->__set($name, $value);} 1090 function hasAttribute($name) {return $this->__isset($name);} 1091 function removeAttribute($name) {$this->__set($name, null);} 1092 function getElementById($id) {return $this->find("#$id", 0);} 1093 function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);} 1094 function getElementByTagName($name) {return $this->find($name, 0);} 1095 function getElementsByTagName($name, $idx=null) {return $this->find($name, $idx);} 1096 function parentNode() {return $this->parent();} 1097 function childNodes($idx=-1) {return $this->children($idx);} 1098 function firstChild() {return $this->first_child();} 1099 function lastChild() {return $this->last_child();} 1100 function nextSibling() {return $this->next_sibling();} 1101 function previousSibling() {return $this->prev_sibling();} 1102 function hasChildNodes() {return $this->has_child();} 1103 function nodeName() {return $this->tag;} 1104 function appendChild($node) {$node->parent($this); return $node;} 1515 function getAllAttributes() 1516 { 1517 return $this->attr; 1518 } 1519 1520 function getAttribute($name) 1521 { 1522 return $this->__get($name); 1523 } 1524 1525 function setAttribute($name, $value) 1526 { 1527 $this->__set($name, $value); 1528 } 1529 1530 function hasAttribute($name) 1531 { 1532 return $this->__isset($name); 1533 } 1534 1535 function removeAttribute($name) 1536 { 1537 $this->__set($name, null); 1538 } 1539 1540 function getElementById($id) 1541 { 1542 return $this->find("#$id", 0); 1543 } 1544 1545 function getElementsById($id, $idx = null) 1546 { 1547 return $this->find("#$id", $idx); 1548 } 1549 1550 function getElementByTagName($name) 1551 { 1552 return $this->find($name, 0); 1553 } 1554 1555 function getElementsByTagName($name, $idx = null) 1556 { 1557 return $this->find($name, $idx); 1558 } 1559 1560 function parentNode() 1561 { 1562 return $this->parent(); 1563 } 1564 1565 function childNodes($idx = -1) 1566 { 1567 return $this->children($idx); 1568 } 1569 1570 function firstChild() 1571 { 1572 return $this->first_child(); 1573 } 1574 1575 function lastChild() 1576 { 1577 return $this->last_child(); 1578 } 1579 1580 function nextSibling() 1581 { 1582 return $this->next_sibling(); 1583 } 1584 1585 function previousSibling() 1586 { 1587 return $this->prev_sibling(); 1588 } 1589 1590 function hasChildNodes() 1591 { 1592 return $this->has_child(); 1593 } 1594 1595 function nodeName() 1596 { 1597 return $this->tag; 1598 } 1599 1600 function appendChild($node) 1601 { 1602 $node->parent($this); 1603 return $node; 1604 } 1105 1605 1106 1606 } … … 1108 1608 /** 1109 1609 * simple html dom parser 1110 * Paperg - in the find routine: allow us to specify that we want case insensitive testing of the value of the selector. 1610 * 1611 * Paperg - in the find routine: allow us to specify that we want case 1612 * insensitive testing of the value of the selector. 1613 * 1111 1614 * Paperg - change $size from protected to public so we can easily access it 1112 * Paperg - added ForceTagsClosed in the constructor which tells us whether we trust the html or not. Default is to NOT trust it. 1615 * 1616 * Paperg - added ForceTagsClosed in the constructor which tells us whether we 1617 * trust the html or not. Default is to NOT trust it. 1113 1618 * 1114 1619 * @package PlaceLocalInclude … … 1186 1691 * the document {@see simple_html_dom::$doc} 1187 1692 * 1188 * _Note_: Using this variable is more efficient than calling `substr($doc, $pos, 1)` 1693 * _Note_: Using this variable is more efficient than calling 1694 * `substr($doc, $pos, 1)` 1189 1695 * 1190 1696 * @var string … … 1233 1739 protected $token_attr = ' >'; 1234 1740 1235 // Note that this is referenced by a child node, and so it needs to be public for that node to see this information. 1741 // Note that this is referenced by a child node, and so it needs to be 1742 // public for that node to see this information. 1236 1743 public $_charset = ''; 1237 1744 public $_target_charset = ''; … … 1242 1749 * @var string 1243 1750 */ 1244 protected $default_br_text = "";1751 protected $default_br_text = ''; 1245 1752 1246 1753 /** … … 1249 1756 * @var string 1250 1757 */ 1251 public $default_span_text = "";1758 public $default_span_text = ''; 1252 1759 1253 1760 /** … … 1264 1771 */ 1265 1772 protected $self_closing_tags = array( 1266 'area' =>1,1267 'base' =>1,1268 'br' =>1,1269 'col' =>1,1270 'embed' =>1,1271 'hr' =>1,1272 'img' =>1,1273 'input' =>1,1274 'link' =>1,1275 'meta' =>1,1276 'param' =>1,1277 'source' =>1,1278 'track' =>1,1279 'wbr' =>11773 'area' => 1, 1774 'base' => 1, 1775 'br' => 1, 1776 'col' => 1, 1777 'embed' => 1, 1778 'hr' => 1, 1779 'img' => 1, 1780 'input' => 1, 1781 'link' => 1, 1782 'meta' => 1, 1783 'param' => 1, 1784 'source' => 1, 1785 'track' => 1, 1786 'wbr' => 1 1280 1787 ); 1281 1788 … … 1292 1799 */ 1293 1800 protected $block_tags = array( 1294 'body' =>1,1295 'div' =>1,1296 'form' =>1,1297 'root' =>1,1298 'span' =>1,1299 'table' =>11801 'body' => 1, 1802 'div' => 1, 1803 'form' => 1, 1804 'root' => 1, 1805 'span' => 1, 1806 'table' => 1 1300 1807 ); 1301 1808 … … 1356 1863 */ 1357 1864 protected $optional_closing_tags = array( 1358 'b'=>array('b'=>1), // Not optional, see https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element 1359 'dd'=>array('dd'=>1, 'dt'=>1), 1360 'dl'=>array('dd'=>1, 'dt'=>1), // Not optional, see https://www.w3.org/TR/html/grouping-content.html#the-dl-element 1361 'dt'=>array('dd'=>1, 'dt'=>1), 1362 'li'=>array('li'=>1), 1363 'optgroup'=>array('optgroup'=>1, 'option'=>1), 1364 'option'=>array('optgroup'=>1, 'option'=>1), 1365 'p'=>array('p'=>1), 1366 'rp'=>array('rp'=>1, 'rt'=>1), 1367 'rt'=>array('rp'=>1, 'rt'=>1), 1368 'td'=>array('td'=>1, 'th'=>1), 1369 'th'=>array('td'=>1, 'th'=>1), 1370 'tr'=>array('td'=>1, 'th'=>1, 'tr'=>1), 1865 // Not optional, see 1866 // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element 1867 'b' => array('b' => 1), 1868 'dd' => array('dd' => 1, 'dt' => 1), 1869 // Not optional, see 1870 // https://www.w3.org/TR/html/grouping-content.html#the-dl-element 1871 'dl' => array('dd' => 1, 'dt' => 1), 1872 'dt' => array('dd' => 1, 'dt' => 1), 1873 'li' => array('li' => 1), 1874 'optgroup' => array('optgroup' => 1, 'option' => 1), 1875 'option' => array('optgroup' => 1, 'option' => 1), 1876 'p' => array('p' => 1), 1877 'rp' => array('rp' => 1, 'rt' => 1), 1878 'rt' => array('rp' => 1, 'rt' => 1), 1879 'td' => array('td' => 1, 'th' => 1), 1880 'th' => array('td' => 1, 'th' => 1), 1881 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1), 1371 1882 ); 1372 1883 1373 function __construct($str=null, $lowercase=true, $forceTagsClosed=true, $target_charset=DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT, $options=0) 1374 { 1375 if ($str) 1376 { 1377 if (preg_match("/^http:\/\//i",$str) || is_file($str)) 1378 { 1884 function __construct( 1885 $str = null, 1886 $lowercase = true, 1887 $forceTagsClosed = true, 1888 $target_charset = DEFAULT_TARGET_CHARSET, 1889 $stripRN = true, 1890 $defaultBRText = DEFAULT_BR_TEXT, 1891 $defaultSpanText = DEFAULT_SPAN_TEXT, 1892 $options = 0) 1893 { 1894 if ($str) { 1895 if (preg_match('/^http:\/\//i', $str) || is_file($str)) { 1379 1896 $this->load_file($str); 1380 } 1381 else 1382 { 1383 $this->load($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText, $options); 1384 } 1385 } 1386 // Forcing tags to be closed implies that we don't trust the html, but it can lead to parsing errors if we SHOULD trust the html. 1897 } else { 1898 $this->load( 1899 $str, 1900 $lowercase, 1901 $stripRN, 1902 $defaultBRText, 1903 $defaultSpanText, 1904 $options 1905 ); 1906 } 1907 } 1908 // Forcing tags to be closed implies that we don't trust the html, but 1909 // it can lead to parsing errors if we SHOULD trust the html. 1387 1910 if (!$forceTagsClosed) { 1388 $this->optional_closing_array=array(); 1389 } 1911 $this->optional_closing_array = array(); 1912 } 1913 1390 1914 $this->_target_charset = $target_charset; 1391 1915 } … … 1397 1921 1398 1922 // load html from string 1399 function load($str, $lowercase=true, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT, $options=0) 1923 function load( 1924 $str, 1925 $lowercase = true, 1926 $stripRN = true, 1927 $defaultBRText = DEFAULT_BR_TEXT, 1928 $defaultSpanText = DEFAULT_SPAN_TEXT, 1929 $options = 0) 1400 1930 { 1401 1931 global $debug_object; … … 1412 1942 // strip out the \r \n's if we are told to. 1413 1943 if ($stripRN) { 1414 $this->doc = str_replace("\r", " ", $this->doc);1415 $this->doc = str_replace("\n", " ", $this->doc);1944 $this->doc = str_replace("\r", ' ', $this->doc); 1945 $this->doc = str_replace("\n", ' ', $this->doc); 1416 1946 1417 1947 // set the length of content since we have changed it. … … 1443 1973 // make load function chainable 1444 1974 return $this; 1445 1446 1975 } 1447 1976 … … 1451 1980 $args = func_get_args(); 1452 1981 1453 if( $doc = call_user_func_array('file_get_contents', $args) !== false) {1982 if(($doc = call_user_func_array('file_get_contents', $args)) !== false) { 1454 1983 $this->load($doc, true); 1455 1984 } else { … … 1481 2010 1482 2011 // save dom as string 1483 function save($filepath ='')2012 function save($filepath = '') 1484 2013 { 1485 2014 $ret = $this->root->innertext(); 1486 if ($filepath !=='') file_put_contents($filepath, $ret, LOCK_EX);2015 if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); } 1487 2016 return $ret; 1488 2017 } … … 1490 2019 // find dom node by css selector 1491 2020 // Paperg - allow us to specify that we want case insensitive testing of the value of the selector. 1492 function find($selector, $idx =null, $lowercase=false)2021 function find($selector, $idx = null, $lowercase = false) 1493 2022 { 1494 2023 return $this->root->find($selector, $idx, $lowercase); … … 1498 2027 function clear() 1499 2028 { 1500 foreach ($this->nodes as $n) {$n->clear(); $n = null;} 1501 // This add next line is documented in the sourceforge repository. 2977248 as a fix for ongoing memory leaks that occur even with the use of clear. 1502 if (isset($this->children)) foreach ($this->children as $n) {$n->clear(); $n = null;} 1503 if (isset($this->parent)) {$this->parent->clear(); unset($this->parent);} 1504 if (isset($this->root)) {$this->root->clear(); unset($this->root);} 2029 foreach ($this->nodes as $n) { 2030 $n->clear(); $n = null; 2031 } 2032 2033 // This add next line is documented in the sourceforge repository. 2034 // 2977248 as a fix for ongoing memory leaks that occur even with the 2035 // use of clear. 2036 if (isset($this->children)) { 2037 foreach ($this->children as $n) { 2038 $n->clear(); $n = null; 2039 } 2040 } 2041 2042 if (isset($this->parent)) { 2043 $this->parent->clear(); 2044 unset($this->parent); 2045 } 2046 2047 if (isset($this->root)) { 2048 $this->root->clear(); 2049 unset($this->root); 2050 } 2051 1505 2052 unset($this->doc); 1506 2053 unset($this->noise); 1507 2054 } 1508 2055 1509 function dump($show_attr =true)2056 function dump($show_attr = true) 1510 2057 { 1511 2058 $this->root->dump($show_attr); … … 1513 2060 1514 2061 // prepare HTML data and init everything 1515 protected function prepare($str, $lowercase=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 2062 protected function prepare( 2063 $str, $lowercase = true, 2064 $defaultBRText = DEFAULT_BR_TEXT, 2065 $defaultSpanText = DEFAULT_SPAN_TEXT) 1516 2066 { 1517 2067 $this->clear(); … … 1519 2069 $this->doc = trim($str); 1520 2070 $this->size = strlen($this->doc); 1521 $this->original_size = $this->size; // Save the original size of the html that we got in. It might be useful to someone.2071 $this->original_size = $this->size; // original size of the html 1522 2072 $this->pos = 0; 1523 2073 $this->cursor = 1; … … 1532 2082 $this->root->nodetype = HDOM_TYPE_ROOT; 1533 2083 $this->parent = $this->root; 1534 if ($this->size >0) $this->char = $this->doc[0];2084 if ($this->size > 0) { $this->char = $this->doc[0]; } 1535 2085 } 1536 2086 … … 1545 2095 // Read next tag if there is no text between current position and the 1546 2096 // next opening tag. 1547 if (($s = $this->copy_until_char('<'))==='') 1548 { 2097 if (($s = $this->copy_until_char('<')) === '') { 1549 2098 if($this->read_tag()) { 1550 2099 continue; … … 1562 2111 } 1563 2112 1564 // PAPERG - dkchou - added this to try to identify the character set of the page we have just parsed so we know better how to spit it out later. 1565 // NOTE: IF you provide a routine called get_last_retrieve_url_contents_content_type which returns the CURLINFO_CONTENT_TYPE from the last curl_exec 1566 // (or the content_type header from the last transfer), we will parse THAT, and if a charset is specified, we will use it over any other mechanism. 2113 // PAPERG - dkchou - added this to try to identify the character set of the 2114 // page we have just parsed so we know better how to spit it out later. 2115 // NOTE: IF you provide a routine called 2116 // get_last_retrieve_url_contents_content_type which returns the 2117 // CURLINFO_CONTENT_TYPE from the last curl_exec 2118 // (or the content_type header from the last transfer), we will parse THAT, 2119 // and if a charset is specified, we will use it over any other mechanism. 1567 2120 protected function parse_charset() 1568 2121 { … … 1571 2124 $charset = null; 1572 2125 1573 if (function_exists('get_last_retrieve_url_contents_content_type')) 1574 { 2126 if (function_exists('get_last_retrieve_url_contents_content_type')) { 1575 2127 $contentTypeHeader = get_last_retrieve_url_contents_content_type(); 1576 2128 $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches); 1577 if ($success) 1578 { 2129 if ($success) { 1579 2130 $charset = $matches[1]; 1580 if (is_object($debug_object)) {$debug_object->debug_log(2, 'header content-type found charset of: ' . $charset);} 1581 } 1582 1583 } 1584 1585 if (empty($charset)) 1586 { 1587 $el = $this->root->find('meta[http-equiv=Content-Type]',0, true); 1588 if (!empty($el)) 1589 { 2131 if (is_object($debug_object)) { 2132 $debug_object->debug_log(2, 2133 'header content-type found charset of: ' 2134 . $charset 2135 ); 2136 } 2137 } 2138 } 2139 2140 if (empty($charset)) { 2141 $el = $this->root->find('meta[http-equiv=Content-Type]', 0, true); 2142 2143 if (!empty($el)) { 1590 2144 $fullvalue = $el->content; 1591 if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag found' . $fullvalue);} 1592 1593 if (!empty($fullvalue)) 1594 { 1595 $success = preg_match('/charset=(.+)/i', $fullvalue, $matches); 1596 if ($success) 1597 { 2145 if (is_object($debug_object)) { 2146 $debug_object->debug_log(2, 2147 'meta content-type tag found' 2148 . $fullvalue 2149 ); 2150 } 2151 2152 if (!empty($fullvalue)) { 2153 $success = preg_match( 2154 '/charset=(.+)/i', 2155 $fullvalue, 2156 $matches 2157 ); 2158 2159 if ($success) { 1598 2160 $charset = $matches[1]; 1599 } 1600 else 1601 { 1602 // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1 1603 if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');} 2161 } else { 2162 // If there is a meta tag, and they don't specify the 2163 // character set, research says that it's typically 2164 // ISO-8859-1 2165 if (is_object($debug_object)) { 2166 $debug_object->debug_log(2, 2167 'meta content-type tag couldn\'t be parsed. using iso-8859 default.' 2168 ); 2169 } 2170 1604 2171 $charset = 'ISO-8859-1'; 1605 2172 } … … 1608 2175 } 1609 2176 1610 // If we couldn't find a charset above, then lets try to detect one based on the text we got... 1611 if (empty($charset)) 1612 { 1613 // Use this in case mb_detect_charset isn't installed/loaded on this machine. 2177 // If we couldn't find a charset above, then lets try to detect one 2178 // based on the text we got... 2179 if (empty($charset)) { 2180 // Use this in case mb_detect_charset isn't installed/loaded on 2181 // this machine. 1614 2182 $charset = false; 1615 if (function_exists('mb_detect_encoding')) 1616 { 2183 if (function_exists('mb_detect_encoding')) { 1617 2184 // Have php try to detect the encoding from the text given to us. 1618 $charset = mb_detect_encoding($this->doc . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) ); 1619 if (is_object($debug_object)) {$debug_object->debug_log(2, 'mb_detect found: ' . $charset);} 1620 } 1621 1622 // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... 1623 if ($charset === false) 1624 { 1625 if (is_object($debug_object)) {$debug_object->debug_log(2, 'since mb_detect failed - using default of utf-8');} 2185 $charset = mb_detect_encoding( 2186 $this->doc . 'ascii', 2187 $encoding_list = array( 'UTF-8', 'CP1252' ) 2188 ); 2189 2190 if (is_object($debug_object)) { 2191 $debug_object->debug_log(2, 'mb_detect found: ' . $charset); 2192 } 2193 } 2194 2195 // and if this doesn't work... then we need to just wrongheadedly 2196 // assume it's UTF-8 so that we can move on - cause this will 2197 // usually give us most of what we need... 2198 if ($charset === false) { 2199 if (is_object($debug_object)) { 2200 $debug_object->debug_log( 2201 2, 2202 'since mb_detect failed - using default of utf-8' 2203 ); 2204 } 2205 1626 2206 $charset = 'UTF-8'; 1627 2207 } 1628 2208 } 1629 2209 1630 // Since CP1252 is a superset, if we get one of it's subsets, we want it instead. 1631 if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1'))) 1632 { 1633 if (is_object($debug_object)) {$debug_object->debug_log(2, 'replacing ' . $charset . ' with CP1252 as its a superset');} 2210 // Since CP1252 is a superset, if we get one of it's subsets, we want 2211 // it instead. 2212 if ((strtolower($charset) == strtolower('ISO-8859-1')) 2213 || (strtolower($charset) == strtolower('Latin1')) 2214 || (strtolower($charset) == strtolower('Latin-1'))) { 2215 2216 if (is_object($debug_object)) { 2217 $debug_object->debug_log( 2218 2, 2219 'replacing ' . $charset . ' with CP1252 as its a superset' 2220 ); 2221 } 2222 1634 2223 $charset = 'CP1252'; 1635 2224 } 1636 2225 1637 if (is_object($debug_object)) {$debug_object->debug_log(1, 'EXIT - ' . $charset);} 2226 if (is_object($debug_object)) { 2227 $debug_object->debug_log(1, 'EXIT - ' . $charset); 2228 } 1638 2229 1639 2230 return $this->_charset = $charset; … … 1648 2239 { 1649 2240 // Set end position if no further tags found 1650 if ($this->char!=='<') 1651 { 2241 if ($this->char !== '<') { 1652 2242 $this->root->_[HDOM_INFO_END] = $this->cursor; 1653 2243 return false; 1654 2244 } 2245 1655 2246 $begin_tag_pos = $this->pos; 1656 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2247 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1657 2248 1658 2249 // end tag 1659 if ($this->char==='/') 1660 { 1661 $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next 2250 if ($this->char === '/') { 2251 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1662 2252 1663 2253 // Skip whitespace in end tags (i.e. in "</ html>") … … 1666 2256 1667 2257 // Skip attributes in end tags 1668 if (($pos = strpos($tag, ' ')) !==false)2258 if (($pos = strpos($tag, ' ')) !== false) { 1669 2259 $tag = substr($tag, 0, $pos); 2260 } 1670 2261 1671 2262 $parent_lower = strtolower($this->parent->tag); … … 1674 2265 // The end tag is supposed to close the parent tag. Handle situations 1675 2266 // when it doesn't 1676 if ($parent_lower!==$tag_lower) 1677 { 2267 if ($parent_lower !== $tag_lower) { 1678 2268 // Parent tag does not have to be closed necessarily (optional closing tag) 1679 2269 // Current tag is a block tag, so it may close an ancestor 1680 if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) 1681 { 2270 if (isset($this->optional_closing_tags[$parent_lower]) 2271 && isset($this->block_tags[$tag_lower])) { 2272 1682 2273 $this->parent->_[HDOM_INFO_END] = 0; 1683 2274 $org_parent = $this->parent; … … 1685 2276 // Traverse ancestors to find a matching opening tag 1686 2277 // Stop at root node 1687 while (($this->parent->parent) && strtolower($this->parent->tag)!==$tag_lower) 2278 while (($this->parent->parent) 2279 && strtolower($this->parent->tag) !== $tag_lower 2280 ){ 1688 2281 $this->parent = $this->parent->parent; 2282 } 1689 2283 1690 2284 // If we don't have a match add current tag as text node 1691 if (strtolower($this->parent->tag) !==$tag_lower) {2285 if (strtolower($this->parent->tag) !== $tag_lower) { 1692 2286 $this->parent = $org_parent; // restore origonal parent 1693 if ($this->parent->parent) $this->parent = $this->parent->parent; 2287 2288 if ($this->parent->parent) { 2289 $this->parent = $this->parent->parent; 2290 } 2291 1694 2292 $this->parent->_[HDOM_INFO_END] = $this->cursor; 1695 2293 return $this->as_text_node($tag); 1696 2294 } 1697 } 1698 // Grandparent exists and current tag is a block tag, so our parent doesn't have an end tag 1699 else if (($this->parent->parent) && isset($this->block_tags[$tag_lower])) 1700 { 2295 } elseif (($this->parent->parent) 2296 && isset($this->block_tags[$tag_lower]) 2297 ) { 2298 // Grandparent exists and current tag is a block tag, so our 2299 // parent doesn't have an end tag 1701 2300 $this->parent->_[HDOM_INFO_END] = 0; // No end tag 1702 2301 $org_parent = $this->parent; … … 1704 2303 // Traverse ancestors to find a matching opening tag 1705 2304 // Stop at root node 1706 while (($this->parent->parent) && strtolower($this->parent->tag)!==$tag_lower) 2305 while (($this->parent->parent) 2306 && strtolower($this->parent->tag) !== $tag_lower 2307 ) { 1707 2308 $this->parent = $this->parent->parent; 2309 } 1708 2310 1709 2311 // If we don't have a match add current tag as text node 1710 if (strtolower($this->parent->tag)!==$tag_lower) 1711 { 2312 if (strtolower($this->parent->tag) !== $tag_lower) { 1712 2313 $this->parent = $org_parent; // restore origonal parent 1713 2314 $this->parent->_[HDOM_INFO_END] = $this->cursor; 1714 2315 return $this->as_text_node($tag); 1715 2316 } 1716 } 1717 // Grandparent exists and current tag closes it 1718 else if (($this->parent->parent) && strtolower($this->parent->parent->tag)===$tag_lower) 1719 { 2317 } elseif (($this->parent->parent) 2318 && strtolower($this->parent->parent->tag) === $tag_lower 2319 ) { // Grandparent exists and current tag closes it 1720 2320 $this->parent->_[HDOM_INFO_END] = 0; 1721 2321 $this->parent = $this->parent->parent; 2322 } else { // Random tag, add as text node 2323 return $this->as_text_node($tag); 1722 2324 } 1723 else // Random tag, add as text node1724 return $this->as_text_node($tag);1725 2325 } 1726 2326 1727 2327 // Set end position of parent tag to current cursor position 1728 2328 $this->parent->_[HDOM_INFO_END] = $this->cursor; 1729 if ($this->parent->parent) $this->parent = $this->parent->parent; 1730 1731 $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next 2329 2330 if ($this->parent->parent) { 2331 $this->parent = $this->parent->parent; 2332 } 2333 2334 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1732 2335 return true; 1733 2336 } … … 1744 2347 // <![CDATA[ ... ]]> 1745 2348 // <!-- Comment --> 1746 if (isset($tag[0]) && $tag[0] ==='!') {2349 if (isset($tag[0]) && $tag[0] === '!') { 1747 2350 $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>'); 1748 2351 1749 if (isset($tag[2]) && $tag[1] ==='-' && $tag[2]==='-') { // Comment ("<!--")2352 if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--") 1750 2353 $node->nodetype = HDOM_TYPE_COMMENT; 1751 2354 $node->tag = 'comment'; … … 1754 2357 $node->tag = 'unknown'; 1755 2358 } 1756 if ($this->char==='>') $node->_[HDOM_INFO_TEXT].='>'; 2359 2360 if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; } 2361 1757 2362 $this->link_nodes($node, true); 1758 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2363 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1759 2364 return true; 1760 2365 } … … 1762 2367 // The start tag cannot contain another start tag, if so add as text 1763 2368 // i.e. "<<html>" 1764 if ($pos =strpos($tag, '<')!==false) {2369 if ($pos = strpos($tag, '<') !== false) { 1765 2370 $tag = '<' . substr($tag, 0, -1); 1766 2371 $node->_[HDOM_INFO_TEXT] = $tag; … … 1771 2376 1772 2377 // Handle invalid tag names (i.e. "<html#doc>") 1773 if (!preg_match( "/^\w[\w:-]*$/", $tag)) {2378 if (!preg_match('/^\w[\w:-]*$/', $tag)) { 1774 2379 $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>'); 1775 2380 1776 2381 // Next char is the beginning of a new tag, don't touch it. 1777 if ($this->char ==='<') {2382 if ($this->char === '<') { 1778 2383 $this->link_nodes($node, false); 1779 2384 return true; … … 1781 2386 1782 2387 // Next char closes current tag, add and be done with it. 1783 if ($this->char ==='>') $node->_[HDOM_INFO_TEXT].='>';2388 if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; } 1784 2389 $this->link_nodes($node, false); 1785 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2390 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1786 2391 return true; 1787 2392 } … … 1793 2398 1794 2399 // handle optional closing tags 1795 if (isset($this->optional_closing_tags[$tag_lower]) ) 1796 { 2400 if (isset($this->optional_closing_tags[$tag_lower])) { 1797 2401 // Traverse ancestors to close all optional closing tags 1798 while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) 1799 { 2402 while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) { 1800 2403 $this->parent->_[HDOM_INFO_END] = 0; 1801 2404 $this->parent = $this->parent->parent; … … 1805 2408 1806 2409 $guard = 0; // prevent infinity loop 1807 $space = array($this->copy_skip($this->token_blank), '', ''); // [0] Space between tag and first attribute 2410 2411 // [0] Space between tag and first attribute 2412 $space = array($this->copy_skip($this->token_blank), '', ''); 1808 2413 1809 2414 // attributes 1810 do 1811 { 2415 do { 1812 2416 // Everything until the first equal sign should be the attribute name 1813 2417 $name = $this->copy_until($this->token_equal); 1814 2418 1815 if ($name==='' && $this->char!==null && $space[0]==='') 1816 { 2419 if ($name === '' && $this->char !== null && $space[0] === '') { 1817 2420 break; 1818 2421 } 1819 2422 1820 if ($guard===$this->pos) // Escape infinite loop 1821 { 1822 $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next 2423 if ($guard === $this->pos) { // Escape infinite loop 2424 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1823 2425 continue; 1824 2426 } 2427 1825 2428 $guard = $this->pos; 1826 2429 1827 2430 // handle endless '<' 1828 if ($this->pos>=$this->size-1 && $this->char!=='>') { // Out of bounds before the tag ended 2431 // Out of bounds before the tag ended 2432 if ($this->pos >= $this->size - 1 && $this->char !== '>') { 1829 2433 $node->nodetype = HDOM_TYPE_TEXT; 1830 2434 $node->_[HDOM_INFO_END] = 0; 1831 $node->_[HDOM_INFO_TEXT] = '<' .$tag . $space[0] . $name;2435 $node->_[HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name; 1832 2436 $node->tag = 'text'; 1833 2437 $this->link_nodes($node, false); … … 1836 2440 1837 2441 // handle mismatch '<' 1838 if ($this->doc[$this->pos-1]=='<') { // Attributes cannot start after opening tag 2442 // Attributes cannot start after opening tag 2443 if ($this->doc[$this->pos - 1] == '<') { 1839 2444 $node->nodetype = HDOM_TYPE_TEXT; 1840 2445 $node->tag = 'text'; 1841 2446 $node->attr = array(); 1842 2447 $node->_[HDOM_INFO_END] = 0; 1843 $node->_[HDOM_INFO_TEXT] = substr($this->doc, $begin_tag_pos, $this->pos-$begin_tag_pos-1); 2448 $node->_[HDOM_INFO_TEXT] = substr( 2449 $this->doc, 2450 $begin_tag_pos, 2451 $this->pos - $begin_tag_pos - 1 2452 ); 1844 2453 $this->pos -= 2; 1845 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2454 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1846 2455 $this->link_nodes($node, false); 1847 2456 return true; 1848 2457 } 1849 2458 1850 if ($name!=='/' && $name!=='') { // this is a attribute name 1851 $space[1] = $this->copy_skip($this->token_blank); // [1] Whitespace after attribute name 2459 if ($name !== '/' && $name !== '') { // this is a attribute name 2460 // [1] Whitespace after attribute name 2461 $space[1] = $this->copy_skip($this->token_blank); 2462 1852 2463 $name = $this->restore_noise($name); // might be a noisy name 1853 if ($this->lowercase) $name = strtolower($name); 1854 if ($this->char==='=') { // attribute with value 1855 $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next 2464 2465 if ($this->lowercase) { $name = strtolower($name); } 2466 2467 if ($this->char === '=') { // attribute with value 2468 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1856 2469 $this->parse_attr($node, $name, $space); // get attribute value 1857 } 1858 else { 2470 } else { 1859 2471 //no value attr: nowrap, checked selected... 1860 2472 $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO; 1861 2473 $node->attr[$name] = true; 1862 if ($this->char !='>') $this->char = $this->doc[--$this->pos];// prev2474 if ($this->char != '>') { $this->char = $this->doc[--$this->pos]; } // prev 1863 2475 } 2476 1864 2477 $node->_[HDOM_INFO_SPACE][] = $space; 1865 $space = array($this->copy_skip($this->token_blank), '', ''); // prepare for next attribute 1866 } 1867 else // no more attributes 2478 2479 // prepare for next attribute 2480 $space = array( 2481 $this->copy_skip($this->token_blank), 2482 '', 2483 '' 2484 ); 2485 } else { // no more attributes 1868 2486 break; 1869 } while ($this->char!=='>' && $this->char!=='/'); // go until the tag ended 2487 } 2488 } while ($this->char !== '>' && $this->char !== '/'); // go until the tag ended 1870 2489 1871 2490 $this->link_nodes($node, true); … … 1873 2492 1874 2493 // handle empty tags (i.e. "<div/>") 1875 if ($this->copy_until_char('>')==='/') 1876 { 2494 if ($this->copy_until_char('>') === '/') { 1877 2495 $node->_[HDOM_INFO_ENDSPACE] .= '/'; 1878 2496 $node->_[HDOM_INFO_END] = 0; 1879 } 1880 else 1881 { 2497 } else { 1882 2498 // reset parent 1883 if (!isset($this->self_closing_tags[strtolower($node->tag)])) $this->parent = $node; 1884 } 1885 $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next 2499 if (!isset($this->self_closing_tags[strtolower($node->tag)])) { 2500 $this->parent = $node; 2501 } 2502 } 2503 2504 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1886 2505 1887 2506 // If it's a BR tag, we need to set it's text to the default text. 1888 2507 // This way when we see it in plaintext, we can generate formatting that the user wants. 1889 2508 // since a br tag never has sub nodes, this works well. 1890 if ($node->tag == "br") 1891 { 2509 if ($node->tag === 'br') { 1892 2510 $node->_[HDOM_INFO_INNER] = $this->default_br_text; 1893 2511 } … … 1907 2525 { 1908 2526 // Per sourceforge: http://sourceforge.net/tracker/?func=detail&aid=3061408&group_id=218559&atid=1044037 1909 // If the attribute is already defined inside a tag, only pay attention to the first one as opposed to the last one. 2527 // If the attribute is already defined inside a tag, only pay attention 2528 // to the first one as opposed to the last one. 1910 2529 // https://stackoverflow.com/a/26341866 1911 if (isset($node->attr[$name])) 1912 { 2530 if (isset($node->attr[$name])) { 1913 2531 return; 1914 2532 } 1915 2533 1916 $space[2] = $this->copy_skip($this->token_blank); // [2] Whitespace between "=" and the value 2534 // [2] Whitespace between "=" and the value 2535 $space[2] = $this->copy_skip($this->token_blank); 2536 1917 2537 switch ($this->char) { 1918 2538 case '"': // value is anything between double quotes 1919 2539 $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; 1920 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2540 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1921 2541 $node->attr[$name] = $this->restore_noise($this->copy_until_char('"')); 1922 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2542 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1923 2543 break; 1924 2544 case '\'': // value is anything between single quotes 1925 2545 $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_SINGLE; 1926 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2546 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1927 2547 $node->attr[$name] = $this->restore_noise($this->copy_until_char('\'')); 1928 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2548 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1929 2549 break; 1930 2550 default: // value is anything until the first space or end tag … … 1932 2552 $node->attr[$name] = $this->restore_noise($this->copy_until($this->token_attr)); 1933 2553 } 1934 // PaperG: Attributes should not have \r or \n in them, that counts as html whitespace. 1935 $node->attr[$name] = str_replace("\r", "", $node->attr[$name]); 1936 $node->attr[$name] = str_replace("\n", "", $node->attr[$name]); 1937 // PaperG: If this is a "class" selector, lets get rid of the preceeding and trailing space since some people leave it in the multi class case. 1938 if ($name == "class") { 2554 // PaperG: Attributes should not have \r or \n in them, that counts as 2555 // html whitespace. 2556 $node->attr[$name] = str_replace("\r", '', $node->attr[$name]); 2557 $node->attr[$name] = str_replace("\n", '', $node->attr[$name]); 2558 // PaperG: If this is a "class" selector, lets get rid of the preceeding 2559 // and trailing space since some people leave it in the multi class case. 2560 if ($name === 'class') { 1939 2561 $node->attr[$name] = trim($node->attr[$name]); 1940 2562 } … … 1953 2575 $node->parent = $this->parent; 1954 2576 $this->parent->nodes[] = $node; 1955 if ($is_child) 1956 { 2577 if ($is_child) { 1957 2578 $this->parent->children[] = $node; 1958 2579 } … … 1971 2592 $node->_[HDOM_INFO_TEXT] = '</' . $tag . '>'; 1972 2593 $this->link_nodes($node, false); 1973 $this->char = (++$this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2594 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1974 2595 return true; 1975 2596 } … … 1986 2607 { 1987 2608 $this->pos += strspn($this->doc, $chars, $this->pos); 1988 $this->char = ($this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2609 $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 1989 2610 } 1990 2611 … … 2002 2623 $len = strspn($this->doc, $chars, $pos); 2003 2624 $this->pos += $len; 2004 $this->char = ($this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2005 if ($len ===0) return '';2625 $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 2626 if ($len === 0) { return ''; } 2006 2627 return substr($this->doc, $pos, $len); 2007 2628 } … … 2020 2641 $len = strcspn($this->doc, $chars, $pos); 2021 2642 $this->pos += $len; 2022 $this->char = ($this->pos <$this->size) ? $this->doc[$this->pos] : null; // next2643 $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next 2023 2644 return substr($this->doc, $pos, $len); 2024 2645 } … … 2034 2655 protected function copy_until_char($char) 2035 2656 { 2036 if ($this->char ===null) return '';2037 2038 if (($pos = strpos($this->doc, $char, $this->pos)) ===false) {2039 $ret = substr($this->doc, $this->pos, $this->size -$this->pos);2657 if ($this->char === null) { return ''; } 2658 2659 if (($pos = strpos($this->doc, $char, $this->pos)) === false) { 2660 $ret = substr($this->doc, $this->pos, $this->size - $this->pos); 2040 2661 $this->char = null; 2041 2662 $this->pos = $this->size; … … 2043 2664 } 2044 2665 2045 if ($pos===$this->pos) return ''; 2666 if ($pos === $this->pos) { return ''; } 2667 2046 2668 $pos_old = $this->pos; 2047 2669 $this->char = $this->doc[$pos]; 2048 2670 $this->pos = $pos; 2049 return substr($this->doc, $pos_old, $pos -$pos_old);2671 return substr($this->doc, $pos_old, $pos - $pos_old); 2050 2672 } 2051 2673 … … 2059 2681 * to only remove the captured data. 2060 2682 */ 2061 protected function remove_noise($pattern, $remove_tag =false)2683 protected function remove_noise($pattern, $remove_tag = false) 2062 2684 { 2063 2685 global $debug_object; 2064 2686 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 2065 2687 2066 $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE); 2067 2068 for ($i=$count-1; $i>-1; --$i) 2069 { 2070 $key = '___noise___'.sprintf('% 5d', count($this->noise)+1000); 2071 if (is_object($debug_object)) { $debug_object->debug_log(2, 'key is: ' . $key); } 2688 $count = preg_match_all( 2689 $pattern, 2690 $this->doc, 2691 $matches, 2692 PREG_SET_ORDER | PREG_OFFSET_CAPTURE 2693 ); 2694 2695 for ($i = $count - 1; $i > -1; --$i) { 2696 $key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000); 2697 2698 if (is_object($debug_object)) { 2699 $debug_object->debug_log(2, 'key is: ' . $key); 2700 } 2701 2072 2702 $idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch 2073 2703 $this->noise[$key] = $matches[$i][$idx][0]; … … 2077 2707 // reset the length of content 2078 2708 $this->size = strlen($this->doc); 2079 if ($this->size>0) 2080 {2709 2710 if ($this->size > 0) { 2081 2711 $this->char = $this->doc[0]; 2082 2712 } … … 2096 2726 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 2097 2727 2098 while (($pos=strpos($text, '___noise___'))!==false) 2099 { 2100 // Sometimes there is a broken piece of markup, and we don't GET the pos+11 etc... token which indicates a problem outside of us... 2101 if (strlen($text) > $pos+15) 2102 { // todo: "___noise___1000" (or any number with four or more digits) in the DOM causes an infinite loop which could be utilized by malicious software 2103 $key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15]; 2104 if (is_object($debug_object)) { $debug_object->debug_log(2, 'located key of: ' . $key); } 2105 2106 if (isset($this->noise[$key])) 2107 { 2108 $text = substr($text, 0, $pos).$this->noise[$key].substr($text, $pos+16); 2728 while (($pos = strpos($text, '___noise___')) !== false) { 2729 // Sometimes there is a broken piece of markup, and we don't GET the 2730 // pos+11 etc... token which indicates a problem outside of us... 2731 2732 // todo: "___noise___1000" (or any number with four or more digits) 2733 // in the DOM causes an infinite loop which could be utilized by 2734 // malicious software 2735 if (strlen($text) > $pos + 15) { 2736 $key = '___noise___' 2737 . $text[$pos + 11] 2738 . $text[$pos + 12] 2739 . $text[$pos + 13] 2740 . $text[$pos + 14] 2741 . $text[$pos + 15]; 2742 2743 if (is_object($debug_object)) { 2744 $debug_object->debug_log(2, 'located key of: ' . $key); 2109 2745 } 2110 else 2111 { 2746 2747 if (isset($this->noise[$key])) { 2748 $text = substr($text, 0, $pos) 2749 . $this->noise[$key] 2750 . substr($text, $pos + 16); 2751 } else { 2112 2752 // do this to prevent an infinite loop. 2113 $text = substr($text, 0, $pos).'UNDEFINED NOISE FOR KEY: '.$key . substr($text, $pos+16); 2753 $text = substr($text, 0, $pos) 2754 . 'UNDEFINED NOISE FOR KEY: ' 2755 . $key 2756 . substr($text, $pos + 16); 2114 2757 } 2115 } 2116 else 2117 { 2118 // There is no valid key being given back to us... We must get rid of the ___noise___ or we will have a problem. 2119 $text = substr($text, 0, $pos).'NO NUMERIC NOISE KEY' . substr($text, $pos+11); 2758 } else { 2759 // There is no valid key being given back to us... We must get 2760 // rid of the ___noise___ or we will have a problem. 2761 $text = substr($text, 0, $pos) 2762 . 'NO NUMERIC NOISE KEY' 2763 . substr($text, $pos + 11); 2120 2764 } 2121 2765 } … … 2129 2773 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 2130 2774 2131 foreach($this->noise as $noiseElement) 2132 { 2133 if (strpos($noiseElement, $text)!==false) 2134 { 2775 foreach($this->noise as $noiseElement) { 2776 if (strpos($noiseElement, $text) !== false) { 2135 2777 return $noiseElement; 2136 2778 } 2137 2779 } 2138 2780 } 2781 2139 2782 function __toString() 2140 2783 { … … 2144 2787 function __get($name) 2145 2788 { 2146 switch ($name) 2147 { 2789 switch ($name) { 2148 2790 case 'outertext': 2149 2791 return $this->root->innertext(); … … 2160 2802 2161 2803 // camel naming conventions 2162 function childNodes($idx=-1) {return $this->root->childNodes($idx);} 2163 function firstChild() {return $this->root->first_child();} 2164 function lastChild() {return $this->root->last_child();} 2165 function createElement($name, $value=null) {return @str_get_html("<$name>$value</$name>")->first_child();} 2166 function createTextNode($value) {return @end(str_get_html($value)->nodes);} 2167 function getElementById($id) {return $this->find("#$id", 0);} 2168 function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);} 2169 function getElementByTagName($name) {return $this->find($name, 0);} 2170 function getElementsByTagName($name, $idx=-1) {return $this->find($name, $idx);} 2171 function loadFile() {$args = func_get_args();$this->load_file($args);} 2804 function childNodes($idx = -1) 2805 { 2806 return $this->root->childNodes($idx); 2807 } 2808 2809 function firstChild() 2810 { 2811 return $this->root->first_child(); 2812 } 2813 2814 function lastChild() 2815 { 2816 return $this->root->last_child(); 2817 } 2818 2819 function createElement($name, $value = null) 2820 { 2821 return @str_get_html("<$name>$value</$name>")->first_child(); 2822 } 2823 2824 function createTextNode($value) 2825 { 2826 return @end(str_get_html($value)->nodes); 2827 } 2828 2829 function getElementById($id) 2830 { 2831 return $this->find("#$id", 0); 2832 } 2833 2834 function getElementsById($id, $idx = null) 2835 { 2836 return $this->find("#$id", $idx); 2837 } 2838 2839 function getElementByTagName($name) 2840 { 2841 return $this->find($name, 0); 2842 } 2843 2844 function getElementsByTagName($name, $idx = -1) 2845 { 2846 return $this->find($name, $idx); 2847 } 2848 2849 function loadFile() 2850 { 2851 $args = func_get_args(); 2852 $this->load_file($args); 2853 } 2172 2854 } 2173 2174 ?> -
convertkit/trunk/wp-convertkit.php
r2058401 r2078109 4 4 * Plugin URI: https://convertkit.com/ 5 5 * Description: Quickly and easily integrate ConvertKit forms into your site. 6 * Version: 1.7. 46 * Version: 1.7.5 7 7 * Author: ConvertKit 8 8 * Author URI: https://convertkit.com/ … … 17 17 define( 'CONVERTKIT_PLUGIN_URL', plugin_dir_url( __FILE__ ) ); 18 18 define( 'CONVERTKIT_PLUGIN_PATH', __DIR__ ); 19 define( 'CONVERTKIT_PLUGIN_VERSION', '1.7. 4' );19 define( 'CONVERTKIT_PLUGIN_VERSION', '1.7.5' ); 20 20 21 21 require_once CONVERTKIT_PLUGIN_PATH . '/vendor/autoload.php';
Note: See TracChangeset
for help on using the changeset viewer.