Changeset 3394634
- Timestamp:
- 11/12/2025 08:23:22 PM (3 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
scholar-publications-fetcher/trunk/scholar-publications-fetcher.php
r3336267 r3394634 4 4 * Plugin URI: https://valsze.de/2025/07/07/google-scholar-publications-fetcher-wordpress-plugin/ 5 5 * Description: Fetch, cache, and display publications from a Google Scholar profile in a responsive card layout. 6 * Version: 2. 0.06 * Version: 2.2.0 7 7 * Requires at least: 5.2 8 8 * Requires PHP: 7.2 … … 27 27 * @var string 28 28 */ 29 const VERSION = '2. 0.0';29 const VERSION = '2.2.0'; 30 30 31 31 /** … … 97 97 98 98 /** 99 * Fetch and parse publications. Uses caching to improve performance.99 * Fetch and parse ALL publications across pages. Uses caching to improve performance. 100 100 * 101 101 * @param string $user_id Google Scholar ID. 102 102 * @param int $cache_hours Cache duration in hours. 103 * @param int $max_pages Max pagination pages to fetch (safety cap). 104 * @param int $page_size Items per page (Scholar supports 20/100; we try 100). 103 105 * @return array List of publications or empty array on failure. 104 106 */ 105 private function get_publications( string $user_id, int $cache_hours = 6 ): array {107 private function get_publications( string $user_id, int $cache_hours = 6, int $max_pages = 20, int $page_size = 100 ): array { 106 108 if ( empty( $user_id ) ) { 107 109 return []; 108 110 } 109 111 110 $transient_key = 'schopufe_pubs_' . md5( $user_id);112 $transient_key = 'schopufe_pubs_' . md5( implode( '|', [ $user_id, $page_size ] ) ); 111 113 $cached_pubs = get_transient( $transient_key ); 112 113 114 if ( false !== $cached_pubs ) { 114 115 return $cached_pubs; 115 116 } 116 117 117 $url = sprintf( 'https://scholar.google.com/citations?hl=en&user=%s&view_op=list_works&sortby=pubdate', $user_id ); 118 $response = wp_remote_get( $url, [ 'timeout' => 20, 'user-agent' => 'WordPress/' . get_bloginfo( 'version' ) . '; ' . get_bloginfo( 'url' ) ] ); 119 120 if ( is_wp_error( $response ) || 200 !== wp_remote_retrieve_response_code( $response ) ) { 121 return []; 122 } 123 124 $html = wp_remote_retrieve_body( $response ); 125 if ( empty( $html ) || ! class_exists( 'DOMDocument' ) ) { 126 return []; 127 } 128 129 libxml_use_internal_errors( true ); 130 $dom = new DOMDocument(); 131 @$dom->loadHTML( $html ); 132 libxml_clear_errors(); 133 $xpath = new DOMXPath( $dom ); 134 135 $pubs = []; 136 $rows = $xpath->query( "//tr[contains(@class,'gsc_a_tr')]" ); 137 138 foreach ( $rows as $row ) { 139 $link_node = $xpath->query( ".//a[contains(@class,'gsc_a_at')]", $row )->item(0); 140 if ( ! $link_node ) continue; 141 142 $meta = $xpath->query( ".//div[contains(@class,'gs_gray')]", $row ); 143 $year_n = $xpath->query( ".//span[contains(@class,'gsc_a_hc')]", $row )->item(0); 144 145 $pubs[] = [ 146 'title' => trim( $link_node->textContent ), 147 'link' => esc_url_raw( 'https://scholar.google.com' . $link_node->getAttribute( 'href' ) ), 148 'authors' => $meta->item(0) ? trim( $meta->item(0)->textContent ) : '', 149 'journal' => $meta->item(1) ? trim( $meta->item(1)->textContent ) : '', 150 'year' => $year_n ? trim( $year_n->textContent ) : '', 151 'abstract'=> '', 152 ]; 153 } 154 155 if ( ! empty( $pubs ) ) { 156 set_transient( $transient_key, $pubs, $cache_hours * HOUR_IN_SECONDS ); 157 } 158 159 return $pubs; 160 } 161 162 /** 163 * Fetch abstract for a single publication. 118 $all = []; 119 for ( $page = 0; $page < $max_pages; $page++ ) { 120 $offset = $page * $page_size; 121 $url = sprintf( 'https://scholar.google.com/citations?hl=en&user=%s&view_op=list_works&sortby=pubdate&cstart=%d&pagesize=%d', 122 rawurlencode( $user_id ), $offset, $page_size ); 123 $response = wp_remote_get( $url, [ 124 'timeout' => 20, 125 'user-agent' => 'WordPress/' . get_bloginfo( 'version' ) . '; ' . home_url(), 126 ] ); 127 128 if ( is_wp_error( $response ) || 200 !== wp_remote_retrieve_response_code( $response ) ) { 129 break; // stop on error 130 } 131 132 $html = wp_remote_retrieve_body( $response ); 133 if ( empty( $html ) ) { 134 break; 135 } 136 137 libxml_use_internal_errors( true ); 138 $dom = new DOMDocument(); 139 @$dom->loadHTML( $html ); 140 libxml_clear_errors(); 141 $xpath = new DOMXPath( $dom ); 142 143 $rows = $xpath->query( "//tr[contains(@class,'gsc_a_tr')]" ); 144 if ( ! $rows || 0 === $rows->length ) { 145 break; // no more items 146 } 147 148 $page_items = 0; 149 foreach ( $rows as $row ) { 150 $link_node = $xpath->query( ".//a[contains(@class,'gsc_a_at')]", $row )->item(0); 151 if ( ! $link_node ) { continue; } 152 153 $meta = $xpath->query( ".//div[contains(@class,'gs_gray')]", $row ); 154 $year_n = $xpath->query( ".//span[contains(@class,'gsc_a_hc')]", $row )->item(0); 155 156 $all[] = [ 157 'title' => trim( $link_node->textContent ), 158 'link' => esc_url_raw( 'https://scholar.google.com' . $link_node->getAttribute( 'href' ) ), 159 'authors' => $meta->item(0) ? trim( $meta->item(0)->textContent ) : '', 160 'journal' => $meta->item(1) ? trim( $meta->item(1)->textContent ) : '', 161 'year' => $year_n ? trim( $year_n->textContent ) : '', 162 'abstract' => '', 163 ]; 164 $page_items++; 165 } 166 167 // If last page had fewer than requested, we're done. 168 if ( $page_items < $page_size ) { 169 break; 170 } 171 } 172 173 if ( ! empty( $all ) ) { 174 // Cache the entire set. 175 set_transient( $transient_key, $all, $cache_hours * HOUR_IN_SECONDS ); 176 } 177 178 return $all; 179 } 180 181 /** 182 * Fetch abstract for a single publication (cached separately). 164 183 * 165 184 * @param string $url Publication detail URL. … … 169 188 $transient_key = 'schopufe_abs_' . md5( $url ); 170 189 $cached_abs = get_transient( $transient_key ); 171 172 190 if ( false !== $cached_abs ) { 173 191 return $cached_abs; 174 192 } 175 176 $response = wp_remote_get( $url, [ 'timeout' => 15, 'user-agent' => 'WordPress/' . get_bloginfo( 'version' ) . '; ' . get_bloginfo( 'url' ) ] ); 193 194 $response = wp_remote_get( $url, [ 195 'timeout' => 15, 196 'user-agent' => 'WordPress/' . get_bloginfo( 'version' ) . '; ' . home_url(), 197 ] ); 177 198 if ( is_wp_error( $response ) || empty( wp_remote_retrieve_body( $response ) ) ) { 178 199 return ''; … … 189 210 $abstract = $desc_node ? trim( $desc_node->textContent ) : ''; 190 211 191 set_transient( $transient_key, $abstract, 24 * HOUR_IN_SECONDS ); // Cache abstract for 24 hours212 set_transient( $transient_key, $abstract, 24 * HOUR_IN_SECONDS ); 192 213 return $abstract; 193 214 } … … 195 216 /** 196 217 * Shortcode handler to render the publications. 218 * 219 * Attributes: 220 * - user_id (string) : required — Google Scholar ID 221 * - count (int|'all') : optional — number of items to show (default 5) or 'all' for all 222 * - show_abstract (bool) : optional — 'true' to fetch and display abstracts 223 * - cache_hours (int) : optional — cache lifetime in hours (default 6) 224 * - order (asc|desc) : optional — display order; 'desc' (default) newest→oldest, 'asc' oldest→newest 225 * - max_pages (int) : optional — safety cap for pagination (default 20) 226 * - page_size (int) : optional — items per page (default 100) 197 227 * 198 228 * @param array $atts Shortcode attributes. … … 206 236 'show_abstract' => 'false', 207 237 'cache_hours' => 6, 238 'order' => 'desc', 239 'max_pages' => 20, 240 'page_size' => 100, 208 241 ], 209 242 $atts, … … 211 244 ); 212 245 213 $user_id = sanitize_text_field( $atts['user_id'] );246 $user_id = sanitize_text_field( $atts['user_id'] ); 214 247 if ( empty( $user_id ) ) { 215 248 return '<p class="spf-error">' . esc_html__( 'Error: Google Scholar User ID is not provided.', 'scholar-publications-fetcher' ) . '</p>'; 216 249 } 217 218 $count = max( 1, intval( $atts['count'] ) ); 250 219 251 $show_abstract = filter_var( $atts['show_abstract'], FILTER_VALIDATE_BOOLEAN ); 220 252 $cache_hours = max( 1, intval( $atts['cache_hours'] ) ); 221 222 $all_pubs = $this->get_publications( $user_id, $cache_hours ); 223 253 $order = strtolower( trim( (string) $atts['order'] ) ) === 'asc' ? 'asc' : 'desc'; 254 $max_pages = max( 1, intval( $atts['max_pages'] ) ); 255 $page_size = max( 1, intval( $atts['page_size'] ) ); 256 257 // Support count="all" 258 $count_raw = is_string( $atts['count'] ) ? strtolower( trim( $atts['count'] ) ) : $atts['count']; 259 if ( $count_raw === 'all' ) { 260 $count = PHP_INT_MAX; 261 } else { 262 $count = max( 1, intval( $count_raw ) ); 263 } 264 265 $all_pubs = $this->get_publications( $user_id, $cache_hours, $max_pages, $page_size ); 224 266 if ( empty( $all_pubs ) ) { 225 267 return '<p class="spf-error">' . esc_html__( 'Could not retrieve publications. Please check the User ID or try again later.', 'scholar-publications-fetcher' ) . '</p>'; 226 268 } 227 269 270 // Scholar returns newest→oldest. Reverse for ASC (oldest→newest). 271 if ( 'asc' === $order ) { 272 $all_pubs = array_reverse( $all_pubs ); 273 } 274 228 275 $pubs_to_show = array_slice( $all_pubs, 0, $count ); 229 276 230 277 ob_start(); 231 278 ?> 232 <div class="spf-container" >279 <div class="spf-container" data-order="<?php echo esc_attr( $order ); ?>"> 233 280 <div class="spf-publication-list"> 234 281 <?php foreach ( $pubs_to_show as $p ) : ?> 235 282 <?php 236 // Fetch abstract only if needed and not already fetched.237 283 if ( $show_abstract && empty( $p['abstract'] ) ) { 238 284 $p['abstract'] = $this->fetch_abstract( $p['link'] ); … … 263 309 </div> 264 310 265 <?php if ( count( $all_pubs ) > $count) : ?>311 <?php if ( count( $all_pubs ) > count( $pubs_to_show ) ) : ?> 266 312 <div class="spf-more-link-wrapper"> 267 <a href="<?php echo esc_url( 'https://scholar.google.com/citations?hl=en&user=' . $user_id . '&view_op=list_works&sortby=pubdate' ); ?>" class="spf-button" target="_blank" rel="noopener noreferrer"> 313 <a href="<?php echo esc_url( 'https://scholar.google.com/citations?hl=en&user=' . rawurlencode( $user_id ) . '&view_op=list_works&sortby=pubdate' ); ?>" 314 class="spf-button" target="_blank" rel="noopener noreferrer"> 268 315 <?php esc_html_e( 'View All Publications', 'scholar-publications-fetcher' ); ?> 269 316 </a>
Note: See TracChangeset
for help on using the changeset viewer.