Skip to content

Commit 426dc5b

Browse files
committed
This release fixes an IRI parsing bug reported recently. It also
replaces regex used to find rel links with xpath, and more cleanly seperates microformats discovery and parsing when php-mf2 is not included.
1 parent 2f272a0 commit 426dc5b

File tree

6 files changed

+174
-109
lines changed

6 files changed

+174
-109
lines changed

library/SimplePie.php

Lines changed: 35 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,25 +1614,44 @@ protected function fetch_data(&$cache)
16141614
$copyContentType = $file->headers['content-type'];
16151615
try
16161616
{
1617-
// First check for h-entry microformats in the current file.
16181617
$microformats = false;
1619-
$position = 0;
1620-
while ($position = strpos($file->body, 'h-entry', $position))
1621-
{
1622-
$start = $position < 200 ? 0 : $position - 200;
1623-
$check = substr($file->body, $start, 400);
1624-
if ($microformats = preg_match('/class="[^"]*h-entry/', $check))
1618+
if (function_exists('Mf2\parse')) {
1619+
// Check for both h-feed and h-entry, as both a feed with no entries
1620+
// and a list of entries without an h-feed wrapper are both valid.
1621+
$position = 0;
1622+
while ($position = strpos($file->body, 'h-feed', $position))
16251623
{
1626-
break;
1624+
$start = $position < 200 ? 0 : $position - 200;
1625+
$check = substr($file->body, $start, 400);
1626+
if ($microformats = preg_match('/class="[^"]*h-feed/', $check))
1627+
{
1628+
break;
1629+
}
1630+
$position += 7;
1631+
}
1632+
$position = 0;
1633+
while ($position = strpos($file->body, 'h-entry', $position))
1634+
{
1635+
$start = $position < 200 ? 0 : $position - 200;
1636+
$check = substr($file->body, $start, 400);
1637+
if ($microformats = preg_match('/class="[^"]*h-entry/', $check))
1638+
{
1639+
break;
1640+
}
1641+
$position += 7;
16271642
}
1628-
$position += 7;
16291643
}
16301644
// Now also do feed discovery, but if an h-entry was found don't
16311645
// overwrite the current value of file.
16321646
$discovered = $locate->find($this->autodiscovery,
16331647
$this->all_discovered_feeds);
16341648
if ($microformats)
16351649
{
1650+
if ($hub = $locate->get_rel_link('hub'))
1651+
{
1652+
$self = $locate->get_rel_link('self');
1653+
$this->store_links($file, $hub, $self);
1654+
}
16361655
// Push the current file onto all_discovered feeds so the user can
16371656
// be shown this as one of the options.
16381657
if (isset($this->all_discovered_feeds)) {
@@ -1681,7 +1700,6 @@ protected function fetch_data(&$cache)
16811700

16821701
$this->raw_data = $file->body;
16831702
$this->permanent_url = $file->permanent_url;
1684-
$this->store_links($file);
16851703
$headers = $file->headers;
16861704
$sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file));
16871705
$sniffed = $sniffer->get_type();
@@ -3221,52 +3239,19 @@ public static function merge_items($urls, $start = 0, $end = 0, $limit = 0)
32213239
*
32223240
* There is no way to find PuSH links in the body of a microformats feed,
32233241
* so they are added to the headers when found, to be used later by get_links.
3224-
* @param SimplePie_File
3242+
* @param SimplePie_File $file
3243+
* @param string $hub
3244+
* @param string $self
32253245
*/
3226-
private function store_links(&$file) {
3246+
private function store_links(&$file, $hub, $self) {
32273247
if (isset($file->headers['link']['hub']) ||
32283248
(isset($file->headers['link']) &&
32293249
preg_match('/rel=hub/', $file->headers['link'])))
32303250
{
32313251
return;
32323252
}
3233-
$hub = '';
3234-
$self = '';
3235-
$position = 0;
3236-
$regex1 = '/<(?:link|a) href="([^"]*)" rel="[^"]*hub[^"]*"/';
3237-
$regex2 = '/<(?:link|a) rel="[^"]*hub[^"]*" href="([^"]*)"/';
3238-
while ($position = strpos($file->body, 'rel="hub"', $position + 7))
3239-
{
3240-
$start = $position < 200 ? 0 : $position - 200;
3241-
$check = substr($file->body, $start, 400);
3242-
if (preg_match($regex1, $check, $match))
3243-
{
3244-
$hub = $match[1] === '' ? $file->url : $match[1];
3245-
}
3246-
else if (preg_match($regex2, $check, $match))
3247-
{
3248-
$hub = $match[1] === '' ? $file->url : $match[1];
3249-
}
3250-
if ($hub !== '') break;
3251-
}
3252-
$position = 0;
3253-
$regex1 = '/<(?:link|a) href="([^"]*)" rel="[^"]*self[^"]*"/';
3254-
$regex2 = '/<(?:link|a) rel="[^"]*self[^"]*" href="([^"]*)"/';
3255-
while ($position = strpos($file->body, 'rel="self"', $position + 7))
3256-
{
3257-
$start = $position < 200 ? 0 : $position - 200;
3258-
$check = substr($file->body, $start, 400);
3259-
if (preg_match($regex1, $check, $match))
3260-
{
3261-
$self = $match[1] === '' ? $file->url : $match[1];
3262-
}
3263-
if (preg_match($regex2, $check, $match))
3264-
{
3265-
$self = $match[1] === '' ? $file->url : $match[1];
3266-
}
3267-
if ($self !== '') break;
3268-
}
3269-
if ($hub !== '')
3253+
3254+
if ($hub)
32703255
{
32713256
if (isset($file->headers['link']))
32723257
{
@@ -3280,7 +3265,7 @@ private function store_links(&$file) {
32803265
$file->headers['link'] = '';
32813266
}
32823267
$file->headers['link'] .= '<'.$hub.'>; rel=hub';
3283-
if ($self !== '')
3268+
if ($self)
32843269
{
32853270
$file->headers['link'] .= ', <'.$self.'>; rel=self';
32863271
}

library/SimplePie/IRI.php

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -776,24 +776,20 @@ protected function scheme_normalization()
776776
*/
777777
public function is_valid()
778778
{
779-
$isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
780-
if ($this->ipath !== '' &&
781-
(
782-
$isauthority && (
783-
$this->ipath[0] !== '/' ||
784-
substr($this->ipath, 0, 2) === '//'
785-
) ||
786-
(
787-
$this->scheme === null &&
788-
!$isauthority &&
789-
strpos($this->ipath, ':') !== false &&
790-
(strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
791-
)
792-
)
793-
)
794-
{
795-
return false;
796-
}
779+
if ($this->ipath === '') return true;
780+
781+
$isauthority = $this->iuserinfo !== null || $this->ihost !== null ||
782+
$this->port !== null;
783+
if ($isauthority && $this->ipath[0] === '/') return true;
784+
785+
if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) return false;
786+
787+
// Relative urls cannot have a colon in the first path segment (and the
788+
// slashes themselves are not included so skip the first character).
789+
if (!$this->scheme && !$isauthority &&
790+
strpos($this->ipath, ':') !== false &&
791+
strpos($this->ipath, '/', 1) !== false &&
792+
strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) return false;
797793

798794
return true;
799795
}

library/SimplePie/Locator.php

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ public function get_links()
281281
{
282282
$href = trim($link->getAttribute('href'));
283283
$parsed = $this->registry->call('Misc', 'parse_url', array($href));
284-
if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
284+
if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme']))
285285
{
286286
if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo())
287287
{
@@ -318,6 +318,57 @@ public function get_links()
318318
return null;
319319
}
320320

321+
public function get_rel_link($rel)
322+
{
323+
if ($this->dom === null)
324+
{
325+
throw new SimplePie_Exception('DOMDocument not found, unable to use '.
326+
'locator');
327+
}
328+
if (!class_exists('DOMXpath'))
329+
{
330+
throw new SimplePie_Exception('DOMXpath not found, unable to use '.
331+
'get_rel_link');
332+
}
333+
334+
$xpath = new DOMXpath($this->dom);
335+
$query = '//a[@rel and @href] | //link[@rel and @href]';
336+
foreach ($xpath->query($query) as $link)
337+
{
338+
$href = trim($link->getAttribute('href'));
339+
$parsed = $this->registry->call('Misc', 'parse_url', array($href));
340+
if ($parsed['scheme'] === '' ||
341+
preg_match('/^https?$/i', $parsed['scheme']))
342+
{
343+
if (method_exists($link, 'getLineNo') &&
344+
$this->base_location < $link->getLineNo())
345+
{
346+
$href =
347+
$this->registry->call('Misc', 'absolutize_url',
348+
array(trim($link->getAttribute('href')),
349+
$this->base));
350+
}
351+
else
352+
{
353+
$href =
354+
$this->registry->call('Misc', 'absolutize_url',
355+
array(trim($link->getAttribute('href')),
356+
$this->http_base));
357+
}
358+
if ($href === false)
359+
{
360+
return null;
361+
}
362+
$rel_values = explode(' ', strtolower($link->getAttribute('rel')));
363+
if (in_array($rel, $rel_values))
364+
{
365+
return $href;
366+
}
367+
}
368+
}
369+
return null;
370+
}
371+
321372
public function extension(&$array)
322373
{
323374
foreach ($array as $key => $value)

library/SimplePie/Parser.php

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,27 @@ public function set_registry(SimplePie_Registry $registry)
7676

7777
public function parse(&$data, $encoding, $url = '')
7878
{
79-
$position = 0;
80-
while ($position = strpos($data, 'h-entry', $position)) {
81-
$start = $position < 200 ? 0 : $position - 200;
82-
$check = substr($data, $start, 400);
83-
if (preg_match('/class="[^"]*h-entry/', $check)) {
84-
return $this->parse_microformats($data, $url);
79+
if (function_exists('Mf2\parse')) {
80+
// Check for both h-feed and h-entry, as both a feed with no entries
81+
// and a list of entries without an h-feed wrapper are both valid.
82+
$position = 0;
83+
while ($position = strpos($data, 'h-feed', $position)) {
84+
$start = $position < 200 ? 0 : $position - 200;
85+
$check = substr($data, $start, 400);
86+
if (preg_match('/class="[^"]*h-feed/', $check)) {
87+
return $this->parse_microformats($data, $url);
88+
}
89+
$position += 7;
90+
}
91+
$position = 0;
92+
while ($position = strpos($data, 'h-entry', $position)) {
93+
$start = $position < 200 ? 0 : $position - 200;
94+
$check = substr($data, $start, 400);
95+
if (preg_match('/class="[^"]*h-entry/', $check)) {
96+
return $this->parse_microformats($data, $url);
97+
}
98+
$position += 7;
8599
}
86-
$position += 7;
87100
}
88101

89102
// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
@@ -439,10 +452,8 @@ private function parse_hcard($data, $category = false) {
439452
}
440453

441454
private function parse_microformats(&$data, $url) {
442-
if (!function_exists('Mf2\parse')) return false;
443-
444455
$feed_title = '';
445-
$icon = '';
456+
$feed_author = NULL;
446457
$author_cache = array();
447458
$items = array();
448459
$entries = array();
@@ -458,23 +469,20 @@ private function parse_microformats(&$data, $url) {
458469
if (!isset($mf_item['children'][0]['type'])) continue;
459470
if (in_array('h-feed', $mf_item['children'][0]['type'])) {
460471
$h_feed = $mf_item['children'][0];
472+
// In this case the parent of the h-feed may be an h-card, so use it as
473+
// the feed_author.
474+
if (in_array('h-card', $mf_item['type'])) $feed_author = $mf_item;
461475
break;
462476
}
463477
}
464478
if (isset($h_feed['children'])) {
465479
$entries = $h_feed['children'];
466-
// Also set the feed title and icon from the h-feed if available.
480+
// Also set the feed title and store author from the h-feed if available.
467481
if (isset($mf['items'][0]['properties']['name'][0])) {
468482
$feed_title = $mf['items'][0]['properties']['name'][0];
469483
}
470484
if (isset($mf['items'][0]['properties']['author'][0])) {
471-
$author = $mf['items'][0]['properties']['author'][0];
472-
if (is_array($author) &&
473-
isset($author['type']) && in_array('h-card', $author['type'])) {
474-
if (isset($author['properties']['photo'][0])) {
475-
$icon = $author['properties']['photo'][0];
476-
}
477-
}
485+
$feed_author = $mf['items'][0]['properties']['author'][0];
478486
}
479487
}
480488
else {
@@ -501,12 +509,13 @@ private function parse_microformats(&$data, $url) {
501509
if (isset($title['value'])) $title = $title['value'];
502510
$item['title'] = array(array('data' => $title));
503511
}
504-
if (isset($entry['properties']['author'][0])) {
512+
if (isset($entry['properties']['author'][0]) || isset($feed_author)) {
505513
// author is a special case, it can be plain text or an h-card array.
506514
// If it's plain text it can also be a url that should be followed to
507515
// get the actual h-card.
508-
$author = $entry['properties']['author'][0];
509-
if (is_array($author)) {
516+
$author = isset($entry['properties']['author'][0]) ?
517+
$entry['properties']['author'][0] : $feed_author;
518+
if (!is_string($author)) {
510519
$author = $this->parse_hcard($author);
511520
}
512521
else if (strpos($author, 'http') === 0) {
@@ -574,6 +583,11 @@ private function parse_microformats(&$data, $url) {
574583
$item['title'] = array(array('data' => $title));
575584
}
576585
$description .= $entry['properties']['content'][0]['html'];
586+
if (isset($entry['properties']['in-reply-to'][0]['value'])) {
587+
$in_reply_to = $entry['properties']['in-reply-to'][0]['value'];
588+
$description .= '<p><span class="in-reply-to"></span> '.
589+
'<a href="'.$in_reply_to.'">'.$in_reply_to.'</a><p>';
590+
}
577591
$item['description'] = array(array('data' => $description));
578592
}
579593
if (isset($entry['properties']['category'])) {
@@ -608,9 +622,10 @@ private function parse_microformats(&$data, $url) {
608622
// Mimic RSS data format when storing microformats.
609623
$link = array(array('data' => $url));
610624
$image = '';
611-
if ($icon !== '') {
612-
array(array('child' => array('' =>
613-
array('url' => array(array('data' => $icon))))));
625+
if (!is_string($feed_author) &&
626+
isset($feed_author['properties']['photo'][0])) {
627+
$image = array(array('child' => array('' => array('url' =>
628+
array(array('data' => $feed_author['properties']['photo'][0]))))));
614629
}
615630
// Use the a name given for the h-feed, or get the title from the html.
616631
if ($feed_title !== '') {

library/SimplePie/Sanitize.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,9 @@ public function sanitize($data, $type, $base = '')
368368

369369
// Finally, convert to a HTML string
370370
$data = trim($document->saveHTML());
371-
372-
list($_, $data, $_) = explode($unique_tag, $data);
371+
$result = explode($unique_tag, $data);
372+
// The tags may not be found again if there was invalid markup.
373+
$data = count($result) === 3 ? $result[1] : '';
373374

374375
if ($this->remove_div)
375376
{

0 commit comments

Comments
 (0)