Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f9d683e

Browse files
committed
merged branch lazyhammer/crawler-relative-links-2.1 (PR #7244)
This PR was merged into the 2.1 branch. Commits ------- a4ec677 [DomCrawler] Fix relative path handling in links Discussion ---------- [2.1][DomCrawler] Fix relative path handling in links | Q | A | ------------- | --- | Bug fix? | yes | New feature? | no | BC breaks? | no | Deprecations? | no | Tests pass? | yes | Fixed tickets | #7219 | License | MIT | Doc PR | n/a Added relative path canonicalization according to RFC 3986, section 5.2.4 --------------------------------------------------------------------------- by vicb at 2013-03-02T14:48:46Z @fabpot seems like 2.1 as a deps issue if you look at travis logs
2 parents 82f6389 + a4ec677 commit f9d683e

File tree

2 files changed

+52
-2
lines changed

2 files changed

+52
-2
lines changed

src/Symfony/Component/DomCrawler/Link.php

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,13 +120,18 @@ public function getUri()
120120
return $baseUri.$uri;
121121
}
122122

123+
$baseUri = preg_replace('#^(.*?//[^/]+)(?:\/.*)?$#', '$1', $this->currentUri);
124+
123125
// absolute path
124126
if ('/' === $uri[0]) {
125-
return preg_replace('#^(.*?//[^/]+)(?:\/.*)?$#', '$1', $this->currentUri).$uri;
127+
return $baseUri.$uri;
126128
}
127129

128130
// relative path
129-
return substr($this->currentUri, 0, strrpos($this->currentUri, '/') + 1).$uri;
131+
$path = parse_url(substr($this->currentUri, strlen($baseUri)), PHP_URL_PATH);
132+
$path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
133+
134+
return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path;
130135
}
131136

132137
/**
@@ -139,6 +144,36 @@ protected function getRawUri()
139144
return $this->node->getAttribute('href');
140145
}
141146

147+
/**
148+
* Returns the canonicalized URI path (see RFC 3986, section 5.2.4)
149+
*
150+
* @param string $path URI path
151+
*
152+
* @return string
153+
*/
154+
protected function canonicalizePath($path)
155+
{
156+
if ('' === $path || '/' === $path) {
157+
return $path;
158+
}
159+
160+
if ('.' === substr($path, -1)) {
161+
$path = $path.'/';
162+
}
163+
164+
$output = array();
165+
166+
foreach (explode('/', $path) as $segment) {
167+
if ('..' === $segment) {
168+
array_pop($output);
169+
} elseif ('.' !== $segment) {
170+
array_push($output, $segment);
171+
}
172+
}
173+
174+
return implode('/', $output);
175+
}
176+
142177
/**
143178
* Sets current \DOMNode instance
144179
*

src/Symfony/Component/DomCrawler/Tests/LinkTest.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,21 @@ public function getGetUriTests()
101101
array('?foo=2', 'http://localhost/bar?foo=1', 'http://localhost/bar?foo=2'),
102102
array('?foo=2', 'http://localhost/bar/?foo=1', 'http://localhost/bar/?foo=2'),
103103
array('?bar=2', 'http://localhost?foo=1', 'http://localhost?bar=2'),
104+
105+
array('.', 'http://localhost/foo/bar/baz', 'http://localhost/foo/bar/'),
106+
array('./', 'http://localhost/foo/bar/baz', 'http://localhost/foo/bar/'),
107+
array('./foo', 'http://localhost/foo/bar/baz', 'http://localhost/foo/bar/foo'),
108+
array('..', 'http://localhost/foo/bar/baz', 'http://localhost/foo/'),
109+
array('../', 'http://localhost/foo/bar/baz', 'http://localhost/foo/'),
110+
array('../foo', 'http://localhost/foo/bar/baz', 'http://localhost/foo/foo'),
111+
array('../..', 'http://localhost/foo/bar/baz', 'http://localhost/'),
112+
array('../../', 'http://localhost/foo/bar/baz', 'http://localhost/'),
113+
array('../../foo', 'http://localhost/foo/bar/baz', 'http://localhost/foo'),
114+
array('../../foo', 'http://localhost/bar/foo/', 'http://localhost/foo'),
115+
array('../bar/../../foo', 'http://localhost/bar/foo/', 'http://localhost/foo'),
116+
array('../bar/./../../foo', 'http://localhost/bar/foo/', 'http://localhost/foo'),
117+
array('../../', 'http://localhost/', 'http://localhost/'),
118+
array('../../', 'http://localhost', 'http://localhost/'),
104119
);
105120
}
106121
}

0 commit comments

Comments
 (0)