once detected the urls you can use parse_url()
and parse_str()
to elaborate the url, add utm and medium and rebuild it without caring too much about the content of the get parameters or the hash:
$url_modifier_domain = preg_quote('add-link.com');
$html_text = preg_replace_callback(
'#((?:https?:)?//'.$url_modifier_domain.'(/[^\'"\#]*)?)(?=[\'"\#])#i',
function ($matches) {
$link = $matches[0];
if (strpos($link, '#') !== false) {
list($link, $hash) = explode('#', $link);
}
$res = parse_url($link);
$result = '';
if (isset($res['scheme'])) {
$result .= $res['scheme'].'://';
}
if (isset($res['host'])) {
$result .= $res['host'];
}
if (isset($res['path'])) {
$result .= $res['path'];
}
if (isset($res['query'])) {
parse_str($res['query'], $res['query']);
} else {
$res['query'] = [];
}
$res['query']['utm'] = 'some';
$res['query']['medium'] = 'stuff';
if (count($res['query']) > 0) {
$result .= '?'.http_build_query($res['query']);
}
if (isset($hash)) {
$result .= '#'.$hash;
}
return $result;
},
$html
);
As you can see, the code is longer but simpler
Edit
I made some change, searching for every href="xxx" inside the text. If the link is not from add-link.com the script will skip it, otherwise he will try to print it in the best way possible
$html = 'blabla <a href="http://add-link.com/">a</a>
<a href="http://add-link.com/">a</a>
<a href="http://add-link.com/#hashed">a</a>
<a href="http://abcd.com/#hashed">a</a>
<a href="http://add-link.com/?test=1">a</a>
<a href="http://add-link.com/try.php">a</a>
<a href="http://add-link.com/try.php?test=1">a</a>
<a href="http://add-link.com/try.php#hashed">a</a>
<a href="http://add-link.com/try.php?test=1#hashed">a</a>
<a href="http://add-link.com/try.php?test=1#hashed">a</a>
<a href="//add-link.com?test=test" style="color: rgb(198, 156, 109);">a</a>
';
$url_modifier_domain = preg_quote('add-link.com');
$html_text = preg_replace_callback(
'/href="([^"]+)"/i',
function ($matches) {
$link = $matches[1];
// ignoring outer links
if(strpos($link,'add-link.com') === false) return 'href="'.$link.'"';
if (strpos($link, '#') !== false) {
list($link, $hash) = explode('#', $link);
}
$res = parse_url($link);
$result = '';
if (isset($res['scheme'])) {
$result .= $res['scheme'].'://';
} else if(isset($res['host'])) {
$result .= '//';
}
if (isset($res['host'])) {
$result .= $res['host'];
}
if (isset($res['path'])) {
$result .= $res['path'];
} else {
$result .= '/';
}
if (isset($res['query'])) {
parse_str($res['query'], $res['query']);
} else {
$res['query'] = [];
}
$res['query']['utm'] = 'some';
$res['query']['medium'] = 'stuff';
if (count($res['query']) > 0) {
$result .= '?'.http_build_query($res['query']);
}
if (isset($hash)) {
$result .= '#'.$hash;
}
return 'href="'.$result.'"';
},
$html
);
var_dump($html_text);