GetSimple Support Forum
SOLVED Vietnamese language url problem - Printable Version

+- GetSimple Support Forum (http://get-simple.info/forums)
+-- Forum: GetSimple (http://get-simple.info/forums/forumdisplay.php?fid=3)
+--- Forum: Developer Discussions (http://get-simple.info/forums/forumdisplay.php?fid=8)
+--- Thread: SOLVED Vietnamese language url problem (/showthread.php?tid=13710)



Vietnamese language url problem - nguyentriquang - 2020-03-30

Dear Admin,

I'm Quang from Vietnam. I am a fan of GSCMS. While i am using the source for my website, i recognize that there is a problem with Vietnamese language on URL. The ascent of Vietnamese language makes URL in incorrect format. So i solved it with some adjust below:

In the basic.php file:

Change blocks of code below:

function clean_url($text)  { 
$text = strip_tags(lowercase($text)); 
$code_entities_match = array(' ?',' ','--','&quot;','!','@','#','$','%','^','&','*','(',')','+','{','}','|',':','"','<','>','?','[',']','\\',';',"'",',','/','*','+','~','`','=','.'); 
$code_entities_replace = array('','-','-','','','','','','','','','','','','','','','','','','','','','','','',''); 
$text = str_replace($code_entities_match, $code_entities_replace, $text); 
$text = urlencode($text);
$text = str_replace('--','-',$text);
$text = rtrim($text, "-");
return $text; 
}

Change to:

function clean_url($text)  { 

$text = strip_tags(lowercase($text));

$text = html_entity_decode ($text);
$text = preg_replace("/(ä|à|á|ạ|ả|ã|â|ầ|ấ|ậ|ẩ|ẫ|ă|ằ|ắ|ặ|ẳ|ẵ)/", 'a', $text);
$text = str_replace("ç","c",$text);
$text = preg_replace("/(è|é|ẹ|ẻ|ẽ|ê|ề|ế|ệ|ể|ễ)/", 'e', $text);
$text = preg_replace("/(ì|í|î|ị|ỉ|ĩ)/", 'i', $text);
$text = preg_replace("/(ö|ò|ó|ọ|ỏ|õ|ô|ồ|ố|ộ|ổ|ỗ|ơ|ờ|ớ|ợ|ở|ỡ)/", 'o', $text);
$text = preg_replace("/(ü|ù|ú|ụ|ủ|ũ|ư|ừ|ứ|ự|ử|ữ)/", 'u', $text);
$text = preg_replace("/(ỳ|ý|ỵ|ỷ|ỹ)/", 'y', $text);
$text = preg_replace("/(đ)/", 'd', $text);

$text = preg_replace("/(À|Á|Ạ|Ả|Ã|Â|Ầ|Ấ|Ậ|Ẩ|Ẫ|Ă|Ằ|Ắ|Ặ|Ẳ|Ẵ)/", 'a', $text);
$text = preg_replace("/(È|É|Ẹ|Ẻ|Ẽ|Ê|Ề|Ế|Ệ|Ể|Ễ)/", 'e', $text);
$text = preg_replace("/(Ì|Í|Ị|Ỉ|Ĩ)/", 'i', $text);
$text = preg_replace("/(Ò|Ó|Ọ|Ỏ|Õ|Ô|Ồ|Ố|Ộ|Ổ|Ỗ|Ơ|Ờ|Ớ|Ợ|Ở|Ỡ)/", 'o', $text);
$text = preg_replace("/(Ù|Ú|Ụ|Ủ|Ũ|Ư|Ừ|Ứ|Ự|Ử|Ữ)/", 'u', $text);
$text = preg_replace("/(Ỳ|Ý|Ỵ|Ỷ|Ỹ)/", 'y', $text);
$text = preg_replace("/(Đ)/", 'd', $text);

$code_entities_match = array(' ?',' ','--','&quot;','!','@','#','$','%','^','&','*','(',')','+','{','}','|',':','"','<','>','?','[',']','\\',';',"'",',','/','*','+','~','`','=','.');
$code_entities_replace = array('','-','-','','','','','','','','','','','','','','','','','','','','','','','',''); 
$text = str_replace($code_entities_match, $code_entities_replace, $text);
$text = urlencode($text);
$text = str_replace('--','-',$text);
$text = rtrim($text, "-");
return $text; 
}

Also with image upload file name:

Change from:

function clean_img_name($text)  {
$text = getDef('GSUPLOADSLC',true) ? strip_tags(lowercase($text)) : strip_tags($text);
$code_entities_match = array(' ?',' ','--','&quot;','!','#','$','%','^','&','*','(',')','+','{','}','|',':','"','<','>','?','[',']','\\',';',"'",',','/','*','+','~','`','='); 
$code_entities_replace = array('','-','-','','','','','','','','','','','','','','','','','','','','','',''); 
$text = str_replace($code_entities_match, $code_entities_replace, $text); 
$text = urlencode($text);
$text = str_replace('--','-',$text);
$text = str_replace('%40','@',$text); // ensure @ is not encoded
$text = rtrim($text, "-");
return $text; 
}

To:

function clean_img_name($text)  {

$text = getDef('GSUPLOADSLC',true) ? strip_tags(lowercase($text)) : strip_tags($text);

$text = html_entity_decode ($text);
$text = preg_replace("/(ä|à|á|ạ|ả|ã|â|ầ|ấ|ậ|ẩ|ẫ|ă|ằ|ắ|ặ|ẳ|ẵ)/", 'a', $text);
$text = str_replace("ç","c",$text);
$text = preg_replace("/(è|é|ẹ|ẻ|ẽ|ê|ề|ế|ệ|ể|ễ)/", 'e', $text);
$text = preg_replace("/(ì|í|î|ị|ỉ|ĩ)/", 'i', $text);
$text = preg_replace("/(ö|ò|ó|ọ|ỏ|õ|ô|ồ|ố|ộ|ổ|ỗ|ơ|ờ|ớ|ợ|ở|ỡ)/", 'o', $text);
$text = preg_replace("/(ü|ù|ú|ụ|ủ|ũ|ư|ừ|ứ|ự|ử|ữ)/", 'u', $text);
$text = preg_replace("/(ỳ|ý|ỵ|ỷ|ỹ)/", 'y', $text);
$text = preg_replace("/(đ)/", 'd', $text);

$text = preg_replace("/(À|Á|Ạ|Ả|Ã|Â|Ầ|Ấ|Ậ|Ẩ|Ẫ|Ă|Ằ|Ắ|Ặ|Ẳ|Ẵ)/", 'a', $text);
$text = preg_replace("/(È|É|Ẹ|Ẻ|Ẽ|Ê|Ề|Ế|Ệ|Ể|Ễ)/", 'e', $text);
$text = preg_replace("/(Ì|Í|Ị|Ỉ|Ĩ)/", 'i', $text);
$text = preg_replace("/(Ò|Ó|Ọ|Ỏ|Õ|Ô|Ồ|Ố|Ộ|Ổ|Ỗ|Ơ|Ờ|Ớ|Ợ|Ở|Ỡ)/", 'o', $text);
$text = preg_replace("/(Ù|Ú|Ụ|Ủ|Ũ|Ư|Ừ|Ứ|Ự|Ử|Ữ)/", 'u', $text);
$text = preg_replace("/(Ỳ|Ý|Ỵ|Ỷ|Ỹ)/", 'y', $text);
$text = preg_replace("/(Đ)/", 'd', $text);

$code_entities_match = array(' ?',' ','--','&quot;','!','#','$','%','^','&','*','(',')','+','{','}','|',':','"','<','>','?','[',']','\\',';',"'",',','/','*','+','~','`','='); 
$code_entities_replace = array('','-','-','','','','','','','','','','','','','','','','','','','','','',''); 
$text = str_replace($code_entities_match, $code_entities_replace, $text); 
$text = urlencode($text);
$text = str_replace('--','-',$text);
$text = str_replace('%40','@',$text); // ensure @ is not encoded
$text = rtrim($text, "-");
return $text; 
}

And this block:

function to7bit($text,$from_enc="UTF-8") {
if (function_exists('mb_convert_encoding')) {
    $text = mb_convert_encoding($text,'HTML-ENTITIES',$from_enc);
    } else {
$text = htmlspecialchars_decode(utf8_decode(htmlentities($text, ENT_COMPAT, 'utf-8', false)));
}
    $text = preg_replace(
        array('/&szlig;/','/&(..)lig;/',
             '/&([aouAOU])uml;/','/&(.)[^;]*;/'),
        array('ss',"$1","$1".'e',"$1"),
        $text);
    return $text;
}

Change to:

function to7bit($text,$from_enc="UTF-8") {

/*if (function_exists('mb_convert_encoding')) {
    $text = mb_convert_encoding($text,'HTML-ENTITIES',$from_enc);
    } else {
$text = htmlspecialchars_decode(utf8_decode(htmlentities($text, ENT_COMPAT, 'utf-8', false)));
}*/
    $text = preg_replace(
        array('/&szlig;/','/&(..)lig;/',
             '/&([aouAOU])uml;/','/&(.)[^;]*;/'),
        array('ss',"$1","$1".'e',"$1"),
        $text);
    return $text;
}

This is what i used and it is really workable for all case with UTF8 format. If you can approve to get better, please help to include it in new version of GSCMS to fix Vietnamese URL.

Thank you so much for your great job.


RE: Vietnamese language url problem - Felix - 2020-03-30

Hi Quang,

Thanks for your contribution to this language issue
and posting your code how to fix it.
Can you show us your website url ?
F.


RE: Vietnamese language url problem - nguyentriquang - 2020-04-01

(2020-03-30, 17:26:34)Felix Wrote: Hi Quang,

Thanks for your contribution to this language issue
and posting your code how to fix it.
Can you show us your website url ?
F.

Hi Felix,
This is my website: https://thaisimili.com/. This is an Vietnamese website with utf8 charset. And after fix the url problem, it works perfectly.