I tried to dump a utf-8 encoded database from mysql 4.1.14 to mysql 5.0.16.
Despite many efforts and trying --default-character-set option, it was always
restoring corrupted data.
I've then decided to write my own code to fix the wrong dump. It is meant
to be used on an already restored db, and will fix the wrong encodings.
I've used it from the command line as follows.
$ ./dumper.sql > dump.sql
After copying the file to the other machine with scp I've restored as usual.
mysql -u root -p mydbname < dump.sql
This did the trick.
CAUTION: if you want to try this script on your own database, be sure to have
a backup first, or even better try it on a copy. This script doesn't do any update
query on the database, they are done only when you load the resulting file with
mysql.
The function get_table_field() tries to avoid any field that does not need
recoding, double check that the list is correct for your database. It's for sure
not a complete list.
#!/usr/local/bin/php <?php
$param=array("host" => "localhost", "username" => "root", "password" => "****", "db_name" => "mydbname");
main($param);
function main($param) { header("Content-type: text/html; charset=UTF-8"); $link1=mysql_connect($param["host"], $param["username"], $param["password"]); if (!$link1) { echo "can't connect to database\n"; exit(); } mysql_select_db($param["db_name"], $link1); $link2=mysql_connect($param["host"], $param["username"], $param["password"]); mysql_select_db($param["db_name"], $link2); list($pri, $table_field) = get_table_field($link1); dump_all($pri, $table_field, $link1, $link2); mysql_close($link1); mysql_close($link2); }
function dump_all($pri, $table_field, &$link1, &$link2) { for ($i=0; $i<count($table_field); $i++) { $table = $table_field[$i][0]; $field = $table_field[$i][1]; $sql="SELECT `".$field."`, ".implode(",", $pri[$table])." FROM `" .$table."`"; //echo $sql."<br/>"; $res1 = mysql_query($sql, $link1); while ($row=mysql_fetch_assoc($res1)) { $encoding = mb_detect_encoding($row[$field]); if ($encoding=="ASCII") continue; $content = $row[$field]; $content = str_replace("à", "à", $content); $content = str_replace("è", "è", $content); $content = str_replace("é", "é", $content); $content = str_replace("ì", "ì", $content); $content = str_replace("ò", "ò", $content); $content = str_replace("ù", "ù", $content); //$content = htmlentities($content, ENT_COMPAT, "UTF-8"); $where = get_where_cond($pri[$table], $row); $sql = "UPDATE `".$table."` SET `".$field."`='" .mysql_real_escape_string($content)."'".$where.";"; //$res2 = mysql_query($sql, $link2); if (!$res2) { echo $sql."\n"; } } } }
function get_table_field($link) { $table = get_tables($link); $table_field=array(); $pri = array(); for ($i=0; $i<count($table); $i++) { $res = mysql_query("DESC ".$table[$i]); $pri[$table[$i]]=array(); while ($row = mysql_fetch_array($res)) { $field = $row["Field"]; $type = $row["Type"]; if ($row["Key"]=="PRI") { $pri[$table[$i]][] = $row["Field"]; } if (substr($type,0,3)=="int") continue; if (substr($type,0,4)=="date") continue; if (substr($type,0,4)=="enum") continue; if (substr($type,0,4)=="blob") continue; if (substr($type,0,5)=="float") continue; if (substr($type,0,6)=="double") continue; if (substr($type,0,7)=="char(1)") continue; if (substr($type,0,7)=="tinyint") continue; if (substr($type,0,7)=="decimal") continue; if (substr($type,0,8)=="smallint") continue; if (substr($type,0,8)=="longblob") continue; if (substr($type,0,9)=="mediumint") continue; if (substr($type,0,10)=="mediumblob") continue; /** if (substr($type,0,4)=="char") continue; if (substr($type,0,7)=="varchar") continue; if (substr($type,0,4)=="text") continue; echo $table[$i]." " .$field." ".$type."<br/>"; /**/ $table_field[] = array($table[$i], $field); } } return array($pri, $table_field); }
function get_tables($link) { $table=array(); $res = mysql_query("SHOW TABLES"); while ($row = mysql_fetch_array($res)) { $table[]=$row[0]; } return $table; }
function get_where_cond($pri, $row) { $where=" WHERE "; $connector=""; for ($i=0; $i<count($pri); $i++) { $key=$pri[$i]; $value=$row[$key]; if (!is_numeric($value)) $value="'".mysql_real_escape_string($value)."'"; $where.=$connector."`".$key."`=".$value; $connector=" AND "; } return $where; }
|