#!/usr/bin/php -q
<?php
/**
* Filename checker.
* Checks for invalid UTF-8 and/or ASCII names of files and folders,
* as well as characters that causes issues with different operating systems.
*
* @author Hellkeepa
* @link https://dl.getdropbox.com/u/228121/checkfile.php
* @version 1.3
* @copyright GPL v3
*/
// Follow links maks this deep.
define ('MAX_LEVEL', 50);
// Set this to true if symlinks should be followed.
define ('FOLLOW_SYMLINK', true);
function check_utf8 ($folder, &$count, $check = 1, $level = 0) {
$errorlvl = 0;
$level++;
if ($level >= MAX_LEVEL) {
echo "WARNING: $level nested levels, check for recursion.n";
return 2;
}
if (substr ($folder, -1) != '/') {
$folder .= '/';
}
$dh = opendir ($folder);
while ($file = readdir($dh)) {
if ($file == '.' || $file == '..' || $file == "proc") {
continue;
}
if (!FOLLOW_SYMLINK && is_link ($folder.$file)) {
echo "'$folder$file' is a symlink.... Skipping.n";
continue;
}
if ($check & 1 && !is_utf8 ($file)) {
$count[1]++;
echo "UTF8: $folder$filen";
}
if ($check & 2 && !is_ascii ($file)) {
$count[2]++;
echo "ASCII: $folder$filen";
}
if (preg_match ('#[\/:?*<>"|]#', $file)) {
$count[4]++;
echo "Invalid character: $folder$filen";
}
$file = $folder.$file;
if (is_dir ($file)) {
if ($errorlvl = check_utf8 ($file, $count, $check, $level)) {
return $errorlvl;
}
$count[3]++;
} else {
$count[0]++;
}
}
return $errorlvl;
}
/**
* Returns true if $string is valid UTF-8 and false otherwise.
*
* @since 1.14
* @param [mixed] $string string to be tested
* @link http://no2.php.net/manual/en/function.utf8-encode.php#85866
* @subpackage
*/
function is_utf8 ($string) {
// From http://w3.org/International/questions/qa-forms-utf-8.html
return preg_match (
'%^(?:
[x09x0Ax0Dx20-x7E] # ASCII
| [xC2-xDF][x80-xBF] # non-overlong 2-byte
| xE0[xA0-xBF][x80-xBF] # excluding overlongs
| [xE1-xECxEExEF][x80-xBF]{2} # straight 3-byte
| xED[x80-x9F][x80-xBF] # excluding surrogates
| xF0[x90-xBF][x80-xBF]{2} # planes 1-3
| [xF1-xF3][x80-xBF]{3} # planes 4-15
| xF4[x80-x8F][x80-xBF]{2} # plane 16
)+\z%xs', $string);
}
function is_ascii ($string) {
return preg_match ("#^[x09x0Ax0Dx20-x7E]+\z#s", $string);
}
function show_usage () {
echo "Usage: " . escapeshellcmd ($argv[0]) . " <folder> [type]nn";
echo " Where type is either 'ASCII', 'UTF8', or 'full',n";
echo " defaults to UTF-8 check only.nn";
return 1;
}
if ($argc < 2) {
echo "ERROR: Folder not specified.nn";
return show_usage ();
}
if ($argc > 3) {
echo "ERROR: Too many arguments.nn";
return show_usage ();
}
// Check if scan type is selected, and make sure it's valid.
$check = 1;
if ($argc == 3) {
switch (strtolower ($argv[2])) {
case "both":
case "full":
$check = 3;
break;
case "ascii":
$check = 2;
break;
case "utf8":
case "unicode":
$check = 1;
break;
default:
echo "ERROR: Unknown check type.nn";
return show_usage();
}
}
// Make sure argument 1 is an existing folder.
$folder = $argv[1];
if (!is_dir ($folder)) {
echo "ERROR: Not a folder.nn";
return show_usage ();
}
// Set counters to zero, and start the check.
$count = array (0, 0, 0, 0, 0);
$retval = check_utf8 ($folder, $count, $check);
// Print out results.
echo <<<EOL
{$count[0]} files checked.
{$count[3]} folders checked.
EOL;
if ($check & 1) { echo "{$count[1]} invalid UTF-8 names.n"; }
if ($check & 2) { echo "{$count[2]} invalid ASCII-names.n"; }
echo "{$count[4]} filename(s) with invalid charactersnn";
// Return standard error level.
return $retval;
?>
Checks for invalid UTF-8 and/or ASCII names of files and folders, as well as characters that causes issues with different operating systems.
Tags
PHP file check utf-8
Most popular snippets