# definition for Azure
define("AZ_REGION", "japaneast");
define("VISION_SERVICE_URI", "https://" . AZ_REGION . ".api.cognitive.microsoft.com/vision/v2.0/");
define("OCR_URI", VISION_SERVICE_URI . "ocr");
# definition of VS_API_KEY from Azure Portal or result of az command as follow
# $ az cognitiveservices account keys list -n COG_NAME -g RES_GRP --query key1 -o tsv
define("VS_API_KEY", "API KEY HERE");
# concatenate '.text' in 'words'
function text_concat($words, $item){
$words .= $item->text;
return $words;
}
# post an image for Azure Computer Vision OCR
function get_ocr_result($image_path){
# initiate cURL
$curl_session = curl_init();
curl_setopt_array($curl_session,
array(
CURLOPT_URL => OCR_URI,
CURLOPT_POST => true,
CURLOPT_HTTPHEADER => array(
"Content-Type: application/octet-stream",
"Ocp-Apim-Subscription-Key: " . VS_API_KEY
),
CURLOPT_POSTFIELDS => file_get_contents($image_path),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_BINARYTRANSFER => true,
)
);
$curl_result = curl_exec($curl_session);
curl_close($curl_session);
$json = json_decode($curl_result);
# extract texts(chars) from json
$res_lines = array();
$max_height = 0;
foreach($json->regions as $region){
foreach($region->lines as $ln_num => $line){
$tmp = array_reduce($line->words, "text_concat");
list($d, $d, $d, $height) = explode(",", str_replace('"', "", $line->boundingBox));
# highest font is used for name
if($max_height < (int)$height){
$tmp_len = mb_strlen($tmp);
if(2 <= $tmp_len && $tmp_len <= 8){
if(preg_match("/^[\p{Han}\p{Hiragana}\p{Katakana}]+$/u", $tmp)) {
$max_height = (int)$height;
$maybe_name = $tmp;
}
}
}
if(strcmp($maybe_name, $tmp) != 0){
$res_lines[] = $tmp;
}
}
}
return array($res_lines, $maybe_name);
}
list($lines_in_image, $maybe_name) = get_ocr_result($tmpfile);
var_dump($lines_in_image);
var_dump($maybe_name);
関連