set_time_limit(600);
require 'Guzzle/vendor/autoload.php';
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Exception\RequestException;
$method = $_SERVER['REQUEST_METHOD'];
if ($method == "OPTIONS") {
die();
}
$doc = null;
if (isset($_GET) || isset($_POST)) {
if (isset($_GET)) {
foreach ($_GET as $key => $val) {
$doc = $val;
}
}
if (isset($_POST)) {
foreach ($_POST as $key => $val) {
$doc = $val;
}
}
}
function VerificaDoc($doc)
{
$doc = preg_replace('/\s+/', '', $doc);
if (empty($doc)) {
echo json_encode(['res' => 'error', 'data' => 'Debe ingresar la cedula o rif']);
exit();
}
$doc = strtoupper($doc);
$firstChar = substr($doc, 0, 1);
if (preg_match('/^[JGPVE]/', $doc)) {
if (strpbrk($firstChar, 'VE') !== false) {
$pattern = '/^[VE][0-9]{7,8}$/';
$errorMessage = 'La cedula debe contener 7 u 8 digitos numericos';
} else {
$pattern = '/^[JGP][0-9]{9}$/';
$errorMessage = 'El rif debe contener 9 digitos numericos';
}
if (!preg_match($pattern, $doc)) {
echo json_encode(['res' => 'error', 'data' => $errorMessage]);
exit();
}
} else {
echo json_encode(['res' => 'error', 'data' => 'Formato no valido, la primera letra debe ser [J,G,P,V,E]']);
exit();
}
}
function CalculaDigitoCedula($c)
{
$l = strtoupper(substr($c, 0, 1));
$n = strlen(substr($c, 1)) == 8 ? substr($c, 1) : '0' . substr($c, 1);
$val = $l . $n . '1';
$digitos = str_split($val);
$digitos[8] *= 2;
$digitos[7] *= 3;
$digitos[6] *= 4;
$digitos[5] *= 5;
$digitos[4] *= 6;
$digitos[3] *= 7;
$digitos[2] *= 2;
$digitos[1] *= 3;
$digitoEspecial = ($digitos[0] == 'V') ? 1 : (($digitos[0] == 'E') ? 2 : null);
$suma = (array_sum($digitos) - $digitos[9]) + ($digitoEspecial * 4);
$residuo = $suma % 11;
$resta = 11 - $residuo;
$digitoVerificador = ($resta >= 10) ? 0 : $resta;
return $l . $n . $digitoVerificador;
}
function LeeOCRSpace($image_path)
{
$image_data = base64_encode(file_get_contents($image_path));
$client = new Client();
try {
$response = $client->post('https://api.ocr.space/parse/image', [
'headers' => ['apikey' => 'K83523935688957'],
'form_params' => [
'base64Image' => 'data:image/jpeg;base64,' . $image_data,
'language' => 'eng',
'isOverlayRequired' => 'false',
'OCREngine' => '3',
],
]);
if ($response->getStatusCode() === 200) {
$data = json_decode($response->getBody(), true);
$text = preg_replace('/[^a-zA-Z0-9]/', '', $data['ParsedResults'][0]['TextOverlay']['Lines'][0]['LineText']);
$text = substr_replace($text, str_replace(['l', 'L', 'I'], '1', substr($text, 0, 1)), 0, 1);
return $text;
} else {
echo json_encode(['res' => 'error', 'data' => 'No se ejecutar el OCR: ' . $response->getStatusCode()]);
exit();
}
} catch (RequestException $e) {
echo json_encode(['res' => 'error', 'data' => 'Error: ' . $e->getMessage()]);
exit();
}
}
function SENIAT($doc)
{
## DESCARGAMOS LA IMAGEN Y CREAMOS LA COOKIE ##
$client = new Client(['verify' => false]);
$cookieJar = new CookieJar();
try {
$response = $client->request('GET', 'http://contribuyente.seniat.gob.ve/BuscaRif/Captcha.jpg', [
'cookies' => $cookieJar
]);
if ($response->getStatusCode() === 200) {
$imageData = $response->getBody()->getContents();
file_put_contents('captcha.jpg', $imageData);
$cookieData = '';
foreach ($cookieJar as $cookie) {
$cookieData .= $cookie->getName() . '=' . $cookie->getValue() . '; ';
}
## MEJORAMOS LA IMAGEN PARA PODER LEERLA CON OCRAPI ##
$per = 1.3;
$thr = 0.5;
$ancho = 300;
$alto = 90;
$blurIzq = 3;
$blurDer = 2;
$image = new Imagick(dirname(__FILE__) . '/captcha.jpg');
$width = $image->getImageWidth();
$height = $image->getImageHeight();
$tam = 4;
$newWidth = $width * $tam;
$newHeight = $height * $tam;
$image->resizeImage($newWidth, $newHeight, Imagick::FILTER_LANCZOS, 1);
$image->resizeImage($newWidth, $newHeight, Imagick::FILTER_QUADRATIC, 1);
$threshold = 0.5 * $image->getQuantumRange()['quantumRangeLong'];
$image->thresholdImage($threshold);
$image->gaussianBlurImage(5, 5);
$dimensions = $image->getImageGeometry();
$width = $dimensions['width'];
$height = $dimensions['height'];
$canvas = new Imagick();
$canvas->newImage($width * $per, $height * $per, 'none');
$x = ($width * $per - $width) / $per;
$y = ($height * $per - $height) / $per;
$canvas->compositeImage($image, Imagick::COMPOSITE_OVER, $x, $y);
$canvas->blurImage($blurIzq, $blurDer);
$threshold = $thr * $canvas->getQuantumRange()['quantumRangeLong'];
$canvas->thresholdImage($threshold);
$canvas->scaleImage($ancho, $alto);
$canvas->writeImage(dirname(__FILE__) . '/lee.jpg');
$image->clear();
$image->destroy();
$canvas->clear();
$canvas->destroy();
$codigo = LeeOCRSpace('lee.jpg');
try {
$response = $client->post('http://contribuyente.seniat.gob.ve/BuscaRif/BuscaRif.jsp', [
'cookies' => $cookieJar,
'form_params' => [
'codigo' => $codigo,
'p_rif' => $doc
],
'headers' => [
'Cookie' => $cookieData
]
]);
if ($response->getStatusCode() === 200) {
$responseBody = $response->getBody()->getContents();
$responseBody = mb_convert_encoding($responseBody, 'UTF-8', 'auto');
$dom = new DOMDocument();
@$dom->loadHTML($responseBody);
return $dom;
} else {
echo json_encode(['res' => 'error', 'data' => 'No se pudieron recuperar los datos. Codigo: ' . $response->getStatusCode()]);
exit();
}
} catch (RequestException $e) {
echo json_encode(['res' => 'error', 'data' => 'Error: ' . $e->getMessage()]);
exit();
}
} else {
echo json_encode(['res' => 'error', 'data' => 'No se pudo descargar la imagen captcha. Codigo: ' . $response->getStatusCode()]);
exit();
}
} catch (RequestException $e) {
echo json_encode(['res' => 'error', 'data' => 'Error: ' . $e->getMessage()]);
exit();
}
}
if (empty(VerificaDoc($doc))) {
$doc = strtoupper($doc);
if (!preg_match('/^[JGP]/i', $doc)) $doc = CalculaDigitoCedula($doc);
for ($i = 0;; $i++) {
$dom = SENIAT($doc);
$xpath = new DOMXPath($dom);
$secondTable = $xpath->query('(//table)[2]')->item(0);
if (trim($secondTable->textContent) != 'EL c?digo no coincide con la imagen.') break;
}
$head = $xpath->query('(//table)[2]')->item(0)->textContent;
$head = htmlentities($head, ENT_SUBSTITUTE, 'utf-8');
$head = trim(str_replace(array(' ', '_'), ' ', $head));
if (strpos($head, 'REGISTRO VENCIDO') !== false) {
$registro = 'VENCIDO';
$head = str_replace('REGISTRO VENCIDO', '', $head);
$head = trim(htmlentities($head, ENT_SUBSTITUTE, 'utf-8'));
} else {
$registro = 'ACTIVO';
}
$rif = strstr($head, ' ', true);
$nombre = strstr($head, ' ');
$nombre = substr($head, strpos($head, ' ') + 1);
$nombre = preg_replace('/\([^)]+\)/', '', $nombre);
preg_match('/\((.*?)\)/', $head, $matches);
$siglas = count($matches) > 0 ? preg_replace('/\s{2,}/', '', $matches[0]) : '';
$data = trim($xpath->query('(//table)[3]')->item(0)->textContent);
$a = explode(':', $data);
$registro = ($registro != 'VENCIDO') ? ($nombre != 'No existe el contribuyente solicitado' ? 'ACTIVO' : 'NO APLICA') : $registro;
if (file_exists(dirname(__FILE__) . '/captcha.jpg')) unlink(dirname(__FILE__) . '/captcha.jpg');
if (file_exists(dirname(__FILE__) . '/lee.jpg')) unlink(dirname(__FILE__) . '/lee.jpg');
$out = array(
'registro' => $registro,
'rif' => $rif,
'nombre' => preg_replace('/[^A-Za-z0-9áéíóúü\s.]/', '', $nombre),
'siglas' => $siglas,
'actividad' => count($a) > 1 ? trim(str_replace('Condici?n', '', $a[1])) : '',
'condicion' => count($a) > 1 ? trim(str_replace('?', 'o', $a[2])) : '',
);
echo json_encode($out, true);
}