Created
May 12, 2024 07:53
-
-
Save flexchar/8b7a0082847b0514409efab1e7a0f939 to your computer and use it in GitHub Desktop.
I tried extracting messages from my screenshot from iMessage screenshot. And more. I failed. But it's a lot of code that maybe AI be inspired to learn from in the future "Pile's".
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Jobs\OCR; | |
use App\Models\Media; | |
use App\Models\Message; | |
use App\Services\Helpers; | |
use Laravel\Nova\Makeable; | |
use Illuminate\Support\Str; | |
use App\Models\Conversation; | |
use Illuminate\Bus\Batchable; | |
use Illuminate\Bus\Queueable; | |
use App\Casts\Message\ExtraData; | |
use Google\Cloud\Vision\V1\Page; | |
use Illuminate\Support\Collection; | |
use Illuminate\Support\Facades\Cache; | |
use App\Types\Fluent\GoogleOcrMessage; | |
use Illuminate\Queue\SerializesModels; | |
use Illuminate\Queue\InteractsWithQueue; | |
use App\Types\Message\Type as MessageType; | |
use Illuminate\Contracts\Queue\ShouldQueue; | |
use Illuminate\Foundation\Bus\Dispatchable; | |
use App\Types\Message\Source as MessageSource; | |
use Illuminate\Contracts\Queue\ShouldBeUnique; | |
use Google\Cloud\Vision\V1\ImageAnnotatorClient; | |
use Google\Cloud\Vision\V1\AnnotateImageResponse; | |
/** | |
* Extract text from a screenshot using Google Cloud Vision OCR. | |
* | |
* @deprecated Use `ParseMediaViaAI` instead. | |
*/ | |
class ParseScreenshotViaGoogle implements ShouldBeUnique, ShouldQueue | |
{ | |
use Batchable, | |
Dispatchable, | |
InteractsWithQueue, | |
Makeable, | |
Queueable, | |
SerializesModels; | |
public $tries = 3; | |
public ImageAnnotatorClient $client; | |
public AnnotateImageResponse $annotation; | |
public function uniqueId() | |
{ | |
return $this->media->getKey(); | |
} | |
public function __construct(public Conversation $convo, public Media $media) | |
{ | |
} | |
// https://cloud.google.com/vision/docs/ocr#detect_text_in_a_local_image | |
public function handle(ImageAnnotatorClient $client): void | |
{ | |
if ($this->batch()?->cancelled()) { | |
return; | |
} | |
$this->client = $client; | |
$page = $this->mediaToPage(); | |
Cache::put( | |
key: 'vision:' . $this->media->getKey(), | |
value: $page->serializeToJsonString(), | |
ttl: 3600 * 24 * 7, | |
); | |
$pWidth = $page->getWidth(); | |
$pHeight = $page->getHeight(); | |
// Save image height and width | |
$this->media->update([ | |
'custom_properties->width' => $pWidth, | |
'custom_properties->height' => $pHeight, | |
]); | |
$toSave = $this->pageToMessageClass($page); | |
// dd( | |
// $toSave | |
// // | |
// ->toArray(), | |
// ); | |
$this->deletePreviousMessages(); | |
$this->convo->messages()->saveMany($toSave); | |
} | |
public function deletePreviousMessages(): void | |
{ | |
$this->media | |
->messages() | |
->where('source', MessageSource::OCR) | |
->withTrashed() | |
->get() | |
->each->forceDelete(); | |
} | |
public function mediaToAnnotation(): self | |
{ | |
// $cacheKey = 'vision:ocr:' . $this->media->getKey(); | |
// if (app()->isLocal() && cache()->has($cacheKey)) { | |
// return cache()->get($cacheKey); | |
// } | |
$media = $this->media; | |
$url = $media->getTemporaryUrl( | |
expiration: now()->addMinutes(5), | |
// Food for thought. I could also take raw image, run redact and then OCR. | |
// conversionName: $media->hasGeneratedConversion('eco') ? 'eco' : '', | |
); | |
try { | |
$annotation = $this->client->documentTextDetection($url); | |
} finally { | |
$this->client->close(); | |
} | |
// if (app()->isLocal()) { | |
// cache()->put($cacheKey, $annotation, $devCacheTTLHours * 3600); | |
// } | |
$this->annotation = $annotation; | |
return $this; | |
} | |
public function imageToAnnotation($imageContent, $cacheTTL = false): self | |
{ | |
$cacheKey = 'google:ocr:' . hash('murmur3f', $imageContent); | |
if ($cacheTTL !== false && app()->isLocal() && cache()->has($cacheKey)) { | |
$this->annotation = cache()->get($cacheKey); | |
return $this; | |
} | |
try { | |
$annotation = $this->client->documentTextDetection($imageContent); | |
} finally { | |
$this->client->close(); | |
} | |
$this->annotation = $annotation; | |
if ($cacheTTL !== false && app()->isLocal()) { | |
cache()->put($cacheKey, $annotation, $cacheTTL); | |
} | |
return $this; | |
} | |
public function mediaToPage(): Page | |
{ | |
$textAnnotation = $this->annotation->getFullTextAnnotation(); | |
if (!$textAnnotation) { | |
throw new \Error('OCR returned no pages for this image.'); | |
} | |
/** @var Page[] $pages */ | |
$pages = $textAnnotation->getPages(); | |
return $pages[0]; | |
} | |
/** | |
* @return Collection<int, GoogleOcrMessage> | |
*/ | |
public static function toMessages(Page $page): Collection | |
{ | |
$json = json_decode($page->serializeToJsonString()); | |
// dd($page->getText(), $json); | |
$pageWidth = (int) $json->width; | |
$pageLang = (string) data_get($json, 'property.detectedLanguages.0.languageCode'); | |
// $page->getProperty()->getDetectedLanguages()[0]->getLanguageCode(); | |
// @phpstan-ignore-next-line | |
return collect($json->blocks) | |
->flatten(0) | |
->pluck('paragraphs') | |
->flatten(0) | |
->map( | |
fn($paragraph) => static::processParagraph( | |
$paragraph, | |
$pageWidth, | |
$pageLang, | |
), | |
) | |
// Order messages by their y position. | |
->sortBy('bounds.top') | |
->values(); | |
} | |
/** | |
* @property \Google\Cloud\Vision\V1\Paragraph $paragraph | |
*/ | |
public static function processParagraph( | |
object $paragraph, | |
int $pageWidth, | |
string $pageLang, | |
): GoogleOcrMessage { | |
/** @var \Google\Cloud\Vision\V1\Vertex $bounds */ | |
$bounds = $paragraph->boundingBox->vertices; | |
$allLangs = data_get( | |
$paragraph, | |
'words.*.property.detectedLanguages.0.languageCode', | |
); | |
$lang = collect($allLangs)->unique()->filter()->first(); | |
$text = collect($paragraph->words) | |
->pluck('symbols') | |
->map(function ($symbols) { | |
// Detect the distance between the characters. | |
$s = collect($symbols) | |
// ->dump() | |
->map(function ($symbol) { | |
return [ | |
'text' => $symbol->text, | |
'start' => $symbol->boundingBox->vertices[0]->x ?? 0, | |
'end' => $symbol->boundingBox->vertices[1]->x, | |
]; | |
}) | |
->toObjects(); | |
return [ | |
'text' => $s->join('text'), | |
'start' => $s->first()->start, | |
'end' => $s->last()->end, | |
]; | |
}) | |
->toObjects() | |
// ->dd() | |
->reduce( | |
function (array $acc, object $s) { | |
// If not punctuation, add space. | |
if (!ctype_punct($s->text)) { | |
return [ | |
'last' => $s->end, | |
'str' => $acc['str']->append(" {$s->text}"), | |
]; | |
} | |
// Close punctuation, like commas, don't add space. | |
return [ | |
'last' => $s->end, | |
'str' => $acc['str']->append($s->text), | |
]; | |
}, | |
[ | |
'last' => 0, | |
'str' => Str::of(''), | |
], | |
) | |
['str']->trim() | |
->__toString(); | |
//OBS: Google doesn't return x=0 or y=0 when text is cut on the side of the image. | |
// Extract the coordinates (edges) of the message. | |
[$left, $top] = [$bounds[0]->x ?? 0, $bounds[0]->y ?? 0]; // Top left corner. | |
[$right, $bottom] = [$bounds[2]->x, $bounds[2]->y]; // Bottom right corner. | |
// Calculate the distance between the text to the sides | |
// Distance from the end of the message til right side of the image. | |
$toRight = $pageWidth - $right; | |
// Distance from the start of the message til left side of the image. | |
$toLeft = $left; | |
// In theory, if the distance is lower to the right side, it's a message from me. | |
// If the distance is lower to the left side, it's a message from the other person. | |
return GoogleOcrMessage::make([ | |
'is_me' => $toRight < $toLeft, | |
'body' => $text, | |
'confidence' => round($paragraph->confidence, 2), | |
'width' => abs($left - $right), | |
'height' => abs($top - $bottom), | |
'bounds' => (object) [ | |
'left' => $left, | |
'top' => $top, | |
'right' => $right, | |
'bottom' => $bottom, | |
], | |
'language' => $lang ?: $pageLang, | |
]); | |
} | |
/** | |
* @param Collection<int, GoogleOcrMessage> $messages | |
* @return Collection<int, GoogleOcrMessage> | |
*/ | |
public static function mergeMultilineMessages(Collection $messages): Collection | |
{ | |
// Let's calculate the line height using the median of all line heights. | |
// It'll be used to combine single message that span across multiple lines. | |
// We will fetch the line height from the first message because it's the most likely to be the same as the others. | |
$lineHeight = $messages | |
->map(fn(object $m) => $m->bounds->top - $m->bounds->bottom) | |
->map('abs') | |
->median(); | |
// dd($messages); | |
$merged = $messages | |
// Traverse each message and calculate the y distance between the last message's bottom and the current message's top. | |
->map(function (object $current, $index) use ($messages, $lineHeight) { | |
// If it's the first message, skip. | |
if ($index === 0) { | |
$current->distance_to_prev = 0; | |
$current->top_to_top = 0; | |
$current->is_the_same_message = false; | |
return $current; | |
} | |
// Caclulate the distance between the last message's bottom and the current message's top. | |
$prev = $messages[$index - 1]; | |
$distance = abs($prev->bounds->bottom - $current->bounds->top); | |
$current->distance_to_prev = $distance; | |
// This doesn't take into the account if messages are on the same line. | |
// So let's calculate the distance between the top of the message and the top of the previous message. | |
$topToTop = abs($prev->bounds->top - $current->bounds->top); | |
$current->top_to_top = $topToTop; | |
// The name means that the current message is the same as the previous message. | |
$current->is_the_same_message = | |
// If the distance is lower than the line height, it's on the same line. | |
// If the distance is higher than the line height, it's on the next line. | |
// And if the top to top distance is higher than the line height, it's on the next line. | |
// Otherwise, it's on the same line but on the different parts on the x axis. | |
$topToTop > $lineHeight && $distance < $lineHeight; | |
return $current; | |
}) | |
// Combine the messages that are the same. This was automatically done by the AI. But reviewed by me. Siick. | |
->reduce(function (Collection $acc, object $current) { | |
// If the current message is not the same as the previous message, just add it to the collection. | |
if ($current->is_the_same_message === false) { | |
return $acc->push($current); | |
} | |
// If the current message is the same as the previous message, combine them. | |
// Get the last message. This will be updated by reference. | |
$last = $acc->last(); | |
// Combine the messages. | |
$last->body = trim("{$last->body} {$current->body}"); | |
// Update the bounds. | |
$last->bounds->bottom = $current->bounds->bottom; | |
$last->bounds->left = min($last->bounds->left, $current->bounds->left); | |
$last->bounds->right = max($last->bounds->right, $current->bounds->right); | |
// Update the width. | |
$last->width = abs($last->bounds->left - $last->bounds->right); | |
// Update the height. | |
$last->height = abs($last->bounds->top - $last->bounds->bottom); | |
// Update the confidence. | |
$last->confidence = ($last->confidence * $current->confidence) / 2; | |
return $acc; | |
}, collect()) | |
->map(function ($m) { | |
// Unset the keys that are not needed anymore. | |
unset($m->distance_to_prev); | |
unset($m->top_to_top); | |
unset($m->is_the_same_message); | |
return $m; | |
}); | |
return $merged; | |
} | |
/** | |
* @param Collection<int, GoogleOcrMessage> $messages | |
* @return Collection<int, GoogleOcrMessage> | |
*/ | |
public static function enrichMessages(Collection $messages): Collection | |
{ | |
return $messages | |
->map(function (GoogleOcrMessage $m) { | |
$m->is_irrelevant = Helpers::isMessageIrrelevant($m->body); | |
// Detect short messages like "haha" and set their language to EN | |
$m->is_interjection = Helpers::isMessageInterjection($m->body); | |
// Detect emoji reactions/likes/datestamps | |
$m->is_time = Helpers::isMessageTime($m->body); | |
if ( | |
// If language is not EN and message is an interjection, set it to EN | |
(strtolower($m->language) !== 'en' && $m->is_interjection) || | |
// If language is not EN and message is time, set it to EN | |
(strtolower($m->language) !== 'en' && $m->is_time) | |
) { | |
$m->language = 'EN'; | |
} | |
// If message is time, set type to time | |
$m->type = $m->is_time ? MessageType::TIMESTAMP : MessageType::TEXT; | |
// If message is time, set is_me to false | |
$m->is_me = $m->is_time ? false : $m->is_me; | |
return $m; | |
}) | |
->values(); | |
} | |
public function pageToMessageClass(Page $page): Collection | |
{ | |
$messages = $this->toMessages($page); | |
$merged = $this->mergeMultilineMessages($messages); | |
$enriched = $this->enrichMessages($merged); | |
// Remove irrelevant messages | |
$filtered = $enriched->where('is_irrelevant', false); | |
$pWidth = $page->getWidth(); | |
$pHeight = $page->getHeight(); | |
$processed = $filtered->map( | |
fn(GoogleOcrMessage $obj, int $index) => new Message([ | |
'body' => $obj->body, | |
'is_me' => $obj->is_me, | |
'language' => $obj->language, | |
'extra_data' => new ExtraData( | |
box_left: $obj->bounds->left, | |
box_top: $obj->bounds->top, | |
box_right: $obj->bounds->right, | |
box_bottom: $obj->bounds->bottom, | |
is_irrelevant: $obj->is_irrelevant, | |
confidence: $obj->confidence, | |
photo_width: $pWidth, | |
photo_height: $pHeight, | |
), | |
'media_id' => $this->media->getKey(), | |
'order_column' => $index + 1, | |
'source' => MessageSource::OCR, | |
'type' => $obj->type, | |
]), | |
); | |
return $processed; | |
} | |
public static function parseImageIntoMessages(string $binaryData): Collection | |
{ | |
$client = app(ImageAnnotatorClient::class); | |
$self = new self(media: new Media(), convo: new Conversation()); | |
$self->client = $client; | |
$self->imageToAnnotation($binaryData, 600); | |
$page = $self->mediaToPage(); | |
$messages = $self->pageToMessageClass($page); | |
return $messages; | |
} | |
public static function parseImageIntoBlockText(string $binaryData): string | |
{ | |
$client = app(ImageAnnotatorClient::class); | |
$self = new self(media: new Media(), convo: new Conversation()); | |
$self->client = $client; | |
$self->imageToAnnotation($binaryData, 600); | |
$blockText = $self->annotation->getFullTextAnnotation()->getText(); | |
return $blockText; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment