feat: getAssetOcr endpoint (#23331)

* feat: getAssetOcr endpoint

* pr feedback
This commit is contained in:
Alex 2025-10-28 15:57:03 -05:00 committed by GitHub
parent 8d25f81bec
commit 9098717c55
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 293 additions and 0 deletions

BIN
mobile/openapi/README.md generated

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -2491,6 +2491,53 @@
"description": "This endpoint requires the `asset.read` permission."
}
},
"/assets/{id}/ocr": {
"get": {
"operationId": "getAssetOcr",
"parameters": [
{
"name": "id",
"required": true,
"in": "path",
"schema": {
"format": "uuid",
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"items": {
"$ref": "#/components/schemas/AssetOcrResponseDto"
},
"type": "array"
}
}
},
"description": ""
}
},
"security": [
{
"bearer": []
},
{
"cookie": []
},
{
"api_key": []
}
],
"tags": [
"Assets"
],
"x-immich-permission": "asset.read",
"description": "This endpoint requires the `asset.read` permission."
}
},
"/assets/{id}/original": {
"get": {
"operationId": "downloadAsset",
@ -11117,6 +11164,88 @@
],
"type": "object"
},
"AssetOcrResponseDto": {
"properties": {
"assetId": {
"format": "uuid",
"type": "string"
},
"boxScore": {
"description": "Confidence score for text detection box",
"format": "double",
"type": "number"
},
"id": {
"format": "uuid",
"type": "string"
},
"text": {
"description": "Recognized text",
"type": "string"
},
"textScore": {
"description": "Confidence score for text recognition",
"format": "double",
"type": "number"
},
"x1": {
"description": "Normalized x coordinate of box corner 1 (0-1)",
"format": "double",
"type": "number"
},
"x2": {
"description": "Normalized x coordinate of box corner 2 (0-1)",
"format": "double",
"type": "number"
},
"x3": {
"description": "Normalized x coordinate of box corner 3 (0-1)",
"format": "double",
"type": "number"
},
"x4": {
"description": "Normalized x coordinate of box corner 4 (0-1)",
"format": "double",
"type": "number"
},
"y1": {
"description": "Normalized y coordinate of box corner 1 (0-1)",
"format": "double",
"type": "number"
},
"y2": {
"description": "Normalized y coordinate of box corner 2 (0-1)",
"format": "double",
"type": "number"
},
"y3": {
"description": "Normalized y coordinate of box corner 3 (0-1)",
"format": "double",
"type": "number"
},
"y4": {
"description": "Normalized y coordinate of box corner 4 (0-1)",
"format": "double",
"type": "number"
}
},
"required": [
"assetId",
"boxScore",
"id",
"text",
"textScore",
"x1",
"x2",
"x3",
"x4",
"y1",
"y2",
"y3",
"y4"
],
"type": "object"
},
"AssetOrder": {
"enum": [
"asc",

View File

@ -546,6 +546,32 @@ export type AssetMetadataResponseDto = {
export type AssetMetadataUpsertDto = {
items: AssetMetadataUpsertItemDto[];
};
export type AssetOcrResponseDto = {
assetId: string;
/** Confidence score for text detection box */
boxScore: number;
id: string;
/** Recognized text */
text: string;
/** Confidence score for text recognition */
textScore: number;
/** Normalized x coordinate of box corner 1 (0-1) */
x1: number;
/** Normalized x coordinate of box corner 2 (0-1) */
x2: number;
/** Normalized x coordinate of box corner 3 (0-1) */
x3: number;
/** Normalized x coordinate of box corner 4 (0-1) */
x4: number;
/** Normalized y coordinate of box corner 1 (0-1) */
y1: number;
/** Normalized y coordinate of box corner 2 (0-1) */
y2: number;
/** Normalized y coordinate of box corner 3 (0-1) */
y3: number;
/** Normalized y coordinate of box corner 4 (0-1) */
y4: number;
};
export type AssetMediaReplaceDto = {
assetData: Blob;
deviceAssetId: string;
@ -2390,6 +2416,19 @@ export function getAssetMetadataByKey({ id, key }: {
...opts
}));
}
/**
* This endpoint requires the `asset.read` permission.
*/
export function getAssetOcr({ id }: {
id: string;
}, opts?: Oazapfts.RequestOpts) {
return oazapfts.ok(oazapfts.fetchJson<{
status: 200;
data: AssetOcrResponseDto[];
}>(`/assets/${encodeURIComponent(id)}/ocr`, {
...opts
}));
}
/**
* This endpoint requires the `asset.download` permission.
*/

View File

@ -16,6 +16,7 @@ import {
UpdateAssetDto,
} from 'src/dtos/asset.dto';
import { AuthDto } from 'src/dtos/auth.dto';
import { AssetOcrResponseDto } from 'src/dtos/ocr.dto';
import { Permission, RouteKey } from 'src/enum';
import { Auth, Authenticated } from 'src/middleware/auth.guard';
import { AssetService } from 'src/services/asset.service';
@ -95,6 +96,12 @@ export class AssetController {
return this.service.getMetadata(auth, id);
}
@Get(':id/ocr')
@Authenticated({ permission: Permission.AssetRead })
getAssetOcr(@Auth() auth: AuthDto, @Param() { id }: UUIDParamDto): Promise<AssetOcrResponseDto[]> {
return this.service.getOcr(auth, id);
}
@Put(':id/metadata')
@Authenticated({ permission: Permission.AssetUpdate })
updateAssetMetadata(

View File

@ -0,0 +1,42 @@
import { ApiProperty } from '@nestjs/swagger';
export class AssetOcrResponseDto {
@ApiProperty({ type: 'string', format: 'uuid' })
id!: string;
@ApiProperty({ type: 'string', format: 'uuid' })
assetId!: string;
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 1 (0-1)' })
x1!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 1 (0-1)' })
y1!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 2 (0-1)' })
x2!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 2 (0-1)' })
y2!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 3 (0-1)' })
x3!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 3 (0-1)' })
y3!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 4 (0-1)' })
x4!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 4 (0-1)' })
y4!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text detection box' })
boxScore!: number;
@ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text recognition' })
textScore!: number;
@ApiProperty({ type: 'string', description: 'Recognized text' })
text!: string;
}

View File

@ -700,6 +700,42 @@ describe(AssetService.name, () => {
});
});
describe('getOcr', () => {
it('should require asset read permission', async () => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set());
await expect(sut.getOcr(authStub.admin, 'asset-1')).rejects.toBeInstanceOf(BadRequestException);
expect(mocks.ocr.getByAssetId).not.toHaveBeenCalled();
});
it('should return OCR data for an asset', async () => {
const ocr1 = factory.assetOcr({ text: 'Hello World' });
const ocr2 = factory.assetOcr({ text: 'Test Image' });
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1']));
mocks.ocr.getByAssetId.mockResolvedValue([ocr1, ocr2]);
await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([ocr1, ocr2]);
expect(mocks.access.asset.checkOwnerAccess).toHaveBeenCalledWith(
authStub.admin.user.id,
new Set(['asset-1']),
undefined,
);
expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1');
});
it('should return empty array when no OCR data exists', async () => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1']));
mocks.ocr.getByAssetId.mockResolvedValue([]);
await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([]);
expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1');
});
});
describe('run', () => {
it('should run the refresh faces job', async () => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1']));

View File

@ -16,6 +16,7 @@ import {
mapStats,
} from 'src/dtos/asset.dto';
import { AuthDto } from 'src/dtos/auth.dto';
import { AssetOcrResponseDto } from 'src/dtos/ocr.dto';
import { AssetMetadataKey, AssetStatus, AssetVisibility, JobName, JobStatus, Permission, QueueName } from 'src/enum';
import { BaseService } from 'src/services/base.service';
import { ISidecarWriteJob, JobItem, JobOf } from 'src/types';
@ -289,6 +290,11 @@ export class AssetService extends BaseService {
return this.assetRepository.getMetadata(id);
}
async getOcr(auth: AuthDto, id: string): Promise<AssetOcrResponseDto[]> {
await this.requireAccess({ auth, permission: Permission.AssetRead, ids: [id] });
return this.ocrRepository.getByAssetId(id);
}
async upsertMetadata(auth: AuthDto, id: string, dto: AssetMetadataUpsertDto): Promise<AssetMetadataResponseDto[]> {
await this.requireAccess({ auth, permission: Permission.AssetUpdate, ids: [id] });
return this.assetRepository.upsertMetadata(id, dto.items);

View File

@ -309,10 +309,44 @@ const assetSidecarWriteFactory = (asset: Partial<SidecarWriteAsset> = {}) => ({
...asset,
});
const assetOcrFactory = (
ocr: {
id?: string;
assetId?: string;
x1?: number;
y1?: number;
x2?: number;
y2?: number;
x3?: number;
y3?: number;
x4?: number;
y4?: number;
boxScore?: number;
textScore?: number;
text?: string;
} = {},
) => ({
id: newUuid(),
assetId: newUuid(),
x1: 0.1,
y1: 0.2,
x2: 0.3,
y2: 0.2,
x3: 0.3,
y3: 0.4,
x4: 0.1,
y4: 0.4,
boxScore: 0.95,
textScore: 0.92,
text: 'Sample Text',
...ocr,
});
export const factory = {
activity: activityFactory,
apiKey: apiKeyFactory,
asset: assetFactory,
assetOcr: assetOcrFactory,
auth: authFactory,
authApiKey: authApiKeyFactory,
authUser: authUserFactory,