mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
218 lines
7.7 KiB
Protocol Buffer
218 lines
7.7 KiB
Protocol Buffer
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// This schema defines how to configure TFLite for delegation. These
|
|
// definitions can be used in multiple ways: as output of a compatibility list,
|
|
// in benchmarking tools and to decouple delegate instantiation from code.
|
|
//
|
|
// The schema is work-in-progress, covering the most broadly used delegates and
|
|
// options.
|
|
|
|
syntax = "proto2";
|
|
|
|
package tflite.proto;
|
|
|
|
// ExecutionPreference is used to match accelerators against the preferences of
|
|
// the current application or usecase. Some of the values here can appear both
|
|
// in the compatibility list and as input, some only as input.
|
|
//
|
|
// These are separate from NNAPIExecutionPreference - the compatibility list
|
|
// design doesn't assume a one-to-one mapping between which usecases
|
|
// compatibility list entries have been developed for and what settings are used
|
|
// for NNAPI.
|
|
enum ExecutionPreference {
|
|
// Match any selected preference. Allowlist (semantically - value is same as
|
|
// on input).
|
|
ANY = 0;
|
|
// Match low latency preference. Both compatibility list and input.
|
|
LOW_LATENCY = 1;
|
|
// Math low power preference. Both compatibility list and input.
|
|
LOW_POWER = 2;
|
|
// Never accelerate. Can be used for input to compatibility list or for
|
|
// standalone Acceleration configuration.
|
|
FORCE_CPU = 3;
|
|
}
|
|
|
|
// TFLite delegate to use.
|
|
enum Delegate {
|
|
NONE = 0;
|
|
NNAPI = 1;
|
|
GPU = 2;
|
|
HEXAGON = 3;
|
|
XNNPACK = 4;
|
|
// TODO(b/157893534): Support exposing edgetpu tflite delegate creation
|
|
// options.
|
|
EDGETPU = 5;
|
|
}
|
|
|
|
enum NNAPIExecutionPreference {
|
|
// Undefined.
|
|
UNDEFINED = 0;
|
|
// Prefer executing in a way that minimizes battery drain.
|
|
NNAPI_LOW_POWER = 1;
|
|
// Prefer returning a single answer as fast as possible, even if this causes
|
|
// more power consumption.
|
|
NNAPI_FAST_SINGLE_ANSWER = 2;
|
|
// Prefer maximizing the throughput of successive frames, for example when
|
|
// processing successive frames coming from the camera.
|
|
NNAPI_SUSTAINED_SPEED = 3;
|
|
}
|
|
|
|
enum NNAPIExecutionPriority {
|
|
NNAPI_PRIORITY_UNDEFINED = 0;
|
|
NNAPI_PRIORITY_LOW = 1;
|
|
NNAPI_PRIORITY_MEDIUM = 2;
|
|
NNAPI_PRIORITY_HIGH = 3;
|
|
}
|
|
|
|
// One possible acceleration configuration.
|
|
message ComputeSettings {
|
|
// Which preference to use this accelerator for.
|
|
optional ExecutionPreference preference = 1;
|
|
// How to configure TFLite
|
|
optional TFLiteSettings tflite_settings = 2;
|
|
// Identifiers to use for instrumentation and telemetry.
|
|
optional string model_namespace_for_statistics = 3;
|
|
optional string model_identifier_for_statistics = 4;
|
|
}
|
|
|
|
// NNAPI delegate settings.
|
|
message NNAPISettings {
|
|
// Which instance (NNAPI accelerator) to use. One driver may provide several
|
|
// accelerators (though a driver may also hide several back-ends behind one
|
|
// name, at the choice of the driver vendor).
|
|
// Note that driver introspection is only available in Android Q and later.
|
|
optional string accelerator_name = 1;
|
|
|
|
// NNAPI model compilation caching settings to be passed to
|
|
// tflite::StatefulNnApiDelegate
|
|
optional string cache_directory = 2;
|
|
optional string model_token = 3;
|
|
|
|
// NNAPI execution preference to pass. See
|
|
// https://developer.android.com/ndk/reference/group/neural-networks.html
|
|
optional NNAPIExecutionPreference execution_preference = 4;
|
|
|
|
// Number of instances to cache for the same model (for input size
|
|
// changes). This is mandatory for getting reasonable performance in that
|
|
// case.
|
|
optional int32 no_of_nnapi_instances_to_cache = 5;
|
|
|
|
// Whether to automatically fall back to TFLite CPU path.
|
|
optional FallbackSettings fallback_settings = 6;
|
|
|
|
// Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
|
|
// 10+ when an accelerator name is not specified. The NNAPI CPU typically
|
|
// performs less well than the TfLite built-in kernels; but allowing allows a
|
|
// model to be partially accelerated which may be a win.
|
|
optional bool allow_nnapi_cpu_on_android_10_plus = 7;
|
|
|
|
optional NNAPIExecutionPriority execution_priority = 8;
|
|
}
|
|
|
|
// Which GPU backend to select. Default behaviour on Android is to try OpenCL
|
|
// and if it's not available fall back to OpenGL.
|
|
enum GPUBackend {
|
|
UNSET = 0;
|
|
OPENCL = 1;
|
|
OPENGL = 2;
|
|
// Not yet supported.
|
|
// VULKAN = 3;
|
|
// METAL = 4;
|
|
}
|
|
|
|
// GPU Delegate settings.
|
|
//
|
|
// See
|
|
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
|
|
message GPUSettings {
|
|
optional bool is_precision_loss_allowed = 1;
|
|
optional bool enable_quantized_inference = 2 [default = true];
|
|
optional GPUBackend force_backend = 3;
|
|
// TODO(b/152019007): add remaining options.
|
|
}
|
|
|
|
// Hexagon Delegate settings.
|
|
//
|
|
// See
|
|
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
|
|
message HexagonSettings {
|
|
optional int32 debug_level = 1;
|
|
optional int32 powersave_level = 2;
|
|
optional bool print_graph_profile = 3;
|
|
optional bool print_graph_debug = 4;
|
|
}
|
|
|
|
// XNNPack Delegate settings.
|
|
//
|
|
// See
|
|
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
|
|
message XNNPackSettings {
|
|
optional int32 num_threads = 1;
|
|
}
|
|
|
|
message CPUSettings {
|
|
optional int32 num_threads = 1;
|
|
}
|
|
|
|
// How to configure TFLite.
|
|
message TFLiteSettings {
|
|
// Which delegate to use.
|
|
optional Delegate delegate = 1;
|
|
|
|
// How to configure the chosen delegate.
|
|
// (In principle we would like to use 'oneof', but flatc turns that into an
|
|
// nested anonymous table rather than a union. See
|
|
// https://github.com/google/flatbuffers/issues/4628).
|
|
optional NNAPISettings nnapi_settings = 2;
|
|
optional GPUSettings gpu_settings = 3;
|
|
optional HexagonSettings hexagon_settings = 4;
|
|
optional XNNPackSettings xnnpack_settings = 5;
|
|
|
|
// How to configure CPU execution.
|
|
optional CPUSettings cpu_settings = 6;
|
|
|
|
// Shared delegation settings.
|
|
optional int32 max_delegated_partitions = 7;
|
|
}
|
|
|
|
// Whether to automatically fallback to TFLite CPU path on delegation errors.
|
|
//
|
|
// Typically fallback is enabled in production use but disabled in tests and
|
|
// benchmarks to ensure they test the intended path.
|
|
message FallbackSettings {
|
|
// Whether to allow automatically falling back to TfLite CPU path on
|
|
// compilation failure. Default is not allowing automatic fallback.
|
|
//
|
|
// This is useful in naive production usecases where the caller would prefer
|
|
// for the model to run even if it's not accelerated. More advanced users will
|
|
// implement fallback themselves; e.g., by using a different model on CPU.
|
|
//
|
|
// Note that compilation errors may occur either at initial
|
|
// ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
|
|
// resizing.
|
|
optional bool allow_automatic_fallback_on_compilation_error = 7;
|
|
// Whether to allow automatically falling back to TfLite CPU path on
|
|
// execution error. Default is not allowing automatic fallback.
|
|
//
|
|
// Experimental, use with care (only when you have complete control over the
|
|
// client code).
|
|
//
|
|
// The caveat above for compilation error holds. Additionally, execution-time
|
|
// errors are harder to handle automatically as they require invalidating the
|
|
// TfLite interpreter which most client code has not been designed to deal
|
|
// with.
|
|
optional bool allow_automatic_fallback_on_execution_error = 8;
|
|
}
|