Merge changes from github.

Change: 137532946
This commit is contained in:
Xiaoqiang Zheng 2016-10-28 10:29:28 -08:00 committed by TensorFlower Gardener
parent f80ef2d696
commit e2d51a87f0
97 changed files with 1731 additions and 405 deletions

View File

@ -33,10 +33,10 @@ and discussion.**
People who are a little more adventurous can also try our nightly binaries: People who are a little more adventurous can also try our nightly binaries:
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/)) * Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/)) * Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/)) * Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/))
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/)) * Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
* [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/)) * [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
#### *Try your first TensorFlow program* #### *Try your first TensorFlow program*

View File

@ -15,6 +15,7 @@ cmake_policy(SET CMP0022 NEW)
# Options # Options
option(tensorflow_VERBOSE "Enable for verbose output" OFF) option(tensorflow_VERBOSE "Enable for verbose output" OFF)
option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF) option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON) option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON) option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON)
@ -48,8 +49,13 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_definitions(-DEIGEN_AVOID_STL_ARRAY) add_definitions(-DEIGEN_AVOID_STL_ARRAY)
if(WIN32) if(WIN32)
add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC -D__VERSION__=\"MSVC\") add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC -D__VERSION__=\"MSVC\")
add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS)
add_definitions(-DTENSORFLOW_USE_EIGEN_THREADPOOL -DEIGEN_HAS_C99_MATH -D_ITERATOR_DEBUG_LEVEL=0)
add_definitions(/bigobj /nologo /EHsc /GF /FC /MP /Gm-)
# Suppress warnings to reduce build log size. # Suppress warnings to reduce build log size.
add_definitions(/wd4267 /wd4244 /wd4800 /wd4503 /wd4554 /wd4996 /wd4348 /wd4018) add_definitions(/wd4267 /wd4244 /wd4800 /wd4503 /wd4554 /wd4996 /wd4348 /wd4018)
add_definitions(/wd4099 /wd4146 /wd4267 /wd4305 /wd4307)
add_definitions(/wd4715 /wd4722 /wd4723 /wd4838 /wd4309 /wd4334)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
endif() endif()
@ -80,7 +86,16 @@ set(tensorflow_EXTERNAL_LIBRARIES
${protobuf_STATIC_LIBRARIES} ${protobuf_STATIC_LIBRARIES}
) )
set(tensorflow_EXTERNAL_DEPENDENCIES set(tensorflow_EXTERNAL_DEPENDENCIES
gif_copy_headers_to_destination png_copy_headers_to_destination jpeg_copy_headers_to_destination jsoncpp farmhash_copy_headers_to_destination highwayhash_copy_headers_to_destination protobuf eigen) zlib_copy_headers_to_destination
gif_copy_headers_to_destination
png_copy_headers_to_destination
jpeg_copy_headers_to_destination
jsoncpp
farmhash_copy_headers_to_destination
highwayhash_copy_headers_to_destination
protobuf
eigen
)
include_directories( include_directories(
# Source and generated code. # Source and generated code.
@ -118,19 +133,67 @@ if(UNIX)
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
endif() endif()
if (tensorflow_ENABLE_GPU)
if (WIN32)
find_package(CUDA 8.0 REQUIRED)
# by default we assume compute cabability 3.5 and 5.2. If you change this change it in
# CUDA_NVCC_FLAGS and cuda_config.h below
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
include_directories(${CUDA_INCLUDE})
add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.5,5.2)
# add cudnn
include_directories(${CUDNN_HOME})
set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDNN_HOME}/lib/x64/cudnn.lib)
# create cuda_config.h
FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
"#ifndef CUDA_CUDA_CONFIG_H_\n"
"#define CUDA_CUDA_CONFIG_H_\n"
"#define TF_CUDA_CAPABILITIES CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
"#define TF_CUDA_VERSION \"64_80\"\n"
"#define TF_CUDNN_VERSION \"64_5\"\n"
"#endif // CUDA_CUDA_CONFIG_H_\n"
)
# tf assumes in various places header files to be in cuda/include. On windows the cuda sdk
# installs them under cuda/version/include and to avoid that we need to change tf we copy a
# few files to cuda/include
FILE(COPY
${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h
${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_HOME}/include/cudnn.h
${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h
DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
)
include_directories(${tensorflow_source_dir}/third_party/gpus)
# add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
endif()
endif()
# Let's get to work! # Let's get to work!
include(tf_core_framework.cmake) include(tf_core_framework.cmake)
include(tf_tools.cmake) include(tf_tools.cmake)
# NOTE: Disabled until issue #3996 is fixed. # NOTE: Disabled until issue #3996 is fixed.
# include(tf_stream_executor.cmake) # include(tf_stream_executor.cmake)
if (tensorflow_ENABLE_GPU)
if (WIN32)
include(tf_stream_executor.cmake)
endif()
endif()
include(tf_core_cpu.cmake) include(tf_core_cpu.cmake)
include(tf_models.cmake) include(tf_models.cmake)
include(tf_core_ops.cmake) include(tf_core_ops.cmake)
include(tf_core_direct_session.cmake) include(tf_core_direct_session.cmake)
include(tf_core_kernels.cmake)
if(tensorflow_ENABLE_GRPC_SUPPORT) if(tensorflow_ENABLE_GRPC_SUPPORT)
include(tf_core_distributed_runtime.cmake) include(tf_core_distributed_runtime.cmake)
endif() endif()
include(tf_core_kernels.cmake)
include(tf_cc_ops.cmake) include(tf_cc_ops.cmake)
if(tensorflow_BUILD_CC_EXAMPLE) if(tensorflow_BUILD_CC_EXAMPLE)
include(tf_tutorials.cmake) include(tf_tutorials.cmake)

View File

@ -15,14 +15,13 @@ Current Status
The CMake files in this directory can build the core TensorFlow runtime, an The CMake files in this directory can build the core TensorFlow runtime, an
example C++ binary, and a PIP package containing the runtime and Python example C++ binary, and a PIP package containing the runtime and Python
bindings. Currently, only CPU builds are supported, but we are working on bindings.
providing a GPU build as well.
Note: Windows support is in an **alpha** state, and we welcome your feedback. Note: Windows support is in an **alpha** state, and we welcome your feedback.
### Pre-requisites ### Pre-requisites
* CMake version 3.1 or later * CMake version 3.1 up to 3.6
* [Git](http://git-scm.com) * [Git](http://git-scm.com)
@ -45,21 +44,13 @@ Note: Windows support is in an **alpha** state, and we welcome your feedback.
- [Anaconda 4.1.1 (Python 3.5 64-bit)](https://www.continuum.io/downloads) - [Anaconda 4.1.1 (Python 3.5 64-bit)](https://www.continuum.io/downloads)
- [Git for Windows version 2.9.2.windows.1](https://git-scm.com/download/win) - [Git for Windows version 2.9.2.windows.1](https://git-scm.com/download/win)
- [swigwin-3.0.10](http://www.swig.org/download.html) - [swigwin-3.0.10](http://www.swig.org/download.html)
- [NVidia CUDA Toolkit 8.0] (https://developer.nvidia.com/cuda-downloads)
- [NVidia CUDNN 5.1] (https://developer.nvidia.com/cudnn)
* Ubuntu 14.04 * Ubuntu 14.04
- Makefile generator - Makefile generator
- Docker 1.9.1 (for automated testing) - Docker 1.9.1 (for automated testing)
### Current known limitations ### Current known limitations
* CPU support only
- We are in the process of porting the GPU code in
`tensorflow/stream_executor` to build with CMake and work on non-POSIX
platforms.
* Additional limitations for the Windows build:
- The Python package supports **Python 3.5 only**, because that is the only - The Python package supports **Python 3.5 only**, because that is the only
version for which standard Python binaries exist and those binaries are version for which standard Python binaries exist and those binaries are
compatible with the TensorFlow runtime. (On Windows, the standard Python compatible with the TensorFlow runtime. (On Windows, the standard Python
@ -114,6 +105,17 @@ Step-by-step Windows build
D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"
``` ```
* When building with GPU support after installing the CUDNN zip file from NVidia, append its
bin directory to your PATH environment variable.
In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable.
It should contain the directory of the CUDA dlls and the directory of the CUDNN dll.
For example:
```
D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin
D:\local\cuda\bin
```
* We assume that `cmake` and `git` are installed and in your `%PATH%`. If * We assume that `cmake` and `git` are installed and in your `%PATH%`. If
for example `cmake` is not in your path and it is installed in for example `cmake` is not in your path and it is installed in
`C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory `C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory
@ -145,9 +147,14 @@ Step-by-step Windows build
D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^ D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^
More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^ More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^
More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^ More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^
More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib
``` ```
To build with GPU support add "^" at the end of the last line above following with:
```
More? -Dtensorflow_ENABLE_GPU=ON ^
More? -DCUDNN_HOME="D:\...\cudnn"
```
Note that the `-DCMAKE_BUILD_TYPE=Release` flag must match the build Note that the `-DCMAKE_BUILD_TYPE=Release` flag must match the build
configuration that you choose when invoking `msbuild`. The known-good configuration that you choose when invoking `msbuild`. The known-good
values are `Release` and `RelWithDebInfo`. The `Debug` build type is values are `Release` and `RelWithDebInfo`. The `Debug` build type is
@ -184,6 +191,11 @@ Step-by-step Windows build
SSL support (for making secure HTTP requests) in the TensorFlow runtime. SSL support (for making secure HTTP requests) in the TensorFlow runtime.
This support is incomplete, and will be used for Google Cloud Storage This support is incomplete, and will be used for Google Cloud Storage
support. support.
* `-Dtensorflow_ENABLE_GPU=(ON|OFF)`. Defaults to `OFF`. Include
GPU support. If GPU is enabled you need to install the CUDA 8.0 Toolkit and CUDNN 5.1.
CMake will expect the location of CUDNN in -DCUDNN_HOME=path_you_unziped_cudnn.
4. Invoke MSBuild to build TensorFlow. 4. Invoke MSBuild to build TensorFlow.
@ -202,7 +214,6 @@ Step-by-step Windows build
D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj
``` ```
Linux Continuous Integration build Linux Continuous Integration build
================================== ==================================

View File

@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
from setuptools.command.install import install as InstallCommandBase from setuptools.command.install import install as InstallCommandBase
from setuptools.dist import Distribution from setuptools.dist import Distribution
_VERSION = '0.11.0rc0-cmake-experimental' _VERSION = '0.11.0rc1-cmake-experimental'
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'numpy >= 1.11.0', 'numpy >= 1.11.0',

View File

@ -21,13 +21,27 @@ file(GLOB_RECURSE tf_core_cpu_exclude_srcs
"${tensorflow_source_dir}/tensorflow/core/common_runtime/session_factory.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/session_factory.cc"
"${tensorflow_source_dir}/tensorflow/core/common_runtime/session_options.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/session_options.cc"
) )
list(REMOVE_ITEM tf_core_cpu_srcs ${tf_core_cpu_exclude_srcs}) list(REMOVE_ITEM tf_core_cpu_srcs ${tf_core_cpu_exclude_srcs})
# We need to include stubs for the GPU tracer, which are in the exclude glob. # We need to include stubs for the GPU tracer, which are in the exclude glob.
list(APPEND tf_core_cpu_srcs list(APPEND tf_core_cpu_srcs
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_tracer.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_tracer.cc"
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_tracer.h" "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_tracer.h"
) )
if (tensorflow_ENABLE_GPU)
file(GLOB_RECURSE tf_core_gpu_srcs
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/*.cc"
"${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/cupti_wrapper.cc"
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu_device_factory.cc"
)
file(GLOB_RECURSE tf_core_gpu_exclude_srcs
"${tensorflow_source_dir}/tensorflow/core/*test*.cc"
"${tensorflow_source_dir}/tensorflow/core/*test*.cc"
)
list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_gpu_exclude_srcs})
list(APPEND tf_core_cpu_srcs ${tf_core_gpu_srcs})
endif()
add_library(tf_core_cpu OBJECT ${tf_core_cpu_srcs}) add_library(tf_core_cpu OBJECT ${tf_core_cpu_srcs})
add_dependencies(tf_core_cpu tf_core_framework) add_dependencies(tf_core_cpu tf_core_framework)

View File

@ -38,9 +38,11 @@ add_executable(grpc_tensorflow_server
$<TARGET_OBJECTS:tf_core_ops> $<TARGET_OBJECTS:tf_core_ops>
$<TARGET_OBJECTS:tf_core_direct_session> $<TARGET_OBJECTS:tf_core_direct_session>
$<TARGET_OBJECTS:tf_core_distributed_runtime> $<TARGET_OBJECTS:tf_core_distributed_runtime>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
) )
target_link_libraries(grpc_tensorflow_server PUBLIC target_link_libraries(grpc_tensorflow_server PUBLIC
tf_protos_cc tf_protos_cc
${tf_core_gpu_kernels_lib}
${tensorflow_EXTERNAL_LIBRARIES} ${tensorflow_EXTERNAL_LIBRARIES}
) )

View File

@ -38,6 +38,7 @@ if(tensorflow_BUILD_CONTRIB_KERNELS)
"${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc"
"${tensorflow_source_dir}/tensorflow/contrib/metrics/kernels/set_kernels.cc" "${tensorflow_source_dir}/tensorflow/contrib/metrics/kernels/set_kernels.cc"
"${tensorflow_source_dir}/tensorflow/contrib/metrics/ops/set_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/metrics/ops/set_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
@ -83,7 +84,7 @@ list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
if(WIN32) if(WIN32)
file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs
# Not currently working on Windows: # not working on windows yet
"${tensorflow_source_dir}/tensorflow/core/kernels/depthwise_conv_op.cc" # Cannot find symbol: tensorflow::LaunchConv2DOp<struct Eigen::ThreadPoolDevice, double>::launch(...). "${tensorflow_source_dir}/tensorflow/core/kernels/depthwise_conv_op.cc" # Cannot find symbol: tensorflow::LaunchConv2DOp<struct Eigen::ThreadPoolDevice, double>::launch(...).
"${tensorflow_source_dir}/tensorflow/core/kernels/fact_op.cc" "${tensorflow_source_dir}/tensorflow/core/kernels/fact_op.cc"
"${tensorflow_source_dir}/tensorflow/core/kernels/immutable_constant_op.cc" "${tensorflow_source_dir}/tensorflow/core/kernels/immutable_constant_op.cc"
@ -93,14 +94,38 @@ if(WIN32)
"${tensorflow_source_dir}/tensorflow/core/kernels/sparse_matmul_op.h" "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_matmul_op.h"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h" "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc" "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
"${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc"
"${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*"
) )
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs}) list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
endif(WIN32) endif(WIN32)
file(GLOB_RECURSE tf_core_gpu_kernels_srcs
"${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
)
if(WIN32)
file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs
# not working on windows yet
"${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc"
)
list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs})
endif(WIN32)
add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs}) add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
add_dependencies(tf_core_kernels tf_core_cpu)
if(WIN32) if(WIN32)
target_compile_options(tf_core_kernels PRIVATE /MP) target_compile_options(tf_core_kernels PRIVATE /MP)
if (tensorflow_ENABLE_GPU)
set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
set_target_properties(${tf_core_gpu_kernels_lib}
PROPERTIES DEBUG_POSTFIX ""
COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
)
add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
endif()
endif() endif()
add_dependencies(tf_core_kernels tf_core_cpu)

View File

@ -302,12 +302,14 @@ add_library(pywrap_tensorflow SHARED
$<TARGET_OBJECTS:tf_core_direct_session> $<TARGET_OBJECTS:tf_core_direct_session>
$<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>> $<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
$<TARGET_OBJECTS:tf_core_kernels> $<TARGET_OBJECTS:tf_core_kernels>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
) )
target_include_directories(pywrap_tensorflow PUBLIC target_include_directories(pywrap_tensorflow PUBLIC
${PYTHON_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}
${NUMPY_INCLUDE_DIR} ${NUMPY_INCLUDE_DIR}
) )
target_link_libraries(pywrap_tensorflow target_link_libraries(pywrap_tensorflow
${tf_core_gpu_kernels_lib}
${tensorflow_EXTERNAL_LIBRARIES} ${tensorflow_EXTERNAL_LIBRARIES}
tf_protos_cc tf_protos_cc
${PYTHON_LIBRARIES} ${PYTHON_LIBRARIES}

View File

@ -47,11 +47,17 @@ file(GLOB tf_stream_executor_srcs
"${tensorflow_source_dir}/tensorflow/stream_executor/platform/default/*.h" "${tensorflow_source_dir}/tensorflow/stream_executor/platform/default/*.h"
) )
if (tensorflow_ENABLE_GPU)
file(GLOB tf_stream_executor_gpu_srcs
"${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
)
list(APPEND tf_stream_executor_srcs ${tf_stream_executor_gpu_srcs})
endif()
#file(GLOB_RECURSE tf_stream_executor_test_srcs #file(GLOB_RECURSE tf_stream_executor_test_srcs
# "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.cc" # "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.cc"
# "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.h" # "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.h"
#) #)
#
#list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) #list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs})
add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs}) add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs})

View File

@ -12,9 +12,11 @@ add_executable(tf_tutorials_example_trainer
$<TARGET_OBJECTS:tf_cc_ops> $<TARGET_OBJECTS:tf_cc_ops>
$<TARGET_OBJECTS:tf_core_ops> $<TARGET_OBJECTS:tf_core_ops>
$<TARGET_OBJECTS:tf_core_direct_session> $<TARGET_OBJECTS:tf_core_direct_session>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
) )
target_link_libraries(tf_tutorials_example_trainer PUBLIC target_link_libraries(tf_tutorials_example_trainer PUBLIC
tf_protos_cc tf_protos_cc
${tf_core_gpu_kernels_lib}
${tensorflow_EXTERNAL_LIBRARIES} ${tensorflow_EXTERNAL_LIBRARIES}
) )

View File

@ -942,6 +942,7 @@ def convolution2d_transpose(
kernel_size, kernel_size,
stride=1, stride=1,
padding='SAME', padding='SAME',
data_format=DATA_FORMAT_NHWC,
activation_fn=nn.relu, activation_fn=nn.relu,
normalizer_fn=None, normalizer_fn=None,
normalizer_params=None, normalizer_params=None,
@ -961,7 +962,9 @@ def convolution2d_transpose(
second variable called 'biases' is added to the result of the operation. second variable called 'biases' is added to the result of the operation.
Args: Args:
inputs: a tensor of size [batch_size, height, width, channels]. inputs: A 4-D `Tensor` of type `float` and shape
`[batch, height, width, in_channels]` for `NHWC` data format or
`[batch, in_channels, height, width]` for `NCHW` data format.
num_outputs: integer, the number of output filters. num_outputs: integer, the number of output filters.
kernel_size: a list of length 2 holding the [kernel_height, kernel_width] of kernel_size: a list of length 2 holding the [kernel_height, kernel_width] of
of the filters. Can be an int if both values are the same. of the filters. Can be an int if both values are the same.
@ -969,6 +972,7 @@ def convolution2d_transpose(
Can be an int if both strides are the same. Note that presently Can be an int if both strides are the same. Note that presently
both strides must have the same value. both strides must have the same value.
padding: one of 'VALID' or 'SAME'. padding: one of 'VALID' or 'SAME'.
data_format: A string. `NHWC` (default) and `NCHW` are supported.
activation_fn: activation function, set to None to skip it and maintain activation_fn: activation function, set to None to skip it and maintain
a linear activation. a linear activation.
normalizer_fn: normalization function to use instead of `biases`. If normalizer_fn: normalization function to use instead of `biases`. If
@ -993,14 +997,23 @@ def convolution2d_transpose(
Raises: Raises:
ValueError: if 'kernel_size' is not a list of length 2. ValueError: if 'kernel_size' is not a list of length 2.
ValueError: if `data_format` is neither `NHWC` nor `NCHW`.
ValueError: if `C` dimension of `inputs` is None.
""" """
with variable_scope.variable_scope( with variable_scope.variable_scope(
scope, 'Conv2d_transpose', [inputs], reuse=reuse) as sc: scope, 'Conv2d_transpose', [inputs], reuse=reuse) as sc:
if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
raise ValueError('data_format has to be either NCHW or NHWC.')
dtype = inputs.dtype.base_dtype dtype = inputs.dtype.base_dtype
kernel_h, kernel_w = utils.two_element_tuple(kernel_size) kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
stride_h, stride_w = utils.two_element_tuple(stride) stride_h, stride_w = utils.two_element_tuple(stride)
num_filters_in = utils.last_dimension( if data_format == DATA_FORMAT_NCHW:
inputs.get_shape(), min_rank=4) c_axis, h_axis, w_axis = 1, 2, 3
else:
h_axis, w_axis, c_axis = 1, 2, 3
num_filters_in = inputs.get_shape()[c_axis].value
if num_filters_in is None:
raise ValueError('`C` dimension of `inputs` must be known but is None.')
weights_shape = [kernel_h, kernel_w, num_outputs, num_filters_in] weights_shape = [kernel_h, kernel_w, num_outputs, num_filters_in]
weights_collections = utils.get_variable_collections( weights_collections = utils.get_variable_collections(
variables_collections, 'weights') variables_collections, 'weights')
@ -1015,7 +1028,7 @@ def convolution2d_transpose(
inputs_shape = array_ops.shape(inputs) inputs_shape = array_ops.shape(inputs)
batch_size = inputs_shape[0] batch_size = inputs_shape[0]
height, width = inputs_shape[1], inputs_shape[2] height, width = inputs_shape[h_axis], inputs_shape[w_axis]
def get_deconv_dim(dim_size, stride_size, kernel_size, padding): def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
if isinstance(dim_size, ops.Tensor): if isinstance(dim_size, ops.Tensor):
@ -1031,17 +1044,25 @@ def convolution2d_transpose(
out_height = get_deconv_dim(height, stride_h, kernel_h, padding) out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
out_width = get_deconv_dim(width, stride_w, kernel_w, padding) out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
output_shape = array_ops.pack( if data_format == DATA_FORMAT_NHWC:
[batch_size, out_height, out_width, num_outputs]) output_shape = [batch_size, out_height, out_width, num_outputs]
strides = [1, stride_h, stride_w, 1]
else:
output_shape = [batch_size, num_outputs, out_height, out_width]
strides = [1, 1, stride_h, stride_w]
output_shape = array_ops.pack(output_shape)
outputs = nn.conv2d_transpose(inputs, weights, output_shape, outputs = nn.conv2d_transpose(inputs, weights, output_shape,
[1, stride_h, stride_w, 1], strides,
padding=padding) padding=padding,
data_format=data_format)
# Infer the static output shape: # Infer the static output shape:
out_shape = inputs.get_shape().as_list() out_shape = inputs.get_shape().as_list()
out_shape[-1] = num_outputs out_shape[c_axis] = num_outputs
out_shape[1] = get_deconv_dim(out_shape[1], stride_h, kernel_h, padding) out_shape[h_axis] = get_deconv_dim(out_shape[h_axis], stride_h, kernel_h, padding)
out_shape[2] = get_deconv_dim(out_shape[2], stride_w, kernel_w, padding) out_shape[w_axis] = get_deconv_dim(out_shape[w_axis], stride_w, kernel_w, padding)
outputs.set_shape(out_shape) outputs.set_shape(out_shape)
if normalizer_fn is not None: if normalizer_fn is not None:
@ -1057,7 +1078,7 @@ def convolution2d_transpose(
initializer=biases_initializer, initializer=biases_initializer,
regularizer=biases_regularizer, regularizer=biases_regularizer,
collections=biases_collections) collections=biases_collections)
outputs = nn.bias_add(outputs, biases) outputs = nn.bias_add(outputs, biases, data_format=data_format)
if activation_fn is not None: if activation_fn is not None:
outputs = activation_fn(outputs) outputs = activation_fn(outputs)

View File

@ -588,6 +588,175 @@ class ConvolutionTest(tf.test.TestCase):
class Convolution2dTransposeTests(tf.test.TestCase): class Convolution2dTransposeTests(tf.test.TestCase):
def testInvalidDataFormat(self):
height, width = 7, 9
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
with self.assertRaisesRegexp(
ValueError, 'data_format has to be either NCHW or NHWC.'):
tf.contrib.layers.convolution2d_transpose(
images, 32, 3, data_format='CHWN')
def testOutputSizeWithStrideOneSamePaddingNCHW(self):
# `NCHW` data fomat is only supported for `GPU` device.
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 32
input_size = [5, 3, 10, 12]
expected_size = [5, num_filters, 10, 12]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [3, 3], stride=1,
padding='SAME', data_format='NCHW')
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
sess.run(tf.initialize_all_variables())
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWithStrideOneValidPaddingNCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 32
input_size = [5, 3, 10, 12]
expected_size = [5, num_filters, 12, 14]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [3, 3], stride=1,
padding='VALID', data_format='NCHW')
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
sess.run(tf.initialize_all_variables())
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWithStrideTwoValidPaddingNCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 32
input_size = [5, 3, 9, 11]
expected_size = [5, num_filters, 19, 23]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [3, 3], stride=[2, 2],
padding='VALID', data_format='NCHW')
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
self.assertListEqual(list(output.get_shape().as_list()), expected_size)
sess.run(tf.initialize_all_variables())
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWith1x1StrideTwoSamePaddingNCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 1
input_size = [1, 1, 1, 1]
expected_size = [1, num_filters, 2, 2]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [2, 2], stride=[2, 2],
padding='SAME', data_format='NCHW')
self.assertListEqual(list(output.get_shape().as_list()), expected_size)
sess.run(tf.initialize_all_variables())
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWith1x1StrideTwoValidPaddingNCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 1
input_size = [1, 1, 1, 1]
expected_size = [1, num_filters, 2, 2]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [2, 2], stride=[2, 2],
padding='VALID', data_format='NCHW')
sess.run(tf.initialize_all_variables())
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWith2x2StrideTwoSamePaddingNCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 1
input_size = [1, 1, 2, 2]
expected_size = [1, num_filters, 4, 4]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [2, 2], stride=[2, 2],
padding='SAME', data_format='NCHW')
sess.run(tf.initialize_all_variables())
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWith2x2StrideTwoValidPaddingNCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 1
input_size = [1, 1, 2, 2]
expected_size = [1, num_filters, 4, 4]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [2, 2], stride=[2, 2],
padding='VALID', data_format='NCHW')
sess.run(tf.initialize_all_variables())
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWithStride2x1NCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 1
input_size = [1, 1, 3, 2]
expected_size = [1, num_filters, 6, 5]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [2, 4], stride=[2, 1],
padding='VALID', data_format='NCHW')
sess.run(tf.initialize_all_variables())
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWithStride2x4NCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 1
input_size = [1, 1, 3, 2]
expected_size = [1, num_filters, 6, 8]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [2, 4], stride=[2, 4],
padding='VALID', data_format='NCHW')
sess.run(tf.initialize_all_variables())
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWithStride2x5NCHW(self):
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
num_filters = 1
input_size = [1, 1, 3, 2]
expected_size = [1, num_filters, 6, 10]
images = tf.random_uniform(input_size, seed=1)
output = tf.contrib.layers.conv2d_transpose(
images, num_filters, [2, 4], stride=[2, 5],
padding='VALID', data_format='NCHW')
sess.run(tf.initialize_all_variables())
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
self.assertListEqual(list(output.eval().shape), expected_size)
def testOutputSizeWithStrideOneSamePadding(self): def testOutputSizeWithStrideOneSamePadding(self):
num_filters = 32 num_filters = 32
input_size = [5, 10, 12, 3] input_size = [5, 10, 12, 3]

View File

@ -244,7 +244,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(tf.initialize_local_variables()) session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator() coord = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coord) threads = tf.train.start_queue_runners(session, coord=coord)
self.assertAllEqual(session.run(inputs), [b"ABC"]) self.assertAllEqual(session.run(inputs), [b"ABC"])
self.assertAllEqual(session.run(inputs), [b"DEF"]) self.assertAllEqual(session.run(inputs), [b"DEF"])
@ -253,6 +253,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(inputs) session.run(inputs)
coord.request_stop() coord.request_stop()
coord.join(threads)
def test_read_keyed_batch_features_mutual_exclusive_args(self): def test_read_keyed_batch_features_mutual_exclusive_args(self):
filename = self._create_temp_file("abcde") filename = self._create_temp_file("abcde")
@ -307,6 +308,7 @@ class GraphIOTest(tf.test.TestCase):
coord.request_stop() coord.request_stop()
coord.join(threads) coord.join(threads)
parsed_records = [item for sublist in [d["sequence"] for d in data] parsed_records = [item for sublist in [d["sequence"] for d in data]
for item in sublist] for item in sublist]
# Check that the number of records matches expected and all records # Check that the number of records matches expected and all records
@ -331,7 +333,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(tf.initialize_local_variables()) session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator() coord = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coord) threads = tf.train.start_queue_runners(session, coord=coord)
self.assertEqual("%s:1" % name, inputs.name) self.assertEqual("%s:1" % name, inputs.name)
file_name_queue_name = "%s/file_name_queue" % name file_name_queue_name = "%s/file_name_queue" % name
@ -352,6 +354,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(inputs) session.run(inputs)
coord.request_stop() coord.request_stop()
coord.join(threads)
def test_read_text_lines_multifile_with_shared_queue(self): def test_read_text_lines_multifile_with_shared_queue(self):
gfile.Glob = self._orig_glob gfile.Glob = self._orig_glob
@ -375,7 +378,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(tf.initialize_local_variables()) session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator() coord = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coord) threads = tf.train.start_queue_runners(session, coord=coord)
self.assertEqual("%s:1" % name, inputs.name) self.assertEqual("%s:1" % name, inputs.name)
shared_file_name_queue_name = "%s/file_name_queue" % name shared_file_name_queue_name = "%s/file_name_queue" % name
@ -398,6 +401,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(inputs) session.run(inputs)
coord.request_stop() coord.request_stop()
coord.join(threads)
def _get_qr(self, name): def _get_qr(self, name):
for qr in ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS): for qr in ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS):
@ -490,7 +494,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(tf.initialize_local_variables()) session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator() coord = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coord) threads = tf.train.start_queue_runners(session, coord=coord)
self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"]) self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"])
self.assertAllEqual(session.run(inputs), [b"D", b"E"]) self.assertAllEqual(session.run(inputs), [b"D", b"E"])
@ -498,6 +502,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(inputs) session.run(inputs)
coord.request_stop() coord.request_stop()
coord.join(threads)
def test_keyed_read_text_lines(self): def test_keyed_read_text_lines(self):
gfile.Glob = self._orig_glob gfile.Glob = self._orig_glob
@ -517,7 +522,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(tf.initialize_local_variables()) session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator() coord = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coord) threads = tf.train.start_queue_runners(session, coord=coord)
self.assertAllEqual(session.run([keys, inputs]), self.assertAllEqual(session.run([keys, inputs]),
[[filename.encode("utf-8") + b":1"], [b"ABC"]]) [[filename.encode("utf-8") + b":1"], [b"ABC"]])
@ -529,6 +534,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(inputs) session.run(inputs)
coord.request_stop() coord.request_stop()
coord.join(threads)
def test_keyed_parse_json(self): def test_keyed_parse_json(self):
gfile.Glob = self._orig_glob gfile.Glob = self._orig_glob
@ -557,7 +563,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(tf.initialize_local_variables()) session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator() coord = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coord) threads = tf.train.start_queue_runners(session, coord=coord)
key, age = session.run([keys, inputs["age"]]) key, age = session.run([keys, inputs["age"]])
self.assertAllEqual(age, [[0]]) self.assertAllEqual(age, [[0]])
@ -572,6 +578,7 @@ class GraphIOTest(tf.test.TestCase):
session.run(inputs) session.run(inputs)
coord.request_stop() coord.request_stop()
coord.join(threads)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -21,6 +21,7 @@ from __future__ import print_function
import os import os
import random import random
import six
import tempfile import tempfile
import numpy as np import numpy as np
@ -63,8 +64,8 @@ class ExportTest(tf.test.TestCase):
# Only the written checkpoints are exported. # Only the written checkpoints are exported.
self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export')) self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export'))
self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export')) self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export'))
self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir, self.assertEquals(export_monitor.last_export_dir,
'00000010')) six.b(os.path.join(export_dir, '00000010')))
# Validate the signature # Validate the signature
signature = self._get_default_signature(export_dir + '00000010/export.meta') signature = self._get_default_signature(export_dir + '00000010/export.meta')
self.assertTrue(signature.HasField('regression_signature')) self.assertTrue(signature.HasField('regression_signature'))
@ -86,8 +87,8 @@ class ExportTest(tf.test.TestCase):
# Only the written checkpoints are exported. # Only the written checkpoints are exported.
self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export')) self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export'))
self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export')) self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export'))
self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir, self.assertEquals(export_monitor.last_export_dir,
'00000010')) six.b(os.path.join(export_dir, '00000010')))
# Validate the signature # Validate the signature
signature = self._get_default_signature(export_dir + '00000010/export.meta') signature = self._get_default_signature(export_dir + '00000010/export.meta')
self.assertTrue(signature.HasField('generic_signature')) self.assertTrue(signature.HasField('generic_signature'))

View File

@ -351,6 +351,10 @@ class BFCAllocator : public VisitableAllocator {
inline int Log2FloorNonZero(uint64 n) { inline int Log2FloorNonZero(uint64 n) {
#if defined(__GNUC__) #if defined(__GNUC__)
return 63 ^ __builtin_clzll(n); return 63 ^ __builtin_clzll(n);
#elif defined(PLATFORM_WINDOWS)
unsigned long index;
_BitScanReverse64(&index, n);
return index;
#else #else
int r = 0; int r = 0;
while (n > 0) { while (n > 0) {

View File

@ -873,7 +873,9 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
if (visible_device_list.empty()) { if (visible_device_list.empty()) {
visible_gpu_order.resize(gpu_manager->VisibleDeviceCount()); visible_gpu_order.resize(gpu_manager->VisibleDeviceCount());
// By default, visible to virtual mapping is unchanged. // By default, visible to virtual mapping is unchanged.
std::iota(visible_gpu_order.begin(), visible_gpu_order.end(), 0); int deviceNo = 0;
std::generate(visible_gpu_order.begin(), visible_gpu_order.end(),
[&deviceNo]{ return deviceNo++; });
} else { } else {
std::vector<string> order_str = str_util::Split(visible_device_list, ','); std::vector<string> order_str = str_util::Split(visible_device_list, ',');
for (int i = 0; i < order_str.size(); ++i) { for (int i = 0; i < order_str.size(); ++i) {

View File

@ -254,6 +254,10 @@ CUPTIManager *GetCUPTIManager() {
return manager; return manager;
} }
#ifdef _MSC_VER
#define __thread __declspec(thread)
#endif
// TODO(pbar) Move this to platform specific header file? // TODO(pbar) Move this to platform specific header file?
// Static thread local variable for POD types. // Static thread local variable for POD types.
#define TF_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \ #define TF_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \

View File

@ -16,8 +16,10 @@ limitations under the License.
#include "tensorflow/core/common_runtime/gpu/pool_allocator.h" #include "tensorflow/core/common_runtime/gpu/pool_allocator.h"
#include <errno.h> #include <errno.h>
#ifndef _MSC_VER
#include <strings.h> #include <strings.h>
#include <sys/mman.h> // for munmap #include <sys/mman.h> // for munmap
#endif
#include <map> #include <map>
#include <utility> #include <utility>

View File

@ -126,7 +126,7 @@ Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options, int gpu_id,
gpu::StreamExecutor* se = gpu::StreamExecutor* se =
gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie(); gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
int bus_id = se->GetDeviceDescription().numa_node(); int bus_id = se->GetDeviceDescription().numa_node();
if (bus_id < static_cast<int64>(gpu_visitors_.size())) { if (bus_id >= 0 && bus_id < static_cast<int64>(gpu_visitors_.size())) {
for (auto v : gpu_visitors_[bus_id]) { for (auto v : gpu_visitors_[bus_id]) {
gpu_allocators_[gpu_id]->AddAllocVisitor(v); gpu_allocators_[gpu_id]->AddAllocVisitor(v);
} }

View File

@ -152,7 +152,7 @@ class Allocator {
// allocated by this allocator. // allocated by this allocator.
virtual size_t RequestedSize(void* ptr) { virtual size_t RequestedSize(void* ptr) {
CHECK(false) << "allocator doesn't track sizes"; CHECK(false) << "allocator doesn't track sizes";
return 0; return size_t(0);
} }
// Returns the allocated size of the buffer at 'ptr' if known, // Returns the allocated size of the buffer at 'ptr' if known,

View File

@ -149,6 +149,7 @@ class DeviceBase {
// attributes requested. See allocator.h for more details. // attributes requested. See allocator.h for more details.
virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) { virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) {
LOG(FATAL) << "GetAllocator() is not implemented."; LOG(FATAL) << "GetAllocator() is not implemented.";
return nullptr;
} }
// Return the Allocator implementation to use based on the allocator // Return the Allocator implementation to use based on the allocator
@ -180,6 +181,8 @@ class DeviceBase {
virtual const DeviceAttributes& attributes() const { virtual const DeviceAttributes& attributes() const {
LOG(FATAL) << "Device does not implement attributes()"; LOG(FATAL) << "Device does not implement attributes()";
static DeviceAttributes dummy;
return dummy;
} }
// Materializes the given TensorProto into 'tensor' stored in Device // Materializes the given TensorProto into 'tensor' stored in Device

View File

@ -348,6 +348,15 @@ TEST(Tensor_Float, Reshape) {
} }
TEST(Tensor_Scalar, Basics) { TEST(Tensor_Scalar, Basics) {
{
Tensor t(DT_BOOL, TensorShape({}));
EXPECT_EQ(1, t.NumElements());
auto Tt = t.scalar<bool>();
EXPECT_EQ(1, Tt.size());
EXPECT_EQ(0, Tt.rank());
t.scalar<bool>()() = true;
EXPECT_TRUE(Tt());
}
{ {
Tensor t(DT_FLOAT, TensorShape({})); Tensor t(DT_FLOAT, TensorShape({}));
EXPECT_EQ(1, t.NumElements()); EXPECT_EQ(1, t.NumElements());

View File

@ -16,6 +16,7 @@ limitations under the License.
#if GOOGLE_CUDA #if GOOGLE_CUDA
#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h" #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace tensorflow { namespace tensorflow {
namespace functor { namespace functor {
@ -31,6 +32,28 @@ struct SelectFunctor<GPUDevice, T> {
} }
}; };
template <typename T>
struct SelectScalarFunctor<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat out,
typename TTypes<bool>::ConstScalar cond,
typename TTypes<T>::ConstFlat then_flat,
typename TTypes<T>::ConstFlat else_flat) {
#if !defined(EIGEN_HAS_INDEX_LIST)
Eigen::array<int, 1> rank1{1};
#else
Eigen::IndexList<Eigen::type2index<1>> rank1;
#endif
const int size = then_flat.dimension(0);
Eigen::array<int, 1> broadcast_dims{size};
To32Bit(out).device(d) = cond.reshape(rank1)
.broadcast(broadcast_dims)
.select(then_flat, else_flat);
}
};
template <typename T> template <typename T>
struct BatchSelectFunctor<GPUDevice, T> { struct BatchSelectFunctor<GPUDevice, T> {
void operator()(const GPUDevice& d, void operator()(const GPUDevice& d,
@ -68,6 +91,7 @@ struct BatchSelectFunctor<GPUDevice, T> {
#define SELECT_FUNCTOR(T) \ #define SELECT_FUNCTOR(T) \
template struct SelectFunctor<GPUDevice, T>; \ template struct SelectFunctor<GPUDevice, T>; \
template struct SelectScalarFunctor<GPUDevice, T>; \
template struct BatchSelectFunctor<GPUDevice, T>; template struct BatchSelectFunctor<GPUDevice, T>;
SELECT_FUNCTOR(Eigen::half); SELECT_FUNCTOR(Eigen::half);

View File

@ -41,6 +41,11 @@ class SelectOp : public OpKernel {
OP_REQUIRES_OK(ctx, ctx->input("t", &then)); OP_REQUIRES_OK(ctx, ctx->input("t", &then));
OP_REQUIRES_OK(ctx, ctx->input("e", &else_)); OP_REQUIRES_OK(ctx, ctx->input("e", &else_));
if (TensorShapeUtils::IsScalar(cond->shape())){
ComputeScalar(ctx, cond, then, else_);
return;
}
bool broadcasting = (TensorShapeUtils::IsVector(cond->shape()) && bool broadcasting = (TensorShapeUtils::IsVector(cond->shape()) &&
!TensorShapeUtils::IsVector(then->shape())); !TensorShapeUtils::IsVector(then->shape()));
@ -108,6 +113,25 @@ class SelectOp : public OpKernel {
} }
} }
void ComputeScalar(OpKernelContext* ctx, const Tensor* cond,
const Tensor* then, const Tensor* else_) {
OP_REQUIRES(
ctx, then->shape().IsSameSize(else_->shape()),
errors::InvalidArgument(
"'then' and 'else' must have the same size. but received: ",
then->shape().DebugString(), " vs. ",
else_->shape().DebugString()));
Tensor* output = nullptr;
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, then->shape(), &output));
if (output->NumElements() > 0) {
functor::SelectScalarFunctor<Device, T> func;
TTypes<bool>::ConstScalar cond_scalar = cond->scalar<bool>();
func(ctx->eigen_device<Device>(), output->flat<T>(), cond_scalar,
then->flat<T>(), else_->flat<T>());
}
}
private: private:
TF_DISALLOW_COPY_AND_ASSIGN(SelectOp); TF_DISALLOW_COPY_AND_ASSIGN(SelectOp);
}; };
@ -152,6 +176,17 @@ struct SelectFunctor<CPUDevice, T> {
} }
}; };
// CPU Specializations of Select functors with scalar
template <typename T>
struct SelectScalarFunctor<CPUDevice, T> {
void operator()(const CPUDevice& d, typename TTypes<T>::Flat out,
TTypes<bool>::ConstScalar cond,
typename TTypes<T>::ConstFlat then_flat,
typename TTypes<T>::ConstFlat else_flat) {
out.device(d) = cond() ? then_flat : else_flat;
}
};
template <typename T> template <typename T>
struct BatchSelectFunctor<CPUDevice, T> { struct BatchSelectFunctor<CPUDevice, T> {
void operator()(const CPUDevice& d, void operator()(const CPUDevice& d,

View File

@ -719,6 +719,14 @@ struct SelectFunctor {
typename TTypes<T>::ConstFlat else_flat); typename TTypes<T>::ConstFlat else_flat);
}; };
template <typename Device, typename T>
struct SelectScalarFunctor {
void operator()(const Device& d, typename TTypes<T>::Flat out,
typename TTypes<bool>::ConstScalar cond,
typename TTypes<T>::ConstFlat then_flat,
typename TTypes<T>::ConstFlat else_flat);
};
template <typename Device, typename T> template <typename Device, typename T>
struct BatchSelectFunctor { struct BatchSelectFunctor {
void operator()(const Device& d, void operator()(const Device& d,

View File

@ -21,7 +21,11 @@ limitations under the License.
#include "tensorflow/core/platform/types.h" #include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/cuda_kernel_helper.h" #include "tensorflow/core/util/cuda_kernel_helper.h"
#if !defined(_MSC_VER)
#define UNROLL _Pragma("unroll") #define UNROLL _Pragma("unroll")
#else
#define UNROLL
#endif
namespace tensorflow { namespace tensorflow {

View File

@ -25,8 +25,25 @@ limitations under the License.
#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/types.h" #include "tensorflow/core/platform/types.h"
#if GOOGLE_CUDA
#include "tensorflow/core/platform/stream_executor.h"
#endif // GOOGLE_CUDA
namespace tensorflow { namespace tensorflow {
#if GOOGLE_CUDA
namespace {
template <typename Scalar>
perftools::gputools::DeviceMemory<Scalar> AsDeviceMemory(
const Scalar* cuda_memory) {
perftools::gputools::DeviceMemoryBase wrapped(
const_cast<Scalar*>(cuda_memory));
perftools::gputools::DeviceMemory<Scalar> typed(wrapped);
return typed;
}
} // namespace
#endif // GOOGLE_CUDA
template <class Scalar> template <class Scalar>
class MatrixTriangularSolveOp : public LinearAlgebraOp<Scalar> { class MatrixTriangularSolveOp : public LinearAlgebraOp<Scalar> {
public: public:
@ -60,7 +77,9 @@ class MatrixTriangularSolveOp : public LinearAlgebraOp<Scalar> {
int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final { int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final {
double rows = static_cast<double>(input_matrix_shapes[0].dim_size(0)); double rows = static_cast<double>(input_matrix_shapes[0].dim_size(0));
double num_rhss = static_cast<double>(input_matrix_shapes[1].dim_size(1)); double num_rhss = static_cast<double>(input_matrix_shapes[1].dim_size(1));
double cost = rows * rows * num_rhss; double cost = rows * rows * num_rhss *
(Eigen::TensorOpCost::AddCost<Scalar>() +
Eigen::TensorOpCost::MulCost<Scalar>());
return cost >= static_cast<double>(kint64max) ? kint64max return cost >= static_cast<double>(kint64max) ? kint64max
: static_cast<int64>(cost); : static_cast<int64>(cost);
} }
@ -103,6 +122,121 @@ class MatrixTriangularSolveOp : public LinearAlgebraOp<Scalar> {
TF_DISALLOW_COPY_AND_ASSIGN(MatrixTriangularSolveOp); TF_DISALLOW_COPY_AND_ASSIGN(MatrixTriangularSolveOp);
}; };
#ifdef GOOGLE_CUDA
template <class Scalar>
class MatrixTriangularSolveOpGPU : public LinearAlgebraOp<Scalar> {
public:
typedef LinearAlgebraOp<Scalar> Base;
explicit MatrixTriangularSolveOpGPU(OpKernelConstruction* context)
: Base(context), lower_(true), adjoint_(false) {
OP_REQUIRES_OK(context, context->GetAttr("lower", &lower_));
OP_REQUIRES_OK(context, context->GetAttr("adjoint", &adjoint_));
}
using TensorShapes = typename Base::TensorShapes;
using Matrix = typename Base::Matrix;
using MatrixMap = typename Base::MatrixMap;
using MatrixMaps = typename Base::MatrixMaps;
using ConstMatrixMap = typename Base::ConstMatrixMap;
using ConstMatrixMaps = typename Base::ConstMatrixMaps;
virtual void ValidateInputMatrixShapes(
OpKernelContext* context,
const TensorShapes& input_matrix_shapes) const final {
Base::ValidateSquareSolver(context, input_matrix_shapes);
}
TensorShapes GetOutputMatrixShapes(
const TensorShapes& input_matrix_shapes) const final {
return TensorShapes({TensorShape({input_matrix_shapes[0].dim_size(1),
input_matrix_shapes[1].dim_size(1)})});
}
int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final {
double rows = static_cast<double>(input_matrix_shapes[0].dim_size(0));
double num_rhss = static_cast<double>(input_matrix_shapes[1].dim_size(1));
double cost = rows * rows * num_rhss *
(Eigen::TensorOpCost::AddCost<Scalar>() +
Eigen::TensorOpCost::MulCost<Scalar>());
return cost >= static_cast<double>(kint64max) ? kint64max
: static_cast<int64>(cost);
}
void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
MatrixMaps* outputs) final {
const ConstMatrixMap& matrix = inputs[0];
const ConstMatrixMap& rhs = inputs[1];
MatrixMap& output = outputs->at(0);
if (matrix.rows() == 0 || rhs.cols() == 0) {
// To be consistent with the MatrixInverse op, we define the solution for
// an empty set of equation as the empty matrix.
return;
}
auto matrix_ptr = AsDeviceMemory(matrix.data());
auto rhs_ptr = AsDeviceMemory(rhs.data());
auto out_ptr = AsDeviceMemory(output.data());
auto* stream = context->op_device_context()->stream();
uint64 rhs_elems = rhs.rows() * rhs.cols();
bool copy_status =
stream->ThenMemcpyD2D(&out_ptr, rhs_ptr, sizeof(Scalar) * rhs_elems)
.ok();
if (!copy_status) {
context->SetStatus(
errors::Internal("Failed to copy rhs into output before solve"));
}
// Cublas does
// output = matrix \ rhs
// where matrix, rhs and output are assumed to be in column major.
// We want the output to be in row-major, so we can compute
// output' = rhs' / matrix' (' stands for transpose)
// Upper/lower needs to be swapped for this.
perftools::gputools::blas::UpperLower upper_lower_matrix;
perftools::gputools::blas::Transpose transpose_matrix;
if (lower_) {
upper_lower_matrix = perftools::gputools::blas::UpperLower::kUpper;
} else {
upper_lower_matrix = perftools::gputools::blas::UpperLower::kLower;
}
if (adjoint_) {
transpose_matrix = perftools::gputools::blas::Transpose::kTranspose;
} else {
transpose_matrix = perftools::gputools::blas::Transpose::kNoTranspose;
}
uint64 leading_dim_matrix = matrix.cols();
uint64 leading_dim_output = output.cols();
uint64 colmajor_rows = output.cols();
uint64 colmajor_cols = output.rows();
bool blas_launch_status =
stream
->ThenBlasTrsm(perftools::gputools::blas::Side::kRight /*side*/,
upper_lower_matrix /*uplo*/,
transpose_matrix /*trans*/,
perftools::gputools::blas::Diagonal::kNonUnit /*diag*/,
colmajor_rows /*m*/, colmajor_cols /*n*/,
Scalar(1.0) /*alpha*/,
matrix_ptr, leading_dim_matrix /*lda*/,
&out_ptr, leading_dim_output /*ldb*/)
.ok();
if (!blas_launch_status) {
context->SetStatus(errors::Internal("Blas TRSM launch failed"));
}
}
private:
bool lower_;
bool adjoint_;
TF_DISALLOW_COPY_AND_ASSIGN(MatrixTriangularSolveOpGPU);
};
#endif // GOOGLE_CUDA
REGISTER_LINALG_OP("MatrixTriangularSolve", (MatrixTriangularSolveOp<float>), REGISTER_LINALG_OP("MatrixTriangularSolve", (MatrixTriangularSolveOp<float>),
float); float);
REGISTER_LINALG_OP("MatrixTriangularSolve", (MatrixTriangularSolveOp<double>), REGISTER_LINALG_OP("MatrixTriangularSolve", (MatrixTriangularSolveOp<double>),
@ -112,4 +246,30 @@ REGISTER_LINALG_OP("BatchMatrixTriangularSolve",
REGISTER_LINALG_OP("BatchMatrixTriangularSolve", REGISTER_LINALG_OP("BatchMatrixTriangularSolve",
(MatrixTriangularSolveOp<double>), double); (MatrixTriangularSolveOp<double>), double);
#ifdef GOOGLE_CUDA
REGISTER_KERNEL_BUILDER(
Name("MatrixTriangularSolve")
.Device(DEVICE_GPU)
.TypeConstraint<float>("T"),
MatrixTriangularSolveOpGPU<float>);
REGISTER_KERNEL_BUILDER(
Name("MatrixTriangularSolve")
.Device(DEVICE_GPU)
.TypeConstraint<double>("T"),
MatrixTriangularSolveOpGPU<double>);
REGISTER_KERNEL_BUILDER(
Name("BatchMatrixTriangularSolve")
.Device(DEVICE_GPU)
.TypeConstraint<float>("T"),
MatrixTriangularSolveOpGPU<float>);
REGISTER_KERNEL_BUILDER(
Name("BatchMatrixTriangularSolve")
.Device(DEVICE_GPU)
.TypeConstraint<double>("T"),
MatrixTriangularSolveOpGPU<double>);
#endif //GOOGLE_CUDA
} // namespace tensorflow } // namespace tensorflow

View File

@ -115,10 +115,12 @@ class AllSampler : public RangeSampler {
int64 Sample(random::SimplePhilox* rnd) const override { int64 Sample(random::SimplePhilox* rnd) const override {
LOG(FATAL) << "Should not be called"; LOG(FATAL) << "Should not be called";
return 0;
} }
float Probability(int64 value) const override { float Probability(int64 value) const override {
LOG(FATAL) << "Should not be called"; LOG(FATAL) << "Should not be called";
return 0;
} }
void SampleBatchGetExpectedCountAvoid( void SampleBatchGetExpectedCountAvoid(

View File

@ -55,7 +55,10 @@ string JoinPathImpl(std::initializer_list<StringPiece> paths) {
// the first part of the output. // the first part of the output.
std::pair<StringPiece, StringPiece> SplitPath(StringPiece path) { std::pair<StringPiece, StringPiece> SplitPath(StringPiece path) {
auto pos = path.rfind('/'); auto pos = path.rfind('/');
#ifdef PLATFORM_WINDOWS
if (pos == StringPiece::npos)
pos = path.rfind('\\');
#endif
// Handle the case with no '/' in 'path'. // Handle the case with no '/' in 'path'.
if (pos == StringPiece::npos) if (pos == StringPiece::npos)
return std::make_pair(StringPiece(path.data(), 0), path); return std::make_pair(StringPiece(path.data(), 0), path);

View File

@ -913,7 +913,8 @@ REGISTER_OP("Select")
.SetShapeFn([](InferenceContext* c) { .SetShapeFn([](InferenceContext* c) {
// The inputs 'then' and 'else' must have the same shape. // The inputs 'then' and 'else' must have the same shape.
ShapeHandle data = c->input(1); ShapeHandle data = c->input(1);
TF_RETURN_IF_ERROR(c->Merge(data, c->input(2), &data)); ShapeHandle other = c->input(2);
TF_RETURN_IF_ERROR(c->Merge(data, other, &data));
// The input 'cond' must either have the same shape as 'then' and // The input 'cond' must either have the same shape as 'then' and
// 'else', or be a vector if 'then' and 'else' are at least vectors. // 'else', or be a vector if 'then' and 'else' are at least vectors.
@ -929,30 +930,49 @@ REGISTER_OP("Select")
const int32 cond_rank = c->Rank(cond); const int32 cond_rank = c->Rank(cond);
const int32 data_rank = c->Rank(data); const int32 data_rank = c->Rank(data);
if (cond_rank != 1) { if (cond_rank == 0){
// If the rank of 'cond' is != 1, the shape must match 'then' and 'else' // The rank of 'cond' is a scalar.
TF_RETURN_IF_ERROR(c->Merge(data, cond, &data)); // t and e can have any shape.
c->set_output(0, data);
return Status::OK();
} }
if (data_rank != 0) {
// If then and else are not scalars, then cond must be at least if (cond_rank != 1) {
// a vector, and its first value must match that of 'else' // If 'cond' is not a vector, and not a scalar,
TF_RETURN_IF_ERROR(c->WithRankAtLeast(cond, 1, &cond)); // then shape must match 'then' and 'else'
if (cond_rank == 1) { TF_RETURN_IF_ERROR(c->Merge(data, cond, &data));
TF_RETURN_IF_ERROR(c->Merge(cond, c->Vector(c->Dim(data, 0)), &cond)); c->set_output(0, data);
} return Status::OK();
}
if (data_rank == 0) {
// if 'then' and 'else' are scalar also the cond must be
TF_RETURN_IF_ERROR(c->Merge(data, cond, &data));
c->set_output(0, data);
return Status::OK();
}
if (cond_rank == 1) {
// if the cond is a vector and the 'then' is not a scalar,
// the first dimension of 'then' and 'else'
TF_RETURN_IF_ERROR(c->Merge(cond, c->Vector(c->Dim(data, 0)), &cond));
c->set_output(0, data);
return Status::OK();
} }
c->set_output(0, data); c->set_output(0, data);
return Status::OK(); return Status::OK();
}) })
.Doc(R"doc( .Doc(R"doc(
Selects elements from `t` or `e`, depending on `condition`. Selects elements from `t` or `e`, depending on `condition`.
The `t`, and `e` tensors must all have the same shape, The `t`, and `e` tensors must all have the same shape, and the
and the output will also have that shape. The `condition` tensor output will also have that shape.
must be a scalar if `t` and `e` are scalars. If `t` and `e` are vectors
or higher rank, then `condition` must be either a vector with size The `condition` tensor must be a scalar if `t` and `e` are scalars.
matching the first dimension of `t`, or must have the same shape as `t`. If `t` and `e` are vectors or higher rank, then `condition` must be either a
scalar, a vector with size matching the first dimension of `t`, or must have
the same shape as `t`.
The `condition` tensor acts as a mask that chooses, based on the value at each The `condition` tensor acts as a mask that chooses, based on the value at each
element, whether the corresponding element / row in the output should be element, whether the corresponding element / row in the output should be

View File

@ -188,7 +188,10 @@ TEST(MathOpsTest, Select_ShapeFn) {
ShapeInferenceTestOp op("Select"); ShapeInferenceTestOp op("Select");
INFER_OK(op, "?;?;?", "in1|in2"); INFER_OK(op, "?;?;?", "in1|in2");
// scalar case
INFER_OK(op, "[];[1];?", "in1");
INFER_OK(op, "[];?;?", "in1|in2"); INFER_OK(op, "[];?;?", "in1|in2");
INFER_OK(op, "[1];?;?", INFER_OK(op, "[1];?;?",
"in1|in2"); // When cond is vector, t/e may not match it. "in1|in2"); // When cond is vector, t/e may not match it.
INFER_OK(op, "[1,2];?;?", "in1|in2?"); INFER_OK(op, "[1,2];?;?", "in1|in2?");
@ -200,8 +203,8 @@ TEST(MathOpsTest, Select_ShapeFn) {
INFER_OK(op, "?;[1,2];?", "in1"); INFER_OK(op, "?;[1,2];?", "in1");
INFER_OK(op, "?;?;[1,2]", "in2"); INFER_OK(op, "?;?;[1,2]", "in2");
INFER_OK(op, "[1];[];?", "in1"); INFER_ERROR("Shapes must be equal rank, but are 0 and 1", op, "[1];[];?");
INFER_ERROR("Shapes must be equal rank, but are 1 and 0", op, "[];[1];?"); INFER_ERROR("Shapes must be equal rank, but are 1 and 2", op, "[];[1];[1,2]");
INFER_ERROR("Shapes must be equal rank, but are 1 and 2", op, "[1,2];[1];?"); INFER_ERROR("Shapes must be equal rank, but are 1 and 2", op, "[1,2];[1];?");
INFER_OK(op, "[2];[?];[?]", "in1|in2"); INFER_OK(op, "[2];[?];[?]", "in1|in2");

View File

@ -20,9 +20,11 @@ limitations under the License.
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#if defined(WIN32)
#include "extras/CUPTI/include/cupti.h"
#else
#include "cuda/extras/CUPTI/include/cupti.h" #include "cuda/extras/CUPTI/include/cupti.h"
#endif
namespace perftools { namespace perftools {
namespace gputools { namespace gputools {
namespace profiler { namespace profiler {

View File

@ -261,6 +261,14 @@ class Env {
virtual Status GetSymbolFromLibrary(void* handle, const char* symbol_name, virtual Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
void** symbol) = 0; void** symbol) = 0;
// \brief build the name of dynamic library.
//
// "name" should be name of the library.
// "version" should be the version of the library or NULL
// returns the name that LoadLibrary() can use
virtual string FormatLibraryFileName(const string& name,
const string& version) = 0;
private: private:
std::unique_ptr<FileSystemRegistry> file_system_registry_; std::unique_ptr<FileSystemRegistry> file_system_registry_;
TF_DISALLOW_COPY_AND_ASSIGN(Env); TF_DISALLOW_COPY_AND_ASSIGN(Env);
@ -318,7 +326,10 @@ class EnvWrapper : public Env {
void** symbol) override { void** symbol) override {
return target_->GetSymbolFromLibrary(handle, symbol_name, symbol); return target_->GetSymbolFromLibrary(handle, symbol_name, symbol);
} }
string FormatLibraryFileName(const string& name,
const string& version) override {
return target_->FormatLibraryFileName(name, version);
}
private: private:
Env* target_; Env* target_;
}; };

View File

@ -25,8 +25,6 @@ namespace internal {
Status LoadLibrary(const char* library_filename, void** handle); Status LoadLibrary(const char* library_filename, void** handle);
Status GetSymbolFromLibrary(void* handle, const char* symbol_name, Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
void** symbol); void** symbol);
// Return the filename of a dynamically linked library formatted according to
// platform naming conventions
string FormatLibraryFileName(const string& name, const string& version); string FormatLibraryFileName(const string& name, const string& version);
} // namespace internal } // namespace internal

View File

@ -20,7 +20,8 @@ limitations under the License.
// mobile. // mobile.
#if !defined(PLATFORM_POSIX) && !defined(PLATFORM_GOOGLE) && \ #if !defined(PLATFORM_POSIX) && !defined(PLATFORM_GOOGLE) && \
!defined(PLATFORM_POSIX_ANDROID) && !defined(PLATFORM_GOOGLE_ANDROID) !defined(PLATFORM_POSIX_ANDROID) && !defined(PLATFORM_GOOGLE_ANDROID) && \
!defined(PLATFORM_WINDOWS)
// Choose which platform we are on. // Choose which platform we are on.
#if defined(ANDROID) || defined(__ANDROID__) #if defined(ANDROID) || defined(__ANDROID__)

View File

@ -119,6 +119,10 @@ class PosixEnv : public Env {
return tensorflow::internal::GetSymbolFromLibrary(handle, symbol_name, return tensorflow::internal::GetSymbolFromLibrary(handle, symbol_name,
symbol); symbol);
} }
string FormatLibraryFileName(const string& name, const string& version) {
return tensorflow::internal::FormatLibraryFileName(name, version);
}
}; };
} // namespace } // namespace

View File

@ -22,7 +22,7 @@ limitations under the License.
#if defined(PLATFORM_GOOGLE) #if defined(PLATFORM_GOOGLE)
#include "tensorflow/core/platform/google/stacktrace.h" #include "tensorflow/core/platform/google/stacktrace.h"
#elif defined(PLATFORM_POSIX) || defined(PLATFORM_POSIX_ANDROID) || \ #elif defined(PLATFORM_POSIX) || defined(PLATFORM_POSIX_ANDROID) || \
defined(PLATFORM_GOOGLE_ANDROID) defined(PLATFORM_GOOGLE_ANDROID) || defined(PLATFORM_WINDOWS)
#include "tensorflow/core/platform/default/stacktrace.h" #include "tensorflow/core/platform/default/stacktrace.h"
#else #else
#error Define the appropriate PLATFORM_<foo> macro for this platform #error Define the appropriate PLATFORM_<foo> macro for this platform

View File

@ -26,6 +26,7 @@ limitations under the License.
#include <thread> #include <thread>
#include <vector> #include <vector>
#include <string>
#include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/core/error_codes.pb.h"
#include "tensorflow/core/platform/load_library.h" #include "tensorflow/core/platform/load_library.h"
@ -52,7 +53,20 @@ class StdThread : public Thread {
class WindowsEnv : public Env { class WindowsEnv : public Env {
public: public:
WindowsEnv() {} WindowsEnv()
: GetSystemTimePreciseAsFileTime_(NULL) {
// GetSystemTimePreciseAsFileTime function is only available in the latest
// versions of Windows. For that reason, we try to look it up in
// kernel32.dll at runtime and use an alternative option if the function
// is not available.
HMODULE module = GetModuleHandle("kernel32.dll");
if (module != NULL) {
auto func = (FnGetSystemTimePreciseAsFileTime)GetProcAddress(
module, "GetSystemTimePreciseAsFileTime");
GetSystemTimePreciseAsFileTime_ = func;
}
}
~WindowsEnv() override { ~WindowsEnv() override {
LOG(FATAL) << "Env::Default() must not be destroyed"; LOG(FATAL) << "Env::Default() must not be destroyed";
} }
@ -62,11 +76,32 @@ class WindowsEnv : public Env {
} }
uint64 NowMicros() override { uint64 NowMicros() override {
FILETIME temp; if (GetSystemTimePreciseAsFileTime_ != NULL) {
GetSystemTimeAsFileTime(&temp); // GetSystemTimePreciseAsFileTime function is only available in latest
uint64 now_ticks = // versions of Windows, so we need to check for its existence here.
(uint64)temp.dwLowDateTime + ((uint64)(temp.dwHighDateTime) << 32LL); // All std::chrono clocks on Windows proved to return
return now_ticks / 10LL; // values that may repeat, which is not good enough for some uses.
constexpr int64_t kUnixEpochStartTicks = 116444736000000000i64;
constexpr int64_t kFtToMicroSec = 10;
// This interface needs to return system time and not
// just any microseconds because it is often used as an argument
// to TimedWait() on condition variable
FILETIME system_time;
GetSystemTimePreciseAsFileTime_(&system_time);
LARGE_INTEGER li;
li.LowPart = system_time.dwLowDateTime;
li.HighPart = system_time.dwHighDateTime;
// Subtract unix epoch start
li.QuadPart -= kUnixEpochStartTicks;
// Convert to microsecs
li.QuadPart /= kFtToMicroSec;
return li.QuadPart;
}
using namespace std::chrono;
return duration_cast<microseconds>(
system_clock::now().time_since_epoch()).count();
} }
void SleepForMicroseconds(int64 micros) override { Sleep(micros / 1000); } void SleepForMicroseconds(int64 micros) override { Sleep(micros / 1000); }
@ -94,19 +129,53 @@ class WindowsEnv : public Env {
}); });
} }
Status LoadLibrary(const char* library_filename, void** handle) override { Status LoadLibrary(const char *library_filename, void** handle) override {
return errors::Unimplemented("WindowsEnv::LoadLibrary"); std::string file_name = library_filename;
std::replace(file_name.begin(), file_name.end(), '/', '\\');
HMODULE hModule = LoadLibraryEx(file_name.c_str(), NULL,
LOAD_WITH_ALTERED_SEARCH_PATH);
if (!hModule) {
return errors::NotFound(file_name + " not found");
}
*handle = hModule;
return Status::OK();
} }
Status GetSymbolFromLibrary(void* handle, const char* symbol_name, Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
void** symbol) override { void** symbol) override {
return errors::Unimplemented("WindowsEnv::GetSymbolFromLibrary"); FARPROC found_symbol;
found_symbol = GetProcAddress((HMODULE)handle, symbol_name);
if (found_symbol == NULL) {
return errors::NotFound(std::string(symbol_name) + " not found");
}
*symbol = (void **)found_symbol;
return Status::OK();
} }
string FormatLibraryFileName(const string& name, const string& version)
override {
string filename;
if (version.size() == 0) {
filename = name + ".dll";
}
else {
filename = name + version + ".dll";
}
return filename;
}
private:
typedef VOID(WINAPI * FnGetSystemTimePreciseAsFileTime)(LPFILETIME);
FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_;
}; };
} // namespace } // namespace
REGISTER_FILE_SYSTEM("", WindowsFileSystem); REGISTER_FILE_SYSTEM("", WindowsFileSystem);
REGISTER_FILE_SYSTEM("file", LocalWinFileSystem);
Env* Env::Default() { Env* Env::Default() {
static Env* default_env = new WindowsEnv; static Env* default_env = new WindowsEnv;
return default_env; return default_env;

View File

@ -0,0 +1,33 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/windows/error.h"
namespace tensorflow {
namespace internal {
std::string GetWindowsErrorMessage(DWORD err) {
LPSTR buffer = NULL;
DWORD flags = FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS;
FormatMessageA(flags, NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
reinterpret_cast<LPSTR>(&buffer), 0, NULL);
std::string message = buffer;
LocalFree(buffer);
return message;
}
} // namespace internal
} // namespace tensorflow

View File

@ -0,0 +1,32 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_
#define TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_
#include <string>
#include <Windows.h>
namespace tensorflow {
namespace internal {
std::string GetWindowsErrorMessage(DWORD err);
}
}
#endif // TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_

View File

@ -15,25 +15,27 @@ limitations under the License.
#include "tensorflow/core/platform/net.h" #include "tensorflow/core/platform/net.h"
#include <cerrno>
#include <cstdlib> #include <cstdlib>
#include <unordered_set> #include <unordered_set>
#include <sys/types.h> #include <sys/types.h>
#include <winsock.h> #include <winsock2.h>
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/windows/error.h"
#undef ERROR #undef ERROR
#pragma comment(lib,"Ws2_32.lib")
namespace tensorflow { namespace tensorflow {
namespace internal { namespace internal {
namespace { namespace {
bool IsPortAvailable(int* port, bool is_tcp) { bool IsPortAvailable(int* port, bool is_tcp) {
const int protocol = is_tcp ? IPPROTO_TCP : 0; const int protocol = is_tcp ? IPPROTO_TCP : 0;
const int fd = socket(AF_INET, is_tcp ? SOCK_STREAM : SOCK_DGRAM, protocol); SOCKET sock = socket(AF_INET, is_tcp ? SOCK_STREAM : SOCK_DGRAM, protocol);
struct sockaddr_in addr; struct sockaddr_in addr;
int addr_len = static_cast<int>(sizeof(addr)); int addr_len = static_cast<int>(sizeof(addr));
@ -41,17 +43,20 @@ bool IsPortAvailable(int* port, bool is_tcp) {
CHECK_GE(*port, 0); CHECK_GE(*port, 0);
CHECK_LE(*port, 65535); CHECK_LE(*port, 65535);
if (fd < 0) { if (sock == INVALID_SOCKET) {
LOG(ERROR) << "socket() failed: " << strerror(errno); LOG(ERROR) << "socket() failed: " <<
GetWindowsErrorMessage(WSAGetLastError());
return false; return false;
} }
// SO_REUSEADDR lets us start up a server immediately after it exists. // SO_REUSEADDR lets us start up a server immediately after it exits.
int one = 1; const int one = 1;
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const char*)&one, sizeof(one)) < int result = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
0) { reinterpret_cast<const char*>(&one), sizeof(one));
LOG(ERROR) << "setsockopt() failed: " << strerror(errno); if (result == SOCKET_ERROR) {
closesocket(fd); LOG(ERROR) << "setsockopt() failed: " <<
GetWindowsErrorMessage(WSAGetLastError());
closesocket(sock);
return false; return false;
} }
@ -59,18 +64,23 @@ bool IsPortAvailable(int* port, bool is_tcp) {
addr.sin_family = AF_INET; addr.sin_family = AF_INET;
addr.sin_addr.s_addr = INADDR_ANY; addr.sin_addr.s_addr = INADDR_ANY;
addr.sin_port = htons((uint16_t)*port); addr.sin_port = htons((uint16_t)*port);
if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) { result = bind(sock, (struct sockaddr*)&addr, sizeof(addr));
LOG(WARNING) << "bind(port=" << *port << ") failed: " << strerror(errno); if (result == SOCKET_ERROR) {
closesocket(fd); LOG(WARNING) << "bind(port=" << *port << ") failed: " <<
GetWindowsErrorMessage(WSAGetLastError());
closesocket(sock);
return false; return false;
} }
// Get the bound port number. // Get the bound port number.
if (getsockname(fd, (struct sockaddr*)&addr, &addr_len) < 0) { result = getsockname(sock, (struct sockaddr*)&addr, &addr_len);
LOG(WARNING) << "getsockname() failed: " << strerror(errno); if (result == SOCKET_ERROR) {
closesocket(fd); LOG(WARNING) << "getsockname() failed: " <<
GetWindowsErrorMessage(WSAGetLastError());
closesocket(sock);
return false; return false;
} }
CHECK_LE(addr_len, sizeof(addr)); CHECK_LE(addr_len, sizeof(addr));
actual_port = ntohs(addr.sin_port); actual_port = ntohs(addr.sin_port);
CHECK_GT(actual_port, 0); CHECK_GT(actual_port, 0);
@ -79,7 +89,8 @@ bool IsPortAvailable(int* port, bool is_tcp) {
} else { } else {
CHECK_EQ(*port, actual_port); CHECK_EQ(*port, actual_port);
} }
closesocket(fd);
closesocket(sock);
return true; return true;
} }
@ -89,6 +100,12 @@ const int kMaximumTrials = 1000;
} // namespace } // namespace
int PickUnusedPortOrDie() { int PickUnusedPortOrDie() {
WSADATA wsaData;
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != NO_ERROR) {
LOG(ERROR) << "Error at WSAStartup()";
return false;
}
static std::unordered_set<int> chosen_ports; static std::unordered_set<int> chosen_ports;
// Type of port to first pick in the next iteration. // Type of port to first pick in the next iteration.
@ -121,6 +138,7 @@ int PickUnusedPortOrDie() {
} }
chosen_ports.insert(port); chosen_ports.insert(port);
WSACleanup();
return port; return port;
} }

View File

@ -19,8 +19,8 @@ limitations under the License.
#ifdef SNAPPY #ifdef SNAPPY
#include <snappy.h> #include <snappy.h>
#endif #endif
#include <WinSock2.h>
#pragma comment(lib, "Ws2_32.lib") #include <Windows.h>
#include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/cpu_info.h"
#include "tensorflow/core/platform/demangle.h" #include "tensorflow/core/platform/demangle.h"
@ -37,10 +37,13 @@ namespace port {
void InitMain(const char* usage, int* argc, char*** argv) {} void InitMain(const char* usage, int* argc, char*** argv) {}
string Hostname() { string Hostname() {
char hostname[1024]; char name[1024];
gethostname(hostname, sizeof hostname); DWORD name_size = sizeof(name);
hostname[sizeof hostname - 1] = 0; name[0] = 0;
return string(hostname); if (::GetComputerNameA(name, &name_size)) {
name[name_size] = 0;
}
return name;
} }
int NumSchedulableCPUs() { int NumSchedulableCPUs() {

View File

@ -30,6 +30,7 @@ limitations under the License.
#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/posix/error.h" #include "tensorflow/core/platform/posix/error.h"
#include "tensorflow/core/platform/windows/error.h"
#include "tensorflow/core/platform/windows/windows_file_system.h" #include "tensorflow/core/platform/windows/windows_file_system.h"
// TODO(mrry): Prevent this Windows.h #define from leaking out of our headers. // TODO(mrry): Prevent this Windows.h #define from leaking out of our headers.
@ -39,19 +40,71 @@ namespace tensorflow {
namespace { namespace {
// RAII helpers for HANDLEs
const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); };
typedef std::unique_ptr<void, decltype(CloseHandleFunc)> UniqueCloseHandlePtr;
inline Status IOErrorFromWindowsError(const string& context, DWORD err) {
return IOError(
context + string(" : ") + internal::GetWindowsErrorMessage(err), err);
}
// PLEASE NOTE: hfile is expected to be an async handle
// (i.e. opened with FILE_FLAG_OVERLAPPED)
SSIZE_T pread(HANDLE hfile, char* src, size_t num_bytes, uint64_t offset) {
assert(num_bytes <= std::numeric_limits<DWORD>::max());
OVERLAPPED overlapped = {0};
ULARGE_INTEGER offset_union;
offset_union.QuadPart = offset;
overlapped.Offset = offset_union.LowPart;
overlapped.OffsetHigh = offset_union.HighPart;
overlapped.hEvent = ::CreateEvent(NULL, TRUE, FALSE, NULL);
if (NULL == overlapped.hEvent) {
return -1;
}
SSIZE_T result = 0;
unsigned long bytes_read = 0;
DWORD last_error = ERROR_SUCCESS;
BOOL read_result = ::ReadFile(hfile, src, static_cast<DWORD>(num_bytes),
&bytes_read, &overlapped);
if ((FALSE == read_result) &&
((last_error = GetLastError()) != ERROR_IO_PENDING)) {
result = (last_error == ERROR_HANDLE_EOF) ? 0 : -1;
} else {
if (ERROR_IO_PENDING == last_error) { // Otherwise bytes_read already has the result.
BOOL overlapped_result = ::GetOverlappedResult(hfile, &overlapped,
&bytes_read, TRUE);
if (FALSE == overlapped_result) {
result = (::GetLastError() == ERROR_HANDLE_EOF) ? 0 : -1;
}
else {
result = bytes_read;
}
}
}
::CloseHandle(overlapped.hEvent);
return result;
}
// read() based random-access // read() based random-access
class WindowsRandomAccessFile : public RandomAccessFile { class WindowsRandomAccessFile : public RandomAccessFile {
private: private:
string filename_; string filename_;
FILE* file_; HANDLE hfile_;
public: public:
WindowsRandomAccessFile(const string& fname, FILE* f) WindowsRandomAccessFile(const string& fname, HANDLE hfile)
: filename_(fname), file_(f) {} : filename_(fname), hfile_(hfile) {}
~WindowsRandomAccessFile() override { ~WindowsRandomAccessFile() override {
if (file_ != NULL) { if (hfile_ != NULL && hfile_ != INVALID_HANDLE_VALUE) {
// Ignoring any potential errors ::CloseHandle(hfile_);
fclose(file_);
} }
} }
@ -59,13 +112,10 @@ class WindowsRandomAccessFile : public RandomAccessFile {
char* scratch) const override { char* scratch) const override {
Status s; Status s;
char* dst = scratch; char* dst = scratch;
int seek_result = fseek(file_, offset, SEEK_SET);
if (seek_result) {
return IOError(filename_, errno);
}
while (n > 0 && s.ok()) { while (n > 0 && s.ok()) {
size_t r = fread(dst, 1, n, file_); SSIZE_T r = pread(hfile_, dst, n, offset);
if (r > 0) { if (r > 0) {
offset += r;
dst += r; dst += r;
n -= r; n -= r;
} else if (r == 0) { } else if (r == 0) {
@ -84,104 +134,246 @@ class WindowsRandomAccessFile : public RandomAccessFile {
class WindowsWritableFile : public WritableFile { class WindowsWritableFile : public WritableFile {
private: private:
string filename_; string filename_;
FILE* file_; HANDLE hfile_;
public: public:
WindowsWritableFile(const string& fname, FILE* f) WindowsWritableFile(const string& fname, HANDLE hFile)
: filename_(fname), file_(f) {} : filename_(fname), hfile_(hFile) {}
~WindowsWritableFile() override { ~WindowsWritableFile() override {
if (file_ != NULL) { if (hfile_ != NULL && hfile_ != INVALID_HANDLE_VALUE) {
// Ignoring any potential errors WindowsWritableFile::Close();
fclose(file_);
} }
} }
Status Append(const StringPiece& data) override { Status Append(const StringPiece& data) override {
size_t r = fwrite(data.data(), 1, data.size(), file_); DWORD bytes_written = 0;
if (r != data.size()) { DWORD data_size = static_cast<DWORD>(data.size());
return IOError(filename_, errno); BOOL write_result = ::WriteFile(hfile_, data.data(), data_size,
&bytes_written, NULL);
if (FALSE == write_result) {
return IOErrorFromWindowsError(
"Failed to WriteFile: " + filename_, ::GetLastError());
} }
assert(size_t(bytes_written) == data.size());
return Status::OK(); return Status::OK();
} }
Status Close() override { Status Close() override {
Status result; assert(INVALID_HANDLE_VALUE != hfile_);
if (fclose(file_) != 0) {
result = IOError(filename_, errno); Status result = Flush();
if (!result.ok()) {
return result;
} }
file_ = NULL;
return result; if (FALSE == ::CloseHandle(hfile_)) {
return IOErrorFromWindowsError(
"CloseHandle failed for: " + filename_, ::GetLastError());
}
hfile_ = INVALID_HANDLE_VALUE;
return Status::OK();
} }
Status Flush() override { Status Flush() override {
if (fflush(file_) != 0) { if (FALSE == ::FlushFileBuffers(hfile_)) {
return IOError(filename_, errno); return IOErrorFromWindowsError(
"FlushFileBuffers failed for: " + filename_, ::GetLastError());
} }
return Status::OK(); return Status::OK();
} }
Status Sync() override { Status Sync() override {
Status s; return Flush();
if (fflush(file_) != 0) {
s = IOError(filename_, errno);
}
return s;
} }
}; };
class WinReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
private:
const std::string filename_;
HANDLE hfile_;
HANDLE hmap_;
const void* const address_;
const uint64 length_;
public:
WinReadOnlyMemoryRegion(const std::string& filename, HANDLE hfile,
HANDLE hmap, const void* address, uint64 length)
: filename_(filename), hfile_(hfile), hmap_(hmap), address_(address),
length_(length) {}
~WinReadOnlyMemoryRegion() {
BOOL ret = ::UnmapViewOfFile(address_);
assert(ret);
ret = ::CloseHandle(hmap_);
assert(ret);
ret = ::CloseHandle(hfile_);
assert(ret);
}
const void* data() override { return address_; }
uint64 length() override { return length_; }
};
} // namespace } // namespace
Status WindowsFileSystem::NewRandomAccessFile( Status WindowsFileSystem::NewRandomAccessFile(
const string& fname, std::unique_ptr<RandomAccessFile>* result) { const string& fname, std::unique_ptr<RandomAccessFile>* result) {
string translated_fname = TranslateName(fname); string translated_fname = TranslateName(fname);
result->reset(); result->reset();
Status s;
FILE* f = fopen(translated_fname.c_str(), "r"); // Open the file for read-only random access
if (f == NULL) { // Random access is to disable read-ahead as the system reads too much data
s = IOError(fname, errno); // Open in async mode which makes Windows allow more parallelism even
} else { // if we need to do sync I/O on top of it.
result->reset(new WindowsRandomAccessFile(translated_fname, f)); DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS |
FILE_FLAG_OVERLAPPED;
// Shared access is necessary for tests to pass
// almost all tests would work with a possible exception of fault_injection.
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_READ,
share_mode, NULL, OPEN_EXISTING, file_flags,
NULL);
if (INVALID_HANDLE_VALUE == hfile) {
string context = "NewRandomAccessFile failed to Create/Open: " + fname;
return IOErrorFromWindowsError(context, ::GetLastError());
} }
return s;
result->reset(new WindowsRandomAccessFile(translated_fname, hfile));
return Status::OK();
} }
Status WindowsFileSystem::NewWritableFile( Status WindowsFileSystem::NewWritableFile(
const string& fname, std::unique_ptr<WritableFile>* result) { const string& fname, std::unique_ptr<WritableFile>* result) {
string translated_fname = TranslateName(fname); string translated_fname = TranslateName(fname);
Status s; result->reset();
FILE* f = fopen(translated_fname.c_str(), "w");
if (f == NULL) { DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
result->reset(); HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_WRITE,
s = IOError(fname, errno); share_mode, NULL, CREATE_ALWAYS,
} else { FILE_ATTRIBUTE_NORMAL, NULL);
result->reset(new WindowsWritableFile(translated_fname, f));
if (INVALID_HANDLE_VALUE == hfile) {
string context = "Failed to create a NewWriteableFile: " + fname;
return IOErrorFromWindowsError(context, ::GetLastError());
} }
return s;
result->reset(new WindowsWritableFile(translated_fname, hfile));
return Status::OK();
} }
Status WindowsFileSystem::NewAppendableFile( Status WindowsFileSystem::NewAppendableFile(
const string& fname, std::unique_ptr<WritableFile>* result) { const string& fname, std::unique_ptr<WritableFile>* result) {
string translated_fname = TranslateName(fname); string translated_fname = TranslateName(fname);
Status s; result->reset();
FILE* f = fopen(translated_fname.c_str(), "a");
if (f == NULL) { DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
result->reset(); HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_WRITE,
s = IOError(fname, errno); share_mode, NULL, OPEN_ALWAYS,
} else { FILE_ATTRIBUTE_NORMAL, NULL);
result->reset(new WindowsWritableFile(translated_fname, f));
if (INVALID_HANDLE_VALUE == hfile) {
string context = "Failed to create a NewAppendableFile: " + fname;
return IOErrorFromWindowsError(context, ::GetLastError());
} }
return s;
UniqueCloseHandlePtr file_guard(hfile, CloseHandleFunc);
DWORD file_ptr = ::SetFilePointer(hfile, NULL, NULL, FILE_END);
if (INVALID_SET_FILE_POINTER == file_ptr) {
string context = "Failed to create a NewAppendableFile: " + fname;
return IOErrorFromWindowsError(context, ::GetLastError());
}
result->reset(new WindowsWritableFile(translated_fname, hfile));
file_guard.release();
return Status::OK();
} }
Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile(
const string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) { const string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) {
return errors::Unimplemented( string translated_fname = TranslateName(fname);
"WindowsFileSystem::NewReadOnlyMemoryRegionFromFile"); result->reset();
Status s = Status::OK();
// Open the file for read-only random access
DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS;
// Open in async mode which makes Windows allow more parallelism even
// if we need to do sync I/O on top of it.
file_flags |= FILE_FLAG_OVERLAPPED;
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_READ,
share_mode, NULL, OPEN_EXISTING, file_flags,
NULL);
if (INVALID_HANDLE_VALUE == hfile) {
return IOErrorFromWindowsError(
"NewReadOnlyMemoryRegionFromFile failed to Create/Open: " + fname,
::GetLastError());
}
UniqueCloseHandlePtr file_guard(hfile, CloseHandleFunc);
// Use mmap when virtual address-space is plentiful.
uint64_t file_size;
s = GetFileSize(translated_fname, &file_size);
if (s.ok()) {
// Will not map empty files
if (file_size == 0) {
return IOError(
"NewReadOnlyMemoryRegionFromFile failed to map empty file: " + fname,
EINVAL);
}
HANDLE hmap = ::CreateFileMappingA(hfile, NULL, PAGE_READONLY,
0, // Whole file at its present length
0,
NULL); // Mapping name
if (!hmap) {
string context = "Failed to create file mapping for "
"NewReadOnlyMemoryRegionFromFile: " + fname;
return IOErrorFromWindowsError(context, ::GetLastError());
}
UniqueCloseHandlePtr map_guard(hmap, CloseHandleFunc);
const void* mapped_region = ::MapViewOfFileEx(
hmap, FILE_MAP_READ,
0, // High DWORD of access start
0, // Low DWORD
file_size,
NULL); // Let the OS choose the mapping
if (!mapped_region) {
string context = "Failed to MapViewOfFile for "
"NewReadOnlyMemoryRegionFromFile: " + fname;
return IOErrorFromWindowsError(context, ::GetLastError());
}
result->reset(new WinReadOnlyMemoryRegion(fname, hfile, hmap,
mapped_region, file_size));
map_guard.release();
file_guard.release();
}
return s;
} }
bool WindowsFileSystem::FileExists(const string& fname) { bool WindowsFileSystem::FileExists(const string& fname) {
return _access(TranslateName(fname).c_str(), 0) == 0; constexpr int kOk = 0;
return _access(TranslateName(fname).c_str(), kOk) == 0;
} }
Status WindowsFileSystem::GetChildren(const string& dir, Status WindowsFileSystem::GetChildren(const string& dir,
@ -189,27 +381,39 @@ Status WindowsFileSystem::GetChildren(const string& dir,
string translated_dir = TranslateName(dir); string translated_dir = TranslateName(dir);
result->clear(); result->clear();
string pattern = translated_dir;
if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') {
pattern += '\\*';
} else {
pattern += '*';
}
WIN32_FIND_DATA find_data; WIN32_FIND_DATA find_data;
HANDLE find_handle = FindFirstFile(translated_dir.c_str(), &find_data); HANDLE find_handle = ::FindFirstFileA(pattern.c_str(), &find_data);
if (find_handle == INVALID_HANDLE_VALUE) { if (find_handle == INVALID_HANDLE_VALUE) {
// TODO(mrry): Convert to a more specific error. string context = "FindFirstFile failed for: " + translated_dir;
return errors::Unknown("Error code: ", GetLastError()); return IOErrorFromWindowsError(context, ::GetLastError());
} }
result->push_back(find_data.cFileName);
while (FindNextFile(find_handle, &find_data)) { do {
result->push_back(find_data.cFileName); const StringPiece basename = find_data.cFileName;
} if (basename != "." && basename != "..") {
if (!FindClose(find_handle)) { result->push_back(find_data.cFileName);
// TODO(mrry): Convert to a more specific error. }
return errors::Unknown("Error closing find handle: ", GetLastError()); } while (::FindNextFileA(find_handle, &find_data));
if (!::FindClose(find_handle)) {
string context = "FindClose failed for: " + translated_dir;
return IOErrorFromWindowsError(context, ::GetLastError());
} }
return Status::OK(); return Status::OK();
} }
Status WindowsFileSystem::DeleteFile(const string& fname) { Status WindowsFileSystem::DeleteFile(const string& fname) {
Status result; Status result;
if (unlink(TranslateName(fname).c_str()) != 0) { if (unlink(TranslateName(fname).c_str()) != 0) {
result = IOError(fname, errno); result = IOError("Failed to delete a file: " + fname, errno);
} }
return result; return result;
} }
@ -217,7 +421,7 @@ Status WindowsFileSystem::DeleteFile(const string& fname) {
Status WindowsFileSystem::CreateDir(const string& name) { Status WindowsFileSystem::CreateDir(const string& name) {
Status result; Status result;
if (_mkdir(TranslateName(name).c_str()) != 0) { if (_mkdir(TranslateName(name).c_str()) != 0) {
result = IOError(name, errno); result = IOError("Failed to create a directory: " + name, errno);
} }
return result; return result;
} }
@ -225,42 +429,52 @@ Status WindowsFileSystem::CreateDir(const string& name) {
Status WindowsFileSystem::DeleteDir(const string& name) { Status WindowsFileSystem::DeleteDir(const string& name) {
Status result; Status result;
if (_rmdir(TranslateName(name).c_str()) != 0) { if (_rmdir(TranslateName(name).c_str()) != 0) {
result = IOError(name, errno); result = IOError("Failed to remove a directory: " + name, errno);
} }
return result; return result;
} }
Status WindowsFileSystem::GetFileSize(const string& fname, uint64* size) { Status WindowsFileSystem::GetFileSize(const string& fname, uint64* size) {
Status s; string translated_fname = TranslateName(fname);
struct _stat sbuf; Status result;
if (_stat(TranslateName(fname).c_str(), &sbuf) != 0) { WIN32_FILE_ATTRIBUTE_DATA attrs;
*size = 0; if (TRUE == ::GetFileAttributesExA(translated_fname.c_str(),
s = IOError(fname, errno); GetFileExInfoStandard, &attrs)) {
} else { ULARGE_INTEGER file_size;
*size = sbuf.st_size; file_size.HighPart = attrs.nFileSizeHigh;
file_size.LowPart = attrs.nFileSizeLow;
*size = file_size.QuadPart;
} }
return s; else {
string context = "Can not get size for: " + fname;
result = IOErrorFromWindowsError(context, ::GetLastError());
}
return result;
} }
Status WindowsFileSystem::RenameFile(const string& src, const string& target) { Status WindowsFileSystem::RenameFile(const string& src, const string& target) {
Status result; Status result;
if (rename(TranslateName(src).c_str(), TranslateName(target).c_str()) != 0) { // rename() is not capable of replacing the existing file as on Linux
result = IOError(src, errno); // so use OS API directly
if (!::MoveFileExA(TranslateName(src).c_str(), TranslateName(target).c_str(),
MOVEFILE_REPLACE_EXISTING)) {
string context(strings::StrCat("Failed to rename: ", src, " to: ", target));
result = IOErrorFromWindowsError(context, ::GetLastError());
} }
return result; return result;
} }
Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) { Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) {
Status s; Status result;
struct _stat sbuf; struct _stat sbuf;
if (_stat(TranslateName(fname).c_str(), &sbuf) != 0) { if (_stat(TranslateName(fname).c_str(), &sbuf) != 0) {
s = IOError(fname, errno); result = IOError(fname, errno);
} else { } else {
stat->mtime_nsec = sbuf.st_mtime * 1e9; stat->mtime_nsec = sbuf.st_mtime * 1e9;
stat->length = sbuf.st_size; stat->length = sbuf.st_size;
stat->is_directory = PathIsDirectory(TranslateName(fname).c_str()); stat->is_directory = PathIsDirectory(TranslateName(fname).c_str());
} }
return s; return result;
} }
} // namespace tensorflow } // namespace tensorflow

View File

@ -64,7 +64,14 @@ class WindowsFileSystem : public FileSystem {
} }
}; };
Status IOError(const string& context, int err_number); class LocalWinFileSystem : public WindowsFileSystem {
public:
string TranslateName(const string& name) const override {
StringPiece scheme, host, path;
ParseURI(name, &scheme, &host, &path);
return path.ToString();
}
};
} // namespace tensorflow } // namespace tensorflow

View File

@ -20,7 +20,7 @@ limitations under the License.
#define TF_MAJOR_VERSION 0 #define TF_MAJOR_VERSION 0
#define TF_MINOR_VERSION 11 #define TF_MINOR_VERSION 11
#define TF_PATCH_VERSION 0rc0 #define TF_PATCH_VERSION 0rc1
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1") // "-beta", "-rc", "-rc.1")

View File

@ -21,7 +21,7 @@ Some examples use the `pandas` library for data processing (`sudo pip install pa
* [Deep Neural Network with Customized Decay Function](iris_custom_decay_dnn.py) * [Deep Neural Network with Customized Decay Function](iris_custom_decay_dnn.py)
## Specialized Models ## Specialized Models
* [Building a Random Forest Model](random_forest.py) * [Building a Random Forest Model](random_forest_mnist.py)
* [Building a Wide & Deep Model](wide_n_deep_tutorial.py) * [Building a Wide & Deep Model](wide_n_deep_tutorial.py)
* [Building a Residual Network Model](resnet.py) * [Building a Residual Network Model](resnet.py)

View File

@ -84,7 +84,6 @@ py_test(
args = [ args = [
"--fake_data", "--fake_data",
"--max_steps=10", "--max_steps=10",
"--train_dir=/tmp/mnist",
], ],
main = "fully_connected_feed.py", main = "fully_connected_feed.py",
srcs_version = "PY2AND3", srcs_version = "PY2AND3",

View File

@ -117,7 +117,7 @@ def run_training():
"""Train MNIST for a number of steps.""" """Train MNIST for a number of steps."""
# Get the sets of images and labels for training, validation, and # Get the sets of images and labels for training, validation, and
# test on MNIST. # test on MNIST.
data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
# Tell TensorFlow that the model will be built into the default Graph. # Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default(): with tf.Graph().as_default():
@ -146,13 +146,13 @@ def run_training():
init = tf.initialize_all_variables() init = tf.initialize_all_variables()
# Create a saver for writing training checkpoints. # Create a saver for writing training checkpoints.
saver = tf.train.Saver() saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
# Create a session for running Ops on the Graph. # Create a session for running Ops on the Graph.
sess = tf.Session() sess = tf.Session()
# Instantiate a SummaryWriter to output summaries and the Graph. # Instantiate a SummaryWriter to output summaries and the Graph.
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph)
# And then after everything is built: # And then after everything is built:
@ -190,7 +190,7 @@ def run_training():
# Save a checkpoint and evaluate the model periodically. # Save a checkpoint and evaluate the model periodically.
if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
saver.save(sess, checkpoint_file, global_step=step) saver.save(sess, checkpoint_file, global_step=step)
# Evaluate against the training set. # Evaluate against the training set.
print('Training Data Eval:') print('Training Data Eval:')
@ -216,6 +216,9 @@ def run_training():
def main(_): def main(_):
if tf.gfile.Exists(FLAGS.log_dir):
tf.gfile.DeleteRecursively(FLAGS.log_dir)
tf.gfile.MakeDirs(FLAGS.log_dir)
run_training() run_training()
@ -252,10 +255,16 @@ if __name__ == '__main__':
help='Batch size. Must divide evenly into the dataset sizes.' help='Batch size. Must divide evenly into the dataset sizes.'
) )
parser.add_argument( parser.add_argument(
'--train_dir', '--input_data_dir',
type=str, type=str,
default='data', default='/tmp/tensorflow/mnist/input_data',
help='Directory to put the training data.' help='Directory to put the input data.'
)
parser.add_argument(
'--log_dir',
type=str,
default='/tmp/tensorflow/mnist/logs/fully_connected_feed',
help='Directory to put the log data.'
) )
parser.add_argument( parser.add_argument(
'--fake_data', '--fake_data',

View File

@ -72,7 +72,7 @@ def main(_):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='/tmp/data', parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing data') help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args() FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

View File

@ -137,9 +137,9 @@ def train():
# Merge all the summaries and write them out to /tmp/mnist_logs (by default) # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
merged = tf.summary.merge_all() merged = tf.summary.merge_all()
train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train', train_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/train',
sess.graph) sess.graph)
test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test') test_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/test')
tf.initialize_all_variables().run() tf.initialize_all_variables().run()
# Train the model, and also write summaries. # Train the model, and also write summaries.
@ -180,9 +180,9 @@ def train():
def main(_): def main(_):
if tf.gfile.Exists(FLAGS.summaries_dir): if tf.gfile.Exists(FLAGS.log_dir):
tf.gfile.DeleteRecursively(FLAGS.summaries_dir) tf.gfile.DeleteRecursively(FLAGS.log_dir)
tf.gfile.MakeDirs(FLAGS.summaries_dir) tf.gfile.MakeDirs(FLAGS.log_dir)
train() train()
@ -197,10 +197,9 @@ if __name__ == '__main__':
help='Initial learning rate') help='Initial learning rate')
parser.add_argument('--dropout', type=float, default=0.9, parser.add_argument('--dropout', type=float, default=0.9,
help='Keep probability for training dropout.') help='Keep probability for training dropout.')
parser.add_argument('--data_dir', type=str, default='/tmp/data', parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing data') help='Directory for storing input data')
parser.add_argument('--summaries_dir', type=str, default='/tmp/mnist_logs', parser.add_argument('--log_dir', type=str, default='/tmp/tensorflow/mnist/logs/mnist_with_summaries',
help='Summaries directory') help='Summaries log directory')
FLAGS, unparsed = parser.parse_known_args() FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

View File

@ -11,8 +11,8 @@ the full softmax loss.
At inference time, you can compute full softmax probabilities with the At inference time, you can compute full softmax probabilities with the
expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`. expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.
See our [Candidate Sampling Algorithms Reference] See our
(../../extras/candidate_sampling.pdf) [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf)
Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.

View File

@ -17,7 +17,7 @@ for k in 0..in_channels-1
filter[di, dj, k, q] filter[di, dj, k, q]
Must have `strides[0] = strides[3] = 1`. For the most common case of the same Must have `strides[0] = strides[3] = 1`. For the most common case of the same
horizontal and vertices strides, `strides = [1, stride, stride, 1]`. horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
##### Args: ##### Args:

View File

@ -42,8 +42,7 @@ with an otherwise unused class.
where a sampled class equals one of the target classes. If set to where a sampled class equals one of the target classes. If set to
`True`, this is a "Sampled Logistic" loss instead of NCE, and we are `True`, this is a "Sampled Logistic" loss instead of NCE, and we are
learning to generate log-odds instead of log probabilities. See learning to generate log-odds instead of log probabilities. See
our [Candidate Sampling Algorithms Reference] our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf).
(../../extras/candidate_sampling.pdf).
Default is False. Default is False.
* <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant * <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant
if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.

View File

@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within
sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
output = input / (bias + alpha * sqr_sum) ** beta output = input / (bias + alpha * sqr_sum) ** beta
For details, see [Krizhevsky et al., ImageNet classification with deep For details, see
convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). [Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
##### Args: ##### Args:

View File

@ -22,7 +22,7 @@ In detail, with the default NHWC format,
filter[di, dj, q, k] filter[di, dj, q, k]
Must have `strides[0] = strides[3] = 1`. For the most common case of the same Must have `strides[0] = strides[3] = 1`. For the most common case of the same
horizontal and vertices strides, `strides = [1, stride, stride, 1]`. horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
##### Args: ##### Args:

View File

@ -63,37 +63,37 @@ Then, select the correct binary to install:
```bash ```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7: # Mac OS X, CPU only, Python 2.7:
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7: # Mac OS X, GPU enabled, Python 2.7:
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5: # Mac OS X, CPU only, Python 3.4 or 3.5:
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5: # Mac OS X, GPU enabled, Python 3.4 or 3.5:
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl $ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
``` ```
Install TensorFlow: Install TensorFlow:
@ -159,37 +159,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First
```bash ```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7: # Mac OS X, CPU only, Python 2.7:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7: # Mac OS X, GPU enabled, Python 2.7:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5: # Mac OS X, CPU only, Python 3.4 or 3.5:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5: # Mac OS X, GPU enabled, Python 3.4 or 3.5:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
``` ```
Finally install TensorFlow: Finally install TensorFlow:
@ -298,37 +298,37 @@ select the correct binary to install:
```bash ```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7: # Mac OS X, CPU only, Python 2.7:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7: # Mac OS X, GPU enabled, Python 2.7:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below. # Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5: # Mac OS X, CPU only, Python 3.4 or 3.5:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5: # Mac OS X, GPU enabled, Python 3.4 or 3.5:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl (tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
``` ```
Finally install TensorFlow: Finally install TensorFlow:
@ -396,13 +396,13 @@ code.
code. code.
We also have tags with `latest` replaced by a released version (e.g., We also have tags with `latest` replaced by a released version (e.g.,
`0.11.0-gpu`). `0.11.0rc1-gpu`).
With Docker the installation is as follows: With Docker the installation is as follows:
* Install Docker on your machine. * Install Docker on your machine.
* Create a [Docker * Create a [Docker
group](http://docs.docker.com/engine/installation/ubuntulinux/#create-a-docker-group) group](https://docs.docker.com/engine/installation/linux/ubuntulinux/#/create-a-docker-group)
to allow launching containers without `sudo`. to allow launching containers without `sudo`.
* Launch a Docker container with the TensorFlow image. The image * Launch a Docker container with the TensorFlow image. The image
gets downloaded automatically on first launch. gets downloaded automatically on first launch.
@ -780,7 +780,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack
$ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg $ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
# The name of the .whl file will depend on your platform. # The name of the .whl file will depend on your platform.
$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc0-py2-none-any.whl $ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc1-py2-none-any.whl
``` ```
## Setting up TensorFlow for Development ## Setting up TensorFlow for Development

View File

@ -222,12 +222,12 @@ To define a feature column for a categorical feature, we can create a
feature values of a column and there are only a few of them, you can use feature values of a column and there are only a few of them, you can use
`sparse_column_with_keys`. Each key in the list will get assigned an `sparse_column_with_keys`. Each key in the list will get assigned an
auto-incremental ID starting from 0. For example, for the `gender` column we can auto-incremental ID starting from 0. For example, for the `gender` column we can
assign the feature string "female" to an integer ID of 0 and "male" to 1 by assign the feature string "Female" to an integer ID of 0 and "Male" to 1 by
doing: doing:
```python ```python
gender = tf.contrib.layers.sparse_column_with_keys( gender = tf.contrib.layers.sparse_column_with_keys(
column_name="gender", keys=["female", "male"]) column_name="gender", keys=["Female", "Male"])
``` ```
What if we don't know the set of possible values in advance? Not a problem. We What if we don't know the set of possible values in advance? Not a problem. We

View File

@ -16,7 +16,8 @@ large-scale regression and classification problems with sparse input features
you're interested in learning more about how Wide & Deep Learning works, please you're interested in learning more about how Wide & Deep Learning works, please
check out our [research paper](http://arxiv.org/abs/1606.07792). check out our [research paper](http://arxiv.org/abs/1606.07792).
![Wide & Deep Spectrum of Models](../../images/wide_n_deep.svg "Wide & Deep") ![Wide & Deep Spectrum of Models]
(../../images/wide_n_deep.svg "Wide & Deep")
The figure above shows a comparison of a wide model (logistic regression with The figure above shows a comparison of a wide model (logistic regression with
sparse features and transformations), a deep model (feed-forward neural network sparse features and transformations), a deep model (feed-forward neural network
@ -85,7 +86,9 @@ part and the deep part of the model.
import tensorflow as tf import tensorflow as tf
# Categorical base columns. # Categorical base columns.
gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender", keys=["female", "male"]) gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender", keys=["Female", "Male"])
race = tf.contrib.layers.sparse_column_with_keys(column_name="race", keys=[
"Amer-Indian-Eskimo", "Asian-Pac-Islander", "Black", "Other", "White"])
education = tf.contrib.layers.sparse_column_with_hash_bucket("education", hash_bucket_size=1000) education = tf.contrib.layers.sparse_column_with_hash_bucket("education", hash_bucket_size=1000)
relationship = tf.contrib.layers.sparse_column_with_hash_bucket("relationship", hash_bucket_size=100) relationship = tf.contrib.layers.sparse_column_with_hash_bucket("relationship", hash_bucket_size=100)
workclass = tf.contrib.layers.sparse_column_with_hash_bucket("workclass", hash_bucket_size=100) workclass = tf.contrib.layers.sparse_column_with_hash_bucket("workclass", hash_bucket_size=100)

View File

@ -391,4 +391,5 @@ def maybe_download_and_extract():
print() print()
statinfo = os.stat(filepath) statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(dest_directory)
tarfile.open(filepath, 'r:gz').extractall(dest_directory)

View File

@ -339,7 +339,7 @@ def main(_):
tf.scalar_summary("Validation Loss", mvalid.cost) tf.scalar_summary("Validation Loss", mvalid.cost)
with tf.name_scope("Test"): with tf.name_scope("Test"):
test_input = PTBInput(config=config, data=test_data, name="TestInput") test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
with tf.variable_scope("Model", reuse=True, initializer=initializer): with tf.variable_scope("Model", reuse=True, initializer=initializer):
mtest = PTBModel(is_training=False, config=eval_config, mtest = PTBModel(is_training=False, config=eval_config,
input_=test_input) input_=test_input)
@ -347,7 +347,7 @@ def main(_):
sv = tf.train.Supervisor(logdir=FLAGS.save_path) sv = tf.train.Supervisor(logdir=FLAGS.save_path)
with sv.managed_session() as session: with sv.managed_session() as session:
for i in range(config.max_max_epoch): for i in range(config.max_max_epoch):
lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
m.assign_lr(session, config.learning_rate * lr_decay) m.assign_lr(session, config.learning_rate * lr_decay)
print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))

View File

@ -213,7 +213,7 @@ tf_py_test(
additional_deps = ["//tensorflow:tensorflow_py"], additional_deps = ["//tensorflow:tensorflow_py"],
) )
tf_py_test( cuda_py_test(
name = "matrix_triangular_solve_op_test", name = "matrix_triangular_solve_op_test",
size = "small", size = "small",
srcs = ["matrix_triangular_solve_op_test.py"], srcs = ["matrix_triangular_solve_op_test.py"],

View File

@ -21,6 +21,7 @@ from __future__ import print_function
import numpy as np import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf import tensorflow as tf
from tensorflow.python.client import device_lib
class Conv2DTransposeTest(tf.test.TestCase): class Conv2DTransposeTest(tf.test.TestCase):
@ -157,6 +158,119 @@ class Conv2DTransposeTest(tf.test.TestCase):
err_tolerance = 0.0005 err_tolerance = 0.0005
self.assertLess(err, err_tolerance) self.assertLess(err, err_tolerance)
def testConv2DTransposeSingleStrideNCHW(self):
# `NCHW` data fomat is only supported for `GPU` device.
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True):
strides = [1, 1, 1, 1]
# Input, output: [batch, depth, height, width, depth]
x_shape = [2, 3, 6, 4]
y_shape = [2, 2, 6, 4]
# Filter: [kernel_height, kernel_width, output_depth, input_depth]
f_shape = [3, 3, 2, 3]
x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
padding="SAME", data_format='NCHW')
value = output.eval()
for n in xrange(x_shape[0]):
for k in xrange(f_shape[2]):
for w in xrange(y_shape[3]):
for h in xrange(y_shape[2]):
target = 4 * 3.0
h_in = h > 0 and h < y_shape[2] - 1
w_in = w > 0 and w < y_shape[3] - 1
if h_in and w_in:
target += 5 * 3.0
elif h_in or w_in:
target += 2 * 3.0
self.assertAllClose(target, value[n, k, h, w])
def testConv2DTransposeSameNCHW(self):
# `NCHW` data fomat is only supported for `GPU` device.
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True):
strides = [1, 1, 2, 2]
# Input, output: [batch, depth, height, width]
x_shape = [2, 3, 6, 4]
y_shape = [2, 2, 12, 8]
# Filter: [kernel_height, kernel_width, output_depth, input_depth]
f_shape = [3, 3, 2, 3]
x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
padding="SAME", data_format='NCHW')
value = output.eval()
for n in xrange(x_shape[0]):
for k in xrange(f_shape[2]):
for w in xrange(y_shape[3]):
for h in xrange(y_shape[2]):
target = 3.0
# We add a case for locations divisible by the stride.
h_in = h % strides[2] == 0 and h > 0 and h < y_shape[2] - 1
w_in = w % strides[3] == 0 and w > 0 and w < y_shape[3] - 1
if h_in and w_in:
target += 9.0
elif h_in or w_in:
target += 3.0
self.assertAllClose(target, value[n, k, h, w])
def testConv2DTransposeValidNCHW(self):
# `NCHW` data fomat is only supported for `GPU` device.
if tf.test.is_gpu_available():
with self.test_session(use_gpu=True):
strides = [1, 1, 2, 2]
# Input, output: [batch, depth, height, width]
x_shape = [2, 3, 6, 4]
y_shape = [2, 2, 13, 9]
# Filter: [kernel_height, kernel_width, output_depth, input_depth]
f_shape = [3, 3, 2, 3]
x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
padding="VALID", data_format='NCHW')
value = output.eval()
cache_values = np.zeros(y_shape, dtype=np.float32)
# The amount of padding added
pad = 1
for n in xrange(x_shape[0]):
for k in xrange(f_shape[2]):
for w in xrange(pad, y_shape[3] - pad):
for h in xrange(pad, y_shape[2] - pad):
target = 3.0
# We add a case for locations divisible by the stride.
h_in = h % strides[
2] == 0 and h > pad and h < y_shape[2] - 1 - pad
w_in = w % strides[
3] == 0 and w > pad and w < y_shape[3] - 1 - pad
if h_in and w_in:
target += 9.0
elif h_in or w_in:
target += 3.0
cache_values[n, k, h, w] = target
# copy values in the border
cache_values[n, k, :, 0] = cache_values[n, k, :, 1]
cache_values[n, k, :, -1] = cache_values[n, k, :, -2]
cache_values[n, k, 0, :] = cache_values[n, k, 1, :]
cache_values[n, k, -1, :] = cache_values[n, k, -2, :]
self.assertAllClose(cache_values, value)
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()

View File

@ -1356,6 +1356,18 @@ class SelectOpTest(tf.test.TestCase):
elif x.dtype == np.float64: elif x.dtype == np.float64:
self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5) self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
def testScalar(self):
c = True
x = np.random.rand(1, 3, 2) * 100
y = np.random.rand(1, 3, 2) * 100
for t in [np.float16, np.float32, np.float64, np.int32, np.int64,
np.complex64, np.complex128]:
xt = x.astype(t)
yt = y.astype(t)
self._compare(c, xt, yt, use_gpu=False)
if t in [np.float16, np.float32, np.float64]:
self._compare(c, xt, yt, use_gpu=True)
def testBasic(self): def testBasic(self):
c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
x = np.random.rand(1, 3, 2) * 100 x = np.random.rand(1, 3, 2) * 100

View File

@ -24,15 +24,17 @@ import tensorflow as tf
class MatrixTriangularSolveOpTest(tf.test.TestCase): class MatrixTriangularSolveOpTest(tf.test.TestCase):
def _verifySolveAllWays(self, x, y, batch_dims=None): def _verifySolveAllWays(self, x, y, batch_dims=None):
for lower in True, False: for use_gpu in True, False:
for adjoint in True, False: for lower in True, False:
self._verifySolve(x, for adjoint in True, False:
y, self._verifySolve(x,
lower=lower, y,
adjoint=adjoint, lower=lower,
batch_dims=batch_dims) adjoint=adjoint,
batch_dims=batch_dims,
use_gpu=use_gpu)
def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None): def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None, use_gpu=False):
for np_type in [np.float32, np.float64]: for np_type in [np.float32, np.float64]:
a = x.astype(np_type) a = x.astype(np_type)
b = y.astype(np_type) b = y.astype(np_type)
@ -52,7 +54,7 @@ class MatrixTriangularSolveOpTest(tf.test.TestCase):
a_np = np.tile(a_np, batch_dims + [1, 1]) a_np = np.tile(a_np, batch_dims + [1, 1])
b = np.tile(b, batch_dims + [1, 1]) b = np.tile(b, batch_dims + [1, 1])
with self.test_session(): with self.test_session(use_gpu=use_gpu):
tf_ans = tf.matrix_triangular_solve(a, b, lower=lower, adjoint=adjoint) tf_ans = tf.matrix_triangular_solve(a, b, lower=lower, adjoint=adjoint)
out = tf_ans.eval() out = tf_ans.eval()
np_ans = np.linalg.solve(a_np, b) np_ans = np.linalg.solve(a_np, b)

View File

@ -264,6 +264,42 @@ class EluTest(tf.test.TestCase):
print("elu (float64) gradient err = ", err) print("elu (float64) gradient err = ", err)
self.assertLess(err, 1e-6) self.assertLess(err, 1e-6)
def testGradGradFloat32(self):
with self.test_session():
x = tf.constant(
[-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
shape=[2, 5], name="x")
y = tf.nn.elu(x, name="elu")
z = tf.gradients(y, x)
x_init = np.asarray(
[[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
dtype=np.float32, order="F")
err = tf.test.compute_gradient_error(x,
[2, 5],
z[0],
[2, 5],
x_init_value=x_init)
print("elu (float32) gradient of gradient err = ", err)
self.assertLess(err, 1e-4)
def testGradGradFloat64(self):
with self.test_session():
x = tf.constant(
[-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
shape=[2, 5], dtype=tf.float64, name="x")
y = tf.nn.elu(x, name="elu")
z = tf.gradients(y, x)
x_init = np.asarray(
[[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
dtype=np.float64, order="F")
err = tf.test.compute_gradient_error(x,
[2, 5],
z[0],
[2, 5],
x_init_value=x_init)
print("elu (float64) gradient of gradient err = ", err)
self.assertLess(err, 1e-6)
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()

View File

@ -1795,7 +1795,7 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
performed performed
instead: instead:
```prettyprint ```prettyprint
tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b] tf.cumprod([a, b, c], exclusive=True) ==> [1, a, a * b]
``` ```
By setting the `reverse` kwarg to `True`, the cumprod is performed in the By setting the `reverse` kwarg to `True`, the cumprod is performed in the
@ -1807,7 +1807,7 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
The `reverse` and `exclusive` kwargs can also be combined: The `reverse` and `exclusive` kwargs can also be combined:
```prettyprint ```prettyprint
tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0] tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 1]
``` ```
Args: Args:

View File

@ -25,7 +25,7 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import sparse_ops
from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gen_nn_ops
from tensorflow.python.ops import gen_math_ops
@ops.RegisterGradient("Conv2DBackpropInput") @ops.RegisterGradient("Conv2DBackpropInput")
def _Conv2DBackpropInputGrad(op, grad): def _Conv2DBackpropInputGrad(op, grad):
@ -268,6 +268,14 @@ def _ReluGrad(op, grad):
return gen_nn_ops._relu_grad(grad, op.outputs[0]) return gen_nn_ops._relu_grad(grad, op.outputs[0])
@ops.RegisterGradient("EluGrad")
def _EluGradGrad(op, grad):
x = op.inputs[1]
return (gen_nn_ops._elu_grad(grad, op.outputs[0]),
gen_math_ops.select(x < 0., gen_nn_ops._elu_grad(grad, op.outputs[0] + 1),
array_ops.zeros(shape = array_ops.shape(x), dtype = x.dtype)))
@ops.RegisterGradient("Relu6") @ops.RegisterGradient("Relu6")
def _Relu6Grad(op, grad): def _Relu6Grad(op, grad):
return gen_nn_ops._relu6_grad(grad, op.inputs[0]) return gen_nn_ops._relu6_grad(grad, op.inputs[0])

View File

@ -1010,6 +1010,7 @@ def conv2d_transpose(value,
output_shape, output_shape,
strides, strides,
padding="SAME", padding="SAME",
data_format="NHWC",
name=None): name=None):
"""The transpose of `conv2d`. """The transpose of `conv2d`.
@ -1020,7 +1021,8 @@ def conv2d_transpose(value,
Args: Args:
value: A 4-D `Tensor` of type `float` and shape value: A 4-D `Tensor` of type `float` and shape
`[batch, height, width, in_channels]`. `[batch, height, width, in_channels]` for `NHWC` data format or
`[batch, in_channels, height, width]` for `NCHW` data format.
filter: A 4-D `Tensor` with the same type as `value` and shape filter: A 4-D `Tensor` with the same type as `value` and shape
`[height, width, output_channels, in_channels]`. `filter`'s `[height, width, output_channels, in_channels]`. `filter`'s
`in_channels` dimension must match that of `value`. `in_channels` dimension must match that of `value`.
@ -1030,6 +1032,7 @@ def conv2d_transpose(value,
dimension of the input tensor. dimension of the input tensor.
padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
See the [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution) See the [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution)
data_format: A string. 'NHWC' and 'NCHW' are supported.
name: Optional name for the returned tensor. name: Optional name for the returned tensor.
Returns: Returns:
@ -1041,9 +1044,12 @@ def conv2d_transpose(value,
""" """
with ops.name_scope(name, "conv2d_transpose", with ops.name_scope(name, "conv2d_transpose",
[value, filter, output_shape]) as name: [value, filter, output_shape]) as name:
if data_format not in ("NCHW", "NHWC"):
raise ValueError("data_format has to be either NCHW or NHWC.")
value = ops.convert_to_tensor(value, name="value") value = ops.convert_to_tensor(value, name="value")
filter = ops.convert_to_tensor(filter, name="filter") filter = ops.convert_to_tensor(filter, name="filter")
if not value.get_shape()[3].is_compatible_with(filter.get_shape()[3]): axis = 3 if data_format=="NHWC" else 1
if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[3]):
raise ValueError("input channels does not match filter's input channels, " raise ValueError("input channels does not match filter's input channels, "
"{} != {}".format(value.get_shape()[3], filter.get_shape( "{} != {}".format(value.get_shape()[3], filter.get_shape(
)[3])) )[3]))
@ -1055,10 +1061,10 @@ def conv2d_transpose(value,
if isinstance(output_shape, (list, np.ndarray)): if isinstance(output_shape, (list, np.ndarray)):
# output_shape's shape should be == [4] if reached this point. # output_shape's shape should be == [4] if reached this point.
if not filter.get_shape()[2].is_compatible_with(output_shape[3]): if not filter.get_shape()[2].is_compatible_with(output_shape[axis]):
raise ValueError( raise ValueError(
"output_shape does not match filter's output channels, " "output_shape does not match filter's output channels, "
"{} != {}".format(output_shape[3], filter.get_shape()[2])) "{} != {}".format(output_shape[axis], filter.get_shape()[2]))
if padding != "VALID" and padding != "SAME": if padding != "VALID" and padding != "SAME":
raise ValueError("padding must be either VALID or SAME:" raise ValueError("padding must be either VALID or SAME:"
@ -1069,6 +1075,7 @@ def conv2d_transpose(value,
out_backprop=value, out_backprop=value,
strides=strides, strides=strides,
padding=padding, padding=padding,
data_format=data_format,
name=name) name=name)

View File

@ -68,7 +68,7 @@ def exponential_decay(learning_rate, global_step, decay_steps, decay_rate,
Must be positive. See the decay computation above. Must be positive. See the decay computation above.
decay_rate: A scalar `float32` or `float64` `Tensor` or a decay_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The decay rate. Python number. The decay rate.
staircase: Boolean. It `True` decay the learning rate at discrete intervals staircase: Boolean. If `True` decay the learning rate at discrete intervals
name: String. Optional name of the operation. Defaults to name: String. Optional name of the operation. Defaults to
'ExponentialDecay'. 'ExponentialDecay'.

View File

@ -15,7 +15,10 @@ limitations under the License.
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h" #include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
#if !defined(PLATFORM_WINDOWS)
#include <dirent.h> #include <dirent.h>
#endif
#include <limits.h> #include <limits.h>
#include <stddef.h> #include <stddef.h>
#include <stdio.h> #include <stdio.h>
@ -25,11 +28,13 @@ limitations under the License.
#include <IOKit/kext/KextManager.h> #include <IOKit/kext/KextManager.h>
#include <mach-o/dyld.h> #include <mach-o/dyld.h>
#else #else
#if !defined(PLATFORM_WINDOWS)
#include <link.h> #include <link.h>
#include <sys/stat.h>
#include <sys/sysmacros.h> #include <sys/sysmacros.h>
#endif
#include <unistd.h> #include <unistd.h>
#endif
#include <sys/stat.h>
#endif
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <vector> #include <vector>
@ -135,7 +140,7 @@ void Diagnostician::LogDiagnosticInformation() {
<< "(" << port::Hostname() << ")"; << "(" << port::Hostname() << ")";
} }
CFRelease(kext_infos); CFRelease(kext_infos);
#else #elif !defined(PLATFORM_WINDOWS)
if (access(kDriverVersionPath, F_OK) != 0) { if (access(kDriverVersionPath, F_OK) != 0) {
LOG(INFO) << "kernel driver does not appear to be running on this host " LOG(INFO) << "kernel driver does not appear to be running on this host "
<< "(" << port::Hostname() << "): " << "(" << port::Hostname() << "): "
@ -158,7 +163,7 @@ void Diagnostician::LogDiagnosticInformation() {
/* static */ void Diagnostician::LogDriverVersionInformation() { /* static */ void Diagnostician::LogDriverVersionInformation() {
LOG(INFO) << "hostname: " << port::Hostname(); LOG(INFO) << "hostname: " << port::Hostname();
#ifndef PLATFORM_WINDOWS
if (VLOG_IS_ON(1)) { if (VLOG_IS_ON(1)) {
const char *value = getenv("LD_LIBRARY_PATH"); const char *value = getenv("LD_LIBRARY_PATH");
string library_path = value == nullptr ? "" : value; string library_path = value == nullptr ? "" : value;
@ -180,17 +185,17 @@ void Diagnostician::LogDiagnosticInformation() {
closedir(dir); closedir(dir);
} }
} }
port::StatusOr<DriverVersion> dso_version = FindDsoVersion(); port::StatusOr<DriverVersion> dso_version = FindDsoVersion();
LOG(INFO) << "libcuda reported version is: " LOG(INFO) << "libcuda reported version is: "
<< DriverVersionStatusToString(dso_version); << DriverVersionStatusToString(dso_version);
port::StatusOr<DriverVersion> kernel_version = FindKernelDriverVersion(); port::StatusOr<DriverVersion> kernel_version = FindKernelDriverVersion();
LOG(INFO) << "kernel reported version is: " LOG(INFO) << "kernel reported version is: "
<< DriverVersionStatusToString(kernel_version); << DriverVersionStatusToString(kernel_version);
#endif
// OS X kernel driver does not report version accurately // OS X kernel driver does not report version accurately
#if !defined(__APPLE__) #if !defined(__APPLE__) && !defined(PLATFORM_WINDOWS)
if (kernel_version.ok() && dso_version.ok()) { if (kernel_version.ok() && dso_version.ok()) {
WarnOnDsoKernelMismatch(dso_version, kernel_version); WarnOnDsoKernelMismatch(dso_version, kernel_version);
} }
@ -227,6 +232,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
result = StringToDriverVersion(version); result = StringToDriverVersion(version);
} }
#else #else
#if !defined(PLATFORM_WINDOWS)
// Callback used when iterating through DSOs. Looks for the driver-interfacing // Callback used when iterating through DSOs. Looks for the driver-interfacing
// DSO and yields its version number into the callback data, when found. // DSO and yields its version number into the callback data, when found.
auto iterate_phdr = auto iterate_phdr =
@ -258,6 +264,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
}; };
dl_iterate_phdr(iterate_phdr, &result); dl_iterate_phdr(iterate_phdr, &result);
#endif
#endif #endif
return result; return result;

View File

@ -3200,6 +3200,7 @@ bool CudnnSupport::DoNormalize(
Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor, Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor,
const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) { const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) {
LOG(FATAL) << "not yet implemented"; // TODO(leary) LOG(FATAL) << "not yet implemented"; // TODO(leary)
return false;
} }
bool CudnnSupport::DoNormalizeWithDimensions( bool CudnnSupport::DoNormalizeWithDimensions(

View File

@ -19,8 +19,8 @@ limitations under the License.
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <set> #include <set>
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h" #include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
#include "tensorflow/stream_executor/dso_loader.h" #include "tensorflow/stream_executor/dso_loader.h"
#include "tensorflow/stream_executor/lib/casts.h" #include "tensorflow/stream_executor/lib/casts.h"
@ -38,6 +38,14 @@ limitations under the License.
#include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/lib/inlined_vector.h" #include "tensorflow/stream_executor/lib/inlined_vector.h"
#if defined(PLATFORM_WINDOWS)
// TODO: in windows ARRAYSIZE is defined in winnt.h but including it
// here creates a conflict with cuda.h - for now define it here.
#define ARRAYSIZE(a) \
((sizeof(a) / sizeof(*(a))) / \
static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
#endif
bool FLAGS_gpuexec_cuda_driver_inject_init_error = false; bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false; bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
bool FLAGS_gpuexec_cuda_device_0_only = false; bool FLAGS_gpuexec_cuda_device_0_only = false;

View File

@ -18,8 +18,12 @@ limitations under the License.
#if defined(__APPLE__) #if defined(__APPLE__)
#include <mach-o/dyld.h> #include <mach-o/dyld.h>
#endif #endif
#if defined(PLATFORM_WINDOWS)
#include <windows.h>
#define PATH_MAX MAX_PATH
#else
#include <unistd.h> #include <unistd.h>
#endif
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h" #include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
#include "tensorflow/stream_executor/cuda/cuda_driver.h" #include "tensorflow/stream_executor/cuda/cuda_driver.h"
#include "tensorflow/stream_executor/cuda/cuda_event.h" #include "tensorflow/stream_executor/cuda/cuda_event.h"
@ -204,7 +208,12 @@ static string GetBinaryDir(bool strip_exe) {
_NSGetExecutablePath(unresolved_path, &buffer_size); _NSGetExecutablePath(unresolved_path, &buffer_size);
CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1); CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
#else #else
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1)); #if defined(PLATFORM_WINDOWS)
HMODULE hModule = GetModuleHandle(NULL);
GetModuleFileName(hModule, exe_path, MAX_PATH);
#else
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
#endif
#endif #endif
// Make sure it's null-terminated: // Make sure it's null-terminated:
exe_path[sizeof(exe_path) - 1] = 0; exe_path[sizeof(exe_path) - 1] = 0;
@ -908,8 +917,10 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
// could use the file::* utilities). // could use the file::* utilities).
FILE *file = fopen(filename.c_str(), "r"); FILE *file = fopen(filename.c_str(), "r");
if (file == nullptr) { if (file == nullptr) {
#if !defined(PLATFORM_WINDOWS)
LOG(ERROR) << "could not open file to read NUMA node: " << filename LOG(ERROR) << "could not open file to read NUMA node: " << filename
<< "\nYour kernel may have been built without NUMA support."; << "\nYour kernel may have been built without NUMA support.";
#endif
return kUnknownNumaNode; return kUnknownNumaNode;
} }

View File

@ -15,8 +15,6 @@ limitations under the License.
#include "tensorflow/stream_executor/cuda/cuda_rng.h" #include "tensorflow/stream_executor/cuda/cuda_rng.h"
#include <dlfcn.h>
#include "tensorflow/stream_executor/cuda/cuda_activation.h" #include "tensorflow/stream_executor/cuda/cuda_activation.h"
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h" #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
#include "tensorflow/stream_executor/cuda/cuda_helpers.h" #include "tensorflow/stream_executor/cuda/cuda_helpers.h"

View File

@ -18,13 +18,17 @@ limitations under the License.
#include "tensorflow/stream_executor/dso_loader.h" #include "tensorflow/stream_executor/dso_loader.h"
#include <dlfcn.h>
#include <limits.h> #include <limits.h>
#if defined(__APPLE__) #if defined(__APPLE__)
#include <mach-o/dyld.h> #include <mach-o/dyld.h>
#endif #endif
#include <stdlib.h> #include <stdlib.h>
#if defined(PLATFORM_WINDOWS)
#include <windows.h>
#define PATH_MAX MAX_PATH
#else
#include <unistd.h> #include <unistd.h>
#endif
#include <initializer_list> #include <initializer_list>
#include <vector> #include <vector>
@ -45,7 +49,7 @@ string GetCudaVersion() { return TF_CUDA_VERSION; }
string GetCudnnVersion() { return TF_CUDNN_VERSION; } string GetCudnnVersion() { return TF_CUDNN_VERSION; }
/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) { /* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName( return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cublas", GetCudaVersion()), "cublas", GetCudaVersion()),
GetCudaLibraryDirPath()), GetCudaLibraryDirPath()),
dso_handle); dso_handle);
@ -55,35 +59,42 @@ string GetCudnnVersion() { return TF_CUDNN_VERSION; }
// libcudnn is versioned differently than the other libraries and may have a // libcudnn is versioned differently than the other libraries and may have a
// different version number than other CUDA libraries. See b/22397368 for // different version number than other CUDA libraries. See b/22397368 for
// some details about the complications surrounding this. // some details about the complications surrounding this.
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName( return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cudnn", GetCudnnVersion()), "cudnn", GetCudnnVersion()),
GetCudaLibraryDirPath()), GetCudaLibraryDirPath()),
dso_handle); dso_handle);
} }
/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) { /* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName( return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cufft", GetCudaVersion()), "cufft", GetCudaVersion()),
GetCudaLibraryDirPath()), GetCudaLibraryDirPath()),
dso_handle); dso_handle);
} }
/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) { /* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName( return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"curand", GetCudaVersion()), "curand", GetCudaVersion()),
GetCudaLibraryDirPath()), GetCudaLibraryDirPath()),
dso_handle); dso_handle);
} }
/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) { /* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
#if defined(PLATFORM_WINDOWS)
return GetDsoHandle( return GetDsoHandle(
FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"), FindDsoPath(port::Env::Default()->FormatLibraryFileName("nvcuda", ""),
GetCudaDriverLibraryPath()), GetCudaDriverLibraryPath()),
dso_handle); dso_handle);
#else
return GetDsoHandle(
FindDsoPath(port::Env::Default()->FormatLibraryFileName("cuda", "1"),
GetCudaDriverLibraryPath()),
dso_handle);
#endif
} }
/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) { /* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName( return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cupti", GetCudaVersion()), "cupti", GetCudaVersion()),
GetCudaCuptiLibraryPath()), GetCudaCuptiLibraryPath()),
dso_handle); dso_handle);
@ -101,8 +112,6 @@ string GetCudnnVersion() { return TF_CUDNN_VERSION; }
return port::Status(port::error::INVALID_ARGUMENT, return port::Status(port::error::INVALID_ARGUMENT,
"Only LoadKind::kLocal is currently supported"); "Only LoadKind::kLocal is currently supported");
} }
int dynload_flags =
RTLD_LAZY | (load_kind == LoadKind::kLocal ? RTLD_LOCAL : RTLD_GLOBAL);
string path_string = path.ToString(); string path_string = path.ToString();
port::Status s = port::Status s =
port::Env::Default()->LoadLibrary(path_string.c_str(), dso_handle); port::Env::Default()->LoadLibrary(path_string.c_str(), dso_handle);
@ -125,6 +134,9 @@ string GetCudnnVersion() { return TF_CUDNN_VERSION; }
char unresolved_path[buffer_size]; char unresolved_path[buffer_size];
_NSGetExecutablePath(unresolved_path, &buffer_size); _NSGetExecutablePath(unresolved_path, &buffer_size);
CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1); CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
#elif defined(PLATFORM_WINDOWS)
HMODULE hModule = GetModuleHandle(NULL);
GetModuleFileName(hModule, exe_path, MAX_PATH);
#else #else
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1)); CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
#endif #endif
@ -159,6 +171,9 @@ static std::vector<string>* CreatePrimordialRpaths() {
} }
/* static */ bool DsoLoader::TrySymbolicDereference(string* candidate) { /* static */ bool DsoLoader::TrySymbolicDereference(string* candidate) {
#if defined(PLATFORM_WINDOWS)
return false;
#else
char buf[PATH_MAX]; char buf[PATH_MAX];
char* result = realpath(candidate->c_str(), buf); char* result = realpath(candidate->c_str(), buf);
if (result == nullptr) { if (result == nullptr) {
@ -168,6 +183,7 @@ static std::vector<string>* CreatePrimordialRpaths() {
<< result << "\""; << result << "\"";
*candidate = result; *candidate = result;
return true; return true;
#endif
} }
/* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name, /* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name,
@ -206,6 +222,8 @@ static std::vector<string>* CreatePrimordialRpaths() {
/* static */ string DsoLoader::GetCudaDriverLibraryPath() { /* static */ string DsoLoader::GetCudaDriverLibraryPath() {
#if defined(__APPLE__) #if defined(__APPLE__)
return "external/local_config_cuda/cuda/driver/lib"; return "external/local_config_cuda/cuda/driver/lib";
#elif defined(PLATFORM_WINDOWS)
return "";
#else #else
return "external/local_config_cuda/cuda/driver/lib64"; return "external/local_config_cuda/cuda/driver/lib64";
#endif #endif

View File

@ -15,8 +15,13 @@ limitations under the License.
#include "tensorflow/stream_executor/lib/process_state.h" #include "tensorflow/stream_executor/lib/process_state.h"
#if defined(PLATFORM_WINDOWS)
#include <direct.h>
#include <stdlib.h>
#include <WinSock2.h>
#else
#include <unistd.h> #include <unistd.h>
#endif
#include <memory> #include <memory>
namespace perftools { namespace perftools {
@ -27,7 +32,7 @@ string Hostname() {
char hostname[1024]; char hostname[1024];
gethostname(hostname, sizeof hostname); gethostname(hostname, sizeof hostname);
hostname[sizeof hostname - 1] = 0; hostname[sizeof hostname - 1] = 0;
return hostname; return std::string(hostname);
} }
bool GetCurrentDirectory(string* dir) { bool GetCurrentDirectory(string* dir) {

View File

@ -16,6 +16,10 @@ limitations under the License.
#ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_ #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_
#define TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_ #define TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_
#ifdef _MSC_VER
#define __thread __declspec(thread)
#endif
// For POD types in TLS mode, s_obj_VAR is the thread-local variable. // For POD types in TLS mode, s_obj_VAR is the thread-local variable.
#define SE_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \ #define SE_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \
static __thread _Type_ s_obj_##_var_; \ static __thread _Type_ s_obj_##_var_; \

View File

@ -81,7 +81,7 @@ def ParseEventFilesSpec(logdir):
else: else:
run_name = None run_name = None
path = specification path = specification
if not io_wrapper.IsGCSPath(path): if not (io_wrapper.IsGCSPath(path) or path.startswith('hdfs://')):
path = os.path.realpath(path) path = os.path.realpath(path)
files[path] = run_name files[path] = run_name
return files return files

View File

@ -563,7 +563,7 @@ def _py_wrap_cc_impl(ctx):
for dep in ctx.attr.deps: for dep in ctx.attr.deps:
inputs += dep.cc.transitive_headers inputs += dep.cc.transitive_headers
inputs += ctx.files._swiglib inputs += ctx.files._swiglib
swig_include_dirs = set([f.root.path for f in inputs if f.root.path]) swig_include_dirs = set(_get_repository_roots(ctx, inputs))
swig_include_dirs += sorted([f.dirname for f in ctx.files._swiglib]) swig_include_dirs += sorted([f.dirname for f in ctx.files._swiglib])
args = ["-c++", args = ["-c++",
"-python", "-python",
@ -616,6 +616,35 @@ _py_wrap_cc = rule(
implementation = _py_wrap_cc_impl, implementation = _py_wrap_cc_impl,
) )
def _get_repository_roots(ctx, files):
"""Returns abnormal root directories under which files reside.
When running a ctx.action, source files within the main repository are all
relative to the current directory; however, files that are generated or exist
in remote repositories will have their root directory be a subdirectory,
e.g. bazel-out/local-fastbuild/genfiles/external/jpeg_archive. This function
returns the set of these devious directories, ranked and sorted by popularity
in order to hopefully minimize the number of I/O system calls within the
compiler, because includes have quadratic complexity.
"""
result = {}
for f in files:
root = f.root.path
if root:
if root not in result:
result[root] = 0
result[root] -= 1
work = f.owner.workspace_root
if work:
if root:
root += "/"
root += work
if root:
if root not in result:
result[root] = 0
result[root] -= 1
return [k for v, k in sorted([(v, k) for k, v in result.items()])]
# Bazel rule for collecting the header files that a target depends on. # Bazel rule for collecting the header files that a target depends on.
def _transitive_hdrs_impl(ctx): def _transitive_hdrs_impl(ctx):
outputs = set() outputs = set()

View File

@ -47,10 +47,6 @@
# TF_BUILD_BAZEL_CLEAN, if set to any non-empty and non-0 value, directs the # TF_BUILD_BAZEL_CLEAN, if set to any non-empty and non-0 value, directs the
# script to perform bazel clean prior to main build and test steps. # script to perform bazel clean prior to main build and test steps.
# #
# TF_BUILD_SERIAL_INSTALL_TESTS, if set to any non-empty and non-0 value,
# will force the Python install tests to run serially, overriding than the
# concurrent testing behavior.
#
# TF_GPU_COUNT, Set the number of GPUs in the system. We run only this many # TF_GPU_COUNT, Set the number of GPUs in the system. We run only this many
# concurrent tests when running GPU tests. # concurrent tests when running GPU tests.
# #
@ -411,21 +407,21 @@ SKIP_COUNTER=0
FAILED_TESTS="" FAILED_TESTS=""
FAILED_TEST_LOGS="" FAILED_TEST_LOGS=""
N_JOBS=$(grep -c ^processor /proc/cpuinfo) if [[ "${IS_GPU}" == "1" ]]; then
if [[ -z ${N_JOBS} ]]; then
# Try the Mac way of getting number of CPUs
N_JOBS=$(sysctl -n hw.ncpu)
fi
if [[ -z ${N_JOBS} ]]; then
N_JOBS=8
echo "Cannot determine the number of processors"
echo "Using default concurrent job counter ${N_JOBS}"
fi
if [[ ! -z "${TF_BUILD_SERIAL_INSTALL_TESTS}" ]] &&
[[ "${TF_BUILD_SERIAL_INSTALL_TESTS}" != "0" ]]; then
N_JOBS=$TF_GPU_COUNT N_JOBS=$TF_GPU_COUNT
else
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
if [[ -z ${N_JOBS} ]]; then
# Try the Mac way of getting number of CPUs
N_JOBS=$(sysctl -n hw.ncpu)
fi
# If still cannot determine the number of CPUs, pick 8.
if [[ -z ${N_JOBS} ]]; then
N_JOBS=8
echo "Cannot determine the number of processors"
echo "Using default concurrent job counter ${N_JOBS}"
fi
fi fi
echo "Running Python tests-on-install with ${N_JOBS} concurrent jobs..." echo "Running Python tests-on-install with ${N_JOBS} concurrent jobs..."
@ -485,9 +481,14 @@ while true; do
TEST_LOGS="${TEST_LOGS} ${TEST_LOG}" TEST_LOGS="${TEST_LOGS} ${TEST_LOG}"
# Launch test asynchronously # Launch test asynchronously
"${SCRIPT_DIR}/../gpu_build/parallel_gpu_execute.sh" \ if [[ "${IS_GPU}" == "1" ]]; then
"${SCRIPT_DIR}/../gpu_build/parallel_gpu_execute.sh" \
"${SCRIPT_DIR}/py_test_delegate.sh" \
"${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
else
"${SCRIPT_DIR}/py_test_delegate.sh" \ "${SCRIPT_DIR}/py_test_delegate.sh" \
"${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" & "${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
fi
if [[ "${TEST_COUNTER}" -ge "${N_PAR_TESTS}" ]]; then if [[ "${TEST_COUNTER}" -ge "${N_PAR_TESTS}" ]]; then
# Run in exclusive mode # Run in exclusive mode

2
tensorflow/tools/ci_build/builds/test_tutorials.sh Normal file → Executable file
View File

@ -146,7 +146,7 @@ test_mnist_with_summaries() {
run_in_directory "${TEST_DIR}" "${LOG_FILE}" \ run_in_directory "${TEST_DIR}" "${LOG_FILE}" \
tensorflow/examples/tutorials/mnist/mnist_with_summaries.py \ tensorflow/examples/tutorials/mnist/mnist_with_summaries.py \
--data_dir="${TUT_TEST_DATA_DIR}/mnist" --summaries_dir="${SUMMARIES_DIR}" --data_dir="${TUT_TEST_DATA_DIR}/mnist" --log_dir="${SUMMARIES_DIR}"
# Verify final accuracy # Verify final accuracy
FINAL_ACCURACY=$(grep "Accuracy at step" "${LOG_FILE}" \ FINAL_ACCURACY=$(grep "Accuracy at step" "${LOG_FILE}" \

View File

@ -103,10 +103,8 @@ WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}"
BUILD_TAG="${BUILD_TAG:-tf_ci}" BUILD_TAG="${BUILD_TAG:-tf_ci}"
# Add extra params for cuda devices and libraries for GPU container. # Add extra params for cuda devices and libraries for GPU container.
if [ "${CONTAINER_TYPE}" == "gpu" ]; then # And clear them if we are not building for GPU.
# GPU pip tests-on-install concurrency is limited to the number of GPUs. if [ "${CONTAINER_TYPE}" != "gpu" ]; then
GPU_EXTRA_PARAMS="${GPU_EXTRA_PARAMS} -e TF_BUILD_SERIAL_INSTALL_TESTS=1"
else
GPU_EXTRA_PARAMS="" GPU_EXTRA_PARAMS=""
fi fi

View File

@ -16,7 +16,14 @@
# #
# Builds the test server for distributed (GRPC) TensorFlow # Builds the test server for distributed (GRPC) TensorFlow
# #
# Usage: build_server.sh <docker_image_name> [--test] # Usage: build_server.sh <docker_image_name> <whl_url> [--test]
#
# Arguments:
# docker_image_name: Name of the docker image to build.
# E.g.: tensorflow/tf_grpc_test_server:0.11.0rc1
#
# whl_url: URL from which the TensorFlow whl file will be downloaded.
# E.g.: https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# #
# The optional flag --test lets the script to use the Dockerfile for the # The optional flag --test lets the script to use the Dockerfile for the
# testing GRPC server. Without the flag, the script will build the non-test # testing GRPC server. Without the flag, the script will build the non-test
@ -33,22 +40,35 @@ die() {
} }
# Check arguments # Check arguments
if [[ $# != 1 ]] && [[ $# != 2 ]]; then if [[ $# -lt 2 ]]; then
die "Usage: $0 <docker_image_name> [--test]" die "Usage: $0 <docker_image_name> <whl_url> [--test]"
fi fi
DOCKER_IMG_NAME=$1 DOCKER_IMG_NAME=$1
shift WHL_URL=$2
shift 2
# Current script directory # Current script directory
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DOCKER_FILE="${DIR}/server/Dockerfile" BUILD_DIR=$(mktemp -d)
echo ""
echo "Using whl file URL: ${WHL_URL}"
echo "Building in temporary directory: ${BUILD_DIR}"
cp -r ${DIR}/* "${BUILD_DIR}"/ || \
die "Failed to copy files to ${BUILD_DIR}"
DOCKER_FILE="${BUILD_DIR}/server/Dockerfile"
if [[ $1 == "--test" ]]; then if [[ $1 == "--test" ]]; then
DOCKER_FILE="${DIR}/server/Dockerfile.test" DOCKER_FILE="${BUILD_DIR}/server/Dockerfile.test"
fi fi
echo "Using Docker file: ${DOCKER_FILE}" echo "Using Docker file: ${DOCKER_FILE}"
# Download whl file into the build context directory.
wget -P "${BUILD_DIR}" ${WHL_URL} || \
die "Failed to download tensorflow whl file from URL: ${WHL_URL}"
if [[ ! -f "${DOCKER_FILE}" ]]; then if [[ ! -f "${DOCKER_FILE}" ]]; then
die "ERROR: Unable to find dockerfile: ${DOCKER_FILE}" die "ERROR: Unable to find dockerfile: ${DOCKER_FILE}"
fi fi
@ -56,5 +76,8 @@ echo "Dockerfile: ${DOCKER_FILE}"
# Call docker build # Call docker build
docker build --no-cache -t "${DOCKER_IMG_NAME}" \ docker build --no-cache -t "${DOCKER_IMG_NAME}" \
-f "${DOCKER_FILE}" \ -f "${DOCKER_FILE}" "${BUILD_DIR}" || \
"${DIR}" die "Failed to build docker image: ${DOCKER_IMG_NAME}"
# Clean up docker build context directory.
rm -rf "${BUILD_DIR}"

View File

@ -34,9 +34,10 @@ RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && \ python get-pip.py && \
rm get-pip.py rm get-pip.py
# Install TensorFlow CPU version from nightly build # Install TensorFlow wheel
RUN pip --no-cache-dir install \ COPY tensorflow-*.whl /
https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl RUN pip install /tensorflow-*.whl && \
rm -f /tensorflow-*.whl
# Copy files, including the GRPC server binary at # Copy files, including the GRPC server binary at
# server/grpc_tensorflow_server.py # server/grpc_tensorflow_server.py

View File

@ -40,9 +40,10 @@ RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
# Install python panda for the census wide&deep test # Install python panda for the census wide&deep test
RUN pip install --upgrade pandas==0.18.1 RUN pip install --upgrade pandas==0.18.1
# Install TensorFlow CPU version. # Install TensorFlow wheel
RUN pip --no-cache-dir install \ COPY tensorflow-*.whl /
https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl RUN pip install /tensorflow-*.whl && \
rm -f /tensorflow-*.whl
# Copy files, including the GRPC server binary at # Copy files, including the GRPC server binary at
# server/grpc_tensorflow_server.py # server/grpc_tensorflow_server.py

View File

@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \
&& \ && \
python -m ipykernel.kernelspec python -m ipykernel.kernelspec
ENV TENSORFLOW_VERSION 0.11.0rc0 ENV TENSORFLOW_VERSION 0.11.0rc1
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- # # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# These lines will be edited automatically by parameterized_docker_build.sh. # # These lines will be edited automatically by parameterized_docker_build.sh. #

View File

@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \
&& \ && \
python -m ipykernel.kernelspec python -m ipykernel.kernelspec
ENV TENSORFLOW_VERSION 0.11.0rc0 ENV TENSORFLOW_VERSION 0.11.0rc1
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- # # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# These lines will be edited automatically by parameterized_docker_build.sh. # # These lines will be edited automatically by parameterized_docker_build.sh. #

View File

@ -17,7 +17,7 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud
# Install nightly TensorFlow pip # Install nightly TensorFlow pip
RUN pip install \ RUN pip install \
https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Copy test files # Copy test files
RUN mkdir -p /gcs-smoke/python RUN mkdir -p /gcs-smoke/python

View File

@ -81,7 +81,6 @@ fi
cat ${LOG_FILE} cat ${LOG_FILE}
echo "" echo ""
# Clean up the newly created tfrecord file in GCS bucket. # Clean up the newly created tfrecord file in GCS bucket.
# First, activate gcloud service account # First, activate gcloud service account
"${GCLOUD_BIN}" auth activate-service-account \ "${GCLOUD_BIN}" auth activate-service-account \
@ -96,13 +95,3 @@ fi
"${GSUTIL_BIN}" rm "${NEW_TFREC_URL}" && \ "${GSUTIL_BIN}" rm "${NEW_TFREC_URL}" && \
echo "Cleaned up new tfrecord file in GCS: ${NEW_TFREC_URL}" || \ echo "Cleaned up new tfrecord file in GCS: ${NEW_TFREC_URL}" || \
die "FAIL: Unable to clean up new tfrecord file in GCS: ${NEW_TFREC_URL}" die "FAIL: Unable to clean up new tfrecord file in GCS: ${NEW_TFREC_URL}"
# Also clean up newly created GCS dir.
NEW_DIR_URL=$(grep "Creating dir" "${LOG_FILE}" | \
awk '{print $NF}')
if [[ -z ${NEW_DIR_URL} ]]; then
die "FAIL: Unable to determine the URL to the new directory created in GCS."
fi
"${GSUTIL_BIN}" rm -r "${NEW_DIR_URL}" && \
echo "Cleaned up new directory created in GCS: ${NEW_DIR_URL}" || \
die "FAIL: Unable to clean up new directory created in GCS: ${NEW_DIR_URL}"

View File

@ -35,7 +35,6 @@ flags.DEFINE_integer("num_examples", 10, "Number of examples to generate")
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
def create_examples(num_examples, input_mean): def create_examples(num_examples, input_mean):
"""Create ExampleProto's containg data.""" """Create ExampleProto's containg data."""
ids = np.arange(num_examples).reshape([num_examples, 1]) ids = np.arange(num_examples).reshape([num_examples, 1])
@ -64,12 +63,48 @@ def create_dir_test():
print("%s directory exists: %s" % (dir_name, dir_exists)) print("%s directory exists: %s" % (dir_name, dir_exists))
# List contents of just created directory. # List contents of just created directory.
starttime = int(round(time.time() * 1000))
print("Listing directory %s." % dir_name) print("Listing directory %s." % dir_name)
starttime = int(round(time.time() * 1000))
print(file_io.list_directory(dir_name)) print(file_io.list_directory(dir_name))
elapsed = int(round(time.time() * 1000)) - starttime elapsed = int(round(time.time() * 1000)) - starttime
print("Listed directory %s in %s milliseconds" % (dir_name, elapsed)) print("Listed directory %s in %s milliseconds" % (dir_name, elapsed))
# Delete directory.
print("Deleting directory %s." % dir_name)
starttime = int(round(time.time() * 1000))
file_io.delete_recursively(dir_name)
elapsed = int(round(time.time() * 1000)) - starttime
print("Deleted directory %s in %s milliseconds" % (dir_name, elapsed))
def create_object_test():
"""Verifies file_io's object manipulation methods ."""
starttime = int(round(time.time() * 1000))
dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime)
print("Creating dir %s." % dir_name)
file_io.create_dir(dir_name)
# Create a file in this directory.
file_name = "%s/test_file.txt" % dir_name
print("Creating file %s." % file_name)
file_io.write_string_to_file(file_name, "test file creation.")
list_files_pattern = "%s/test_file*.txt" % dir_name
print("Getting files matching pattern %s." % list_files_pattern)
files_list = file_io.get_matching_files(list_files_pattern)
print(files_list)
assert len(files_list) == 1
assert files_list[0] == file_name
# Cleanup test files.
print("Deleting file %s." % file_name)
file_io.delete_file(file_name)
# Delete directory.
print("Deleting directory %s." % dir_name)
file_io.delete_recursively(dir_name)
if __name__ == "__main__": if __name__ == "__main__":
# Sanity check on the GCS bucket URL. # Sanity check on the GCS bucket URL.
if not FLAGS.gcs_bucket_url or not FLAGS.gcs_bucket_url.startswith("gs://"): if not FLAGS.gcs_bucket_url or not FLAGS.gcs_bucket_url.startswith("gs://"):
@ -132,4 +167,5 @@ if __name__ == "__main__":
print("Successfully caught the expected OutOfRangeError while " print("Successfully caught the expected OutOfRangeError while "
"reading one more record than is available") "reading one more record than is available")
create_dir_test() create_dir_test()
create_object_test()

View File

@ -147,7 +147,7 @@ def get_git_version(git_base_path):
""" """
unknown_label = b"unknown" unknown_label = b"unknown"
try: try:
val = subprocess.check_output(["git", "-C", git_base_path, "describe", val = subprocess.check_output(["git", str("--git-dir="+git_base_path+"/.git"), str("--work-tree="+git_base_path), "describe",
"--long", "--dirty", "--tags"]).strip() "--long", "--dirty", "--tags"]).strip()
return val if val else unknown_label return val if val else unknown_label
except subprocess.CalledProcessError: except subprocess.CalledProcessError:

View File

@ -107,7 +107,8 @@ function main() {
mkdir -p ${TMPDIR}/third_party mkdir -p ${TMPDIR}/third_party
pushd ${RUNFILES%org_tensorflow} pushd ${RUNFILES%org_tensorflow}
for header in $(find protobuf -name \*.h); do for header in $(find protobuf -name \*.h); do
cp --parents "$header" ${TMPDIR}/google; mkdir -p "${TMPDIR}/google/$(dirname ${header})"
cp "$header" "${TMPDIR}/google/$(dirname ${header})/"
done done
popd popd
cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party

View File

@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
from setuptools.command.install import install as InstallCommandBase from setuptools.command.install import install as InstallCommandBase
from setuptools.dist import Distribution from setuptools.dist import Distribution
_VERSION = '0.11.0rc0' _VERSION = '0.11.0rc1'
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'numpy >= 1.11.0', 'numpy >= 1.11.0',

1
tensorflow/tools/swig/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
swig_path

View File

@ -98,9 +98,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
native.http_archive( native.http_archive(
name = "protobuf", name = "protobuf",
url = "http://github.com/google/protobuf/archive/c2b3e70efd2038a54ef8973771ac58192885125e.tar.gz", url = "http://github.com/google/protobuf/archive/008b5a228b37c054f46ba478ccafa5e855cb16db.tar.gz",
sha256 = "eafc1bc4c27970d62effe64ba6610823fdd66711f440d8ca4a168167786a2fcb", sha256 = "2737ad055eb8a9bc63ed068e32c4ea280b62d8236578cb4d4120eb5543f759ab",
strip_prefix = "protobuf-c2b3e70efd2038a54ef8973771ac58192885125e", strip_prefix = "protobuf-008b5a228b37c054f46ba478ccafa5e855cb16db",
) )
native.new_http_archive( native.new_http_archive(

View File

@ -1,3 +1,6 @@
#ifdef _WIN32
#define sleep(seconds) Sleep(1000*seconds)
#endif // _WIN32
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
#ifdef _WIN32 #ifdef _WIN32

View File

@ -113,29 +113,33 @@ function setup_python {
echo -e "\n\nERROR: Problem getting python include path. Is distutils installed?" echo -e "\n\nERROR: Problem getting python include path. Is distutils installed?"
exit 1 exit 1
fi fi
local python_lib_path
# Split python_path into an array of paths, this allows path containing spaces if [ -z "$PYTHON_LIB_PATH" ]; then
IFS=',' local python_lib_path
python_lib_path=($(python_path)) # Split python_path into an array of paths, this allows path containing spaces
unset IFS IFS=','
echo "Found possible Python library paths:" python_lib_path=($(python_path))
for x in "${python_lib_path[@]}"; do unset IFS
echo " $x" echo "Found possible Python library paths:"
done for x in "${python_lib_path[@]}"; do
set -- "${python_lib_path[@]}" echo " $x"
echo "Please input the desired Python library path to use. Default is ["$1"]" done
read b || true set -- "${python_lib_path[@]}"
if [ "$b" == "" ]; then echo "Please input the desired Python library path to use. Default is ["$1"]"
python_lib="$(default_python_path "${python_lib_path[0]}")" read b || true
echo $python_lib if [ "$b" == "" ]; then
else PYTHON_LIB_PATH="$(default_python_path "${python_lib_path[0]}")"
if test -d "$b" -a -x "$b"; then echo $PYTHON_LIB_PATH
python_lib="$b"
else else
echo -e "\n\nERROR: The path you have entered does not exist." PYTHON_LIB_PATH="$b"
exit 1
fi fi
fi fi
if test -d "$PYTHON_LIB_PATH" -a -x "$PYTHON_LIB_PATH"; then
python_lib="$PYTHON_LIB_PATH"
else
echo -e "\n\nERROR: Invalid python library path: ${PYTHON_LIB_PATH}."
exit 1
fi
local numpy_include=$("${PYTHON_BIN_PATH}" -c 'from __future__ import print_function; import numpy; print(numpy.get_include());') local numpy_include=$("${PYTHON_BIN_PATH}" -c 'from __future__ import print_function; import numpy; print(numpy.get_include());')
if [ "$numpy_include" == "" ]; then if [ "$numpy_include" == "" ]; then