mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Overal context: open-source BlackBoxPredictor as the entry point for inference in Caffe2 (thread safe abstraction for Caffe2 inference). This should be used in ThroughputBenchmark for the purpose of framework comparison This specific diff: There should be no harm in moving transformation code to OSS. On the advantages side we will be able to compare production Caffe2 setup with PyTorch in the most fair way via ThroughputBenchmark. This approach avoid any complicated transformation regirstries. Building those proper would be significant engineering effort as well as production risk. In the past we had SEVs related to transforms being turned off due to various refactors. Given that we don't plan to build any other significant investments into transformation logic except existing ones (like TVM and Glow), and those also relate to open-source technologies, I came up to the conclusion of moving to OSS the whole thing. Reviewed By: bertmaher Differential Revision: D16367134 fbshipit-source-id: fc6bacc1be3ff6336beb57cdad58168d3a2b8c28
120 lines
2.8 KiB
C++
120 lines
2.8 KiB
C++
#include "caffe2/utils/string_utils.h"
|
|
|
|
#include <algorithm>
|
|
#include <sstream>
|
|
#include <vector>
|
|
|
|
namespace caffe2 {
|
|
|
|
std::vector<std::string>
|
|
split(char separator, const std::string& string, bool ignore_empty) {
|
|
std::vector<std::string> pieces;
|
|
std::stringstream ss(string);
|
|
std::string item;
|
|
while (getline(ss, item, separator)) {
|
|
if (!ignore_empty || !item.empty()) {
|
|
pieces.push_back(std::move(item));
|
|
}
|
|
}
|
|
return pieces;
|
|
}
|
|
|
|
std::string trim(const std::string& str) {
|
|
size_t left = str.find_first_not_of(' ');
|
|
if (left == std::string::npos) {
|
|
return str;
|
|
}
|
|
size_t right = str.find_last_not_of(' ');
|
|
return str.substr(left, (right - left + 1));
|
|
}
|
|
|
|
size_t editDistance(
|
|
const std::string& s1, const std::string& s2, size_t max_distance)
|
|
{
|
|
std::vector<size_t> current(s1.length() + 1);
|
|
std::vector<size_t> previous(s1.length() + 1);
|
|
std::vector<size_t> previous1(s1.length() + 1);
|
|
|
|
return editDistanceHelper(
|
|
s1.c_str(),
|
|
s1.length(),
|
|
s2.c_str(),
|
|
s2.length(),
|
|
current,
|
|
previous,
|
|
previous1,
|
|
max_distance
|
|
);
|
|
}
|
|
#define NEXT_UNSAFE(s, i, c) { \
|
|
(c)=(uint8_t)(s)[(i)++]; \
|
|
}
|
|
|
|
int32_t editDistanceHelper(const char* s1,
|
|
size_t s1_len,
|
|
const char* s2,
|
|
size_t s2_len,
|
|
std::vector<size_t> ¤t,
|
|
std::vector<size_t> &previous,
|
|
std::vector<size_t> &previous1,
|
|
size_t max_distance) {
|
|
if (max_distance) {
|
|
if (std::max(s1_len, s2_len) - std::min(s1_len, s2_len) > max_distance) {
|
|
return max_distance+1;
|
|
}
|
|
}
|
|
|
|
for (size_t j = 0; j <= s1_len; ++j) {
|
|
current[j] = j;
|
|
}
|
|
|
|
int32_t str2_offset = 0;
|
|
char prev2 = 0;
|
|
for (size_t i = 1; i <= s2_len; ++i) {
|
|
swap(previous1, previous);
|
|
swap(current, previous);
|
|
current[0] = i;
|
|
|
|
char c2 = s2[str2_offset];
|
|
char prev1 = 0;
|
|
int32_t str1_offset = 0;
|
|
|
|
NEXT_UNSAFE(s2, str2_offset, c2);
|
|
|
|
size_t current_min = s1_len;
|
|
for (size_t j = 1; j <= s1_len; ++j) {
|
|
size_t insertion = previous[j] + 1;
|
|
size_t deletion = current[j - 1] + 1;
|
|
size_t substitution = previous[j - 1];
|
|
size_t transposition = insertion;
|
|
char c1 = s1[str1_offset];
|
|
|
|
NEXT_UNSAFE(s1, str1_offset, c1);
|
|
|
|
if (c1 != c2) {
|
|
substitution += 1;
|
|
}
|
|
|
|
|
|
if (prev1 == c2 && prev2 == c1 && j > 1 && i > 1) {
|
|
transposition = previous1[j - 2] + 1;
|
|
}
|
|
prev1 = c1;
|
|
|
|
current[j] = std::min(std::min(insertion, deletion),
|
|
std::min(substitution, transposition));
|
|
current_min = std::min(current_min, current[j]);
|
|
}
|
|
|
|
|
|
if (max_distance != 0 && current_min > max_distance) {
|
|
return max_distance+1;
|
|
}
|
|
|
|
prev2 = c2;
|
|
}
|
|
|
|
return current[s1_len];
|
|
}
|
|
} // namespace caffe2
|