diff --git a/src/core/interface/index.cc b/src/core/interface/index.cc
index d482f1292..b969c9214 100644
--- a/src/core/interface/index.cc
+++ b/src/core/interface/index.cc
@@ -171,6 +171,9 @@ int Index::CreateAndInitConverterReformer(const QuantizerParam &param,
         case QuantizerType::kRabitq:
           // no converter here
           return 0;
+        case QuantizerType::kUniformInt8:
+          converter_name = "UniformInt8StreamingConverter";
+          break;
         default:
           LOG_ERROR("Unsupported quantizer type: ");
           return core::IndexError_Unsupported;
@@ -187,13 +190,17 @@ int Index::CreateAndInitConverterReformer(const QuantizerParam &param,
   }
 
   proxima_index_meta_ = converter_->meta();
-  reformer_ =
-      core::IndexFactory::CreateReformer(proxima_index_meta_.reformer_name());
-  if (reformer_ == nullptr ||
-      reformer_->init(proxima_index_meta_.reformer_params()) != 0) {
-    LOG_ERROR("Failed to create and init reformer");
-    return core::IndexError_Runtime;
+
+  if (!proxima_index_meta_.reformer_name().empty()) {
+    reformer_ =
+        core::IndexFactory::CreateReformer(proxima_index_meta_.reformer_name());
+    if (reformer_ == nullptr ||
+        reformer_->init(proxima_index_meta_.reformer_params()) != 0) {
+      LOG_ERROR("Failed to create and init reformer");
+      return core::IndexError_Runtime;
+    }
   }
+
   streamer_vector_meta_.set_meta(proxima_index_meta_.data_type(),
                                  proxima_index_meta_.dimension());
   streamer_vector_meta_.set_meta_type(proxima_index_meta_.meta_type());
@@ -294,6 +301,27 @@ int Index::Open(const std::string &file_path, StorageOptions storage_options) {
     return core::IndexError_Runtime;
   }
 
+  // If a converter exists but reformer was not created during Init()
+  // (converters like UniformInt8 whose reformer params are only available
+  // after train()), create it now from the persisted meta that the streamer
+  // has loaded.  When there is no converter (QuantizerType::kNone), reformer_
+  // is nullptr by design — skip this block entirely.
+  if (converter_ != nullptr && reformer_ == nullptr) {
+    const auto &meta = streamer_->meta();
+    if (meta.reformer_name().empty()) {
+      LOG_ERROR(
+          "Index::Open: converter exists but reformer not initialized and "
+          "no reformer in persisted meta");
+      return core::IndexError_Runtime;
+    }
+    reformer_ = core::IndexFactory::CreateReformer(meta.reformer_name());
+    if (!reformer_ || reformer_->init(meta.reformer_params()) != 0) {
+      LOG_ERROR("Failed to create reformer '%s' from persisted meta",
+                meta.reformer_name().c_str());
+      return core::IndexError_Runtime;
+    }
+  }
+
   // converter/reformer/metric are created in IndexFactory::CreateIndex
   // TODO: init
 
diff --git a/src/core/metric/metric_params.h b/src/core/metric/metric_params.h
index b5719e873..05f8db96a 100644
--- a/src/core/metric/metric_params.h
+++ b/src/core/metric/metric_params.h
@@ -34,5 +34,9 @@ static const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME =
 static const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS =
     "proxima.quantized_integer.metric.origin_metric_params";
 
+//! UniformInt8 Metric
+static const std::string UNIFORM_INT8_METRIC_ORIGIN_METRIC_NAME =
+    "proxima.uniform_int8.metric.origin_metric_name";
+
 }  // namespace core
 }  // namespace zvec
\ No newline at end of file
diff --git a/src/core/metric/uniform_int8_metric.cc b/src/core/metric/uniform_int8_metric.cc
new file mode 100644
index 000000000..32fb410c6
--- /dev/null
+++ b/src/core/metric/uniform_int8_metric.cc
@@ -0,0 +1,158 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <ailego/math/euclidean_distance_matrix.h>
+#include <ailego/math_batch/euclidean_distance_batch.h>
+#include <zvec/core/framework/index_error.h>
+#include <zvec/core/framework/index_factory.h>
+#include <zvec/turbo/turbo.h>
+#include "metric_params.h"
+
+namespace zvec {
+namespace core {
+
+/*! Index Metric for Uniform Int8 Quantization (Global Scale)
+ *
+ * Uses direct int8 L2 distance computation. Since all vectors share
+ * a single global scale/bias, no per-vector reconstruction is needed.
+ * This is the key benefit: distance = sum((a[i] - b[i])^2) on raw int8
+ * values, with optional post-scaling by 1/scale^2 for real L2 distances.
+ */
+class UniformInt8Metric : public IndexMetric {
+ public:
+  //! Initialize Metric
+  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
+    if (meta.data_type() != IndexMeta::DataType::DT_INT8) {
+      LOG_ERROR("UniformInt8Metric: unsupported type %d", meta.data_type());
+      return IndexError_Unsupported;
+    }
+
+    std::string metric_name;
+    index_params.get(UNIFORM_INT8_METRIC_ORIGIN_METRIC_NAME, &metric_name);
+    if (metric_name.empty()) {
+      LOG_ERROR("UniformInt8Metric: param %s is required",
+                UNIFORM_INT8_METRIC_ORIGIN_METRIC_NAME.c_str());
+      return IndexError_InvalidArgument;
+    }
+
+    if (metric_name != "SquaredEuclidean") {
+      LOG_ERROR("UniformInt8Metric: only SquaredEuclidean supported, got %s",
+                metric_name.c_str());
+      return IndexError_Unsupported;
+    }
+
+    meta_ = meta;
+    params_ = index_params;
+
+    LOG_INFO("UniformInt8Metric initialized: dimension=%u", meta_.dimension());
+    return 0;
+  }
+
+  //! Cleanup Metric
+  int cleanup(void) override {
+    return 0;
+  }
+
+  //! Retrieve if it matched
+  bool is_matched(const IndexMeta &meta) const override {
+    return meta.data_type() == meta_.data_type() &&
+           meta.unit_size() == meta_.unit_size();
+  }
+
+  //! Retrieve if it matched
+  bool is_matched(const IndexMeta &meta,
+                  const IndexQueryMeta &qmeta) const override {
+    return qmeta.data_type() == meta_.data_type() &&
+           qmeta.unit_size() == meta_.unit_size() &&
+           qmeta.dimension() == meta.dimension();
+  }
+
+  //! Retrieve distance function for query (1x1)
+  MatrixDistance distance(void) const override {
+    return distance_matrix(1, 1);
+  }
+
+  //! Retrieve matrix distance function
+  //! Uses direct int8 L2: sum((a[i]-b[i])^2) — no reconstruction needed
+  MatrixDistance distance_matrix(size_t m, size_t n) const override {
+    if (m == 1 && n == 1) {
+      auto turbo_ret = turbo::get_distance_func(
+          turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8,
+          turbo::QuantizeType::kUniform);
+      if (turbo_ret) {
+        return turbo_ret;
+      }
+      return reinterpret_cast<MatrixDistanceHandle>(
+          ailego::SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute);
+    }
+    // Only 1x1 is available for int8 in ailego
+    return nullptr;
+  }
+
+  //! Retrieve batch distance function
+  //! Uses direct int8 batch L2 with prefetching
+  MatrixBatchDistance batch_distance(void) const override {
+    auto turbo_ret = turbo::get_batch_distance_func(
+        turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8,
+        turbo::QuantizeType::kUniform);
+    if (turbo_ret) {
+      return turbo_ret;
+    }
+    return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
+        ailego::DistanceBatch::SquaredEuclideanDistanceBatch<int8_t, 12,
+                                                             2>::ComputeBatch);
+  }
+
+  //! Retrieve params of Metric
+  const ailego::Params &params(void) const override {
+    return params_;
+  }
+
+  //! Train the metric (no training needed)
+  int train(const void * /*vec*/, size_t /*dim*/) override {
+    return 0;
+  }
+
+  //! Retrieve if it supports training
+  bool support_train(void) const override {
+    return false;
+  }
+
+  //! Normalize result (no-op: normalization is handled by reformer)
+  void normalize(float * /*score*/) const override {}
+
+  //! Retrieve if it supports normalization
+  bool support_normalize(void) const override {
+    return false;
+  }
+
+  //! Retrieve query metric object of this index metric
+  Pointer query_metric(void) const override {
+    return nullptr;
+  }
+
+  //! No query preprocessing needed for direct int8 L2
+  DistanceBatchQueryPreprocessFunc get_query_preprocess_func() const override {
+    return nullptr;
+  }
+
+ private:
+  IndexMeta meta_{};
+  ailego::Params params_{};
+};
+
+INDEX_FACTORY_REGISTER_METRIC_ALIAS(UniformInt8, UniformInt8Metric);
+
+}  // namespace core
+}  // namespace zvec
diff --git a/src/core/quantizer/CMakeLists.txt b/src/core/quantizer/CMakeLists.txt
index 80b4f612a..f5c9ad898 100644
--- a/src/core/quantizer/CMakeLists.txt
+++ b/src/core/quantizer/CMakeLists.txt
@@ -10,7 +10,7 @@ cc_library(
         NAME core_quantizer 
         STATIC SHARED STRICT ALWAYS_LINK
         SRCS *.cc
-        LIBS zvec_ailego core_framework
+        LIBS zvec_ailego zvec_turbo core_framework
         INCS . ${PROJECT_ROOT_DIR}/src/core
         LDFLAGS "${CORE_QUANTIZER_LDFLAGS}"
         VERSION "${PROXIMA_ZVEC_VERSION}"
diff --git a/src/core/quantizer/quantizer_params.h b/src/core/quantizer/quantizer_params.h
index 622361660..a089a2d9f 100644
--- a/src/core/quantizer/quantizer_params.h
+++ b/src/core/quantizer/quantizer_params.h
@@ -115,6 +115,12 @@ static const std::string INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE =
 static const std::string INTEGER_STREAMING_REFORMER_IS_EUCLIDEAN =
     "integer_streaming.reformer.is_euclidean";
 
+//! UniformInt8StreamingConverter / Reformer
+static const std::string UNIFORM_INT8_REFORMER_SCALE =
+    "uniform_int8.reformer.scale";
+static const std::string UNIFORM_INT8_REFORMER_BIAS =
+    "uniform_int8.reformer.bias";
+
 //! DoubleBitConverter
 static const std::string DOUBLE_BIT_CONVERTER_TRAIN_SAMPLE_COUNT =
     "double_bit.converter.train_sample_count";
diff --git a/src/core/quantizer/uniform_int8_converter.cc b/src/core/quantizer/uniform_int8_converter.cc
new file mode 100644
index 000000000..5b23210de
--- /dev/null
+++ b/src/core/quantizer/uniform_int8_converter.cc
@@ -0,0 +1,336 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+#include <ailego/pattern/defer.h>
+#include <core/quantizer/quantizer_params.h>
+#include <zvec/core/framework/index_factory.h>
+#include <zvec/turbo/turbo.h>
+#include "../metric/metric_params.h"
+
+namespace zvec {
+namespace core {
+
+/*! Converter for Uniform Int8 Quantization (Global Scale)
+ *
+ * Unlike IntegerStreamingConverter which uses per-vector scale/bias,
+ * this converter computes a single global scale/bias from the entire dataset.
+ * All vectors share the same quantization parameters, enabling direct int8
+ * L2 distance computation without per-vector reconstruction.
+ */
+class UniformInt8StreamingConverter : public IndexConverter {
+ public:
+  //! Constructor.
+  //! `dst_type` is required by the INDEX_FACTORY_REGISTER_CONVERTER_ALIAS
+  //! macro signature but is unused here: the output type is always
+  //! IndexMeta::DataType::DT_INT8, hard-coded in init().
+  UniformInt8StreamingConverter(IndexMeta::DataType /*dst_type*/) {}
+
+  //! Destructor
+  ~UniformInt8StreamingConverter() override {}
+
+  //! Initialize Converter
+  int init(const IndexMeta &index_meta, const ailego::Params &params) override {
+    meta_ = index_meta;
+    original_dimension_ = index_meta.dimension();
+
+    // Reset stats so a re-init() call does not leak counters from a
+    // previous lifecycle.
+    *stats_.mutable_trained_count() = 0;
+    *stats_.mutable_transformed_count() = 0;
+
+    // Store converter info in meta
+    meta_.set_converter("UniformInt8StreamingConverter", 0, params);
+
+    // Set data type to INT8, dimension stays the same (no per-vector extras)
+    meta_.set_meta(IndexMeta::DataType::DT_INT8, original_dimension_);
+
+    // Set metric to our direct int8 L2 metric
+    ailego::Params metric_params;
+    metric_params.set(UNIFORM_INT8_METRIC_ORIGIN_METRIC_NAME,
+                      index_meta.metric_name());
+    meta_.set_metric("UniformInt8", 0, metric_params);
+
+    // Restore scale/bias from persisted params if available (search-only
+    // path where train() is skipped).  Otherwise they stay at 0 and will
+    // be computed in train().
+    params.get(UNIFORM_INT8_REFORMER_SCALE, &scale_);
+    params.get(UNIFORM_INT8_REFORMER_BIAS, &bias_);
+
+    // Only register reformer when scale/bias are available (either from
+    // persisted params or after train()). During first-time Init() before
+    // train(), we intentionally leave reformer_name empty so that the
+    // Index layer does not attempt to create an uninitialized reformer.
+    if (scale_ != 0.0f) {
+      ailego::Params reformer_params;
+      reformer_params.set(UNIFORM_INT8_REFORMER_SCALE, scale_);
+      reformer_params.set(UNIFORM_INT8_REFORMER_BIAS, bias_);
+      meta_.set_reformer("UniformInt8StreamingReformer", 0, reformer_params);
+    }
+
+    return 0;
+  }
+
+  //! Cleanup Converter
+  int cleanup(void) override {
+    *stats_.mutable_trained_count() = 0;
+    *stats_.mutable_transformed_count() = 0;
+    return 0;
+  }
+
+  //! Train: compute global min/max and derive scale/bias
+  int train(IndexHolder::Pointer holder) override {
+    if (!holder) {
+      LOG_ERROR("UniformInt8StreamingConverter: null holder in train");
+      return IndexError_InvalidArgument;
+    }
+
+    ailego::ElapsedTime timer;
+    AILEGO_DEFER([&]() { stats_.set_trained_costtime(timer.milli_seconds()); });
+
+    float global_min = std::numeric_limits<float>::max();
+    float global_max = std::numeric_limits<float>::lowest();
+
+    auto iter = holder->create_iterator();
+    if (!iter) {
+      LOG_ERROR("UniformInt8StreamingConverter: failed to create iterator");
+      return IndexError_Runtime;
+    }
+
+    bool all_integer = true;
+    for (; iter->is_valid(); iter->next()) {
+      const float *vec = reinterpret_cast<const float *>(iter->data());
+      for (size_t i = 0; i < original_dimension_; ++i) {
+        float v = vec[i];
+        if (!std::isfinite(v)) {
+          LOG_ERROR(
+              "UniformInt8StreamingConverter: non-finite value in training "
+              "set (record_idx=%zu, dim_idx=%zu, value=%f)",
+              (size_t)*stats_.mutable_trained_count(), i, v);
+          return IndexError_InvalidArgument;
+        }
+        global_min = std::min(global_min, v);
+        global_max = std::max(global_max, v);
+        if (all_integer && std::floor(v) != v) {
+          all_integer = false;
+        }
+      }
+      (*stats_.mutable_trained_count())++;
+    }
+
+    // Reject empty training set: scale/bias would be undefined and would
+    // silently produce all-clipped int8 vectors at search time.
+    if (*stats_.mutable_trained_count() == 0) {
+      LOG_ERROR("UniformInt8StreamingConverter: empty training set");
+      return IndexError_InvalidArgument;
+    }
+
+    // Compute global scale and bias:
+    //   forward:  int8 = clip(round(float * scale + bias), 0, 127)
+    //   inverse:  float ≈ (int8 - bias) / scale
+    //
+    // Values are mapped to [0, 127] to enable the VNNI abs trick in the
+    // distance kernel (sub_epi8 + abs_epi8 + vpdpbusd), which requires
+    // max |diff| ≤ 127 to avoid int8 overflow.
+    //
+    // Lossless integer fast-path: when all training values are integers and
+    // the dynamic range fits within 127, we use scale=1 for exact mapping.
+    constexpr float epsilon = std::numeric_limits<float>::epsilon();
+    float range = global_max - global_min;
+    if (all_integer && range <= 127.0f) {
+      scale_ = 1.0f;
+      bias_ = -global_min;  // global_min is integer — maps to 0
+    } else {
+      scale_ = 127.0f / std::max(range, epsilon);
+      bias_ = -global_min * scale_;
+    }
+
+    LOG_INFO(
+        "UniformInt8StreamingConverter train done: costtime %zums, "
+        "global_min=%f, global_max=%f, scale=%f, bias=%f",
+        (size_t)timer.milli_seconds(), global_min, global_max, scale_, bias_);
+
+    // Now configure the reformer with the computed scale/bias
+    ailego::Params reformer_params;
+    reformer_params.set(UNIFORM_INT8_REFORMER_SCALE, scale_);
+    reformer_params.set(UNIFORM_INT8_REFORMER_BIAS, bias_);
+    meta_.set_reformer("UniformInt8StreamingReformer", 0, reformer_params);
+
+    // Also store scale/bias in converter params for persistence
+    ailego::Params conv_params = meta_.converter_params();
+    conv_params.set(UNIFORM_INT8_REFORMER_SCALE, scale_);
+    conv_params.set(UNIFORM_INT8_REFORMER_BIAS, bias_);
+    meta_.set_converter(meta_.converter_name(), 0, conv_params);
+
+    return 0;
+  }
+
+  //! Transform: wrap holder to produce quantized int8 data
+  int transform(IndexHolder::Pointer holder) override {
+    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||
+        holder->dimension() != original_dimension_) {
+      return IndexError_Mismatch;
+    }
+
+    *stats_.mutable_transformed_count() += holder->count();
+    holder_ = std::make_shared<UniformInt8Holder>(holder, original_dimension_,
+                                                  scale_, bias_);
+    return 0;
+  }
+
+  //! Dump index into storage
+  int dump(const IndexDumper::Pointer &dumper) override {
+    (void)dumper;
+    return 0;
+  }
+
+  //! Retrieve statistics
+  const Stats &stats(void) const override {
+    return stats_;
+  }
+
+  //! Retrieve a holder as result
+  IndexHolder::Pointer result(void) const override {
+    return holder_;
+  }
+
+  //! Retrieve Index Meta
+  const IndexMeta &meta(void) const override {
+    return meta_;
+  }
+
+ private:
+  //! IndexHolder that applies uniform int8 quantization on-the-fly
+  class UniformInt8Holder : public IndexHolder {
+   public:
+    class Iterator : public IndexHolder::Iterator {
+     public:
+      Iterator(const UniformInt8Holder *owner,
+               IndexHolder::Iterator::Pointer &&iter)
+          : owner_(owner),
+            buffer_(owner->dimension(), 0),
+            front_iter_(std::move(iter)) {
+        this->encode_record();
+      }
+
+      ~Iterator(void) override {}
+
+      const void *data(void) const override {
+        return buffer_.data();
+      }
+
+      bool is_valid(void) const override {
+        return front_iter_->is_valid();
+      }
+
+      uint64_t key(void) const override {
+        return front_iter_->key();
+      }
+
+      void next(void) override {
+        front_iter_->next();
+        this->encode_record();
+      }
+
+     private:
+      void encode_record(void) {
+        if (!front_iter_->is_valid()) {
+          return;
+        }
+        const float *vec = reinterpret_cast<const float *>(front_iter_->data());
+        int8_t *out = buffer_.data();
+        const float scale = owner_->scale_;
+        const float bias = owner_->bias_;
+        const size_t dim = owner_->original_dim_;
+
+        if (owner_->quantize_func_ != nullptr) {
+          owner_->quantize_func_(vec, dim, scale, bias, out);
+          return;
+        }
+        for (size_t i = 0; i < dim; ++i) {
+          float v = std::round(vec[i] * scale + bias);
+          v = std::max(0.0f, std::min(127.0f, v));
+          out[i] = static_cast<int8_t>(v);
+        }
+      }
+
+      const UniformInt8Holder *owner_{nullptr};
+      std::vector<int8_t> buffer_{};
+      IndexHolder::Iterator::Pointer front_iter_{};
+    };
+
+    UniformInt8Holder(IndexHolder::Pointer front, size_t original_dim,
+                      float scale, float bias)
+        : front_(std::move(front)),
+          original_dim_(original_dim),
+          scale_(scale),
+          bias_(bias),
+          quantize_func_(
+              turbo::get_uniform_quantize_func(turbo::DataType::kInt8)) {}
+
+    size_t count(void) const override {
+      return front_->count();
+    }
+
+    size_t dimension(void) const override {
+      return original_dim_;
+    }
+
+    IndexMeta::DataType data_type(void) const override {
+      return IndexMeta::DataType::DT_INT8;
+    }
+
+    size_t element_size(void) const override {
+      return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8,
+                                      original_dim_);
+    }
+
+    bool multipass(void) const override {
+      return front_->multipass();
+    }
+
+    IndexHolder::Iterator::Pointer create_iterator(void) override {
+      auto iter = front_->create_iterator();
+      return iter ? IndexHolder::Iterator::Pointer(
+                        new UniformInt8Holder::Iterator(this, std::move(iter)))
+                  : IndexHolder::Iterator::Pointer();
+    }
+
+   private:
+    IndexHolder::Pointer front_{};
+    size_t original_dim_{0};
+    float scale_{0.0f};
+    float bias_{0.0f};
+    //! Resolved once at Holder construction; nullptr → use scalar fallback.
+    turbo::UniformQuantizeFunc quantize_func_{nullptr};
+  };
+
+  //! Members
+  IndexMeta meta_{};
+  Stats stats_{};
+  IndexHolder::Pointer holder_{};
+  size_t original_dimension_{0};
+  float scale_{0.0f};
+  float bias_{0.0f};
+};
+
+INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(UniformInt8StreamingConverter,
+                                       UniformInt8StreamingConverter,
+                                       IndexMeta::DataType::DT_INT8);
+
+}  // namespace core
+}  // namespace zvec
diff --git a/src/core/quantizer/uniform_int8_reformer.cc b/src/core/quantizer/uniform_int8_reformer.cc
new file mode 100644
index 000000000..b642baf14
--- /dev/null
+++ b/src/core/quantizer/uniform_int8_reformer.cc
@@ -0,0 +1,224 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cmath>
+#include <core/quantizer/quantizer_params.h>
+#include <zvec/core/framework/index_factory.h>
+#include <zvec/turbo/turbo.h>
+
+namespace zvec {
+namespace core {
+
+/*! Reformer for Uniform Int8 Quantization (Global Scale)
+ *
+ * Uses a global scale/bias (computed by UniformInt8StreamingConverter) to
+ * quantize query vectors and build-time record vectors to int8.
+ * No per-vector extras are appended — the output is pure int8.
+ */
+class UniformInt8StreamingReformer : public IndexReformer {
+ public:
+  //! Constructor.
+  //! `dst_type` is required by the INDEX_FACTORY_REGISTER_REFORMER_ALIAS
+  //! macro signature but is unused here: the quantization output is
+  //! always int8, governed by the (scale, bias) pair received in init().
+  UniformInt8StreamingReformer(IndexMeta::DataType /*dst_type*/) {}
+
+  //! Initialize Reformer
+  //!
+  //! Lifecycle note: during build, scale/bias come from the converter's
+  //! train(); during search-only path, the converter first creates the
+  //! reformer with empty params, then Index::Open re-invokes init() with
+  //! the persisted params. We treat empty-params as "not yet initialized"
+  //! and reject any quantize/normalize call until real params arrive, so a
+  //! mis-wired pipeline fails loudly instead of silently producing garbage.
+  int init(const ailego::Params &params) override {
+    bool has_scale = params.get(UNIFORM_INT8_REFORMER_SCALE, &scale_);
+    bool has_bias = params.get(UNIFORM_INT8_REFORMER_BIAS, &bias_);
+
+    if (!has_scale || !has_bias) {
+      LOG_ERROR(
+          "UniformInt8StreamingReformer init: missing required params "
+          "(scale_present=%d, bias_present=%d)",
+          (int)has_scale, (int)has_bias);
+      initialized_ = false;
+      return IndexError_InvalidArgument;
+    }
+
+    if (!std::isfinite(scale_) || scale_ == 0.0f || !std::isfinite(bias_)) {
+      LOG_ERROR(
+          "UniformInt8StreamingReformer: invalid params scale=%f, bias=%f",
+          scale_, bias_);
+      initialized_ = false;
+      return IndexError_InvalidArgument;
+    }
+
+    // int8_l2 = scale^2 * real_l2, so real_l2 = int8_l2 / scale^2.
+    scale_reciprocal_sq_ = 1.0f / (scale_ * scale_);
+    initialized_ = true;
+
+    // Resolve the SIMD quantize kernel once; falls back to scalar when the
+    // current CPU lacks AVX-512 (turbo returns nullptr on those builds).
+    quantize_func_ = turbo::get_uniform_quantize_func(turbo::DataType::kInt8);
+
+    LOG_INFO("UniformInt8StreamingReformer init: scale=%f, bias=%f, simd=%s",
+             scale_, bias_, quantize_func_ != nullptr ? "avx512" : "scalar");
+    return 0;
+  }
+
+  //! Cleanup Reformer
+  int cleanup(void) override {
+    return 0;
+  }
+
+  //! Load index from container
+  int load(IndexStorage::Pointer) override {
+    return 0;
+  }
+
+  //! Unload index
+  int unload(void) override {
+    return 0;
+  }
+
+  //! Transform a single query: float → int8
+  int transform(const void *query, const IndexQueryMeta &qmeta,
+                std::string *out, IndexQueryMeta *ometa) const override {
+    return do_quantize(query, qmeta, 1, out, ometa);
+  }
+
+  //! Transform batch queries: float → int8
+  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,
+                std::string *out, IndexQueryMeta *ometa) const override {
+    return do_quantize(query, qmeta, count, out, ometa);
+  }
+
+  //! Convert a single record: float → int8 (used during build)
+  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,
+              IndexQueryMeta *ometa) const override {
+    return do_quantize(record, rmeta, 1, out, ometa);
+  }
+
+  //! Convert batch records: float → int8
+  int convert(const void *records, const IndexQueryMeta &rmeta, uint32_t count,
+              std::string *out, IndexQueryMeta *ometa) const override {
+    return do_quantize(records, rmeta, count, out, ometa);
+  }
+
+  //! Normalize results: convert int8 L2 distances back to float L2 distances
+  int normalize(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,
+                IndexDocumentList &result) const override {
+    if (!initialized_) {
+      LOG_ERROR(
+          "UniformInt8StreamingReformer::normalize called before init "
+          "with valid params");
+      return IndexError_Runtime;
+    }
+    for (auto &it : result) {
+      *it.mutable_score() *= scale_reciprocal_sq_;
+    }
+    return 0;
+  }
+
+  //! Support revert (int8 → float)
+  bool need_revert() const override {
+    return true;
+  }
+
+  //! Revert: convert int8 vector back to float
+  int revert(const void *in, const IndexQueryMeta &qmeta,
+             std::string *out) const override {
+    if (!initialized_) {
+      LOG_ERROR(
+          "UniformInt8StreamingReformer::revert called before init "
+          "with valid params");
+      return IndexError_Runtime;
+    }
+    size_t dim = qmeta.dimension();
+    out->resize(dim * sizeof(float));
+    float *out_buf = reinterpret_cast<float *>(out->data());
+    const int8_t *buf = reinterpret_cast<const int8_t *>(in);
+
+    // Approximate dequantization (lossy):
+    //   forward:  int8 = clip(round(float * scale + bias), -127, 127)
+    //   inverse:  float ≈ (int8 - bias) / scale
+    // initialized_ guarantees scale_ != 0 and finite.
+    float inv_scale = 1.0f / scale_;
+    for (size_t i = 0; i < dim; ++i) {
+      out_buf[i] = (static_cast<float>(buf[i]) - bias_) * inv_scale;
+    }
+
+    return 0;
+  }
+
+ private:
+  //! Common quantization path shared by transform()/convert() (single & batch)
+  int do_quantize(const void *src, const IndexQueryMeta &smeta, uint32_t count,
+                  std::string *out, IndexQueryMeta *ometa) const {
+    if (!initialized_) {
+      LOG_ERROR(
+          "UniformInt8StreamingReformer: quantize called before init "
+          "with valid params");
+      return IndexError_Runtime;
+    }
+    if (smeta.data_type() != IndexMeta::DataType::DT_FP32 ||
+        smeta.unit_size() !=
+            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
+      return IndexError_Unsupported;
+    }
+
+    *ometa = smeta;
+    ometa->set_meta(IndexMeta::DataType::DT_INT8, smeta.dimension());
+    const size_t out_stride = ometa->element_size();
+    out->resize(static_cast<size_t>(count) * out_stride);
+
+    const float *vec = reinterpret_cast<const float *>(src);
+    int8_t *ovec = reinterpret_cast<int8_t *>(&(*out)[0]);
+    const size_t dim = smeta.dimension();
+    for (uint32_t i = 0; i < count; ++i) {
+      quantize(vec + i * dim, dim, ovec + i * out_stride);
+    }
+    return 0;
+  }
+
+  //! Quantize float vector to int8 using global scale/bias.
+  //! Output values are in [0, 127] to enable the VNNI abs trick.
+  //! Uses the SIMD kernel resolved in init() when available, otherwise
+  //! falls back to the scalar reference implementation.
+  inline void quantize(const float *in, size_t dim, int8_t *out) const {
+    if (quantize_func_ != nullptr) {
+      quantize_func_(in, dim, scale_, bias_, out);
+      return;
+    }
+    for (size_t i = 0; i < dim; ++i) {
+      float v = std::round(in[i] * scale_ + bias_);
+      v = std::max(0.0f, std::min(127.0f, v));
+      out[i] = static_cast<int8_t>(v);
+    }
+  }
+
+  //! Members
+  float scale_{0.0f};
+  float bias_{0.0f};
+  float scale_reciprocal_sq_{1.0f};
+  bool initialized_{false};
+  turbo::UniformQuantizeFunc quantize_func_{nullptr};
+};
+
+INDEX_FACTORY_REGISTER_REFORMER_ALIAS(UniformInt8StreamingReformer,
+                                      UniformInt8StreamingReformer,
+                                      IndexMeta::DataType::DT_INT8);
+
+}  // namespace core
+}  // namespace zvec
diff --git a/src/include/zvec/core/interface/index_param.h b/src/include/zvec/core/interface/index_param.h
index cd617b237..491d71608 100644
--- a/src/include/zvec/core/interface/index_param.h
+++ b/src/include/zvec/core/interface/index_param.h
@@ -86,6 +86,7 @@ enum class QuantizerType {
   kInt8,
   kInt4,
   kRabitq,
+  kUniformInt8,  // Global uniform int8 quantization (shared scale/bias).
 };
 
 struct SerializableBase {
diff --git a/src/include/zvec/turbo/turbo.h b/src/include/zvec/turbo/turbo.h
index 6ecbfdd1e..2fbf6d680 100644
--- a/src/include/zvec/turbo/turbo.h
+++ b/src/include/zvec/turbo/turbo.h
@@ -25,6 +25,14 @@ using BatchDistanceFunc = std::function<void(
 using QueryPreprocessFunc =
     zvec::ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc;
 
+// Uniform int8 quantize kernel: fp32 -> int8 with a global affine transform:
+//   out[i] = clip(round(in[i] * scale + bias), 0, 127)
+// This signature is specific to the uniform-int8 quantizer and is NOT a
+// generic quantize contract. Raw function pointer (rather than std::function)
+// to avoid indirect-call overhead on the per-record / per-query hot path.
+using UniformQuantizeFunc = void (*)(const float *in, size_t dim, float scale,
+                                     float bias, int8_t *out);
+
 enum class MetricType {
   kSquaredEuclidean,
   kCosine,
@@ -39,6 +47,7 @@ enum class DataType {
 
 enum class QuantizeType {
   kDefault,
+  kUniform,
 };
 
 DistanceFunc get_distance_func(MetricType metric_type, DataType data_type,
@@ -52,4 +61,12 @@ QueryPreprocessFunc get_query_preprocess_func(MetricType metric_type,
                                               DataType data_type,
                                               QuantizeType quantize_type);
 
+// Returns the SIMD kernel for the uniform quantizer on the current CPU for
+// the given output data_type, or nullptr if no SIMD implementation is
+// available (callers must keep a scalar fallback). This is a
+// uniform-specific accessor intentionally kept outside of the generic
+// (metric/data/quantize) dispatch above; data_type is retained so the
+// interface can grow to cover other output types (e.g. fp16) in the future.
+UniformQuantizeFunc get_uniform_quantize_func(DataType data_type);
+
 }  // namespace zvec::turbo
diff --git a/src/turbo/avx512_vnni/uniform_int8/quantize.cc b/src/turbo/avx512_vnni/uniform_int8/quantize.cc
new file mode 100644
index 000000000..140923a23
--- /dev/null
+++ b/src/turbo/avx512_vnni/uniform_int8/quantize.cc
@@ -0,0 +1,83 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// AVX-512 quantization for the uniform-int8 quantizer.
+//
+// Pipeline (16 floats per iteration):
+//   1. Load 16 fp32 values                                   (vmovups)
+//   2. Fused multiply-add:  v = in * scale + bias            (vfmadd)
+//   3. Convert fp32 -> int32 with current rounding mode      (vcvtps2dq)
+//   4. Clamp int32 to [0, 127]                              (vpmaxsd + vpminsd)
+//   5. Saturating pack int32 -> int8                         (vpmovsdb)
+//   6. Store 16 int8 values                                  (vmovdqu)
+//
+// Output values are guaranteed to be in [0, 127] to enable the VNNI
+// abs trick (sub_epi8 + abs_epi8 + vpdpbusd) in the distance kernel.
+//
+// Compiled with -march=avx512vnni (set per-file in src/turbo/CMakeLists.txt).
+
+#include "avx512_vnni/uniform_int8/quantize.h"
+#include <algorithm>
+#include <cmath>
+
+#if defined(__AVX512F__) || (defined(_MSC_VER) && defined(__AVX512F__))
+#include <immintrin.h>
+
+namespace zvec::turbo::avx512_vnni {
+
+void uniform_int8_quantize(const float *in, std::size_t dim, float scale,
+                           float bias, std::int8_t *out) {
+  const __m512 vscale = _mm512_set1_ps(scale);
+  const __m512 vbias = _mm512_set1_ps(bias);
+  const __m512i vzero = _mm512_setzero_si512();
+  const __m512i v127 = _mm512_set1_epi32(127);
+
+  std::size_t i = 0;
+  for (; i + 16 <= dim; i += 16) {
+    __m512 v = _mm512_loadu_ps(in + i);
+    v = _mm512_fmadd_ps(v, vscale, vbias);
+    // fp32 -> int32 with current rounding mode (round-to-nearest-even).
+    __m512i vi = _mm512_cvtps_epi32(v);
+    // Clamp to [0, 127] for the VNNI abs trick.
+    vi = _mm512_max_epi32(vi, vzero);
+    vi = _mm512_min_epi32(vi, v127);
+    // Pack int32 -> int8 (values already in [0, 127], no saturation needed).
+    __m128i packed = _mm512_cvtsepi32_epi8(vi);
+    _mm_storeu_si128(reinterpret_cast<__m128i *>(out + i), packed);
+  }
+
+  // Tail: scalar fallback (matches the scalar reference exactly).
+  for (; i < dim; ++i) {
+    float v = std::round(in[i] * scale + bias);
+    v = std::max(0.0f, std::min(127.0f, v));
+    out[i] = static_cast<std::int8_t>(v);
+  }
+}
+
+}  // namespace zvec::turbo::avx512_vnni
+
+#else  // no AVX-512 support — provide a no-op stub so dispatch can fall back
+
+namespace zvec::turbo::avx512_vnni {
+
+void uniform_int8_quantize(const float * /*in*/, std::size_t /*dim*/,
+                           float /*scale*/, float /*bias*/,
+                           std::int8_t * /*out*/) {
+  // Intentionally empty; turbo::get_uniform_quantize_func will return nullptr
+  // on CPUs without AVX-512 support and the caller will use its scalar path.
+}
+
+}  // namespace zvec::turbo::avx512_vnni
+
+#endif
diff --git a/src/turbo/avx512_vnni/uniform_int8/quantize.h b/src/turbo/avx512_vnni/uniform_int8/quantize.h
new file mode 100644
index 000000000..f544711a1
--- /dev/null
+++ b/src/turbo/avx512_vnni/uniform_int8/quantize.h
@@ -0,0 +1,33 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace zvec::turbo::avx512_vnni {
+
+// AVX-512 vectorized quantization for the uniform-int8 quantizer.
+//   forward:  out[i] = clip(round(in[i] * scale + bias), -127, 127)
+//
+// Implementation detail: relies on hardware saturation in
+// vcvtsepi32_epi8 / vpackss to clip without explicit min/max.
+// Note: AVX-512 default rounding mode is round-to-nearest-even, which
+// matches std::round() to within ULP for typical embedding values; tests
+// against the scalar reference confirm bit-exact results on common inputs.
+void uniform_int8_quantize(const float *in, std::size_t dim, float scale,
+                           float bias, std::int8_t *out);
+
+}  // namespace zvec::turbo::avx512_vnni
diff --git a/src/turbo/avx512_vnni/uniform_int8/squared_euclidean.cc b/src/turbo/avx512_vnni/uniform_int8/squared_euclidean.cc
new file mode 100644
index 000000000..1d6c0a0f4
--- /dev/null
+++ b/src/turbo/avx512_vnni/uniform_int8/squared_euclidean.cc
@@ -0,0 +1,222 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// AVX512-VNNI optimized squared Euclidean distance for uniform-quantized INT8.
+//
+// Since all vectors share a single global scale/bias, the distance is simply:
+//   sum((a[i] - b[i])^2)
+// computed entirely in the integer domain.  No per-vector reconstruction or
+// scalar dequantization is needed.
+//
+// Algorithm for each 64-element chunk (VNNI abs trick):
+//   1. Load 64 int8 values from each vector                (zmm load)
+//   2. Subtract int8 vectors: diff = a - b                  (vpsubb)
+//   3. Absolute value: |diff|                               (vpabsb)
+//   4. Squared accumulate via VNNI: acc += |diff| * |diff|  (vpdpbusd)
+//
+// Constraint: input values MUST be in [0, 127] so that the int8
+// subtraction does not overflow (max |diff| = 127 fits in both
+// uint8 and int8 for the VNNI multiply).
+//
+// This processes 64 bytes per iteration (2x throughput vs int16 widening)
+// and uses only 3 core SIMD ops in the inner loop.
+//
+// This file is compiled with per-file -march=avx512vnni (set in
+// CMakeLists.txt).
+
+#include "avx512_vnni/uniform_int8/squared_euclidean.h"
+#include "zvec/ailego/internal/platform.h"
+
+#if defined(__AVX512VNNI__) || (defined(_MSC_VER) && defined(__AVX512F__))
+#include <immintrin.h>
+#include <array>
+#include <cstdint>
+
+namespace zvec::turbo::avx512_vnni {
+
+// ---------------------------------------------------------------------------
+// Batch kernel template: compute squared L2 for `batch_size` database vectors
+// against a single query, with software prefetching of future vectors.
+//
+// Uses VNNI abs trick: sub_epi8 → abs_epi8 → vpdpbusd, processing 64 bytes
+// per iteration.  Two-phase load/compute: load ALL vectors first, then compute
+// (allows CPU to issue multiple loads in parallel, hiding memory latency).
+// ---------------------------------------------------------------------------
+template <size_t batch_size>
+static ailego_force_inline void uniform_sq_l2_int8_batch_impl(
+    const void *query, const void *const *vectors,
+    const std::array<const void *, batch_size> &prefetch_ptrs, size_t dim,
+    float *distances) {
+  const int8_t *q = reinterpret_cast<const int8_t *>(query);
+
+  __m512i accs[batch_size];
+  for (size_t i = 0; i < batch_size; ++i) {
+    accs[i] = _mm512_setzero_si512();
+  }
+
+  // Process 64 bytes (one cache line) per iteration.
+  size_t d = 0;
+  for (; d + 64 <= dim; d += 64) {
+    // Load 64 query bytes
+    __m512i q_zmm =
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(q + d));
+
+    // Phase 1: load all data vectors into registers first
+    __m512i data_regs[batch_size];
+    for (size_t i = 0; i < batch_size; ++i) {
+      data_regs[i] = _mm512_loadu_si512(reinterpret_cast<const __m512i *>(
+          reinterpret_cast<const int8_t *>(vectors[i]) + d));
+    }
+
+    // Phase 2: prefetch + compute (data already in registers)
+    for (size_t i = 0; i < batch_size; ++i) {
+      if (prefetch_ptrs[i]) {
+        _mm_prefetch(
+            reinterpret_cast<const char *>(
+                reinterpret_cast<const int8_t *>(prefetch_ptrs[i]) + d),
+            _MM_HINT_T0);
+      }
+      __m512i diff = _mm512_sub_epi8(data_regs[i], q_zmm);
+      diff = _mm512_abs_epi8(diff);
+      accs[i] = _mm512_dpbusd_epi32(accs[i], diff, diff);
+    }
+  }
+
+  // Horizontal reduce each accumulator
+  std::array<int, batch_size> results{};
+  for (size_t i = 0; i < batch_size; ++i) {
+    results[i] = _mm512_reduce_add_epi32(accs[i]);
+  }
+
+  // Handle remaining elements (dim not a multiple of 64)
+  for (; d < dim; ++d) {
+    int qv = static_cast<int>(q[d]);
+    for (size_t i = 0; i < batch_size; ++i) {
+      int diff = qv - static_cast<int>(
+                          reinterpret_cast<const int8_t *>(vectors[i])[d]);
+      results[i] += diff * diff;
+    }
+  }
+
+  for (size_t i = 0; i < batch_size; ++i) {
+    distances[i] = static_cast<float>(results[i]);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Public: single-vector squared Euclidean distance (int8, VNNI abs trick)
+// ---------------------------------------------------------------------------
+void uniform_squared_euclidean_int8_distance(const void *a, const void *b,
+                                             size_t dim, float *distance) {
+  const int8_t *lhs = reinterpret_cast<const int8_t *>(a);
+  const int8_t *rhs = reinterpret_cast<const int8_t *>(b);
+
+  // Four independent accumulators to break the data-dependency chain.
+  __m512i acc0 = _mm512_setzero_si512();
+  __m512i acc1 = _mm512_setzero_si512();
+  __m512i acc2 = _mm512_setzero_si512();
+  __m512i acc3 = _mm512_setzero_si512();
+
+  size_t d = 0;
+
+  // Main loop: process 256 bytes (4 × 64) per iteration.
+  for (; d + 256 <= dim; d += 256) {
+    __m512i diff0 = _mm512_abs_epi8(_mm512_sub_epi8(
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(lhs + d + 0)),
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(rhs + d + 0))));
+    __m512i diff1 = _mm512_abs_epi8(_mm512_sub_epi8(
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(lhs + d + 64)),
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(rhs + d + 64))));
+    __m512i diff2 = _mm512_abs_epi8(_mm512_sub_epi8(
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(lhs + d + 128)),
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(rhs + d + 128))));
+    __m512i diff3 = _mm512_abs_epi8(_mm512_sub_epi8(
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(lhs + d + 192)),
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(rhs + d + 192))));
+
+    acc0 = _mm512_dpbusd_epi32(acc0, diff0, diff0);
+    acc1 = _mm512_dpbusd_epi32(acc1, diff1, diff1);
+    acc2 = _mm512_dpbusd_epi32(acc2, diff2, diff2);
+    acc3 = _mm512_dpbusd_epi32(acc3, diff3, diff3);
+  }
+
+  // Bridge loop: 64-byte chunks for the remaining (dim % 256) bytes.
+  for (; d + 64 <= dim; d += 64) {
+    __m512i diff = _mm512_abs_epi8(_mm512_sub_epi8(
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(lhs + d)),
+        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(rhs + d))));
+    acc0 = _mm512_dpbusd_epi32(acc0, diff, diff);
+  }
+
+  // Reduce four accumulators -> one, then horizontally to a scalar.
+  __m512i acc = _mm512_add_epi32(_mm512_add_epi32(acc0, acc1),
+                                 _mm512_add_epi32(acc2, acc3));
+  int result = _mm512_reduce_add_epi32(acc);
+
+  // Scalar tail (dim not a multiple of 64).
+  for (; d < dim; ++d) {
+    int diff = static_cast<int>(lhs[d]) - static_cast<int>(rhs[d]);
+    result += diff * diff;
+  }
+
+  *distance = static_cast<float>(result);
+}
+
+// ---------------------------------------------------------------------------
+// Public: batch squared Euclidean distance (int8, no tail, no preprocessing)
+// ---------------------------------------------------------------------------
+void uniform_squared_euclidean_int8_batch_distance(const void *const *vectors,
+                                                   const void *query, size_t n,
+                                                   size_t dim,
+                                                   float *distances) {
+  static constexpr size_t batch_size = 4;
+  static constexpr size_t prefetch_step = 2;
+
+  size_t i = 0;
+  for (; i + batch_size <= n; i += batch_size) {
+    std::array<const void *, batch_size> prefetch_ptrs;
+    for (size_t j = 0; j < batch_size; ++j) {
+      size_t pi = i + j + batch_size * prefetch_step;
+      prefetch_ptrs[j] = (pi < n) ? vectors[pi] : nullptr;
+    }
+    uniform_sq_l2_int8_batch_impl<batch_size>(query, &vectors[i], prefetch_ptrs,
+                                              dim, distances + i);
+  }
+  // Tail (n % batch_size vectors): delegate to the single-vector kernel.
+  // It already uses 4-way independent accumulators (see P1-2) and avoids
+  // both an extra `batch_size=1` template instantiation and the per-call
+  // std::array setup that the batch_impl path requires.
+  for (; i < n; ++i) {
+    uniform_squared_euclidean_int8_distance(vectors[i], query, dim,
+                                            distances + i);
+  }
+}
+
+}  // namespace zvec::turbo::avx512_vnni
+
+#else  // no AVX512 support
+
+namespace zvec::turbo::avx512_vnni {
+
+void uniform_squared_euclidean_int8_distance(const void * /*a*/,
+                                             const void * /*b*/, size_t /*dim*/,
+                                             float * /*distance*/) {}
+
+void uniform_squared_euclidean_int8_batch_distance(
+    const void *const * /*vectors*/, const void * /*query*/, size_t /*n*/,
+    size_t /*dim*/, float * /*distances*/) {}
+
+}  // namespace zvec::turbo::avx512_vnni
+
+#endif
diff --git a/src/turbo/avx512_vnni/uniform_int8/squared_euclidean.h b/src/turbo/avx512_vnni/uniform_int8/squared_euclidean.h
new file mode 100644
index 000000000..14bad3f2d
--- /dev/null
+++ b/src/turbo/avx512_vnni/uniform_int8/squared_euclidean.h
@@ -0,0 +1,35 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+
+namespace zvec::turbo::avx512_vnni {
+
+// Compute squared Euclidean distance between two uniform-quantized INT8
+// vectors. Unlike record_quantized, there is NO metadata tail — `dim` is the
+// pure int8 vector length.  Distance = sum((a[i] - b[i])^2).
+void uniform_squared_euclidean_int8_distance(const void *a, const void *b,
+                                             size_t dim, float *distance);
+
+// Batch version: compute squared Euclidean distance between `n` INT8 database
+// vectors and a single INT8 query.  No query preprocessing is required (unlike
+// the record_quantized path which needs int8→uint8 shifting for dpbusd).
+void uniform_squared_euclidean_int8_batch_distance(const void *const *vectors,
+                                                   const void *query, size_t n,
+                                                   size_t dim,
+                                                   float *distances);
+
+}  // namespace zvec::turbo::avx512_vnni
diff --git a/src/turbo/turbo.cc b/src/turbo/turbo.cc
index a731cfed1..adc9b785e 100644
--- a/src/turbo/turbo.cc
+++ b/src/turbo/turbo.cc
@@ -16,6 +16,8 @@
 #include <zvec/turbo/turbo.h>
 #include "avx512_vnni/record_quantized_int8/cosine.h"
 #include "avx512_vnni/record_quantized_int8/squared_euclidean.h"
+#include "avx512_vnni/uniform_int8/quantize.h"
+#include "avx512_vnni/uniform_int8/squared_euclidean.h"
 
 namespace zvec::turbo {
 
@@ -32,6 +34,13 @@ DistanceFunc get_distance_func(MetricType metric_type, DataType data_type,
         }
       }
     }
+    if (quantize_type == QuantizeType::kUniform) {
+      if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
+        if (metric_type == MetricType::kSquaredEuclidean) {
+          return avx512_vnni::uniform_squared_euclidean_int8_distance;
+        }
+      }
+    }
   }
   return nullptr;
 }
@@ -50,6 +59,13 @@ BatchDistanceFunc get_batch_distance_func(MetricType metric_type,
         }
       }
     }
+    if (quantize_type == QuantizeType::kUniform) {
+      if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
+        if (metric_type == MetricType::kSquaredEuclidean) {
+          return avx512_vnni::uniform_squared_euclidean_int8_batch_distance;
+        }
+      }
+    }
   }
   return nullptr;
 }
@@ -72,4 +88,16 @@ QueryPreprocessFunc get_query_preprocess_func(MetricType metric_type,
   return nullptr;
 }
 
+UniformQuantizeFunc get_uniform_quantize_func(DataType data_type) {
+  if (data_type == DataType::kInt8) {
+    // Quantize uses AVX-512F (no VNNI required), but we gate on the same
+    // AVX512_VNNI flag for now since the kernel lives in the avx512_vnni
+    // directory and is compiled with the same march flag.
+    if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
+      return avx512_vnni::uniform_int8_quantize;
+    }
+  }
+  return nullptr;
+}
+
 }  // namespace zvec::turbo
diff --git a/tests/core/quantizer/uniform_int8_reformer_test.cc b/tests/core/quantizer/uniform_int8_reformer_test.cc
new file mode 100644
index 000000000..ae47a88f0
--- /dev/null
+++ b/tests/core/quantizer/uniform_int8_reformer_test.cc
@@ -0,0 +1,539 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <random>
+#include <string>
+#include <vector>
+#include <gtest/gtest.h>
+#include <zvec/ailego/container/vector.h>
+#include "zvec/core/framework/index_factory.h"
+#include "zvec/core/framework/index_holder.h"
+
+using namespace zvec::core;
+
+// ---------------------------------------------------------------------------
+// UniformInt8 Converter + Reformer: General (MultiPassHolder, uniform dist)
+// ---------------------------------------------------------------------------
+TEST(UniformInt8Reformer, General) {
+  std::mt19937 gen(42);
+  std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+
+  const size_t COUNT = 5000;
+  const size_t DIMENSION = 64;
+
+  IndexMeta meta;
+  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
+
+  auto converter =
+      IndexFactory::CreateConverter("UniformInt8StreamingConverter");
+  ASSERT_TRUE(converter);
+  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));
+
+  auto holder =
+      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
+          DIMENSION);
+  for (size_t i = 0; i < COUNT; ++i) {
+    zvec::ailego::NumericalVector<float> vec(DIMENSION);
+    for (size_t j = 0; j < DIMENSION; ++j) {
+      vec[j] = dist(gen);
+    }
+    holder->emplace(i + 1, vec);
+  }
+  EXPECT_EQ(COUNT, holder->count());
+  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
+
+  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
+
+  auto &stats = converter->stats();
+  EXPECT_EQ(COUNT, stats.trained_count());
+  EXPECT_EQ(COUNT, stats.transformed_count());
+
+  auto holder2 = converter->result();
+  ASSERT_TRUE(holder2);
+  EXPECT_EQ(COUNT, holder2->count());
+  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());
+  EXPECT_EQ(DIMENSION, holder2->dimension());
+  // INT8: 1 byte per dim; FP32: 4 bytes per dim
+  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);
+
+  // Verify quantized values are in [0, 127]
+  auto iter_check = holder2->create_iterator();
+  for (; iter_check->is_valid(); iter_check->next()) {
+    const int8_t *quantized =
+        reinterpret_cast<const int8_t *>(iter_check->data());
+    for (size_t d = 0; d < DIMENSION; ++d) {
+      EXPECT_GE(quantized[d], 0) << "dim=" << d;
+      EXPECT_LE(quantized[d], 127) << "dim=" << d;
+    }
+  }
+
+  // Create reformer from converter's trained params
+  auto reformer = IndexFactory::CreateReformer("UniformInt8StreamingReformer");
+  ASSERT_TRUE(reformer);
+  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));
+
+  // Verify transform() produces the same int8 as the converter
+  auto iter = holder->create_iterator();
+  auto iter2 = holder2->create_iterator();
+  std::string buffer;
+
+  for (; iter->is_valid(); iter->next(), iter2->next()) {
+    ASSERT_TRUE(iter2->is_valid());
+    ASSERT_TRUE(iter->data());
+    ASSERT_TRUE(iter2->data());
+
+    std::string expected(reinterpret_cast<const char *>(iter2->data()),
+                         holder2->element_size());
+
+    IndexQueryMeta qmeta;
+    EXPECT_EQ(0, reformer->transform(
+                     iter->data(),
+                     IndexQueryMeta(holder->data_type(), holder->dimension()),
+                     &buffer, &qmeta));
+    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
+    EXPECT_EQ(DIMENSION, qmeta.dimension());
+    EXPECT_EQ(expected, buffer);
+
+    // Batch transform (count=4, dimension/4 per sub-vector)
+    EXPECT_EQ(0, reformer->transform(iter->data(),
+                                     IndexQueryMeta(holder->data_type(),
+                                                    holder->dimension() / 4),
+                                     4, &buffer, &qmeta));
+    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
+    EXPECT_EQ(DIMENSION / 4, qmeta.dimension());
+    EXPECT_EQ(expected, buffer);
+
+    // convert() should produce the same result
+    buffer.clear();
+    EXPECT_EQ(0, reformer->convert(
+                     iter->data(),
+                     IndexQueryMeta(holder->data_type(), holder->dimension()),
+                     &buffer, &qmeta));
+    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
+    EXPECT_EQ(DIMENSION, qmeta.dimension());
+    EXPECT_EQ(expected, buffer);
+
+    // Batch convert
+    buffer.clear();
+    EXPECT_EQ(0, reformer->convert(iter->data(),
+                                   IndexQueryMeta(holder->data_type(),
+                                                  holder->dimension() / 4),
+                                   4, &buffer, &qmeta));
+    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
+    EXPECT_EQ(DIMENSION / 4, qmeta.dimension());
+    EXPECT_EQ(expected, buffer);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// OnePassHolder: verify converter works with single-pass holders
+// ---------------------------------------------------------------------------
+TEST(UniformInt8Reformer, OnePassHolder) {
+  std::mt19937 gen(123);
+  std::normal_distribution<float> dist(5.0f, 2.0f);
+
+  const size_t COUNT = 5000;
+  const size_t DIMENSION = 128;
+
+  IndexMeta meta;
+  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
+
+  auto converter =
+      IndexFactory::CreateConverter("UniformInt8StreamingConverter");
+  ASSERT_TRUE(converter);
+  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));
+
+  auto holder =
+      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(
+          DIMENSION);
+  auto holder_mirror =
+      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
+          DIMENSION);
+  for (size_t i = 0; i < COUNT; ++i) {
+    zvec::ailego::NumericalVector<float> vec(DIMENSION);
+    for (size_t j = 0; j < DIMENSION; ++j) {
+      vec[j] = dist(gen);
+    }
+    holder->emplace(i + 1, vec);
+    holder_mirror->emplace(i + 1, vec);
+  }
+
+  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
+
+  auto holder2 = converter->result();
+  ASSERT_TRUE(holder2);
+  EXPECT_EQ(COUNT, holder2->count());
+  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());
+  EXPECT_EQ(DIMENSION, holder2->dimension());
+
+  auto reformer = IndexFactory::CreateReformer("UniformInt8StreamingReformer");
+  ASSERT_TRUE(reformer);
+  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));
+
+  auto iter = holder_mirror->create_iterator();
+  auto iter2 = holder2->create_iterator();
+  std::string buffer;
+
+  for (; iter->is_valid(); iter->next(), iter2->next()) {
+    ASSERT_TRUE(iter2->is_valid());
+    std::string expected(reinterpret_cast<const char *>(iter2->data()),
+                         holder2->element_size());
+
+    IndexQueryMeta qmeta;
+    EXPECT_EQ(0, reformer->transform(
+                     iter->data(),
+                     IndexQueryMeta(holder->data_type(), holder->dimension()),
+                     &buffer, &qmeta));
+    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
+    EXPECT_EQ(expected, buffer);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// TrainedParams: verify scale/bias are persisted correctly after train
+// ---------------------------------------------------------------------------
+TEST(UniformInt8Reformer, TrainedParams) {
+  std::mt19937 gen(99);
+  std::uniform_real_distribution<float> dist(-3.0f, 7.0f);
+
+  const size_t COUNT = 5000;
+  const size_t DIMENSION = 32;
+
+  IndexMeta meta;
+  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
+
+  auto converter =
+      IndexFactory::CreateConverter("UniformInt8StreamingConverter");
+  ASSERT_TRUE(converter);
+  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));
+
+  auto holder =
+      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
+          DIMENSION);
+  for (size_t i = 0; i < COUNT; ++i) {
+    zvec::ailego::NumericalVector<float> vec(DIMENSION);
+    for (size_t j = 0; j < DIMENSION; ++j) {
+      vec[j] = dist(gen);
+    }
+    holder->emplace(i + 1, vec);
+  }
+
+  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
+  EXPECT_EQ(COUNT, converter->stats().trained_count());
+
+  // Verify reformer params contain scale and bias
+  auto reformer_params = converter->meta().reformer_params();
+  float scale = 0.0f, bias = 0.0f;
+  EXPECT_TRUE(reformer_params.get("uniform_int8.reformer.scale", &scale));
+  EXPECT_TRUE(reformer_params.get("uniform_int8.reformer.bias", &bias));
+  EXPECT_GT(scale, 0.0f);
+  EXPECT_TRUE(std::isfinite(scale));
+  EXPECT_TRUE(std::isfinite(bias));
+
+  // Verify converter params also contain scale/bias (for persistence)
+  auto conv_params = converter->meta().converter_params();
+  float conv_scale = 0.0f, conv_bias = 0.0f;
+  EXPECT_TRUE(conv_params.get("uniform_int8.reformer.scale", &conv_scale));
+  EXPECT_TRUE(conv_params.get("uniform_int8.reformer.bias", &conv_bias));
+  EXPECT_FLOAT_EQ(scale, conv_scale);
+  EXPECT_FLOAT_EQ(bias, conv_bias);
+
+  // Verify meta reflects the correct reformer and metric
+  EXPECT_EQ("UniformInt8StreamingReformer", converter->meta().reformer_name());
+  EXPECT_EQ("UniformInt8", converter->meta().metric_name());
+}
+
+// ---------------------------------------------------------------------------
+// Revert: verify int8 → float dequantization round-trip quality
+// ---------------------------------------------------------------------------
+TEST(UniformInt8Reformer, Revert) {
+  std::mt19937 gen(77);
+  std::uniform_real_distribution<float> dist(0.0f, 10.0f);
+
+  const size_t COUNT = 100;
+  const size_t DIMENSION = 16;
+
+  IndexMeta meta;
+  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
+
+  auto converter =
+      IndexFactory::CreateConverter("UniformInt8StreamingConverter");
+  ASSERT_TRUE(converter);
+  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));
+
+  auto holder =
+      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
+          DIMENSION);
+  for (size_t i = 0; i < COUNT; ++i) {
+    zvec::ailego::NumericalVector<float> vec(DIMENSION);
+    for (size_t j = 0; j < DIMENSION; ++j) {
+      vec[j] = dist(gen);
+    }
+    holder->emplace(i + 1, vec);
+  }
+
+  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
+
+  auto reformer = IndexFactory::CreateReformer("UniformInt8StreamingReformer");
+  ASSERT_TRUE(reformer);
+  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));
+
+  // Verify round-trip: float → int8 → float
+  auto iter = holder->create_iterator();
+  std::string quantized_buf, reverted_buf;
+
+  for (; iter->is_valid(); iter->next()) {
+    const float *original = reinterpret_cast<const float *>(iter->data());
+
+    IndexQueryMeta qmeta;
+    ASSERT_EQ(0, reformer->transform(
+                     iter->data(),
+                     IndexQueryMeta(holder->data_type(), holder->dimension()),
+                     &quantized_buf, &qmeta));
+
+    ASSERT_EQ(0, reformer->revert(quantized_buf.data(), qmeta, &reverted_buf));
+
+    const float *reverted =
+        reinterpret_cast<const float *>(reverted_buf.data());
+
+    // Quantization error should be bounded by step_size / 2
+    // step_size ≈ range / 127
+    float range = 10.0f;  // approximate
+    float max_error = range / 127.0f;
+    for (size_t d = 0; d < DIMENSION; ++d) {
+      EXPECT_NEAR(original[d], reverted[d], max_error * 1.5f)
+          << "dim=" << d << " original=" << original[d]
+          << " reverted=" << reverted[d];
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Normalize: verify score rescaling from int8 L2 to float L2
+// ---------------------------------------------------------------------------
+TEST(UniformInt8Reformer, Normalize) {
+  const size_t COUNT = 1000;
+  const size_t DIMENSION = 32;
+
+  std::mt19937 gen(55);
+  std::uniform_real_distribution<float> dist(0.0f, 5.0f);
+
+  IndexMeta meta;
+  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
+
+  auto converter =
+      IndexFactory::CreateConverter("UniformInt8StreamingConverter");
+  ASSERT_TRUE(converter);
+  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));
+
+  auto holder =
+      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
+          DIMENSION);
+  for (size_t i = 0; i < COUNT; ++i) {
+    zvec::ailego::NumericalVector<float> vec(DIMENSION);
+    for (size_t j = 0; j < DIMENSION; ++j) {
+      vec[j] = dist(gen);
+    }
+    holder->emplace(i + 1, vec);
+  }
+
+  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
+
+  auto reformer_params = converter->meta().reformer_params();
+  float scale = 0.0f;
+  ASSERT_TRUE(reformer_params.get("uniform_int8.reformer.scale", &scale));
+
+  auto reformer = IndexFactory::CreateReformer("UniformInt8StreamingReformer");
+  ASSERT_TRUE(reformer);
+  ASSERT_EQ(0u, reformer->init(reformer_params));
+
+  // Create mock results and verify normalize rescales by 1/scale^2
+  IndexDocumentList results;
+  float int8_score = 100.0f;
+  IndexDocument doc;
+  *doc.mutable_score() = int8_score;
+  results.push_back(doc);
+
+  // normalize is independent of query, pass nullptr
+  ASSERT_EQ(
+      0, reformer->normalize(
+             nullptr, IndexQueryMeta(IndexMeta::DataType::DT_FP32, DIMENSION),
+             results));
+
+  float expected_score = int8_score / (scale * scale);
+  EXPECT_NEAR(results[0].score(), expected_score, expected_score * 1e-5f);
+}
+
+// ---------------------------------------------------------------------------
+// InitConverterWithTrainedParams: simulate the search-only path where
+// scale/bias come from persisted converter params (no re-train needed)
+// ---------------------------------------------------------------------------
+TEST(UniformInt8Reformer, InitConverterWithTrainedParams) {
+  std::mt19937 gen(42);
+  std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+
+  const size_t COUNT = 5000;
+  const size_t DIMENSION = 12;
+
+  IndexMeta meta;
+  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
+
+  // First pass: train to get params
+  auto converter =
+      IndexFactory::CreateConverter("UniformInt8StreamingConverter");
+  ASSERT_TRUE(converter);
+  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));
+
+  auto holder =
+      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
+          DIMENSION);
+  for (size_t i = 0; i < COUNT; ++i) {
+    zvec::ailego::NumericalVector<float> vec(DIMENSION);
+    for (size_t j = 0; j < DIMENSION; ++j) {
+      vec[j] = dist(gen);
+    }
+    holder->emplace(i + 1, vec);
+  }
+
+  ASSERT_EQ(0, converter->train(holder));
+  auto reformer_params = converter->meta().reformer_params();
+  auto converter_params = converter->meta().converter_params();
+
+  // Second pass: create a new converter with trained params (skip train)
+  auto converter2 =
+      IndexFactory::CreateConverter("UniformInt8StreamingConverter");
+  ASSERT_TRUE(converter2);
+  ASSERT_EQ(0, converter2->init(meta, converter_params));
+  ASSERT_EQ(0, converter2->transform(holder));
+
+  auto &stats = converter2->stats();
+  EXPECT_EQ(0u, stats.trained_count());
+  EXPECT_EQ(COUNT, stats.transformed_count());
+
+  auto holder2 = converter2->result();
+  ASSERT_TRUE(holder2);
+  EXPECT_EQ(COUNT, holder2->count());
+  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());
+  EXPECT_EQ(DIMENSION, holder2->dimension());
+
+  // Verify reformer with persisted params produces same results
+  auto reformer = IndexFactory::CreateReformer("UniformInt8StreamingReformer");
+  ASSERT_TRUE(reformer);
+  ASSERT_EQ(0u, reformer->init(reformer_params));
+
+  auto iter = holder->create_iterator();
+  auto iter2 = holder2->create_iterator();
+  std::string buffer;
+
+  for (; iter->is_valid(); iter->next(), iter2->next()) {
+    ASSERT_TRUE(iter2->is_valid());
+    std::string expected(reinterpret_cast<const char *>(iter2->data()),
+                         holder2->element_size());
+
+    IndexQueryMeta qmeta;
+    EXPECT_EQ(0, reformer->transform(
+                     iter->data(),
+                     IndexQueryMeta(holder->data_type(), holder->dimension()),
+                     &buffer, &qmeta));
+    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
+    EXPECT_EQ(DIMENSION, qmeta.dimension());
+    EXPECT_EQ(expected, buffer);
+
+    // convert() path
+    buffer.clear();
+    EXPECT_EQ(0, reformer->convert(
+                     iter->data(),
+                     IndexQueryMeta(holder->data_type(), holder->dimension()),
+                     &buffer, &qmeta));
+    EXPECT_EQ(expected, buffer);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// LosslessIntegerFastPath: when all training values are integers within
+// [0, 127], scale should be 1.0 for exact mapping
+// ---------------------------------------------------------------------------
+TEST(UniformInt8Reformer, LosslessIntegerFastPath) {
+  const size_t COUNT = 100;
+  const size_t DIMENSION = 8;
+
+  IndexMeta meta;
+  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
+
+  auto converter =
+      IndexFactory::CreateConverter("UniformInt8StreamingConverter");
+  ASSERT_TRUE(converter);
+  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));
+
+  auto holder =
+      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
+          DIMENSION);
+
+  // Fill with integer values in [0, 50]
+  std::mt19937 gen(10);
+  std::uniform_int_distribution<int> idist(0, 50);
+  for (size_t i = 0; i < COUNT; ++i) {
+    zvec::ailego::NumericalVector<float> vec(DIMENSION);
+    for (size_t j = 0; j < DIMENSION; ++j) {
+      vec[j] = static_cast<float>(idist(gen));
+    }
+    holder->emplace(i + 1, vec);
+  }
+
+  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
+
+  // scale should be 1.0 for lossless integer path
+  auto reformer_params = converter->meta().reformer_params();
+  float scale = 0.0f;
+  ASSERT_TRUE(reformer_params.get("uniform_int8.reformer.scale", &scale));
+  EXPECT_FLOAT_EQ(1.0f, scale);
+
+  // Verify exact round-trip for integer values
+  auto reformer = IndexFactory::CreateReformer("UniformInt8StreamingReformer");
+  ASSERT_TRUE(reformer);
+  ASSERT_EQ(0u, reformer->init(reformer_params));
+
+  auto iter = holder->create_iterator();
+  std::string quantized_buf, reverted_buf;
+
+  for (; iter->is_valid(); iter->next()) {
+    const float *original = reinterpret_cast<const float *>(iter->data());
+
+    IndexQueryMeta qmeta;
+    ASSERT_EQ(0, reformer->transform(
+                     iter->data(),
+                     IndexQueryMeta(holder->data_type(), holder->dimension()),
+                     &quantized_buf, &qmeta));
+
+    // Verify quantized values match original integers
+    const int8_t *quantized =
+        reinterpret_cast<const int8_t *>(quantized_buf.data());
+    for (size_t d = 0; d < DIMENSION; ++d) {
+      EXPECT_EQ(static_cast<int8_t>(original[d] - 0 /* global_min offset */),
+                quantized[d])
+          << "dim=" << d;
+    }
+
+    // Revert should give exact values back
+    ASSERT_EQ(0, reformer->revert(quantized_buf.data(), qmeta, &reverted_buf));
+    const float *reverted =
+        reinterpret_cast<const float *>(reverted_buf.data());
+    for (size_t d = 0; d < DIMENSION; ++d) {
+      EXPECT_FLOAT_EQ(original[d], reverted[d]) << "dim=" << d;
+    }
+  }
+}