xref: /aosp_15_r20/external/pytorch/torch/csrc/serialization.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1*da0073e9SAndroid Build Coastguard Worker #include <torch/csrc/python_headers.h>
2*da0073e9SAndroid Build Coastguard Worker #include <system_error>
3*da0073e9SAndroid Build Coastguard Worker #include <vector>
4*da0073e9SAndroid Build Coastguard Worker 
5*da0073e9SAndroid Build Coastguard Worker #include <ATen/ops/from_blob.h>
6*da0073e9SAndroid Build Coastguard Worker #include <c10/core/CPUAllocator.h>
7*da0073e9SAndroid Build Coastguard Worker #include <torch/csrc/THP.h>
8*da0073e9SAndroid Build Coastguard Worker #include <torch/csrc/serialization.h>
9*da0073e9SAndroid Build Coastguard Worker 
10*da0073e9SAndroid Build Coastguard Worker template <class io>
11*da0073e9SAndroid Build Coastguard Worker Py_ssize_t doPartialRead(io fildes, void* buf, size_t nbytes);
12*da0073e9SAndroid Build Coastguard Worker 
13*da0073e9SAndroid Build Coastguard Worker template <class io>
14*da0073e9SAndroid Build Coastguard Worker Py_ssize_t doPartialWrite(io fildes, void* buf, size_t nbytes);
15*da0073e9SAndroid Build Coastguard Worker 
16*da0073e9SAndroid Build Coastguard Worker static Py_ssize_t doPartialPythonReadBuffered(
17*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
18*da0073e9SAndroid Build Coastguard Worker     void* buf,
19*da0073e9SAndroid Build Coastguard Worker     size_t nbytes);
20*da0073e9SAndroid Build Coastguard Worker static Py_ssize_t doPartialPythonReadInto(
21*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
22*da0073e9SAndroid Build Coastguard Worker     void* buf,
23*da0073e9SAndroid Build Coastguard Worker     size_t nbytes);
24*da0073e9SAndroid Build Coastguard Worker static Py_ssize_t doPartialPythonWrite(
25*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
26*da0073e9SAndroid Build Coastguard Worker     void* buf,
27*da0073e9SAndroid Build Coastguard Worker     size_t nbytes);
28*da0073e9SAndroid Build Coastguard Worker 
29*da0073e9SAndroid Build Coastguard Worker template <>
doPartialRead(int fildes,void * buf,size_t nbytes)30*da0073e9SAndroid Build Coastguard Worker Py_ssize_t doPartialRead<int>(int fildes, void* buf, size_t nbytes) {
31*da0073e9SAndroid Build Coastguard Worker   return read(fildes, buf, nbytes);
32*da0073e9SAndroid Build Coastguard Worker }
33*da0073e9SAndroid Build Coastguard Worker 
34*da0073e9SAndroid Build Coastguard Worker template <>
doPartialRead(PyObject * fildes,void * buf,size_t nbytes)35*da0073e9SAndroid Build Coastguard Worker Py_ssize_t doPartialRead<PyObject*>(
36*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
37*da0073e9SAndroid Build Coastguard Worker     void* buf,
38*da0073e9SAndroid Build Coastguard Worker     size_t nbytes) {
39*da0073e9SAndroid Build Coastguard Worker   // Try to use fildes.readinto() instead of fildes.read()
40*da0073e9SAndroid Build Coastguard Worker   // because it is more memory efficient.
41*da0073e9SAndroid Build Coastguard Worker   // TODO: Stop calling PyObject_HasAttrString() in a loop on our read loop
42*da0073e9SAndroid Build Coastguard Worker   auto has_readinto = PyObject_HasAttrString(fildes, "readinto") == 1;
43*da0073e9SAndroid Build Coastguard Worker   if (has_readinto) {
44*da0073e9SAndroid Build Coastguard Worker     return doPartialPythonReadInto(fildes, buf, nbytes);
45*da0073e9SAndroid Build Coastguard Worker   }
46*da0073e9SAndroid Build Coastguard Worker   return doPartialPythonReadBuffered(fildes, buf, nbytes);
47*da0073e9SAndroid Build Coastguard Worker }
48*da0073e9SAndroid Build Coastguard Worker 
49*da0073e9SAndroid Build Coastguard Worker template <>
doPartialWrite(int fildes,void * buf,size_t nbytes)50*da0073e9SAndroid Build Coastguard Worker Py_ssize_t doPartialWrite<int>(int fildes, void* buf, size_t nbytes) {
51*da0073e9SAndroid Build Coastguard Worker   return write(fildes, buf, nbytes);
52*da0073e9SAndroid Build Coastguard Worker }
53*da0073e9SAndroid Build Coastguard Worker 
54*da0073e9SAndroid Build Coastguard Worker template <>
doPartialWrite(PyObject * fildes,void * buf,size_t nbytes)55*da0073e9SAndroid Build Coastguard Worker Py_ssize_t doPartialWrite<PyObject*>(
56*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
57*da0073e9SAndroid Build Coastguard Worker     void* buf,
58*da0073e9SAndroid Build Coastguard Worker     size_t nbytes) {
59*da0073e9SAndroid Build Coastguard Worker   return doPartialPythonWrite(fildes, buf, nbytes);
60*da0073e9SAndroid Build Coastguard Worker }
61*da0073e9SAndroid Build Coastguard Worker 
isUnsupportedOperation()62*da0073e9SAndroid Build Coastguard Worker static inline bool isUnsupportedOperation() {
63*da0073e9SAndroid Build Coastguard Worker   THPObjectPtr io(PyImport_ImportModule("io"));
64*da0073e9SAndroid Build Coastguard Worker   if (!io)
65*da0073e9SAndroid Build Coastguard Worker     throw python_error();
66*da0073e9SAndroid Build Coastguard Worker   THPObjectPtr exception(PyObject_GetAttrString(io, "UnsupportedOperation"));
67*da0073e9SAndroid Build Coastguard Worker   if (!exception)
68*da0073e9SAndroid Build Coastguard Worker     throw python_error();
69*da0073e9SAndroid Build Coastguard Worker   return PyErr_ExceptionMatches(exception.get());
70*da0073e9SAndroid Build Coastguard Worker }
71*da0073e9SAndroid Build Coastguard Worker 
72*da0073e9SAndroid Build Coastguard Worker // Call Python fildes.read(nbytes) and copy it to buf.
doPartialPythonReadBuffered(PyObject * fildes,void * buf,size_t raw_nbytes)73*da0073e9SAndroid Build Coastguard Worker static inline Py_ssize_t doPartialPythonReadBuffered(
74*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
75*da0073e9SAndroid Build Coastguard Worker     void* buf,
76*da0073e9SAndroid Build Coastguard Worker     size_t raw_nbytes) {
77*da0073e9SAndroid Build Coastguard Worker   // If we request a large amount of data, f.read() will internally try to
78*da0073e9SAndroid Build Coastguard Worker   // allocate a buffer of that size.  This is counterproductive, because
79*da0073e9SAndroid Build Coastguard Worker   // it's not the buffer we ultimately want to write the data into.  Read
80*da0073e9SAndroid Build Coastguard Worker   // less than that and avoid allocating too much extra memory.
81*da0073e9SAndroid Build Coastguard Worker   // TODO: Maybe 260 KB is a bit small...
82*da0073e9SAndroid Build Coastguard Worker   const size_t nbytes = std::min<size_t>(raw_nbytes, 262144u); // 2^18 (~260 KB)
83*da0073e9SAndroid Build Coastguard Worker 
84*da0073e9SAndroid Build Coastguard Worker   THPObjectPtr r(PyObject_CallMethod(fildes, "read", "i", nbytes));
85*da0073e9SAndroid Build Coastguard Worker   if (!r)
86*da0073e9SAndroid Build Coastguard Worker     throw python_error();
87*da0073e9SAndroid Build Coastguard Worker 
88*da0073e9SAndroid Build Coastguard Worker   auto size = PyBytes_GET_SIZE(r.get());
89*da0073e9SAndroid Build Coastguard Worker   const void* py_buf = PyBytes_AsString(r.get());
90*da0073e9SAndroid Build Coastguard Worker 
91*da0073e9SAndroid Build Coastguard Worker   // we read EOF
92*da0073e9SAndroid Build Coastguard Worker   if (size == 0) {
93*da0073e9SAndroid Build Coastguard Worker     return 0;
94*da0073e9SAndroid Build Coastguard Worker   }
95*da0073e9SAndroid Build Coastguard Worker 
96*da0073e9SAndroid Build Coastguard Worker   // Slurp it into the buffer we actually want
97*da0073e9SAndroid Build Coastguard Worker   memcpy(buf, py_buf, size);
98*da0073e9SAndroid Build Coastguard Worker 
99*da0073e9SAndroid Build Coastguard Worker   return size;
100*da0073e9SAndroid Build Coastguard Worker }
101*da0073e9SAndroid Build Coastguard Worker 
102*da0073e9SAndroid Build Coastguard Worker // Either does fildes.readinto(buf) or fildes.write(buf)
doPartialPythonIO(PyObject * fildes,void * buf,size_t nbytes,bool is_read)103*da0073e9SAndroid Build Coastguard Worker static inline Py_ssize_t doPartialPythonIO(
104*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
105*da0073e9SAndroid Build Coastguard Worker     void* buf,
106*da0073e9SAndroid Build Coastguard Worker     size_t nbytes,
107*da0073e9SAndroid Build Coastguard Worker     bool is_read) {
108*da0073e9SAndroid Build Coastguard Worker   auto rw_flag = is_read ? PyBUF_WRITE : PyBUF_READ;
109*da0073e9SAndroid Build Coastguard Worker   THPObjectPtr memview(PyMemoryView_FromMemory(
110*da0073e9SAndroid Build Coastguard Worker       reinterpret_cast<char*>(buf), static_cast<Py_ssize_t>(nbytes), rw_flag));
111*da0073e9SAndroid Build Coastguard Worker   if (!memview)
112*da0073e9SAndroid Build Coastguard Worker     throw python_error();
113*da0073e9SAndroid Build Coastguard Worker 
114*da0073e9SAndroid Build Coastguard Worker   std::string method = "write";
115*da0073e9SAndroid Build Coastguard Worker   if (is_read) {
116*da0073e9SAndroid Build Coastguard Worker     method = "readinto";
117*da0073e9SAndroid Build Coastguard Worker   }
118*da0073e9SAndroid Build Coastguard Worker   THPObjectPtr r(
119*da0073e9SAndroid Build Coastguard Worker       PyObject_CallMethod(fildes, method.c_str(), "O", memview.get()));
120*da0073e9SAndroid Build Coastguard Worker   if (r) {
121*da0073e9SAndroid Build Coastguard Worker     return PyLong_AsSsize_t(r.get());
122*da0073e9SAndroid Build Coastguard Worker   }
123*da0073e9SAndroid Build Coastguard Worker 
124*da0073e9SAndroid Build Coastguard Worker   // fildes.readinto can return UnsupportedOperation so fall back to
125*da0073e9SAndroid Build Coastguard Worker   // fildes.read.
126*da0073e9SAndroid Build Coastguard Worker   if (is_read && isUnsupportedOperation()) {
127*da0073e9SAndroid Build Coastguard Worker     PyErr_Clear();
128*da0073e9SAndroid Build Coastguard Worker     return doPartialPythonReadBuffered(fildes, buf, nbytes);
129*da0073e9SAndroid Build Coastguard Worker   }
130*da0073e9SAndroid Build Coastguard Worker   throw python_error();
131*da0073e9SAndroid Build Coastguard Worker }
132*da0073e9SAndroid Build Coastguard Worker 
133*da0073e9SAndroid Build Coastguard Worker // Call Python fildes.readinto(buf)
doPartialPythonReadInto(PyObject * fildes,void * buf,size_t nbytes)134*da0073e9SAndroid Build Coastguard Worker static Py_ssize_t doPartialPythonReadInto(
135*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
136*da0073e9SAndroid Build Coastguard Worker     void* buf,
137*da0073e9SAndroid Build Coastguard Worker     size_t nbytes) {
138*da0073e9SAndroid Build Coastguard Worker   return doPartialPythonIO(fildes, buf, nbytes, /* is_read */ true);
139*da0073e9SAndroid Build Coastguard Worker }
140*da0073e9SAndroid Build Coastguard Worker 
141*da0073e9SAndroid Build Coastguard Worker // Call Python fildes.write(buf)
doPartialPythonWrite(PyObject * fildes,void * buf,size_t nbytes)142*da0073e9SAndroid Build Coastguard Worker static Py_ssize_t doPartialPythonWrite(
143*da0073e9SAndroid Build Coastguard Worker     PyObject* fildes,
144*da0073e9SAndroid Build Coastguard Worker     void* buf,
145*da0073e9SAndroid Build Coastguard Worker     size_t nbytes) {
146*da0073e9SAndroid Build Coastguard Worker   return doPartialPythonIO(fildes, buf, nbytes, /* is_read */ false);
147*da0073e9SAndroid Build Coastguard Worker }
148*da0073e9SAndroid Build Coastguard Worker 
149*da0073e9SAndroid Build Coastguard Worker // Requires that we read EXACTLY nbytes; fails if we don't.
150*da0073e9SAndroid Build Coastguard Worker template <typename io>
doRead(io fildes,void * raw_buf,size_t nbytes)151*da0073e9SAndroid Build Coastguard Worker void doRead(io fildes, void* raw_buf, size_t nbytes) {
152*da0073e9SAndroid Build Coastguard Worker   char* buf = static_cast<char*>(raw_buf);
153*da0073e9SAndroid Build Coastguard Worker   while (nbytes > 0) {
154*da0073e9SAndroid Build Coastguard Worker     errno = 0; // doPartialRead may not set errno
155*da0073e9SAndroid Build Coastguard Worker     // we read in 1GB blocks to avoid bugs on Mac OS X Lion
156*da0073e9SAndroid Build Coastguard Worker     // see https://github.com/pytorch/pytorch/issues/1031 for more details
157*da0073e9SAndroid Build Coastguard Worker     Py_ssize_t r =
158*da0073e9SAndroid Build Coastguard Worker         doPartialRead(fildes, buf, std::min<size_t>(nbytes, 1073741824));
159*da0073e9SAndroid Build Coastguard Worker     if (r < 0) {
160*da0073e9SAndroid Build Coastguard Worker       int err = errno;
161*da0073e9SAndroid Build Coastguard Worker       TORCH_INTERNAL_ASSERT(
162*da0073e9SAndroid Build Coastguard Worker           err != 0, "read(): impossible! r < 0, but no errno was set");
163*da0073e9SAndroid Build Coastguard Worker       TORCH_INTERNAL_ASSERT(
164*da0073e9SAndroid Build Coastguard Worker           err != EAGAIN,
165*da0073e9SAndroid Build Coastguard Worker           "read(): non-blocking fd ",
166*da0073e9SAndroid Build Coastguard Worker           fildes,
167*da0073e9SAndroid Build Coastguard Worker           " read EAGAIN; cowardly refusing to spin-wait");
168*da0073e9SAndroid Build Coastguard Worker       if (err == EINTR) {
169*da0073e9SAndroid Build Coastguard Worker         continue;
170*da0073e9SAndroid Build Coastguard Worker       } else {
171*da0073e9SAndroid Build Coastguard Worker         AT_ERROR("read(): fd ", fildes, " failed with ", strerror(err));
172*da0073e9SAndroid Build Coastguard Worker       }
173*da0073e9SAndroid Build Coastguard Worker     } else if (r == 0) {
174*da0073e9SAndroid Build Coastguard Worker       break;
175*da0073e9SAndroid Build Coastguard Worker     }
176*da0073e9SAndroid Build Coastguard Worker     buf += r;
177*da0073e9SAndroid Build Coastguard Worker     // This is guaranteed by POSIX, but I just want to be double-sure
178*da0073e9SAndroid Build Coastguard Worker     // to not underflow a signed integer.
179*da0073e9SAndroid Build Coastguard Worker     AT_ASSERT(static_cast<size_t>(r) <= nbytes);
180*da0073e9SAndroid Build Coastguard Worker     nbytes -= r;
181*da0073e9SAndroid Build Coastguard Worker   }
182*da0073e9SAndroid Build Coastguard Worker   if (nbytes != 0) {
183*da0073e9SAndroid Build Coastguard Worker     AT_ERROR(
184*da0073e9SAndroid Build Coastguard Worker         "unexpected EOF, expected ",
185*da0073e9SAndroid Build Coastguard Worker         nbytes,
186*da0073e9SAndroid Build Coastguard Worker         " more bytes. The file might be corrupted.");
187*da0073e9SAndroid Build Coastguard Worker   }
188*da0073e9SAndroid Build Coastguard Worker }
189*da0073e9SAndroid Build Coastguard Worker 
190*da0073e9SAndroid Build Coastguard Worker template <typename io>
doWrite(io fildes,void * raw_buf,size_t nbytes)191*da0073e9SAndroid Build Coastguard Worker void doWrite(io fildes, void* raw_buf, size_t nbytes) {
192*da0073e9SAndroid Build Coastguard Worker   char* buf = static_cast<char*>(raw_buf);
193*da0073e9SAndroid Build Coastguard Worker   while (nbytes > 0) {
194*da0073e9SAndroid Build Coastguard Worker     errno = 0; // doPartialWrite may not set errno
195*da0073e9SAndroid Build Coastguard Worker     // we write in 1GB blocks to avoid bugs on Mac OS X Lion
196*da0073e9SAndroid Build Coastguard Worker     // see https://github.com/pytorch/pytorch/issues/1031 for more details
197*da0073e9SAndroid Build Coastguard Worker     Py_ssize_t r =
198*da0073e9SAndroid Build Coastguard Worker         doPartialWrite(fildes, buf, std::min<size_t>(nbytes, 1073741824));
199*da0073e9SAndroid Build Coastguard Worker     if (r < 0) {
200*da0073e9SAndroid Build Coastguard Worker       int err = errno;
201*da0073e9SAndroid Build Coastguard Worker       TORCH_INTERNAL_ASSERT(
202*da0073e9SAndroid Build Coastguard Worker           err != 0, "write(): impossible! r < 0, but no errno was set");
203*da0073e9SAndroid Build Coastguard Worker       TORCH_INTERNAL_ASSERT(
204*da0073e9SAndroid Build Coastguard Worker           err != EAGAIN,
205*da0073e9SAndroid Build Coastguard Worker           "write(): non-blocking fd ",
206*da0073e9SAndroid Build Coastguard Worker           fildes,
207*da0073e9SAndroid Build Coastguard Worker           " read EAGAIN; cowardly refusing to spin-wait");
208*da0073e9SAndroid Build Coastguard Worker       if (err == EINTR) {
209*da0073e9SAndroid Build Coastguard Worker         continue;
210*da0073e9SAndroid Build Coastguard Worker       } else {
211*da0073e9SAndroid Build Coastguard Worker         AT_ERROR("write(): fd ", fildes, " failed with ", strerror(err));
212*da0073e9SAndroid Build Coastguard Worker       }
213*da0073e9SAndroid Build Coastguard Worker     }
214*da0073e9SAndroid Build Coastguard Worker     buf += r;
215*da0073e9SAndroid Build Coastguard Worker     AT_ASSERT(static_cast<size_t>(r) <= nbytes);
216*da0073e9SAndroid Build Coastguard Worker     nbytes -= r;
217*da0073e9SAndroid Build Coastguard Worker   }
218*da0073e9SAndroid Build Coastguard Worker }
219*da0073e9SAndroid Build Coastguard Worker 
220*da0073e9SAndroid Build Coastguard Worker // save_save is necessary since the old eager format saved storages as
221*da0073e9SAndroid Build Coastguard Worker // [size + data], but the v1.5 eager format removes this since size is saved in
222*da0073e9SAndroid Build Coastguard Worker // the filesize.
223*da0073e9SAndroid Build Coastguard Worker template <class io>
THPStorage_writeFileRaw(c10::StorageImpl * self,io fd,bool save_size,uint64_t element_size)224*da0073e9SAndroid Build Coastguard Worker void THPStorage_writeFileRaw(
225*da0073e9SAndroid Build Coastguard Worker     c10::StorageImpl* self,
226*da0073e9SAndroid Build Coastguard Worker     io fd,
227*da0073e9SAndroid Build Coastguard Worker     bool save_size,
228*da0073e9SAndroid Build Coastguard Worker     uint64_t element_size) {
229*da0073e9SAndroid Build Coastguard Worker   c10::DeviceGuard guard(self->device());
230*da0073e9SAndroid Build Coastguard Worker   uint8_t* data{};
231*da0073e9SAndroid Build Coastguard Worker   at::Tensor cpu_tensor;
232*da0073e9SAndroid Build Coastguard Worker   size_t size_bytes = self->nbytes();
233*da0073e9SAndroid Build Coastguard Worker   size_t numel = size_bytes / element_size;
234*da0073e9SAndroid Build Coastguard Worker   if (self->device_type() == at::kCPU) {
235*da0073e9SAndroid Build Coastguard Worker     // We are using a mutable pointer here because we're ultimately
236*da0073e9SAndroid Build Coastguard Worker     // calling into a Python API that requires that, even though it
237*da0073e9SAndroid Build Coastguard Worker     // won't mutate the data.
238*da0073e9SAndroid Build Coastguard Worker     data = static_cast<uint8_t*>(self->mutable_data());
239*da0073e9SAndroid Build Coastguard Worker   } else {
240*da0073e9SAndroid Build Coastguard Worker     // Here we use a tensor.to() to impl D2H for all non-CPU device.
241*da0073e9SAndroid Build Coastguard Worker     auto device_tensor = at::from_blob(
242*da0073e9SAndroid Build Coastguard Worker         self->mutable_data(),
243*da0073e9SAndroid Build Coastguard Worker         {static_cast<int64_t>(size_bytes)},
244*da0073e9SAndroid Build Coastguard Worker         {1},
245*da0073e9SAndroid Build Coastguard Worker         nullptr,
246*da0073e9SAndroid Build Coastguard Worker         at::device(self->device()).dtype(c10::kByte),
247*da0073e9SAndroid Build Coastguard Worker         {self->device()});
248*da0073e9SAndroid Build Coastguard Worker     cpu_tensor = device_tensor.to(at::kCPU);
249*da0073e9SAndroid Build Coastguard Worker     data = (uint8_t*)cpu_tensor.data_ptr();
250*da0073e9SAndroid Build Coastguard Worker   }
251*da0073e9SAndroid Build Coastguard Worker   if (save_size) {
252*da0073e9SAndroid Build Coastguard Worker     if (torch::utils::THP_nativeByteOrder() ==
253*da0073e9SAndroid Build Coastguard Worker         torch::utils::THPByteOrder::THP_LITTLE_ENDIAN)
254*da0073e9SAndroid Build Coastguard Worker       doWrite(fd, &numel, sizeof(int64_t));
255*da0073e9SAndroid Build Coastguard Worker     else {
256*da0073e9SAndroid Build Coastguard Worker       int64_t nsize{}; // convert big endian cpu to little endian storage
257*da0073e9SAndroid Build Coastguard Worker       torch::utils::THP_encodeInt64Buffer(
258*da0073e9SAndroid Build Coastguard Worker           (uint8_t*)&nsize,
259*da0073e9SAndroid Build Coastguard Worker           (const int64_t*)&numel,
260*da0073e9SAndroid Build Coastguard Worker           torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
261*da0073e9SAndroid Build Coastguard Worker           1);
262*da0073e9SAndroid Build Coastguard Worker       doWrite(fd, &nsize, sizeof(int64_t));
263*da0073e9SAndroid Build Coastguard Worker     }
264*da0073e9SAndroid Build Coastguard Worker   }
265*da0073e9SAndroid Build Coastguard Worker   // fast track for bytes and little endian
266*da0073e9SAndroid Build Coastguard Worker   if (element_size == 1 ||
267*da0073e9SAndroid Build Coastguard Worker       torch::utils::THP_nativeByteOrder() ==
268*da0073e9SAndroid Build Coastguard Worker           torch::utils::THPByteOrder::THP_LITTLE_ENDIAN) {
269*da0073e9SAndroid Build Coastguard Worker     doWrite(fd, data, size_bytes);
270*da0073e9SAndroid Build Coastguard Worker   } else {
271*da0073e9SAndroid Build Coastguard Worker     size_t buffer_size = std::min(numel, (size_t)5000);
272*da0073e9SAndroid Build Coastguard Worker     std::vector<uint8_t> le_buffer;
273*da0073e9SAndroid Build Coastguard Worker     le_buffer.resize(buffer_size * element_size);
274*da0073e9SAndroid Build Coastguard Worker     for (size_t i = 0; i < numel; i += buffer_size) {
275*da0073e9SAndroid Build Coastguard Worker       size_t to_convert = std::min(numel - i, buffer_size);
276*da0073e9SAndroid Build Coastguard Worker       if (element_size == 2) {
277*da0073e9SAndroid Build Coastguard Worker         torch::utils::THP_encodeInt16Buffer(
278*da0073e9SAndroid Build Coastguard Worker             le_buffer.data(),
279*da0073e9SAndroid Build Coastguard Worker             (const int16_t*)data + i,
280*da0073e9SAndroid Build Coastguard Worker             torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
281*da0073e9SAndroid Build Coastguard Worker             to_convert);
282*da0073e9SAndroid Build Coastguard Worker       } else if (element_size == 4) {
283*da0073e9SAndroid Build Coastguard Worker         torch::utils::THP_encodeInt32Buffer(
284*da0073e9SAndroid Build Coastguard Worker             le_buffer.data(),
285*da0073e9SAndroid Build Coastguard Worker             (const int32_t*)data + i,
286*da0073e9SAndroid Build Coastguard Worker             torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
287*da0073e9SAndroid Build Coastguard Worker             to_convert);
288*da0073e9SAndroid Build Coastguard Worker       } else if (element_size == 8) {
289*da0073e9SAndroid Build Coastguard Worker         torch::utils::THP_encodeInt64Buffer(
290*da0073e9SAndroid Build Coastguard Worker             le_buffer.data(),
291*da0073e9SAndroid Build Coastguard Worker             (const int64_t*)data + i,
292*da0073e9SAndroid Build Coastguard Worker             torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
293*da0073e9SAndroid Build Coastguard Worker             to_convert);
294*da0073e9SAndroid Build Coastguard Worker       }
295*da0073e9SAndroid Build Coastguard Worker       doWrite(fd, le_buffer.data(), to_convert * element_size);
296*da0073e9SAndroid Build Coastguard Worker     }
297*da0073e9SAndroid Build Coastguard Worker   }
298*da0073e9SAndroid Build Coastguard Worker }
299*da0073e9SAndroid Build Coastguard Worker 
300*da0073e9SAndroid Build Coastguard Worker template void THPStorage_writeFileRaw<int>(
301*da0073e9SAndroid Build Coastguard Worker     c10::StorageImpl* self,
302*da0073e9SAndroid Build Coastguard Worker     int fd,
303*da0073e9SAndroid Build Coastguard Worker     bool save_size,
304*da0073e9SAndroid Build Coastguard Worker     uint64_t element_size);
305*da0073e9SAndroid Build Coastguard Worker template void THPStorage_writeFileRaw<PyObject*>(
306*da0073e9SAndroid Build Coastguard Worker     c10::StorageImpl* self,
307*da0073e9SAndroid Build Coastguard Worker     PyObject* fd,
308*da0073e9SAndroid Build Coastguard Worker     bool save_size,
309*da0073e9SAndroid Build Coastguard Worker     uint64_t element_size);
310*da0073e9SAndroid Build Coastguard Worker 
311*da0073e9SAndroid Build Coastguard Worker template <class io>
THPStorage_readFileRaw(io file,c10::intrusive_ptr<c10::StorageImpl> storage,uint64_t element_size)312*da0073e9SAndroid Build Coastguard Worker c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw(
313*da0073e9SAndroid Build Coastguard Worker     io file,
314*da0073e9SAndroid Build Coastguard Worker     c10::intrusive_ptr<c10::StorageImpl> storage,
315*da0073e9SAndroid Build Coastguard Worker     uint64_t element_size) {
316*da0073e9SAndroid Build Coastguard Worker   c10::OptionalDeviceGuard guard;
317*da0073e9SAndroid Build Coastguard Worker   if (storage.defined()) {
318*da0073e9SAndroid Build Coastguard Worker     guard.reset_device(storage->device());
319*da0073e9SAndroid Build Coastguard Worker   }
320*da0073e9SAndroid Build Coastguard Worker   int64_t size{};
321*da0073e9SAndroid Build Coastguard Worker   doRead(file, &size, sizeof(int64_t));
322*da0073e9SAndroid Build Coastguard Worker   if (torch::utils::THP_nativeByteOrder() ==
323*da0073e9SAndroid Build Coastguard Worker       torch::utils::THPByteOrder::THP_BIG_ENDIAN) {
324*da0073e9SAndroid Build Coastguard Worker     int64_t tsize = size; // convert little endian storage to big endian cpu
325*da0073e9SAndroid Build Coastguard Worker     torch::utils::THP_decodeInt64Buffer(&size, (const uint8_t*)&tsize, true, 1);
326*da0073e9SAndroid Build Coastguard Worker   }
327*da0073e9SAndroid Build Coastguard Worker   size_t nbytes = element_size * size;
328*da0073e9SAndroid Build Coastguard Worker   if (!storage.defined()) {
329*da0073e9SAndroid Build Coastguard Worker     storage = c10::make_intrusive<at::StorageImpl>(
330*da0073e9SAndroid Build Coastguard Worker         c10::StorageImpl::use_byte_size_t(),
331*da0073e9SAndroid Build Coastguard Worker         nbytes,
332*da0073e9SAndroid Build Coastguard Worker         c10::GetDefaultCPUAllocator(),
333*da0073e9SAndroid Build Coastguard Worker         /*resizable=*/true);
334*da0073e9SAndroid Build Coastguard Worker   } else {
335*da0073e9SAndroid Build Coastguard Worker     size_t _storage_nbytes = storage->nbytes();
336*da0073e9SAndroid Build Coastguard Worker     TORCH_CHECK(
337*da0073e9SAndroid Build Coastguard Worker         _storage_nbytes == nbytes,
338*da0073e9SAndroid Build Coastguard Worker         "storage has wrong byte size: expected %ld got %ld",
339*da0073e9SAndroid Build Coastguard Worker         nbytes,
340*da0073e9SAndroid Build Coastguard Worker         _storage_nbytes);
341*da0073e9SAndroid Build Coastguard Worker   }
342*da0073e9SAndroid Build Coastguard Worker 
343*da0073e9SAndroid Build Coastguard Worker   // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
344*da0073e9SAndroid Build Coastguard Worker   std::unique_ptr<char[]> cpu_data;
345*da0073e9SAndroid Build Coastguard Worker 
346*da0073e9SAndroid Build Coastguard Worker   uint8_t* data{};
347*da0073e9SAndroid Build Coastguard Worker   if (storage->device_type() == at::kCPU) {
348*da0073e9SAndroid Build Coastguard Worker     data = static_cast<uint8_t*>(storage->mutable_data());
349*da0073e9SAndroid Build Coastguard Worker   } else {
350*da0073e9SAndroid Build Coastguard Worker     // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
351*da0073e9SAndroid Build Coastguard Worker     cpu_data = std::unique_ptr<char[]>(new char[nbytes]);
352*da0073e9SAndroid Build Coastguard Worker     data = (uint8_t*)cpu_data.get();
353*da0073e9SAndroid Build Coastguard Worker   }
354*da0073e9SAndroid Build Coastguard Worker 
355*da0073e9SAndroid Build Coastguard Worker   // fast track for bytes and little endian
356*da0073e9SAndroid Build Coastguard Worker   if (element_size == 1 ||
357*da0073e9SAndroid Build Coastguard Worker       torch::utils::THP_nativeByteOrder() ==
358*da0073e9SAndroid Build Coastguard Worker           torch::utils::THPByteOrder::THP_LITTLE_ENDIAN) {
359*da0073e9SAndroid Build Coastguard Worker     doRead(file, data, storage->nbytes());
360*da0073e9SAndroid Build Coastguard Worker   } else {
361*da0073e9SAndroid Build Coastguard Worker     int64_t buffer_size = std::min(size, (int64_t)5000);
362*da0073e9SAndroid Build Coastguard Worker     // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
363*da0073e9SAndroid Build Coastguard Worker     std::unique_ptr<uint8_t[]> le_buffer(
364*da0073e9SAndroid Build Coastguard Worker         new uint8_t[buffer_size * element_size]);
365*da0073e9SAndroid Build Coastguard Worker 
366*da0073e9SAndroid Build Coastguard Worker     for (int64_t i = 0; i < size; i += buffer_size) {
367*da0073e9SAndroid Build Coastguard Worker       size_t to_convert = std::min(size - i, buffer_size);
368*da0073e9SAndroid Build Coastguard Worker       doRead(file, le_buffer.get(), element_size * to_convert);
369*da0073e9SAndroid Build Coastguard Worker 
370*da0073e9SAndroid Build Coastguard Worker       // NOLINTNEXTLINE(bugprone-branch-clone)
371*da0073e9SAndroid Build Coastguard Worker       if (element_size == 2) {
372*da0073e9SAndroid Build Coastguard Worker         torch::utils::THP_decodeInt16Buffer(
373*da0073e9SAndroid Build Coastguard Worker             (int16_t*)data + i, le_buffer.get(), true, to_convert);
374*da0073e9SAndroid Build Coastguard Worker       } else if (element_size == 4) {
375*da0073e9SAndroid Build Coastguard Worker         torch::utils::THP_decodeInt32Buffer(
376*da0073e9SAndroid Build Coastguard Worker             (int32_t*)data + i, le_buffer.get(), true, to_convert);
377*da0073e9SAndroid Build Coastguard Worker       } else if (element_size == 8) {
378*da0073e9SAndroid Build Coastguard Worker         torch::utils::THP_decodeInt64Buffer(
379*da0073e9SAndroid Build Coastguard Worker             (int64_t*)data + i, le_buffer.get(), true, to_convert);
380*da0073e9SAndroid Build Coastguard Worker       }
381*da0073e9SAndroid Build Coastguard Worker     }
382*da0073e9SAndroid Build Coastguard Worker   }
383*da0073e9SAndroid Build Coastguard Worker 
384*da0073e9SAndroid Build Coastguard Worker   if (storage->device_type() != at::kCPU) {
385*da0073e9SAndroid Build Coastguard Worker     // Here we use a tensor.copy_() to impl H2D for all non-CPU device.
386*da0073e9SAndroid Build Coastguard Worker     auto cpu_tensor = at::from_blob(
387*da0073e9SAndroid Build Coastguard Worker         (void*)data,
388*da0073e9SAndroid Build Coastguard Worker         {static_cast<int64_t>(nbytes)},
389*da0073e9SAndroid Build Coastguard Worker         at::device(at::kCPU).dtype(c10::kByte));
390*da0073e9SAndroid Build Coastguard Worker     auto device_tensor = at::from_blob(
391*da0073e9SAndroid Build Coastguard Worker         storage->mutable_data(),
392*da0073e9SAndroid Build Coastguard Worker         {static_cast<int64_t>(nbytes)},
393*da0073e9SAndroid Build Coastguard Worker         {1},
394*da0073e9SAndroid Build Coastguard Worker         nullptr,
395*da0073e9SAndroid Build Coastguard Worker         at::device(storage->device()).dtype(c10::kByte),
396*da0073e9SAndroid Build Coastguard Worker         {storage->device()});
397*da0073e9SAndroid Build Coastguard Worker     device_tensor.copy_(cpu_tensor);
398*da0073e9SAndroid Build Coastguard Worker   }
399*da0073e9SAndroid Build Coastguard Worker   return storage;
400*da0073e9SAndroid Build Coastguard Worker }
401*da0073e9SAndroid Build Coastguard Worker 
402*da0073e9SAndroid Build Coastguard Worker template c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw<int>(
403*da0073e9SAndroid Build Coastguard Worker     int fd,
404*da0073e9SAndroid Build Coastguard Worker     c10::intrusive_ptr<c10::StorageImpl> storage,
405*da0073e9SAndroid Build Coastguard Worker     uint64_t element_size);
406*da0073e9SAndroid Build Coastguard Worker template c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw<PyObject*>(
407*da0073e9SAndroid Build Coastguard Worker     PyObject* fd,
408*da0073e9SAndroid Build Coastguard Worker     c10::intrusive_ptr<c10::StorageImpl> storage,
409*da0073e9SAndroid Build Coastguard Worker     uint64_t element_size);
410