Merge pull request pandas-dev#492 from dimosped/frombuffer_hotfix

dimosped · web-flow · commit a123e6914d1f · 2018-01-15T13:53:03.000+02:00
Revert to using back fromstring instead of frombuffer
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,5 +1,8 @@
 ## Changelog
 
+### 1.58
+  * Bugfix: #491 roll back the use of frombuffer to fromstring, fixes the read-only ndarray issue
+
 ### 1.57
   * Feature: #206 String support for tickstore
   * Bugfix: #486 improve mongo_retry robustness with failures for version store write/append
diff --git a/arctic/serialization/numpy_arrays.py b/arctic/serialization/numpy_arrays.py
@@ -139,11 +139,11 @@ def objify(self, doc, columns=None):
 
         for col in cols:
             d = decompress(doc[DATA][doc[METADATA][LENGTHS][col][0]: doc[METADATA][LENGTHS][col][1] + 1])
-            d = np.frombuffer(d, doc[METADATA][DTYPE][col])
+            d = np.fromstring(d, doc[METADATA][DTYPE][col])
 
             if MASK in doc[METADATA] and col in doc[METADATA][MASK]:
                 mask_data = decompress(doc[METADATA][MASK][col])
-                mask = np.frombuffer(mask_data, 'bool')
+                mask = np.fromstring(mask_data, 'bool')
                 d = ma.masked_array(d, mask)
             data[col] = d
 
diff --git a/arctic/store/_ndarray_store.py b/arctic/store/_ndarray_store.py
@@ -200,7 +200,7 @@ def _do_read(self, collection, version, symbol, index_range=None):
                                    symbol, version['version'], segment_count, i + 1, collection.database.name + '.' + collection.name))
 
         dtype = self._dtype(version['dtype'], version.get('dtype_metadata', {}))
-        rtn = np.frombuffer(data, dtype=dtype).reshape(version.get('shape', (-1)))
+        rtn = np.fromstring(data, dtype=dtype).reshape(version.get('shape', (-1)))
         return rtn
 
     def _promote_types(self, dtype, dtype_str):
diff --git a/arctic/store/_pandas_ndarray_store.py b/arctic/store/_pandas_ndarray_store.py
@@ -51,7 +51,7 @@ def _segment_index(self, recarr, existing_index, start, new_segments):
                                                dtype=INDEX_DTYPE)
             # append to existing index if exists
             if existing_index:
-                existing_index_arr = np.frombuffer(decompress(existing_index), dtype=INDEX_DTYPE)
+                existing_index_arr = np.fromstring(decompress(existing_index), dtype=INDEX_DTYPE)
                 if start > 0:
                     existing_index_arr = existing_index_arr[existing_index_arr['index'] < start]
                 index = np.concatenate((existing_index_arr, index))
@@ -74,7 +74,7 @@ def _index_range(self, version, symbol, date_range=None, **kwargs):
         with the date_range. As the segment index is (id -> last datetime)
         we need to take care in choosing the correct chunks. """
         if date_range and 'segment_index' in version:
-            index = np.frombuffer(decompress(version['segment_index']), dtype=INDEX_DTYPE)
+            index = np.fromstring(decompress(version['segment_index']), dtype=INDEX_DTYPE)
             dtcol = self._datetime64_index(index)
             if dtcol and len(index):
                 dts = index[dtcol]
diff --git a/arctic/tickstore/tickstore.py b/arctic/tickstore/tickstore.py
@@ -435,7 +435,7 @@ def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_i
         rtn = {}
         if doc[VERSION] != 3:
             raise ArcticException("Unhandled document version: %s" % doc[VERSION])
-        rtn[INDEX] = np.cumsum(np.frombuffer(decompress(doc[INDEX]), dtype='uint64'))
+        rtn[INDEX] = np.cumsum(np.fromstring(decompress(doc[INDEX]), dtype='uint64'))
         doc_length = len(rtn[INDEX])
         column_set.update(doc[COLUMNS].keys())
 
@@ -444,7 +444,7 @@ def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_i
         for c in column_set:
             try:
                 coldata = doc[COLUMNS][c]
-                mask = np.frombuffer(decompress(coldata[ROWMASK]), dtype='uint8')
+                mask = np.fromstring(decompress(coldata[ROWMASK]), dtype='uint8')
                 union_mask = union_mask | mask
             except KeyError:
                 rtn[c] = None
@@ -460,10 +460,10 @@ def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_i
             try:
                 coldata = doc[COLUMNS][c]
                 dtype = np.dtype(coldata[DTYPE])
-                values = np.frombuffer(decompress(coldata[DATA]), dtype=dtype)
+                values = np.fromstring(decompress(coldata[DATA]), dtype=dtype)
                 self._set_or_promote_dtype(column_dtypes, c, dtype)
                 rtn[c] = self._empty(rtn_length, dtype=column_dtypes[c])
-                rowmask = np.unpackbits(np.frombuffer(decompress(coldata[ROWMASK]),
+                rowmask = np.unpackbits(np.fromstring(decompress(coldata[ROWMASK]),
                                         dtype='uint8'))[:doc_length].astype('bool')
                 rowmask = rowmask[union_mask]
                 rtn[c][rowmask] = values
diff --git a/tests/integration/store/test_ndarray_store.py b/tests/integration/store/test_ndarray_store.py
@@ -124,6 +124,15 @@ def test_save_read_large_ndarray(library):
     assert np.all(ndarr == saved_arr)
 
 
+def test_mutable_ndarray(library):
+    dtype = np.dtype([('abc', 'int64')])
+    ndarr = np.arange(32).view(dtype=dtype)
+    ndarr.setflags(write=True)
+    library.write('MYARR', ndarr)
+    saved_arr = library.read('MYARR').data
+    assert saved_arr.flags['WRITEABLE']
+
+
 @pytest.mark.xfail(reason="delete_version not safe with append...")
 def test_delete_version_shouldnt_break_read(library):
     data = np.arange(30)
diff --git a/tests/integration/store/test_pandas_store.py b/tests/integration/store/test_pandas_store.py
@@ -899,3 +899,10 @@ def test_read_write_multiindex_store_keeps_timezone(library):
     assert list(library.read('spam').data.index[0]) == row0[:-1]
     assert list(library.read('spam').data.index[1]) == row1[:-1]
 
+
+def test_mutable_df(library):
+    s = DataFrame(data=[1, 2, 3], index=[4, 5, 6])
+    s.__array__().setflags(write=True)
+    library.write('pandas', s)
+    read_s = library.read('pandas')
+    assert read_s.data.__array__().flags['WRITEABLE']
diff --git a/tests/integration/test_arctic_multithreading.py b/tests/integration/test_arctic_multithreading.py
@@ -42,7 +42,7 @@ def my_auth_hook(host, app_name, database_name):
     AUTH_COUNT += 1
 
 
-@pytest.mark.timeout(300)
+@pytest.mark.timeout(600)
 def test_multiprocessing_safety(mongo_host, library_name):
     # Create/initialize library at the parent process, then spawn children, and start them aligned in time
     total_processes = 64
@@ -70,7 +70,7 @@ def test_multiprocessing_safety(mongo_host, library_name):
     assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
 
 
-@pytest.mark.timeout(300)
+@pytest.mark.timeout(600)
 def test_multiprocessing_safety_parent_children_race(mongo_host, library_name):
     # Create Arctic and directly fork/start children (no wait)
     total_iterations = 12