summaryrefslogtreecommitdiff
path: root/port
diff options
context:
space:
mode:
authorDmitri Smirnov <yuslepukhin@users.noreply.github.com>2016-04-29 16:43:13 -0700
committerSiying Dong <siying.d@fb.com>2016-04-29 16:43:13 -0700
commit4ea6e051ee8dde163c0afae1a9297b964d8f94b6 (patch)
tree8327b5773e67b46c9e8dc53f031f7651eaeac648 /port
parentf3bb024fd6eba9ba95abd653efa47e3b37e1a999 (diff)
Fix multiple issues with WinMmapFile fo sequential writing (#1108)
make preallocation inline with other writable files make sure that we map no more than pre-allocated size.
Diffstat (limited to 'port')
-rw-r--r--port/win/env_win.cc184
1 files changed, 84 insertions, 100 deletions
diff --git a/port/win/env_win.cc b/port/win/env_win.cc
index a468d0c77..fb323e22c 100644
--- a/port/win/env_win.cc
+++ b/port/win/env_win.cc
@@ -262,8 +262,11 @@ class WinMmapFile : public WritableFile {
// page size or SSD page size
const size_t
allocation_granularity_; // View must start at such a granularity
- size_t mapping_size_; // We want file mapping to be of a specific size
- // because then the file is expandable
+
+ size_t reserved_size_; // Preallocated size
+
+ size_t mapping_size_; // The max size of the mapping object
+ // we want to guess the final file size to minimize the remapping
size_t view_size_; // How much memory to map into a view at a time
char* mapped_begin_; // Must begin at the file offset that is aligned with
@@ -283,15 +286,6 @@ class WinMmapFile : public WritableFile {
return ftruncate(filename_, hFile_, toSize);
}
- // Can only truncate or reserve to a sector size aligned if
- // used on files that are opened with Unbuffered I/O
- // Normally it does not present a problem since in memory mapped files
- // we do not disable buffering
- Status ReserveFileSpace(uint64_t toSize) {
- IOSTATS_TIMER_GUARD(allocate_nanos);
- return fallocate(filename_, hFile_, toSize);
- }
-
Status UnmapCurrentRegion() {
Status status;
@@ -301,82 +295,57 @@ class WinMmapFile : public WritableFile {
"Failed to unmap file view: " + filename_, GetLastError());
}
+ // Move on to the next portion of the file
+ file_offset_ += view_size_;
+
// UnmapView automatically sends data to disk but not the metadata
// which is good and provides some equivalent of fdatasync() on Linux
// therefore, we donot need separate flag for metadata
- pending_sync_ = false;
mapped_begin_ = nullptr;
mapped_end_ = nullptr;
dst_ = nullptr;
- last_sync_ = nullptr;
-
- // Move on to the next portion of the file
- file_offset_ += view_size_;
- // Increase the amount we map the next time, but capped at 1MB
- view_size_ *= 2;
- view_size_ = std::min(view_size_, c_OneMB);
+ last_sync_ = nullptr;
+ pending_sync_ = false;
}
return status;
}
Status MapNewRegion() {
+
Status status;
assert(mapped_begin_ == nullptr);
- size_t minMappingSize = file_offset_ + view_size_;
-
- // Check if we need to create a new mapping since we want to write beyond
- // the current one
- // If the mapping view is now too short
- // CreateFileMapping will extend the size of the file automatically if the
- // mapping size is greater than
- // the current length of the file, which reserves the space and makes
- // writing faster, except, windows can not map an empty file.
- // Thus the first time around we must actually extend the file ourselves
- if (hMap_ == NULL || minMappingSize > mapping_size_) {
- if (NULL == hMap_) {
- // Creating mapping for the first time so reserve the space on disk
- status = ReserveFileSpace(minMappingSize);
- if (!status.ok()) {
- return status;
- }
+ size_t minDiskSize = file_offset_ + view_size_;
+
+ if (minDiskSize > reserved_size_) {
+ status = Allocate(file_offset_, view_size_);
+ if (!status.ok()) {
+ return status;
}
+ }
+
+ // Need to remap
+ if (hMap_ == NULL || reserved_size_ > mapping_size_) {
- if (hMap_) {
+ if (hMap_ != NULL) {
// Unmap the previous one
BOOL ret = ::CloseHandle(hMap_);
assert(ret);
hMap_ = NULL;
}
- // Calculate the new mapping size which will hopefully reserve space for
- // several consecutive sliding views
- // Query preallocation block size if set
- size_t preallocationBlockSize = 0;
- size_t lastAllocatedBlockSize = 0; // Not used
- GetPreallocationStatus(&preallocationBlockSize, &lastAllocatedBlockSize);
-
- if (preallocationBlockSize) {
- preallocationBlockSize =
- Roundup(preallocationBlockSize, allocation_granularity_);
- } else {
- preallocationBlockSize = 2 * view_size_;
- }
-
- mapping_size_ += preallocationBlockSize;
-
ULARGE_INTEGER mappingSize;
- mappingSize.QuadPart = mapping_size_;
+ mappingSize.QuadPart = reserved_size_;
hMap_ = CreateFileMappingA(
hFile_,
NULL, // Security attributes
PAGE_READWRITE, // There is not a write only mode for mapping
mappingSize.HighPart, // Enable mapping the whole file but the actual
- // amount mapped is determined by MapViewOfFile
+ // amount mapped is determined by MapViewOfFile
mappingSize.LowPart,
NULL); // Mapping name
@@ -385,6 +354,8 @@ class WinMmapFile : public WritableFile {
"WindowsMmapFile failed to create file mapping for: " + filename_,
GetLastError());
}
+
+ mapping_size_ = reserved_size_;
}
ULARGE_INTEGER offset;
@@ -416,6 +387,7 @@ class WinMmapFile : public WritableFile {
hMap_(NULL),
page_size_(page_size),
allocation_granularity_(allocation_granularity),
+ reserved_size_(0),
mapping_size_(0),
view_size_(0),
mapped_begin_(nullptr),
@@ -435,25 +407,10 @@ class WinMmapFile : public WritableFile {
// Only for memory mapped writes
assert(options.use_mmap_writes);
- // Make sure buffering is not disabled. It is ignored for mapping
- // purposes but also imposes restriction on moving file position
- // it is not a problem so much with reserving space since it is probably a
- // factor
- // of allocation_granularity but we also want to truncate the file in
- // Close() at
- // arbitrary position so we do not have to feel this with zeros.
- assert(options.use_os_buffer);
-
// View size must be both the multiple of allocation_granularity AND the
- // page size
- if ((allocation_granularity_ % page_size_) == 0) {
- view_size_ = 2 * allocation_granularity;
- } else if ((page_size_ % allocation_granularity_) == 0) {
- view_size_ = 2 * page_size_;
- } else {
- // we can multiply them together
- assert(false);
- }
+ // page size and the granularity is usually a multiple of a page size.
+ const size_t viewSize = 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode
+ view_size_ = Roundup(viewSize, allocation_granularity_);
}
~WinMmapFile() {
@@ -479,14 +436,20 @@ class WinMmapFile : public WritableFile {
if (!s.ok()) {
return s;
}
+ } else {
+ size_t n = std::min(left, avail);
+ memcpy(dst_, src, n);
+ dst_ += n;
+ src += n;
+ left -= n;
+ pending_sync_ = true;
}
+ }
- size_t n = std::min(left, avail);
- memcpy(dst_, src, n);
- dst_ += n;
- src += n;
- left -= n;
- pending_sync_ = true;
+ // Now make sure that the last partial page is padded with zeros if needed
+ size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_);
+ if (bytesToPad > 0) {
+ memset(dst_, 0, bytesToPad);
}
return Status::OK();
@@ -508,7 +471,13 @@ class WinMmapFile : public WritableFile {
// which we use does not write zeros and it is good.
uint64_t targetSize = GetFileSize();
- s = UnmapCurrentRegion();
+ if (mapped_begin_ != nullptr) {
+ // Sync before unmapping to make sure everything
+ // is on disk and there is not a lazy writing
+ // so we are deterministic with the tests
+ Sync();
+ s = UnmapCurrentRegion();
+ }
if (NULL != hMap_) {
BOOL ret = ::CloseHandle(hMap_);
@@ -521,15 +490,18 @@ class WinMmapFile : public WritableFile {
hMap_ = NULL;
}
- TruncateFile(targetSize);
+ if (hFile_ != NULL) {
- BOOL ret = ::CloseHandle(hFile_);
- hFile_ = NULL;
+ TruncateFile(targetSize);
- if (!ret && s.ok()) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "Failed to close file map handle: " + filename_, lastError);
+ BOOL ret = ::CloseHandle(hFile_);
+ hFile_ = NULL;
+
+ if (!ret && s.ok()) {
+ auto lastError = GetLastError();
+ s = IOErrorFromWindowsError(
+ "Failed to close file map handle: " + filename_, lastError);
+ }
}
return s;
@@ -542,7 +514,7 @@ class WinMmapFile : public WritableFile {
Status s;
// Some writes occurred since last sync
- if (pending_sync_) {
+ if (dst_ > last_sync_) {
assert(mapped_begin_);
assert(dst_);
assert(dst_ > mapped_begin_);
@@ -552,16 +524,15 @@ class WinMmapFile : public WritableFile {
TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_);
size_t page_end =
TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1);
- last_sync_ = dst_;
// Flush only the amount of that is a multiple of pages
if (!::FlushViewOfFile(mapped_begin_ + page_begin,
- (page_end - page_begin) + page_size_)) {
+ (page_end - page_begin) + page_size_)) {
s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_,
GetLastError());
+ } else {
+ last_sync_ = dst_;
}
-
- pending_sync_ = false;
}
return s;
@@ -571,19 +542,15 @@ class WinMmapFile : public WritableFile {
* Flush data as well as metadata to stable storage.
*/
virtual Status Fsync() override {
- Status s;
-
- // Flush metadata if pending
- const bool pending = pending_sync_;
-
- s = Sync();
+ Status s = Sync();
// Flush metadata
- if (s.ok() && pending) {
+ if (s.ok() && pending_sync_) {
if (!::FlushFileBuffers(hFile_)) {
s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_,
GetLastError());
}
+ pending_sync_ = false;
}
return s;
@@ -604,7 +571,24 @@ class WinMmapFile : public WritableFile {
}
virtual Status Allocate(uint64_t offset, uint64_t len) override {
- return Status::OK();
+ Status status;
+ TEST_KILL_RANDOM("WinMmapFile::Allocate", rocksdb_kill_odds);
+
+ // Make sure that we reserve an aligned amount of space
+ // since the reservation block size is driven outside so we want
+ // to check if we are ok with reservation here
+ size_t spaceToReserve = Roundup(offset + len, view_size_);
+ // Nothing to do
+ if (spaceToReserve <= reserved_size_) {
+ return status;
+ }
+
+ IOSTATS_TIMER_GUARD(allocate_nanos);
+ status = fallocate(filename_, hFile_, spaceToReserve);
+ if (status.ok()) {
+ reserved_size_ = spaceToReserve;
+ }
+ return status;
}
};