/*
Copyright (c) 2006, Jean Gressmann All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * 	Redistributions of source code must retain the above copyright
    	notice, this list of conditions and the following disclaimer. 
    *	Redistributions in binary form must reproduce the above copyright
		notice, this list of conditions and the following disclaimer in the
		documentation and/or other materials provided with the distribution.
    * 	The names of its contributors may not be used to endorse or promote products
		derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <cassert>
#include <new>
#include <limits>
#include <algorithm>
#include <portablethreads/mmap.h>
#include <portablethreads/lockfree/atomic_number.h>
#include <portablethreads/lockfree/memory_chunk_batch.h>
#include <portablethreads/warning_header.h>
#include <portablethreads/warning_header.h>

#if	defined(_DEBUG) && defined(_WIN32)
#	include <windows.h>
#endif

#ifdef _MSC_VER
#	pragma warning(disable:4244) 
#endif 


#if defined(_DEBUG) && defined(_WIN32)
#	ifdef assert
#		undef assert
#	endif
#	define assert(x) if(!(x)) DebugBreak();
#endif

#ifdef max
#	undef max
#endif
#ifdef min
#	undef min
#endif



using namespace std;

namespace PortableThreads
{
	namespace LockFree
	{
		namespace Private
		{
			namespace
			{
				using PortableThreads::Private::PTAlignedSize;

				struct BatchHeader
				{
					uint32 magic_;			
					uint32 memorySize_;
					uint32 chunkSize_;
					uint32 numberOfChunks_;
					LockFree::PTAtomicNumber numberOfFreeChunks_;
					LockFree::PTAtomicNumber firstFreeChunk_;
#ifdef _DEBUG
					volatile bool freedToSystem_;
#endif		
				};		
				struct ChunkHeader
				{
					ChunkHeader() { /* existence eliminates warning about POD initialization */ }
					uint16 pagesFromBatchHeader_;
					volatile uint16 nextFreeChunk_;
					volatile uint32 userData_;
				};
				
				const uint32 BATCH_HEADER_MAGIC = 0xdeadbeef;
				const uint32 MAGIC_FOR_POINTERS = 0xdeadbef0; // least significant bits must be zero b/c this is put into PTPointerCAS

				inline ChunkHeader* getChunkHeaderFromRaw(void* mem)
				{
					return reinterpret_cast<ChunkHeader*>(static_cast<char*>(mem) - PTAlignedSize<ChunkHeader>::SIZE);
				}
				inline BatchHeader* getBatchHeaderFromRaw(void* fRaw)
				{
					char* raw = static_cast<char*>(fRaw);
					raw -= reinterpret_cast<pt_uint_t>(raw) % pt_pagesize();
					raw -= getChunkHeaderFromRaw(fRaw)->pagesFromBatchHeader_ * pt_pagesize();
					
					return reinterpret_cast<BatchHeader*>(raw);
				}
				
			}

			
			void pt_setup_memory_chunk_batch(void* memory, pt_uint_t memorySize, pt_uint_t chunkSize)
			{
				assert((memorySize % pt_pagesize()) == 0);	
				assert((chunkSize % sizeof(double)) == 0);	
				assert(PTAlignedSize<BatchHeader>::SIZE <= memorySize);
				const pt_uint_t NumberOfChunkHeaders = (memorySize - PTAlignedSize<BatchHeader>::SIZE) / (chunkSize + PTAlignedSize<ChunkHeader>::SIZE);

				char* raw = static_cast<char*>(memory);

				BatchHeader* header = new (raw) BatchHeader();
				header->magic_ = BATCH_HEADER_MAGIC;
				header->numberOfFreeChunks_ = NumberOfChunkHeaders * 2;
				header->firstFreeChunk_ = 0;
				header->memorySize_ = memorySize;
				header->chunkSize_ = chunkSize;
				header->numberOfChunks_ = NumberOfChunkHeaders;
#ifdef _DEBUG
				header->freedToSystem_ = false;
#endif
				
				raw += PTAlignedSize<BatchHeader>::SIZE;

				for(pt_uint_t i = 0; i < NumberOfChunkHeaders; ++i)
				{
#ifndef NDEBUG
					const char* chunkstart = reinterpret_cast<char*>(header) + PTAlignedSize<BatchHeader>::SIZE;
					const long diff = raw - chunkstart;
					const bool ok = diff % (PTAlignedSize<ChunkHeader>::SIZE + chunkSize) == 0;
					assert(ok && "ChunkHeader starts at invalid offset into memory block!");
#endif
					ChunkHeader* chunk = new (raw) ChunkHeader();
					raw += PTAlignedSize<ChunkHeader>::SIZE;
					raw += chunkSize;
					assert((reinterpret_cast<pt_uint_t>(raw) % sizeof(double)) == 0);


					// set to whereever head is pointing to also in the chunk
					chunk->pagesFromBatchHeader_ = (reinterpret_cast<char*>(chunk) - static_cast<char*>(memory)) / pt_pagesize();
					chunk->nextFreeChunk_ = i+1;
					chunk->userData_ = 0;
				}

				pt_mfence();
			}

			void* pt_alloc_from_memory_chunk_batch(void* memory)
			{
				BatchHeader* header = reinterpret_cast<BatchHeader*>(memory);
				assert(header->magic_ == BATCH_HEADER_MAGIC && "Invalid memory header!");
#ifdef _DEBUG
				assert(!header->freedToSystem_);
#endif				

				// allocate a chunk or fail
				{
					LockFree::PTAtomicNumber::int_t ov, nv;
					do 
					{
						ov = header->numberOfFreeChunks_.get();
						if(ov & LockFree::PTAtomicNumber::int_t(1))
							return 0;

						if(ov == 2)
							nv = 1;
						else
							nv = ov - 2;
					} 
					while(!header->numberOfFreeChunks_.cas(nv, ov));
				}


				{
					char* data = static_cast<char*>(memory) + PTAlignedSize<BatchHeader>::SIZE;
					ChunkHeader* chunk;
					pt_uint_t next_free, current_free;
					do
					{
						current_free = header->firstFreeChunk_.get();
						assert(current_free < header->numberOfChunks_);
						chunk = reinterpret_cast<ChunkHeader*>(data + current_free * (PTAlignedSize<ChunkHeader>::SIZE + header->chunkSize_));
						next_free = chunk->nextFreeChunk_;
						assert(next_free <= header->numberOfChunks_);
					}
					while(!header->firstFreeChunk_.cas(next_free, current_free));
					
					data += current_free * (PTAlignedSize<ChunkHeader>::SIZE + header->chunkSize_);
					data += PTAlignedSize<ChunkHeader>::SIZE;

					return data;
				}
			}

			bool pt_can_free_memory_chunk_batch(void* memory)
			{
				BatchHeader* header = reinterpret_cast<BatchHeader*>(memory);
				assert(header->magic_ == BATCH_HEADER_MAGIC && "Invalid memory header!");
#ifdef _DEBUG
				assert(!header->freedToSystem_);
#endif

				const LockFree::PTAtomicNumber::int_t count = header->numberOfFreeChunks_.get();

				return count == static_cast<LockFree::PTAtomicNumber::int_t>(header->numberOfChunks_ * 2 + 1);
			}

			void pt_set_userdata_on_memory(void* mem, uint32 fUserData)
			{
				// no header validation, needs to work with non-batched chunks as we
				getChunkHeaderFromRaw(mem)->userData_ = fUserData;
				LockFree::Private::pt_mfence();
			}
			uint32 pt_get_userdata_from_memory(void* mem)
			{
				// no header validation, needs to work with non-batched chunks as well
				return getChunkHeaderFromRaw(mem)->userData_;
			}


			void pt_free_to_memory_chunk_batch(void* toFree)
			{
				assert(toFree && "Cannot handle free of NULL");
				
				BatchHeader* header = getBatchHeaderFromRaw(toFree);
				assert(header->magic_ == BATCH_HEADER_MAGIC && "Invalid memory header!");
#ifdef _DEBUG
				assert(!header->freedToSystem_);
#endif
				ChunkHeader* chunk = getChunkHeaderFromRaw(toFree);
				char* beginOfData = reinterpret_cast<char*>(header) + PTAlignedSize<BatchHeader>::SIZE;
				const unsigned short index = (reinterpret_cast<char*>(chunk) - beginOfData) / header->chunkSize_;

				// push the chunk;
				{
					pt_uint_t old_free;
					do 
					{
						old_free = header->firstFreeChunk_.get();
						chunk->nextFreeChunk_ = old_free;
						pt_mfence();
					} 
					while(!header->firstFreeChunk_.cas(index, old_free));
				}

				{
					LockFree::PTAtomicNumber::int_t ov, nv;
					do 
					{
						nv = ov = header->numberOfFreeChunks_.get();
						nv += 2;
					} 
					while(!header->numberOfFreeChunks_.cas(nv, ov));
				}	
			}
			void pt_destroy_memory_chunk_batch(void* memory)
			{
#ifdef _DEBUG
				BatchHeader* header = static_cast<BatchHeader*>(memory);
				assert(header->magic_ == BATCH_HEADER_MAGIC && "Invalid memory header!");
				assert(!header->freedToSystem_);

				header->freedToSystem_ = true;
				pt_mfence();
#endif				
			}

			bool pt_validate_write_to_memory_chunk(void* mem, pt_uint_t bytes)
			{
				BatchHeader* header = getBatchHeaderFromRaw(mem);
				assert(header->magic_ == BATCH_HEADER_MAGIC && "Invalid memory header!");
#ifdef _DEBUG
				assert(!header->freedToSystem_);
#endif

				return bytes <= header->chunkSize_;
			}
			
			pt_uint_t pt_get_optimal_size_of_memory_chunk_batch(pt_uint_t ChunkSize)
			{
				// NOTE: This function returns silly (but sizewise ok) results when ChunkSize
				// approaches pt_pagesize() or exceeds it.
				assert((ChunkSize % sizeof(double)) == 0);
				const pt_uint_t memoryBlockSize = pt_pagesize();
				const pt_uint_t batchHeaderSize = PTAlignedSize<BatchHeader>::SIZE;
				const pt_uint_t chunkHeaderAndDataSize = PTAlignedSize<ChunkHeader>::SIZE + ChunkSize;
				
				pt_uint_t blocks = std::max(static_cast<pt_uint_t>(1), ChunkSize / memoryBlockSize + ((ChunkSize % memoryBlockSize) != 0));				
				for(pt_uint_t lastWaste = blocks * memoryBlockSize; true; ++blocks)
				{
					const pt_uint_t bytes = blocks * memoryBlockSize;
					const pt_uint_t bytesWithoutBatchHeader = bytes - batchHeaderSize;
					const pt_uint_t numberOfChunks = bytesWithoutBatchHeader / chunkHeaderAndDataSize;
					pt_uint_t waste = bytesWithoutBatchHeader - numberOfChunks * chunkHeaderAndDataSize;
					if(lastWaste <= waste || numberOfChunks > std::numeric_limits<uint16>::max())
					{
						--blocks;
						break;
					}
					
					lastWaste = waste;
				}

				return blocks;
			}

			pt_uint_t pt_get_memory_chunk_header_size() { return PTAlignedSize<ChunkHeader>::SIZE; }
			void pt_create_memory_chunk_header(void* fMem)
			{
				new (fMem) ChunkHeader();
			}
		}

		
	}
}
