
#if 0

#if 0

/*
 *	Copyright (c) Januar 2005 Jean Gressmann (jsg@rz.uni-potsdam.de)
 *
 *  Einige Beispiele zur Benutzung der Thread-Klassen
 *
 */

#ifdef _MSC_VER
	#pragma warning(disable:4786)
	#pragma warning(disable:4503)
#endif

#include <iostream>
#include <string>
#include <vector>
#include <cassert>
#include <cstdlib>
#include <cstdio>
#include <set>
#include <iomanip>
#include <portablethreads/smartpointer.h>
#include <portablethreads/thread.h>
#include <portablethreads/utility.h>
#include <portablethreads/lock_free.h>
#include <portablethreads/random.h>
#include <portablethreads/time.h>
#include <portablethreads/barrier.h>
#ifdef _MSC_VER
	#include <crtdbg.h>
#endif

using namespace std;
using namespace PortableThreads;
//using namespace PortableThreads::Private;
using namespace PortableThreads::LockFree;
//using namespace PortableThreads::LockFree::Private;


namespace beispiel_12
{

	typedef vector<unsigned long> ShortList;
	typedef pair<size_t, ShortList > Element;
	typedef PTQueue<Element> Queue;
	
	class Reader : public PThread
	{
	public:
		Reader(int id, Queue& queue, PTAtomicNumber& n)
			:	id_(id)
			,	shutdown_(false)
			,	queue_(queue)
			,	n_(n)
		{}

		void threadMain()
		{
			Element e;			
			while(!shutdown_)
			{
				bool didRemove = queue_.popFront(e);
				if(didRemove)
				{
					--n_;
					int ok = e.first == e.second.size();
					if(ok)
					{
						for(size_t i = 0; i < e.second.size(); ++i)
						{
							assert(e.second[i] == e.first);
						}
					}
					assert(ok);
				}
				else
					give();
			}
			std::printf("R %d done\n", id_);
		}
		void shutdown() { shutdown_ = true; }
	private:
		const int id_;
		volatile bool shutdown_;
		Queue& queue_;
		PTAtomicNumber& n_;
	};

	class Writer : public PThread
	{
	public:
		Writer(int id, Queue& queue, int runs)
			:	id_(id)
			,	queue_(queue)
			,	runs_(runs)
		{}
		void threadMain()
		{
			PTRandom r(pt_seed());
			for(int i = 0; i < runs_; ++i)
			{
				unsigned long n = r.urand() % 100;
				Queue::value_type vt = make_pair(n, ShortList(n, n));
				queue_.pushBack(vt);
				if(i % 5 == 0)
					pt_milli_sleep(50);
			}
		}
	private:
		const int id_;
		Queue& queue_;
		int runs_;
	};
}

/*****************************************************************************/

// Testcode (Reader-Writer) fr Lock-Free Datenstrukturen. Es werden die Queue 
// und der Stack getestet mit jeweils zwei Readern und Writern. 
// Jede Datenstruktur wird einmal mit aktivierten Speichermanagement (fat,
// interner Speicher wird wiederverwendet)
// und einmal ohne (lean, jedes Einfgen zieht eine heap-Allocation nach sich)
// getest.

void lockfree_queue()
{
	using namespace beispiel_12;
	
	if(1)
	{
		std::printf("Lock-free Queue\n");
		Queue queue;
		PTAtomicNumber n(100);
		Reader r1(1, queue, n), r2(2, queue, n), r3(3, queue, n), r4(4, queue, n);

		r1.run();
		r2.run();
		r3.run();
		r4.run();

		Writer w1(1, queue, 20), w2(2, queue, 30), w3(3, queue, 30), w4(4, queue, 20);
		w1.run();
		w2.run();
		w3.run();
		w4.run();

		w1.join();
		w2.join();
		w3.join();
		w4.join();

		std::printf("Writers joined\n");
		r1.shutdown();
		r2.shutdown();
		r3.shutdown();
		r4.shutdown();
		std::printf("Signaled readers shutdown\n");
		r1.join();
		std::printf("R1 joined\n");
		r2.join();
		std::printf("R2 joined\n");
		r3.join();
		std::printf("R3 joined\n");
		r4.join();
		std::printf("R4 joined\n");
		Element e;
		for(PTAtomicNumber::int_type i = 0; i < n.get(); ++i)
		{
			bool removed = queue.popFront(e);
			assert(removed);
			int ok = e.first == e.second.size();
			if(ok)
			{
				for(size_t i = 0; i < e.second.size(); ++i)
				{
					assert(e.second[i] == e.first);
				}
			}
			assert(ok);
		}
		std::printf("Forcing queue d'tor\n");
	}
	std::printf("Queue d'tor went ok\n");
	
	
}


namespace beispiel_12_1
{
	typedef vector<unsigned long> ShortList;
	typedef pair<size_t, ShortList > Element;
	typedef PTStack<Element> Stack;
	
	class Reader : public PThread
	{
	public:
		Reader(int id, Stack& s, PTAtomicNumber& n)
			:	id_(id)
			,	shutdown_(false)
			,	stack_(s)
			,	n_(n)
		{}
		void threadMain()
		{
			Element e;
			
			while(!shutdown_)
			{
				bool didRemove = stack_.pop(e);
				if(didRemove)
				{
					--n_;
					int ok = e.first == e.second.size();
					if(ok)
					{
						for(size_t i = 0; i < e.second.size(); ++i)
						{
							assert(e.second[i] == e.first);
						}
					}
					assert(ok);
					
				}
				else
					give();
				
			}
			std::printf("R %d done\n", id_);
		}
		void shutdown() volatile { shutdown_ = true; }
	private:
		const int id_;
		volatile bool shutdown_;
		Stack& stack_;
		PTAtomicNumber& n_;
	};

	class Writer : public PThread
	{
	public:
		Writer(int id, Stack& s, int runs, PTAtomicNumber& n)
			:	id_(id)
			,	stack_(s)
			,	runs_(runs)
			,	n_(n)
		{}
		void threadMain()
		{
			PTRandom r(pt_seed());
			for(int i = 0; i < runs_; ++i)
			{
				unsigned long n = r.urand() % 100;
				Stack::value_type vt = make_pair(n, ShortList(n, n));
				stack_.push(vt);
				++n_;
				if(i % 5 == 0)
					pt_milli_sleep(50);
			}
		}
	private:
		const int id_;
		Stack& stack_;
		int runs_;
		PTAtomicNumber& n_;
	};
}

void lockfree_stack()
{
	using namespace beispiel_12_1;

	// stack_ fat
	{
		std::printf("Lock-free Stack\n");
		Stack stack_;
		PTAtomicNumber n;
		Reader r1(1, stack_, n), r2(2, stack_, n), r3(3, stack_, n), r4(4, stack_, n);

		r1.run();
		r2.run();
		r3.run();
		r4.run();

		Writer w1(1, stack_, 20, n), w2(2, stack_, 30, n), w3(3, stack_, 30, n), w4(4, stack_, 20, n);
		w1.run();
		w2.run();
		w3.run();
		w4.run();

		w1.join();
		w2.join();
		w3.join();
		w4.join();

		std::printf("Writers joined\n");
		r1.shutdown();
		r2.shutdown();
		r3.shutdown();
		r4.shutdown();
		std::printf("Signaled readers shutdown\n");
		r1.join();
		std::printf("R1 joined\n");
		r2.join();
		std::printf("R2 joined\n");
		r3.join();
		std::printf("R3 joined\n");
		r4.join();
		std::printf("R4 joined\n");
		Element e;
		for(PTAtomicNumber::int_type i = 0; i < n.get(); ++i)
		{
			bool removed = stack_.pop(e);
			assert(removed);
			int ok = e.first == e.second.size();
			if(ok)
			{
				for(size_t i = 0; i < e.second.size(); ++i)
				{
					assert(e.second[i] == e.first);
				}
			}
			assert(ok);
		}
		std::printf("Forcing stack d'tor\n");
	}
	std::printf("Stack d'tor went ok\n");
}

namespace beispiel_14
{
	using namespace PortableThreads::Private;
	using namespace PortableThreads::LockFree::Private;
	class StressCAS : public PThread
	{
	public:
		typedef std::vector<PTPointerCAS::int_t> IdList;
		StressCAS(PTBarrier& barrier, int id, int runs, PTPointerCAS& cas)
			:	barrier_(barrier)
			,	id_(id)
			,	runs_(runs)
			,	cas2_(cas)
		{}
	private:
		void threadMain()
		{
			barrier_.wait();
			token_t current, old;
			PTPointerCAS::int_t nv;
			for(int i = 0; i < runs_; ++i)
			{
				int tries = 0;
				do
				{
					if(tries)
					{
						printf("%d trying for the %d. time\n", id_, tries+1);
						old = current;
					}
					current = cas2_.get();
					assert(current.value() >= 0 && current.value() % 8 == 0);
					if(tries)
					{
						if(current.count() <= old.count())
						{
							printf("Current count should be larger than old count: %d:%d\n", current.count(), old.count());

						}
						assert(current.count() > old.count());
					}
					nv = 8*(i);
				
					++tries;					
				}
				while(!cas2_.cas(nv, current));
				ids_.push_back(current.count());
				printf("%d changed from %ld:%ld to %ld\n", id_, 
					current.value(), current.count(), nv);
			}
		}
	public:
		const IdList& ids() const { return ids_; }
	private:
		PTBarrier& barrier_;
		const int id_;
		const int runs_;
		PTPointerCAS& cas2_;
		IdList ids_;
	};
}

void beispiel14()
{
	using namespace beispiel_14;

	const int n = 8;
	const int runs = 20;
	PTBarrier barrier(n);
	PTPointerCAS value;
	std::vector< StackPtr<StressCAS> > threads(n);
	typedef std::set<PTPointerCAS::int_t> IdSet;
	IdSet ids;
	for(int i = 0; i < n; ++i)
	{
		threads[i] = new StressCAS(barrier, i, runs, value);
		threads[i]->run();
	}
	for(int i = 0; i < n; ++i)
	{
		threads[i]->join();
		for(StressCAS::IdList::const_iterator it = threads[i]->ids().begin();
			it != threads[i]->ids().end(); ++it)
		{
			ids.insert(*it);
		}
	}
	if(value.get().count() < n*runs)
	{
		cerr << "error! Expected at least: " << (n*runs) << ", is: " << value.get().count() << endl;
	}
	assert(value.get().count() >= n*runs);
	if(ids.size() != n*runs)
	{
		cerr << "error! Expected set size: " << (n*runs) << ", is: " << ids.size() << endl;
	}
	assert(ids.size() == n*runs);
	cout << "OK" << endl;
}

namespace beispiel_15
{
	
	inline int32 high(int64 large)
	{
		return (int32)(large >> 32);
	}
	inline int32 low(int64 large)
	{
		return (int32)large;
	}

	static inline int64 multiplex(int32 value, int32 count)
	{
		const int32 buf[2] = { value, count };
		return *((int64*)buf);
	}


	static inline uint64 umultiplex(uint32 value, uint32 count)
	{
		const uint32 buf[2] = { value, count };
		return *((uint64*)buf);
	}

	static inline void assign(int64& value, const int32* buf)
	{
		value = *(int64*)buf;
	}

	static inline void uassign(uint64& value, const uint32* buf)
	{
		value = *(uint64*)buf;
	}

	static inline void assign(int32* buf , int64 value)
	{
		*(int64*)buf = value;
	}

	static inline void uassign(uint32* buf , uint64 value)
	{
		*(uint64*)buf = value;
	}

	union union_int
	{
		int64 large_;
		struct { int32 high_, low_; } parts_;
	};

	union union_uint
	{
		uint64 large_;
		struct { uint32 high_, low_; } parts_;
	};

}

void beispiel15()
{
	using namespace PortableThreads::LockFree::Private;

	int64 n = 1<<6;
	assert(n == 64);
	n <<= 19;
	n++;

	cout << "Count should be 1, is: " << (n & (((int64)1<<19)-1)) << endl;
	cout << "Left shift 16, now: " << (n >> 16) << endl;
	cout << "Left shift 19, now: " << (n >> 19) << endl;
	cout << "Value should be 64, is: " << ((n >> 19) & ~(((int64)1<<3)-1)) << endl;

	// Attention: All values passed to PTPointerCAS must not have any lowerorder bits
	// that are smaller than 7!
	PTPointerCAS ptc;
	token_t current, old;

	ptc.assign(64);

	current = ptc.get();
	cout << "Value should be 64, is: " << current.value() << endl;
	assert(current.value() == 64);
	
	bool ret = ptc.cas(8, current);
	assert(ret);
	current = ptc.get();
	cout << "Value should be 8, is: " << current.value() << endl;
	assert(current.value() == 8);

	ptc.assign(0);

	for(int i = 0; i < 10; ++i)
	{
		old = current;
		current = ptc.get();
		if(current.value() != (i*8))
		{
			cout << "i: " << i << ". value should be " << (i*8) << ", is " << current.value() << endl;
		}
		assert(current.value() == (i*8));
		if(i)
		{
			
			assert(current.count() == old.count() + 1);			
		}
		bool ok = ptc.cas(8*(i+1), current);
		if(!ok)
		{
			cout << "Failed with i=" << i << endl;
		}
		assert(ok);
	}
}

void atomic_number()
{
	PTAtomicNumber n(-42);
	assert(n.get() == -42);
	n.inc(42);
	assert(n.get() == 0);
	n.inc(42);
	PTAtomicNumber::int_type res = n++;
	assert(res == 42);
	assert(n.get() == 43);
	
	n = -33;
	
	assert(n.get() == -33);
	res = --n;
	assert(res == -34);
	assert(n.get() == -34);

}

void spinlock()
{
	using namespace PortableThreads::LockFree::Private;

	volatile uint8 lock = 0;
	bool ret = pt_atomic_set_lock(&lock);
	assert(lock != 0);
	assert(ret);
	pt_atomic_clear_lock(&lock);
	assert(lock == 0);
	pt_atomic_clear_lock(&lock);
	assert(lock == 0);
	ret = pt_atomic_set_lock(&lock);
	assert(lock != 0);
	assert(ret);
	ret = pt_atomic_set_lock(&lock);
	assert(lock != 0);
	assert(!ret);
}

template<class T>
struct bits
{
	T t_;
	bits(T t = 0)
		:	t_(t)
	{}
};

template<class T>
ostream& operator<<(ostream& os, bits<T>& b)
{
	for(unsigned i = 8*sizeof(T)-1; i > 0; --i)
	{
		os << ((b.t_ & (1<<i)) == 0 ? '0' : '1');
	}
	return os;
}

void ultraSPARC()
{
	// UltraSPARC PointerCAS simulation

	const int64 MSB_MASK = static_cast<int64>(1)<<63;
	const int64 HIGH_ONES = (((int64)1<<21)-1) << 44;
	const int64 LOW_ONES = ~HIGH_ONES;

	cout << ((((int64)-1) & MSB_MASK)  == 0) << endl;
	cout << ((((int64)1<<63) & MSB_MASK)  == 0) << endl;
	cout << ((((int64)0) & MSB_MASK)  == 0) << endl;
	
	int64 low = 42;
	int64 high = low | HIGH_ONES;

	int64 count = ((int64)33) << 44;

	int64 mux = low ^ count;

	int64 counter = (mux & MSB_MASK) == 0 ? mux >> 44 : (~mux) >> 44;
	int64 value = (mux & MSB_MASK) == 0 ? mux & LOW_ONES : mux | HIGH_ONES;
	cout << counter << ", " << value << endl;

	mux = high ^ count;
	counter = (mux & MSB_MASK) == 0 ? mux >> 44 : (~mux) >> 44;
	value = (mux & MSB_MASK) == 0 ? mux & LOW_ONES : mux | HIGH_ONES;
	cout << counter << ", " << value << ". Same as high: " << (value == high) << endl;


}

void queue_iterator()
{
	typedef PTQueue<int> queuetype;
	queuetype q;

	q.pushBack(1);
	q.pushBack(2);
	q.pushBack(3);

	cout << "Queue with three elements: ";
	copy(q.begin(), q.end(), ostream_iterator<int>(cout, " "));
	cout << endl;

	queuetype f;
	cout << "Queue with zero elements: ";
	copy(f.begin(), f.end(), ostream_iterator<int>(cout, " "));
	cout << endl;

	const queuetype& cq = q;
	cout << "Queue with three elements: ";
	copy(cq.begin(), cq.end(), ostream_iterator<int>(cout, " "));
	cout << endl;
}

namespace lockfree_allocator_
{
	class GlobalHeapUser : public PThread
	{
	public:
		GlobalHeapUser(PTBarrier& barrier1, PTBarrier& barrier2, unsigned rounds, unsigned allocations, unsigned sizesMin, unsigned sizesMax)
			:	barrier1_(&barrier1)
			,	barrier2_(&barrier2)
			,	rounds_(rounds)
			,	allocations_(allocations)
			,	min_(sizesMin)
			,	max_(sizesMax)
		{}
	private:
		void threadMain()
		{
			std::vector<void*> buffer;
			buffer.reserve(allocations_);
			barrier1_->wait();
			const char* const answer = "The answer is 42";
			size_t length = 100;
			for(unsigned i = 0; i < rounds_; ++i)
			{
				buffer.clear();
				for(unsigned j = 0; j < allocations_; ++j)
				{
					void* p = allocate(length);
					if(rand_.urand() % 2)
						memset(p, 42, length);
					buffer.push_back(p);					
				}

				for(unsigned j = 0; j < allocations_; ++j)
				{
					deallocate(buffer.back());
					buffer.pop_back();
				}

			}
			barrier2_->wait();
		}
	private:
		void* allocate(size_t s)
		{
			return ::operator new(s);
		}
		void deallocate(void* p)
		{
			::operator delete(p);
		}
	private:
		PTBarrier* barrier1_;
		PTBarrier* barrier2_;
		unsigned rounds_, allocations_, min_, max_;
		PTRandom rand_;
	};

	class PTHeapUser : public PThread
	{
	public:
		PTHeapUser(PTBarrier& barrier1, PTBarrier& barrier2, PTHeap& heap, unsigned rounds, unsigned allocations, unsigned sizesMin, unsigned sizesMax)
			:	barrier1_(&barrier1)
			,	barrier2_(&barrier2)
			,	heap_(&heap)
			,	rounds_(rounds)
			,	allocations_(allocations)
			,	min_(sizesMin)
			,	max_(sizesMax)
		{}
	private:
		void threadMain()
		{
			std::vector<void*> buffer;
			buffer.reserve(allocations_);
			barrier1_->wait();
			const char* const answer = "The answer is 42";
			size_t length = 100;
			for(unsigned i = 0; i < rounds_; ++i)
			{
				buffer.clear();
				for(unsigned j = 0; j < allocations_; ++j)
				{
					void* p = allocate(length);
					if(rand_.urand() % 2)
						memset(p, 42, length);
					buffer.push_back(p);					
				}

				for(unsigned j = 0; j < allocations_; ++j)
				{
					deallocate(buffer.back());
					buffer.pop_back();
				}

			}
			barrier2_->wait();
		}
		void* allocate(size_t s)
		{
			return heap_->allocate(s);
		}
		void deallocate(void* p)
		{
			heap_->deallocate(p);
		}
	private:
		PTBarrier* barrier1_;
		PTBarrier* barrier2_;
		PTHeap* heap_;
		unsigned rounds_, allocations_, min_, max_;
		PTRandom rand_;
	};
}

void lockfree_allocator()
{
	using namespace lockfree_allocator_;

	unsigned threads = 4;
	unsigned its = 1<<10, allocations = 40;
	unsigned sizesmin = 12, sizesmax = 64;


	std::vector<PTHeapUser*> lfThreads;
	std::vector<GlobalHeapUser*> globalThreads;
	PTBarrier b1(threads+1), b2(threads+1);
	PTime stopwatch;
	PTHeap h;
	
	for(unsigned i = 0; i < threads; ++i)
	{
		lfThreads.push_back(new PTHeapUser(b1, b2, h, its, allocations,sizesmin, sizesmax));
		globalThreads.push_back(new GlobalHeapUser(b1, b2, its, allocations,sizesmin, sizesmax));
	}

	for(unsigned i = 0; i < threads; ++i)
	{
		globalThreads[i]->run();
	}
		
	
	stopwatch.start();
	b1.wait();
	b2.wait();
	stopwatch.stop();

	for(unsigned i = 0; i < threads; ++i)
	{
		globalThreads[i]->join();
	}
	
	cout	<< "Global heap: " << setprecision(3) 
			<< (stopwatch.difference() / (double)stopwatch.frequency()) << endl;

	for(unsigned i = 0; i < threads; ++i)
	{
		lfThreads[i]->run();
	}

	stopwatch.start();
	b1.wait();
	b2.wait();
	stopwatch.stop();

	for(unsigned i = 0; i < threads; ++i)
	{
		lfThreads[i]->join();
	}
	
	cout	<< "PTHeap heap: " << setprecision(3) 
			<< (stopwatch.difference() / (double)stopwatch.frequency()) << endl;

	for(unsigned i = 0; i < threads; ++i)
	{
		delete lfThreads[i];
		delete globalThreads[i];
	}
}

#if 0
namespace MichaelDeque
{
	

	typedef PTDeque<int> IntQueue;

	class Writer : public PThread
	{
	public:
		Writer(PTBarrier& b, IntQueue& q, unsigned count)
			:	barrier_(&b)
			,	queue_(&q)
			,	count_(count)
		{}
	private:
		void threadMain()
		{
			barrier_->wait();
			for(unsigned i = 0; i < count_;)
			{
				if(i % 2)
				{

					queue_->pushBack(i);
					++i;
				}
				else
				{
					queue_->pushFront(i);
					++i;
				}
			}
		}
	private:
		PTBarrier* barrier_;
		IntQueue* queue_;
		unsigned count_;
	};

	class Reader : public PThread
	{
	public:
		Reader(PTBarrier& b, IntQueue& q, unsigned count)
			:	barrier_(&b)
			,	queue_(&q)
			,	count_(count)
		{}
	private:
		void threadMain()
		{
			barrier_->wait();
			for(unsigned i = 0; i < count_;)
			{
				int x = 0;
				if(i % 2)
				{
					bool ok = queue_->popBack(x);
					if(ok)
					{
						++i;
						assert(x < count_);
					}
				}
				else
				{
					bool ok = queue_->popFront(x);
					if(ok)
					{
						++i;
						assert(x < count_);
					}
				}
				
				
			}
		}
	private:
		PTBarrier* barrier_;
		IntQueue* queue_;
		unsigned count_;
	};
}

void michaeldeque()
{
	using namespace MichaelDeque;

	IntQueue q;
	unsigned threads = 4;
	unsigned count = 1000;
	PTBarrier b(2*threads + 1);
	vector<Writer*> writer(threads);
	vector<Reader*> reader(threads);

	for(unsigned i = 0; i < threads; ++i)
	{
		writer.push_back(new Writer(b, q, count));
		writer.back()->run();
		reader.push_back(new Reader(b, q, count));
		reader.back()->run();
	}
	

	b.wait();

	for(unsigned i = 0; i < threads; ++i)
	{
		writer.back()->join();
		delete writer.back();
		writer.pop_back();
		reader.back()->join();
		delete reader.back();
		reader.pop_back();
	}
	
	bool ok;
	int x;
	ok = q.popBack(x);
	cout << "Should fail (0 = good): " << ok << endl;

}
#endif

int main()
{
#ifdef _MSC_VER
	if(0)
	{
		_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) |
					_CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF |
		_CRTDBG_CHECK_ALWAYS_DF);
		_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE );
		_CrtSetReportFile( _CRT_WARN, _CRTDBG_FILE_STDERR );
		_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE );
		_CrtSetReportFile( _CRT_ERROR, _CRTDBG_FILE_STDERR );
		_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE );
		_CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDERR );
	}
#endif
	/*	
	beispiel14();
	lockfree_queue();
	
	lockfree_stack();
	
	atomic_number();
	spinlock();
	ultraSPARC();
	
	queue_iterator();
	
	
	lockfree_allocator();
	*/
	
	//michaeldeque();
	/*
	int x = 0;
	PTDeque<int> d;
	
	d.pushBack(1);
	d.pushBack(2);
	d.pushBack(3);
	d.popBack(x);
	cout << x << "\n";
	d.popBack(x);
	cout << x << "\n";
	d.popBack(x);
	cout << x << endl;
	*/

	
	return 0;
}
#endif

#include <iostream>
#include <portablethreads/thread.h>
#include <portablethreads/message_queue.h>
#include <portablethreads/time.h>

using namespace std;

typedef int MessageType;
const MessageType TerminiateMessage = -1;
typedef PortableThreads::PTMessageQueue<MessageType> Queue;

class Producer : public PortableThreads::PThread 
{
public:
	Producer(Queue& queue, int itemsToProduce)
		:	queue_(&queue)
		,	itemsToProduce_(itemsToProduce)
	{}
private:
	void threadMain()
	{
		for(int i = 0; i < itemsToProduce_; ++i)
		{
			// assume item production takes some time
			PortableThreads::pt_milli_sleep(50);
			queue_->pushBack(i);
		}

		// done producing, send terminiate message
		queue_->pushBack(TerminiateMessage);
	}
private:
	Queue* queue_;
	int itemsToProduce_;
};

class Consumer : public PortableThreads::PThread 
{
public:
	Consumer(Queue& queue)
		:	queue_(&queue)
	{}
private:
	void threadMain()
	{
		cout << "consuming: ";
		MessageType m;
		while(true)
		{
			// wait for a period, then check in case we missed a
			// message
			queue_->wait(0, 10); 
			if(queue_->popFront(m)) // got message
			{
				if(m == TerminiateMessage)
					break;
				else
					cout << m << " ";
			}
		}
		cout << "done" << endl;
	}
private:
	Queue* queue_;
};

#endif

typedef int int32;
typedef unsigned uint32;
typedef __int64 int64;
typedef unsigned __int64 uint64;

inline bool __cdecl pt_atomic_cas(volatile int64* inMemory, int64 nv, int64 ov)
{
	/*
	const int32 oldLow = static_cast<int32>(ov);
	const int32 oldHigh = static_cast<int32>(ov >> 32);
	const int32 newLow = static_cast<int32>(nv);
	const int32 newHigh = static_cast<int32>(nv >> 32);
	*/
	const uint32 buf[] = {
		static_cast<uint32>(static_cast<uint64>(ov)),
						static_cast<uint32>(static_cast<uint64>(ov) >> 32),
						static_cast<uint32>(static_cast<uint64>(nv)),
						static_cast<uint32>(static_cast<uint64>(nv) >> 32)};

	bool yes;
	/*
	__asm
	{
		mov eax, oldLow
		mov edx, oldHigh
		mov ebx, newLow
		mov ecx, newHigh
		mov edi, inMemory
		lock cmpxchg8b [edi]
		sete yes
	}
	*/
	__asm
	{
		mov eax, buf[0]
		mov edx, buf[1]
		mov ebx, buf[2]
		mov ecx, buf[3]
		mov edi, inMemory
		lock cmpxchg8b [edi]
		sete yes
	}
	return yes;
}




#if 0


/*
 *	Copyright (c) Januar 2005 Jean Gressmann (jsg@rz.uni-potsdam.de)
 *
 *  Einige Beispiele zur Benutzung der Thread-Klassen
 *
 */

#ifdef _MSC_VER
	#pragma warning(disable:4786)
	#pragma warning(disable:4503)
#endif

#include <iostream>
#include <fstream>
#include <string>
#include <cassert>
#include <deque>
#include <list>
#include <ctime>
#include <cstdlib>
#include <cstdio>
#include <set>
#include <portablethreads/mutex.h>
#include <portablethreads/condition.h>
#include <portablethreads/thread.h>
#include <portablethreads/tsallocator.h>
#include <portablethreads/message_queue.h>
#include <portablethreads/utility.h>
#include <portablethreads/lock_free.h>
#ifdef _MSC_VER
	#include <crtdbg.h>
#endif

using namespace std;
using namespace PortableThreads;






/*****************************************************************************/

// In diesem Ping-Pong Beispiel wird ein Monitor zur Synchronisation genutzt. 
// Der Vorteil des Monitors gegenber einer Mutex ist das der Monitor potentiell
// performanter ist wenn nur ein Thread gleichzeitig versucht darauf zuzugreifen.
// Bei einer Mutex muss jedes mal der Speicherbus gesperrt werden - beim Monitor
// wird durch atomares dekrementieren eines Zhlers sichergestellt das sich jeweils
// nur ein Thread im Monitor befindet. Versucht ein weiterer Thread den Monitor zu
// betreten, so muss er warten, bis der aktive Thread den Monitor verlsst und dies
// ggf. wartenden Threads signalisiert.


namespace beispiel_8
{
	class PingPong : public PThread
	{
	public:
		PingPong(int id, PMonitor& monitor, volatile bool& ping)
			:	id_(id)
			,	monitor_(monitor)
			,	ping_(ping)
		{}
		void threadMain()
		{
			for(int i = 0; i < 10; ++i)
			{
				// Versuche den Monitor zu betreten. Falls bereits ein Thread
				// im Monitor ist, warten wir blockierend.
				monitor_.enter();
				cout << id_ << " " << (ping_ ? "ping" : "pong") << endl;
				ping_ = !ping_;
				monitor_.leave();
			}
		}
	private:
		const int id_;
		PMonitor& monitor_;
		volatile bool& ping_;
	};
}

void beispiel8()
{
	cout << "Ping-Pong Beispiel mit 2 Threads die durch einen Monitor synchronisiert werden. ";
	cout << "Press enter to start" << endl;
	cin.get();

	using namespace beispiel_8;

	PMonitor monitor;
	volatile bool ping = true;

	// Drei bzw. zwei oder ein Threads versuchen konkurrierend dPing bzw. Pong auszugeben.
	PingPong p1(1, monitor, ping), p2(2, monitor, ping), p3(3, monitor, ping);
	
	p3.run();
	p2.run();
	p1.run();

	p1.join();
	p2.join();
	p3.join();


	p2.run();
	p1.run();

	p1.join();
	p2.join();

	p1.run();

	p1.join();
}


namespace beispiel_12
{

	typedef vector<unsigned long> ShortList;
	typedef pair<size_t, ShortList > Element;
	typedef PLockFreeQueue<Element> Queue;
	
	class Reader : public PThread
	{
	public:
		Reader(int id, Queue& queue)
			:	id_(id)
			,	shutdown_(false)
			,	queue_(queue)
		{}

		void threadMain()
		{
			Element e;			
			while(!shutdown_)
			{
				bool didRemove = queue_.popFront(e);
				if(didRemove)
				{
					int ok = e.first == e.second.size();
					if(ok)
					{
						for(size_t i = 0; i < e.second.size(); ++i)
						{
							assert(e.second[i] == e.first);
						}
					}
					assert(ok);
				}
				else
					give();
			}
			std::printf("R %d done\n", id_);
		}
		void shutdown() volatile { shutdown_ = true; }
	private:
		const int id_;
		volatile bool shutdown_;
		Queue& queue_;
	};

	class Writer : public PThread
	{
	public:
		Writer(int id, Queue& queue, int runs)
			:	id_(id)
			,	queue_(queue)
			,	runs_(runs)
		{}
		void threadMain()
		{
			PRandom r(time(0));
			for(int i = 0; i < runs_; ++i)
			{
				unsigned long n = r.urand() % 100;
				Queue::value_type vt = make_pair(n, ShortList(n, n));
				queue_.pushBack(vt);
				if(i % 5 == 0)
					pt_milli_sleep(50);
			}
		}
	private:
		const int id_;
		Queue& queue_;
		int runs_;
	};
}

/*****************************************************************************/

// Testcode (Reader-Writer) fr Lock-Free Datenstrukturen. Es werden die Queue 
// und der Stack getestet mit jeweils zwei Readern und Writern. 
// Jede Datenstruktur wird einmal mit aktivierten Speichermanagement (fat,
// interner Speicher wird wiederverwendet)
// und einmal ohne (lean, jedes Einfgen zieht eine heap-Allocation nach sich)
// getest.

void beispiel12()
{
	using namespace beispiel_12;
	// queue lean
	if(1)
	{
		Queue queue(true);
		Reader r1(1, queue), r2(2, queue), r3(3, queue), r4(4, queue);

		r1.run();
		r2.run();
		r3.run();
		r4.run();

		Writer w1(1, queue, 20), w2(2, queue, 30), w3(3, queue, 30), w4(4, queue, 20);
		w1.run();
		w2.run();
		w3.run();
		w4.run();

		w1.join();
		w2.join();
		w3.join();
		w4.join();

		std::printf("Writers joined\n");
		r1.shutdown();
		r2.shutdown();
		r3.shutdown();
		r4.shutdown();
		std::printf("Signaled readers shutdown\n");
		r1.join();
		std::printf("R1 joined\n");
		r2.join();
		std::printf("R2 joined\n");
		r3.join();
		std::printf("R3 joined\n");
		r4.join();
		std::printf("R4 joined\n");
		std::printf("Forcing queue d'tor\n");
	}
	std::printf("Queue d'tor went ok\n");

	// queue fat
	if(1)
	{
		Queue queue;
		Reader r1(1, queue), r2(2, queue), r3(3, queue), r4(4, queue);

		r1.run();
		r2.run();
		r3.run();
		r4.run();

		Writer w1(1, queue, 20), w2(2, queue, 30), w3(3, queue, 30), w4(4, queue, 20);
		w1.run();
		w2.run();
		w3.run();
		w4.run();

		w1.join();
		w2.join();
		w3.join();
		w4.join();

		std::printf("Writers joined\n");
		r1.shutdown();
		r2.shutdown();
		r3.shutdown();
		r4.shutdown();
		std::printf("Signaled readers shutdown\n");
		r1.join();
		std::printf("R1 joined\n");
		r2.join();
		std::printf("R2 joined\n");
		r3.join();
		std::printf("R3 joined\n");
		r4.join();
		std::printf("R4 joined\n");
		std::printf("Forcing queue d'tor\n");
	}
	std::printf("Queue d'tor went ok\n");
	
	
}

/*****************************************************************************/

// Testcode fr PAtomicNumber. Der Code soll sicherstellen, dass atomare
// Vernderungen an der Zahl auch wirklich atomar sind.


namespace beispiel_13
{
	class AtomicBomber : public PThread
	{
	public:
		AtomicBomber(PAtomicNumber& n, int runs, bool up)
			:	n_(n)
			,	runs_(runs)
			,	up_(up)
		{}
		void threadMain()
		{
			PRandom r(time(0));
			for(int i = 0; i < runs_; ++i)
			{
				uint64 n = r.urand();
				if(up_)
				{
					n_.inc(2*n);
					n_.dec(2*n-1);
				}
				else
				{
					n_.dec(2*n);
					n_.inc(2*n-1);
				}
			}
		}
	private:
		PAtomicNumber& n_;
		const int runs_;
		bool up_;
	};

}


void beispiel13()
{
	using namespace beispiel_13;

	PAtomicNumber n;
	AtomicBomber b1(n, 10000000, true), b2(n, 5000000, true), b3(n, 5000000, false), b4(n, 10000000, false);

	b1.run();
	b2.run();
	b3.run();
	b4.run();

	b1.join();
	b2.join();
	b3.join();
	b4.join();

	if(n.get() != 0)
	{
		cout << n.get() << endl;
	}
	assert(n.get() == 0);	
}
/*****************************************************************************/

// Rudimentrer sanity-check fr PAtomicNumber

void beispiel11()
{
	PAtomicNumber n;
	const PAtomicNumber::int_type shouldBe43 = n.inc(43);
	cout << "Return value should be 43, is: " << shouldBe43 << endl;
	cout << "n should be 43, is: " << n.get() << endl;
	assert(shouldBe43 == 43);
	assert(n.get() == 43);
	const PAtomicNumber::int_type shouldBe42 = n.dec(1);
	cout << "Return value should be 42, is: " << shouldBe42 << endl;
	cout << "n should be 42, is: " << n.get() << endl;
	assert(shouldBe42 == 42);
	assert(n.get() == 42);


	n = 0;
	bool ok = n.swap(-1, 0);
	cout << "Swap should succeed, did: " << ok << endl;
	assert(ok);
	ok = n.swap(-2, 0);
	cout << "Swap should NOT succeed, did: " << ok << endl;
	ok = n.swap(-1, -1);
	cout << "Swap should succeed, did: " << ok << endl;
	assert(ok);

	if(1)
	{
		int64 n = 0;
		bool ok = pt_atomic_cas(&n, -1, 0);
		cout << "Swap should succeed, did: " << ok << endl;
		assert(ok);
		assert(n == -1);
		
		ok = pt_atomic_cas(&n, -2, 0);
		cout << "Swap should NOT succeed, did: " << ok << endl;
		assert(!ok);
		assert(n == -1);
		ok = pt_atomic_cas(&n, -1, -1);
		cout << "Swap should succeed, did: " << ok << endl;
		assert(ok);
		assert(n == -1);
	}
}

namespace xorQueue
{
	template<typename T, typename A = std::allocator<T> >
	class XORQueue
	{
	public:
		typedef T& reference;
		typedef const T& const_reference;
	private:
		typedef PortableThreads::LockFree::Private::PTPointerCAS PTPointerCAS;
		typedef PortableThreads::LockFree::Private::token_t token_t;
		typedef PortableThreads::LockFree::Private::SingleLinkedNode<T> Node;
	public:
		XORQueue()
			:	head_(0)
			,	tail_(0)
			,	special_(0)
		{}
		bool pushFront(const_reference r)
		{
			Node* node = new Node;
			node->construct(r);

			
			token_t h, t, n;
			int64 s;
			bool done = false;
			while(!done)
			{
				h = head_.get();
				t = tail_.get();
				s = special_;
				node->next(h.value());
				if(tag(s) == 0)
				{
					if(h.value() == 0 && t.value() == 0) // empty
					{
						int64 new_s = multiplex(node, 1, s);
						if(LockFree::Private::pt_atomic_cas(&special_, new_s, s))
						{
							cout << "we\n";
							s = new_s;
							done = true;
						}
						
					}
					else
					{
						if(h.value() != 0 && t.value() != 0)
						{
							n = reinterpret_cast<Node*>(h.value())->next();
							int64 new_s = multiplex(node, 3, s);
							if(LockFree::Private::pt_atomic_cas(&special_, new_s, s))
							{
								cout << "w+1\n";
								s = new_s;
								done = true;
							}
							
						}
					}
				}
				dispatch(s);
			}
			return true;
		}
		bool popFront(reference r)
		{
			token_t h, t, n;
			int64 s;
			bool done = false;
			while(!done)
			{
				h = head_.get();
				t = tail_.get();
				s = special_;

				if(h.value() == 0 && t.value() == 0) // empty
					return false;

				if(tag(s) == 0)
				{
					if(h.value() == t.value()) // single element
					{
						int64 new_s = multiplex(0, 2, s);
						if(LockFree::Private::pt_atomic_cas(&special_, new_s, s))
						{
							cout << "re\n";
							s = new_s;
							done = true;
						}
					}
					else
					{
						n = reinterpret_cast<Node*>(h.value())->next();
						if(!n.value()) 
							continue;
						int64 new_s = multiplex(reinterpret_cast<Node*>(n.value()), 5, s);
						if(LockFree::Private::pt_atomic_cas(&special_, new_s, s))
						{
							cout << "r-1\n";
							s = new_s;
							done = true;
						}
					}
					
				}
				else
				{
					//cout << "unstable\n";
				}
				dispatch(s);
			}
			assert(h.value());
			r = reinterpret_cast<Node*>(h.value())->data();
			return true;
		}
	private:
		void dispatch(int64 s)
		{
			// STACKABHNGIG!!!!!!!!!!!
			if(s != special_)
				return;
			if(tag(s) == static_cast<int64>(1))
			{
				// push into empty queue
				if(s != special_)
					return;
				head_.cas(reinterpret_cast<PTPointerCAS::int_t>(pointer(s)), head_.get());
				if(s != special_)
					return;
				tail_.cas(reinterpret_cast<PTPointerCAS::int_t>(pointer(s)), t);
			}
			else if(tag(s) == static_cast<int64>(2))
			{
				// pop from queue with one element
				if(s != special_)
					return;
				head_.cas(0, h);
				if(s != special_)
					return;
				tail_.cas(0, t);
			}
			
			else if(tag(s) == static_cast<int64>(3))
			{
				// pushfront, one in queue
				
				Node* newhead = pointer(s);
				assert(newhead);
				h = head_.get();
				Node* oldhead = reinterpret_cast<Node*>(h.value());
				if(oldhead == reinterpret_cast<Node*>(newhead->next().value()))
				{
					token_t x = oldhead->next();

					if(special_ == s)
						oldhead->cas(x.value() ^ reinterpret_cast<PTPointerCAS::int_t>(newhead), x);
				}
				head_.cas(reinterpret_cast<PTPointerCAS::int_t>(newhead), h);
			}
			else if(tag(s) == static_cast<int64>(5))
			{
				// popfront
				Node* newhead = pointer(s);
				assert(newhead);

				Node* oldhead = reinterpret_cast<Node*>(h.value());
				Node* newhead = reinterpret_cast<Node*>(n.value());
				token_t x = newhead->next();
				if(s != special_)
					return;
				newhead->cas(h.value() ^ x.value(), x);
				
				if(s != special_)
					return;
				head_.cas(reinterpret_cast<PTPointerCAS::int_t>(newhead), h);
			}
			
			int64 new_s = multiplex(0, 0, s);
			LockFree::Private::pt_atomic_cas(&special_, new_s, s);
		}
		static int64 multiplex(Node* p, int64 t, int64 ov)
		{
			int64 nv = reinterpret_cast<int64>(p) << 32;
			assert(t >= 0);
			assert(t <= 7);
			nv ^= (t << 29);
			int64 oldcount = ov & ((static_cast<int64>(1) << 29)-1);
			if(++oldcount <= (static_cast<int64>(1) << 29))
				nv ^= oldcount;

			assert(pointer(nv) == p);
			assert(tag(nv) == t);
			return nv;
		}
		static inline int64 tag(int64 s)
		{
			return (s >> 29) & static_cast<int64>(7);
		}
		static inline Node* pointer(int64 s)
		{
			return reinterpret_cast<Node*>(s >> 32);
		}
	private:
		PTPointerCAS head_, tail_;
		volatile int64 special_;
	};

	typedef XORQueue<int> IntQueue;

	class Writer : public PThread
	{
	public:
		Writer(PTBarrier& b, IntQueue& q, unsigned count)
			:	barrier_(&b)
			,	queue_(&q)
			,	count_(count)
		{}
	private:
		void threadMain()
		{
			barrier_->wait();
			for(unsigned i = 0; i < count_;)
			{
				if(queue_->pushFront(i))
				{ 
					++i;
				}
				else
				{
					cout << "w push failed\n";
				}
			}
		}
	private:
		PTBarrier* barrier_;
		IntQueue* queue_;
		unsigned count_;
	};

	class Reader : public PThread
	{
	public:
		Reader(PTBarrier& b, IntQueue& q, unsigned count)
			:	barrier_(&b)
			,	queue_(&q)
			,	count_(count)
		{}
	private:
		void threadMain()
		{
			barrier_->wait();
			for(unsigned i = 0; i < count_;)
			{
				int x = 0;
				bool ok = queue_->popFront(x);
				if(ok)
				{
					++i;
					assert(x < count_);
				}
				else
				{
					cout << "r pop failed\n";
				}
				
			}
		}
	private:
		PTBarrier* barrier_;
		IntQueue* queue_;
		unsigned count_;
	};
}

void xorqueue()
{
	int x = 0;
	xorQueue::XORQueue<int> d;
	
	d.pushFront(1);
	d.pushFront(2);
	d.pushFront(3);
	d.popFront(x);
	cout << x << "\n";
	d.popFront(x);
	cout << x << "\n";
	d.popFront(x);
	cout << x << endl;
	

	xorQueue::XORQueue<int> q;
	unsigned threads = 4;
	unsigned count = 1000;
	PTBarrier b(2*threads + 1);
	vector<xorQueue::Writer*> writer(threads);
	vector<xorQueue::Reader*> reader(threads);

	for(unsigned i = 0; i < threads; ++i)
	{
		writer.push_back(new xorQueue::Writer(b, q, count));
		writer.back()->run();
		reader.push_back(new xorQueue::Reader(b, q, count));
		reader.back()->run();
	}
	

	b.wait();

	for(unsigned i = 0; i < threads; ++i)
	{
		writer.back()->join();
		delete writer.back();
		writer.pop_back();
		reader.back()->join();
		delete reader.back();
		reader.pop_back();
	}
	
	bool ok;
	int y;
	ok = q.popFront(y);
	cout << "Should fail (0 = good): " << ok << endl;

}

#endif
