#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sched.h>
#include <unistd.h>
#include "common.h"

/* This program shows whether reads and writes to different variables require
 * fences for correct ordering of operations across cores. */

/* This program will work correctly on x86 without fences because the
 * total store order of x86 maintains store -> store, and load -> load
 * dependencies. */

/* Using volatile ensures that the compiler does not reorder accesses to A, B */
static volatile long A = 0, B = 0;

/* Counters for synchronizing the threads with the main thread */
static long counterR, counterW = 0;

/* Pin threads to cores */
void
set_cpu_affinity(int core_id)
{
	int ret;
	cpu_set_t cpuset;

	int cores = sysconf(_SC_NPROCESSORS_ONLN);
	assert(core_id >= 0);
	assert(core_id < cores);

	CPU_ZERO(&cpuset);
	CPU_SET(core_id, &cpuset);
	ret = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
				     &cpuset);
	assert(ret >= 0);
}

void *
thread_write(void *arg)
{
	unsigned int delay = 1;
	set_cpu_affinity(1);
	while (1) {
		/* wait for main thread until it sets counterW */
		while (__atomic_load_n(&counterW, __ATOMIC_SEQ_CST) != 1);
		A = 1;
		// not required on x86
		// __atomic_thread_fence(__ATOMIC_SEQ_CST);
		B = 1;
		while (rand_r(&delay) % 8 != 0);
		__atomic_store_n(&counterW, 0, __ATOMIC_SEQ_CST);
	}
	return NULL;
}

void *
thread_read(void *arg)
{
	set_cpu_affinity(2);
	while (1) {
		/* wait for main thread until it sets counterR */
		while (__atomic_load_n(&counterR, __ATOMIC_SEQ_CST) != 1);
		while (B == 0);
		// not required on x86		
		// __atomic_thread_fence(__ATOMIC_SEQ_CST);
		assert(A);
		__atomic_store_n(&counterR, 0, __ATOMIC_SEQ_CST);
	}
	return NULL;
}





int
main(int argc, char *argv[])
{
	if (argc != 1) {
		fprintf(stderr, "usage: mem-order\n");
		exit(1);
	}

	pthread_t p1, p2;
	/* create two threads */
	Pthread_create(&p1, NULL, thread_write, NULL);
	Pthread_create(&p2, NULL, thread_read, NULL);

	for (int i = 0; ; i++) {
		/* initialize A and B */
		A = 0;
		B = 0;
		__atomic_store_n(&counterW, 1, __ATOMIC_SEQ_CST);		
		__atomic_store_n(&counterR, 1, __ATOMIC_SEQ_CST);
		while (__atomic_load_n(&counterW, __ATOMIC_SEQ_CST) ||
		       __atomic_load_n(&counterR, __ATOMIC_SEQ_CST));
		if (i % 1000000 == 0) {
			printf("i = %d\n", i);
		}
	}
	
	/* wait for the two threads to finish executing */
	Pthread_join(p1, NULL);
	Pthread_join(p2, NULL);
	return 0;
}
