#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sched.h>
#include <unistd.h>
#include "common.h"

/* This program shows whether reads and writes to different variables require
 * fences for correct ordering of operations across cores. */

/* This program will NOT work correctly on x86 without fences because the
 * total store order of x86 does not maintain store -> load dependencies. */

/* Using volatile ensures that the compiler does not reorder accesses to A, B */
static volatile long A = 0, B = 0;
static long a, b;

/* Counters for synchronizing the threads with the main thread */
static long counterA, counterB = 0;

/* Pin threads to cores */
void
set_cpu_affinity(int core_id)
{
	int ret;
	cpu_set_t cpuset;

	int cores = sysconf(_SC_NPROCESSORS_ONLN);
	assert(core_id >= 0);
	assert(core_id < cores);

	CPU_ZERO(&cpuset);
	CPU_SET(core_id, &cpuset);
	ret = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
				     &cpuset);
	assert(ret >= 0);
}

void *
thread_A(void *arg)
{
	set_cpu_affinity(1);
	while (1) {
		/* wait for main thread until it sets counterA */
		while (__atomic_load_n(&counterA, __ATOMIC_SEQ_CST) != 1);
		A = 1;
		// uncomment the next line to avoid assertion failure
		// __atomic_thread_fence(__ATOMIC_SEQ_CST);
		b = B;
		__atomic_store_n(&counterA, 0, __ATOMIC_SEQ_CST);
	}
	return NULL;
}

void *
thread_B(void *arg)
{
	set_cpu_affinity(2);
	while (1) {
		/* wait for main thread until it sets counterB */
		while (__atomic_load_n(&counterB, __ATOMIC_SEQ_CST) != 1);
		B = 1;
		// uncomment the next line to avoid assertion failure
		// __atomic_thread_fence(__ATOMIC_SEQ_CST);
		a = A;
		__atomic_store_n(&counterB, 0, __ATOMIC_SEQ_CST);
	}
	return NULL;
}







int
main(int argc, char *argv[])
{
	if (argc != 1) {
		fprintf(stderr, "usage: mem-order\n");
		exit(1);
	}

	pthread_t p1, p2;
	/* create two threads */
	Pthread_create(&p1, NULL, thread_A, NULL);
	Pthread_create(&p2, NULL, thread_B, NULL);

	for (int i = 0; ; i++) {
		/* initialize A and B */
		A = 0;
		B = 0;
		__atomic_store_n(&counterA, 1, __ATOMIC_SEQ_CST);		
		__atomic_store_n(&counterB, 1, __ATOMIC_SEQ_CST);
		while (__atomic_load_n(&counterA, __ATOMIC_SEQ_CST) ||
		       __atomic_load_n(&counterB, __ATOMIC_SEQ_CST));
		// this assert will fire if the load and store operations
		// on A and B are not ordered correctly (i.e., in a sequentially
		// consistent manner) across cores
		assert(!(a == 0 && b == 0));
		if (i % 1000000 == 0) {
			printf("i = %d\n", i);
		}
	}
	
	/* wait for the two threads to finish executing */
	Pthread_join(p1, NULL);
	Pthread_join(p2, NULL);
	return 0;
}
