/*
 * mini-llvm.c: llvm "Backend" for the mono JIT
 *
 * Copyright 2009-2011 Novell Inc (http://www.novell.com)
 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
 */

#include "mini.h"
#include <mono/metadata/debug-helpers.h>
#include <mono/metadata/mempool-internals.h>
#include <mono/utils/mono-tls.h>
#include <mono/utils/mono-dl.h>

#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif

#include "llvm-c/Core.h"
#include "llvm-c/ExecutionEngine.h"
#include "llvm-c/BitWriter.h"
#include "llvm-c/Analysis.h"

#include "mini-llvm-cpp.h"

 /*
  * Information associated by mono with LLVM modules.
  */
typedef struct {
	LLVMModuleRef module;
	LLVMValueRef throw, rethrow, throw_corlib_exception;
	GHashTable *llvm_types;
	LLVMValueRef got_var;
	const char *got_symbol;
	GHashTable *plt_entries;
} MonoLLVMModule;

/*
 * Information associated by the backend with mono basic blocks.
 */
typedef struct {
	LLVMBasicBlockRef bblock, end_bblock;
	LLVMValueRef finally_ind;
	gboolean added, invoke_target;
	/* 
	 * If this bblock is the start of a finally clause, this is a list of bblocks it
	 * needs to branch to in ENDFINALLY.
	 */
	GSList *call_handler_return_bbs;
	/*
	 * If this bblock is the start of a finally clause, this is the bblock that
	 * CALL_HANDLER needs to branch to.
	 */
	LLVMBasicBlockRef call_handler_target_bb;
	/* The list of switch statements generated by ENDFINALLY instructions */
	GSList *endfinally_switch_ins_list;
	GSList *phi_nodes;
} BBInfo;

/*
 * Structure containing emit state
 */
typedef struct {
	MonoMemPool *mempool;

	/* Maps method names to the corresponding LLVMValueRef */
	GHashTable *emitted_method_decls;

	MonoCompile *cfg;
	LLVMValueRef lmethod;
	MonoLLVMModule *lmodule;
	LLVMModuleRef module;
	BBInfo *bblocks;
	int sindex, default_index, ex_index;
	LLVMBuilderRef builder;
	LLVMValueRef *values, *addresses;
	MonoType **vreg_cli_types;
	LLVMCallInfo *linfo;
	MonoMethodSignature *sig;
	GSList *builders;
	GHashTable *region_to_handler;
	LLVMBuilderRef alloca_builder;
	LLVMValueRef last_alloca;
	LLVMValueRef rgctx_arg;
	LLVMTypeRef *vreg_types;
	gboolean *is_dead;
	gboolean *unreachable;
	int *pindexes;

	char temp_name [32];
} EmitContext;

typedef struct {
	MonoBasicBlock *bb;
	MonoInst *phi;
	MonoBasicBlock *in_bb;
	int sreg;
} PhiNode;

/*
 * Instruction metadata
 * This is the same as ins_info, but LREG != IREG.
 */
#ifdef MINI_OP
#undef MINI_OP
#endif
#ifdef MINI_OP3
#undef MINI_OP3
#endif
#define MINI_OP(a,b,dest,src1,src2) dest, src1, src2, ' ',
#define MINI_OP3(a,b,dest,src1,src2,src3) dest, src1, src2, src3,
#define NONE ' '
#define IREG 'i'
#define FREG 'f'
#define VREG 'v'
#define XREG 'x'
#define LREG 'l'
/* keep in sync with the enum in mini.h */
const char
llvm_ins_info[] = {
#include "mini-ops.h"
};
#undef MINI_OP
#undef MINI_OP3

#if SIZEOF_VOID_P == 4
#define GET_LONG_IMM(ins) (((guint64)(ins)->inst_ms_word << 32) | (guint64)(guint32)(ins)->inst_ls_word)
#else
#define GET_LONG_IMM(ins) ((ins)->inst_imm)
#endif

#define LLVM_INS_INFO(opcode) (&llvm_ins_info [((opcode) - OP_START - 1) * 4])

#if 0
#define TRACE_FAILURE(msg) do { printf ("%s\n", msg); } while (0)
#else
#define TRACE_FAILURE(msg)
#endif

#ifdef TARGET_X86
#define IS_TARGET_X86 1
#else
#define IS_TARGET_X86 0
#endif

#define LLVM_FAILURE(ctx, reason) do { \
	TRACE_FAILURE (reason); \
	(ctx)->cfg->exception_message = g_strdup (reason); \
	(ctx)->cfg->disable_llvm = TRUE; \
	goto FAILURE; \
} while (0)

#define CHECK_FAILURE(ctx) do { \
    if ((ctx)->cfg->disable_llvm) \
		goto FAILURE; \
} while (0)

static LLVMIntPredicate cond_to_llvm_cond [] = {
	LLVMIntEQ,
	LLVMIntNE,
	LLVMIntSLE,
	LLVMIntSGE,
	LLVMIntSLT,
	LLVMIntSGT,
	LLVMIntULE,
	LLVMIntUGE,
	LLVMIntULT,
	LLVMIntUGT,
};

static LLVMRealPredicate fpcond_to_llvm_cond [] = {
	LLVMRealOEQ,
	LLVMRealUNE,
	LLVMRealOLE,
	LLVMRealOGE,
	LLVMRealOLT,
	LLVMRealOGT,
	LLVMRealULE,
	LLVMRealUGE,
	LLVMRealULT,
	LLVMRealUGT,
};

static LLVMExecutionEngineRef ee;
static MonoNativeTlsKey current_cfg_tls_id;

static MonoLLVMModule jit_module, aot_module;
static gboolean jit_module_inited;
static int memset_param_count, memcpy_param_count;
static const char *memset_func_name;
static const char *memcpy_func_name;

static void init_jit_module (void);

/*
 * IntPtrType:
 *
 *   The LLVM type with width == sizeof (gpointer)
 */
static LLVMTypeRef
IntPtrType (void)
{
	return sizeof (gpointer) == 8 ? LLVMInt64Type () : LLVMInt32Type ();
}

/*
 * get_vtype_size:
 *
 *   Return the size of the LLVM representation of the vtype T.
 */
static guint32
get_vtype_size (MonoType *t)
{
	int size;

	size = mono_class_value_size (mono_class_from_mono_type (t), NULL);

	while (size < sizeof (gpointer) && mono_is_power_of_two (size) == -1)
		size ++;

	return size;
}

/*
 * simd_class_to_llvm_type:
 *
 *   Return the LLVM type corresponding to the Mono.SIMD class KLASS
 */
static LLVMTypeRef
simd_class_to_llvm_type (EmitContext *ctx, MonoClass *klass)
{
	if (!strcmp (klass->name, "Vector2d")) {
		return LLVMVectorType (LLVMDoubleType (), 2);
	} else if (!strcmp (klass->name, "Vector2l")) {
		return LLVMVectorType (LLVMInt64Type (), 2);
	} else if (!strcmp (klass->name, "Vector2ul")) {
		return LLVMVectorType (LLVMInt64Type (), 2);
	} else if (!strcmp (klass->name, "Vector4i")) {
		return LLVMVectorType (LLVMInt32Type (), 4);
	} else if (!strcmp (klass->name, "Vector4ui")) {
		return LLVMVectorType (LLVMInt32Type (), 4);
	} else if (!strcmp (klass->name, "Vector4f")) {
		return LLVMVectorType (LLVMFloatType (), 4);
	} else if (!strcmp (klass->name, "Vector8s")) {
		return LLVMVectorType (LLVMInt16Type (), 8);
	} else if (!strcmp (klass->name, "Vector8us")) {
		return LLVMVectorType (LLVMInt16Type (), 8);
	} else if (!strcmp (klass->name, "Vector16sb")) {
		return LLVMVectorType (LLVMInt8Type (), 16);
	} else if (!strcmp (klass->name, "Vector16b")) {
		return LLVMVectorType (LLVMInt8Type (), 16);
	} else {
		printf ("%s\n", klass->name);
		NOT_IMPLEMENTED;
		return NULL;
	}
}

/* Return the 128 bit SIMD type corresponding to the mono type TYPE */
static inline G_GNUC_UNUSED LLVMTypeRef
type_to_simd_type (int type)
{
	switch (type) {
	case MONO_TYPE_I1:
		return LLVMVectorType (LLVMInt8Type (), 16);
	case MONO_TYPE_I2:
		return LLVMVectorType (LLVMInt16Type (), 8);
	case MONO_TYPE_I4:
		return LLVMVectorType (LLVMInt32Type (), 4);
	case MONO_TYPE_I8:
		return LLVMVectorType (LLVMInt64Type (), 2);
	case MONO_TYPE_R8:
		return LLVMVectorType (LLVMDoubleType (), 2);
	case MONO_TYPE_R4:
		return LLVMVectorType (LLVMFloatType (), 4);
	default:
		g_assert_not_reached ();
		return NULL;
	}
}

/*
 * type_to_llvm_type:
 *
 *   Return the LLVM type corresponding to T.
 */
static LLVMTypeRef
type_to_llvm_type (EmitContext *ctx, MonoType *t)
{
	if (t->byref)
		return LLVMPointerType (LLVMInt8Type (), 0);
	switch (t->type) {
	case MONO_TYPE_VOID:
		return LLVMVoidType ();
	case MONO_TYPE_I1:
		return LLVMInt8Type ();
	case MONO_TYPE_I2:
		return LLVMInt16Type ();
	case MONO_TYPE_I4:
		return LLVMInt32Type ();
	case MONO_TYPE_U1:
		return LLVMInt8Type ();
	case MONO_TYPE_U2:
		return LLVMInt16Type ();
	case MONO_TYPE_U4:
		return LLVMInt32Type ();
	case MONO_TYPE_BOOLEAN:
		return LLVMInt8Type ();
	case MONO_TYPE_I8:
	case MONO_TYPE_U8:
		return LLVMInt64Type ();
	case MONO_TYPE_CHAR:
		return LLVMInt16Type ();
	case MONO_TYPE_R4:
		return LLVMFloatType ();
	case MONO_TYPE_R8:
		return LLVMDoubleType ();
	case MONO_TYPE_I:
	case MONO_TYPE_U:
		return IntPtrType ();
	case MONO_TYPE_OBJECT:
	case MONO_TYPE_CLASS:
	case MONO_TYPE_ARRAY:
	case MONO_TYPE_SZARRAY:
	case MONO_TYPE_STRING:
	case MONO_TYPE_PTR:
		return IntPtrType ();
	case MONO_TYPE_VAR:
	case MONO_TYPE_MVAR:
		/* Because of generic sharing */
		if (mini_type_var_is_vt (ctx->cfg, t))
			return type_to_llvm_type (ctx, mini_get_gsharedvt_alloc_type_for_type (ctx->cfg, t));
		else
			return IntPtrType ();
	case MONO_TYPE_GENERICINST:
		if (!mono_type_generic_inst_is_valuetype (t))
			return IntPtrType ();
		/* Fall through */
	case MONO_TYPE_VALUETYPE:
	case MONO_TYPE_TYPEDBYREF: {
		MonoClass *klass;
		LLVMTypeRef ltype;

		klass = mono_class_from_mono_type (t);

		if (MONO_CLASS_IS_SIMD (ctx->cfg, klass))
			return simd_class_to_llvm_type (ctx, klass);

		if (klass->enumtype)
			return type_to_llvm_type (ctx, mono_class_enum_basetype (klass));
		ltype = g_hash_table_lookup (ctx->lmodule->llvm_types, klass);
		if (!ltype) {
			int i, size;
			LLVMTypeRef *eltypes;
			char *name;

			size = get_vtype_size (t);

			eltypes = g_new (LLVMTypeRef, size);
			for (i = 0; i < size; ++i)
				eltypes [i] = LLVMInt8Type ();

			name = mono_type_full_name (&klass->byval_arg);
			ltype = LLVMStructCreateNamed (LLVMGetGlobalContext (), name);
			LLVMStructSetBody (ltype, eltypes, size, FALSE);
			g_hash_table_insert (ctx->lmodule->llvm_types, klass, ltype);
			g_free (eltypes);
		}
		return ltype;
	}

	default:
		printf ("X: %d\n", t->type);
		ctx->cfg->exception_message = g_strdup_printf ("type %s", mono_type_full_name (t));
		ctx->cfg->disable_llvm = TRUE;
		return NULL;
	}
}

/*
 * type_is_unsigned:
 *
 *   Return whenever T is an unsigned int type.
 */
static gboolean
type_is_unsigned (EmitContext *ctx, MonoType *t)
{
	if (t->byref)
		return FALSE;
	switch (t->type) {
	case MONO_TYPE_U1:
	case MONO_TYPE_U2:
	case MONO_TYPE_U4:
	case MONO_TYPE_U8:
		return TRUE;
	default:
		return FALSE;
	}
}

/*
 * type_to_llvm_arg_type:
 *
 *   Same as type_to_llvm_type, but treat i8/i16 as i32.
 */
static LLVMTypeRef
type_to_llvm_arg_type (EmitContext *ctx, MonoType *t)
{
	LLVMTypeRef ptype = type_to_llvm_type (ctx, t);
	
	if (ptype == LLVMInt8Type () || ptype == LLVMInt16Type ()) {
		/* 
		 * LLVM generates code which only sets the lower bits, while JITted
		 * code expects all the bits to be set.
		 */
		ptype = LLVMInt32Type ();
	}

	return ptype;
}

/*
 * llvm_type_to_stack_type:
 *
 *   Return the LLVM type which needs to be used when a value of type TYPE is pushed
 * on the IL stack.
 */
static G_GNUC_UNUSED LLVMTypeRef
llvm_type_to_stack_type (LLVMTypeRef type)
{
	if (type == NULL)
		return NULL;
	if (type == LLVMInt8Type ())
		return LLVMInt32Type ();
	else if (type == LLVMInt16Type ())
		return LLVMInt32Type ();
	else if (type == LLVMFloatType ())
		return LLVMDoubleType ();
	else
		return type;
}

/*
 * regtype_to_llvm_type:
 *
 *   Return the LLVM type corresponding to the regtype C used in instruction 
 * descriptions.
 */
static LLVMTypeRef
regtype_to_llvm_type (char c)
{
	switch (c) {
	case 'i':
		return LLVMInt32Type ();
	case 'l':
		return LLVMInt64Type ();
	case 'f':
		return LLVMDoubleType ();
	default:
		return NULL;
	}
}

/*
 * op_to_llvm_type:
 *
 *   Return the LLVM type corresponding to the unary/binary opcode OPCODE.
 */
static LLVMTypeRef
op_to_llvm_type (int opcode)
{
	switch (opcode) {
	case OP_ICONV_TO_I1:
	case OP_LCONV_TO_I1:
		return LLVMInt8Type ();
	case OP_ICONV_TO_U1:
	case OP_LCONV_TO_U1:
		return LLVMInt8Type ();
	case OP_ICONV_TO_I2:
	case OP_LCONV_TO_I2:
		return LLVMInt16Type ();
	case OP_ICONV_TO_U2:
	case OP_LCONV_TO_U2:
		return LLVMInt16Type ();
	case OP_ICONV_TO_I4:
	case OP_LCONV_TO_I4:
		return LLVMInt32Type ();
	case OP_ICONV_TO_U4:
	case OP_LCONV_TO_U4:
		return LLVMInt32Type ();
	case OP_ICONV_TO_I8:
		return LLVMInt64Type ();
	case OP_ICONV_TO_R4:
		return LLVMFloatType ();
	case OP_ICONV_TO_R8:
		return LLVMDoubleType ();
	case OP_ICONV_TO_U8:
		return LLVMInt64Type ();
	case OP_FCONV_TO_I4:
		return LLVMInt32Type ();
	case OP_FCONV_TO_I8:
		return LLVMInt64Type ();
	case OP_FCONV_TO_I1:
	case OP_FCONV_TO_U1:
		return LLVMInt8Type ();
	case OP_FCONV_TO_I2:
	case OP_FCONV_TO_U2:
		return LLVMInt16Type ();
	case OP_FCONV_TO_I:
	case OP_FCONV_TO_U:
		return sizeof (gpointer) == 8 ? LLVMInt64Type () : LLVMInt32Type ();
	case OP_IADD_OVF:
	case OP_IADD_OVF_UN:
	case OP_ISUB_OVF:
	case OP_ISUB_OVF_UN:
	case OP_IMUL_OVF:
	case OP_IMUL_OVF_UN:
		return LLVMInt32Type ();
	case OP_LADD_OVF:
	case OP_LADD_OVF_UN:
	case OP_LSUB_OVF:
	case OP_LSUB_OVF_UN:
	case OP_LMUL_OVF:
	case OP_LMUL_OVF_UN:
		return LLVMInt64Type ();
	default:
		printf ("%s\n", mono_inst_name (opcode));
		g_assert_not_reached ();
		return NULL;
	}
}		

/*
 * load_store_to_llvm_type:
 *
 *   Return the size/sign/zero extension corresponding to the load/store opcode
 * OPCODE.
 */
static LLVMTypeRef
load_store_to_llvm_type (int opcode, int *size, gboolean *sext, gboolean *zext)
{
	*sext = FALSE;
	*zext = FALSE;

	switch (opcode) {
	case OP_LOADI1_MEMBASE:
	case OP_STOREI1_MEMBASE_REG:
	case OP_STOREI1_MEMBASE_IMM:
		*size = 1;
		*sext = TRUE;
		return LLVMInt8Type ();
	case OP_LOADU1_MEMBASE:
	case OP_LOADU1_MEM:
		*size = 1;
		*zext = TRUE;
		return LLVMInt8Type ();
	case OP_LOADI2_MEMBASE:
	case OP_STOREI2_MEMBASE_REG:
	case OP_STOREI2_MEMBASE_IMM:
		*size = 2;
		*sext = TRUE;
		return LLVMInt16Type ();
	case OP_LOADU2_MEMBASE:
	case OP_LOADU2_MEM:
		*size = 2;
		*zext = TRUE;
		return LLVMInt16Type ();
	case OP_LOADI4_MEMBASE:
	case OP_LOADU4_MEMBASE:
	case OP_LOADI4_MEM:
	case OP_LOADU4_MEM:
	case OP_STOREI4_MEMBASE_REG:
	case OP_STOREI4_MEMBASE_IMM:
		*size = 4;
		return LLVMInt32Type ();
	case OP_LOADI8_MEMBASE:
	case OP_LOADI8_MEM:
	case OP_STOREI8_MEMBASE_REG:
	case OP_STOREI8_MEMBASE_IMM:
		*size = 8;
		return LLVMInt64Type ();
	case OP_LOADR4_MEMBASE:
	case OP_STORER4_MEMBASE_REG:
		*size = 4;
		return LLVMFloatType ();
	case OP_LOADR8_MEMBASE:
	case OP_STORER8_MEMBASE_REG:
		*size = 8;
		return LLVMDoubleType ();
	case OP_LOAD_MEMBASE:
	case OP_LOAD_MEM:
	case OP_STORE_MEMBASE_REG:
	case OP_STORE_MEMBASE_IMM:
		*size = sizeof (gpointer);
		return IntPtrType ();
	default:
		g_assert_not_reached ();
		return NULL;
	}
}

/*
 * ovf_op_to_intrins:
 *
 *   Return the LLVM intrinsics corresponding to the overflow opcode OPCODE.
 */
static const char*
ovf_op_to_intrins (int opcode)
{
	switch (opcode) {
	case OP_IADD_OVF:
		return "llvm.sadd.with.overflow.i32";
	case OP_IADD_OVF_UN:
		return "llvm.uadd.with.overflow.i32";
	case OP_ISUB_OVF:
		return "llvm.ssub.with.overflow.i32";
	case OP_ISUB_OVF_UN:
		return "llvm.usub.with.overflow.i32";
	case OP_IMUL_OVF:
		return "llvm.smul.with.overflow.i32";
	case OP_IMUL_OVF_UN:
		return "llvm.umul.with.overflow.i32";
	case OP_LADD_OVF:
		return "llvm.sadd.with.overflow.i64";
	case OP_LADD_OVF_UN:
		return "llvm.uadd.with.overflow.i64";
	case OP_LSUB_OVF:
		return "llvm.ssub.with.overflow.i64";
	case OP_LSUB_OVF_UN:
		return "llvm.usub.with.overflow.i64";
	case OP_LMUL_OVF:
		return "llvm.smul.with.overflow.i64";
	case OP_LMUL_OVF_UN:
		return "llvm.umul.with.overflow.i64";
	default:
		g_assert_not_reached ();
		return NULL;
	}
}

static const char*
simd_op_to_intrins (int opcode)
{
	switch (opcode) {
#if defined(TARGET_X86) || defined(TARGET_AMD64)
	case OP_MINPD:
		return "llvm.x86.sse2.min.pd";
	case OP_MINPS:
		return "llvm.x86.sse.min.ps";
	case OP_PMIND_UN:
		return "llvm.x86.sse41.pminud";
	case OP_PMINW_UN:
		return "llvm.x86.sse41.pminuw";
	case OP_PMINB_UN:
		return "llvm.x86.sse2.pminu.b";
	case OP_PMINW:
		return "llvm.x86.sse2.pmins.w";
	case OP_MAXPD:
		return "llvm.x86.sse2.max.pd";
	case OP_MAXPS:
		return "llvm.x86.sse.max.ps";
	case OP_HADDPD:
		return "llvm.x86.sse3.hadd.pd";
	case OP_HADDPS:
		return "llvm.x86.sse3.hadd.ps";
	case OP_HSUBPD:
		return "llvm.x86.sse3.hsub.pd";
	case OP_HSUBPS:
		return "llvm.x86.sse3.hsub.ps";
	case OP_PMAXD_UN:
		return "llvm.x86.sse41.pmaxud";
	case OP_PMAXW_UN:
		return "llvm.x86.sse41.pmaxuw";
	case OP_PMAXB_UN:
		return "llvm.x86.sse2.pmaxu.b";
	case OP_ADDSUBPS:
		return "llvm.x86.sse3.addsub.ps";
	case OP_ADDSUBPD:
		return "llvm.x86.sse3.addsub.pd";
	case OP_EXTRACT_MASK:
		return "llvm.x86.sse2.pmovmskb.128";
	case OP_PSHRW:
	case OP_PSHRW_REG:
		return "llvm.x86.sse2.psrli.w";
	case OP_PSHRD:
	case OP_PSHRD_REG:
		return "llvm.x86.sse2.psrli.d";
	case OP_PSHRQ:
	case OP_PSHRQ_REG:
		return "llvm.x86.sse2.psrli.q";
	case OP_PSHLW:
	case OP_PSHLW_REG:
		return "llvm.x86.sse2.pslli.w";
	case OP_PSHLD:
	case OP_PSHLD_REG:
		return "llvm.x86.sse2.pslli.d";
	case OP_PSHLQ:
	case OP_PSHLQ_REG:
		return "llvm.x86.sse2.pslli.q";
	case OP_PSARW:
	case OP_PSARW_REG:
		return "llvm.x86.sse2.psrai.w";
	case OP_PSARD:
	case OP_PSARD_REG:
		return "llvm.x86.sse2.psrai.d";
	case OP_PADDB_SAT:
		return "llvm.x86.sse2.padds.b";
	case OP_PADDW_SAT:
		return "llvm.x86.sse2.padds.w";
	case OP_PSUBB_SAT:
		return "llvm.x86.sse2.psubs.b";
	case OP_PSUBW_SAT:
		return "llvm.x86.sse2.psubs.w";
	case OP_PADDB_SAT_UN:
		return "llvm.x86.sse2.paddus.b";
	case OP_PADDW_SAT_UN:
		return "llvm.x86.sse2.paddus.w";
	case OP_PSUBB_SAT_UN:
		return "llvm.x86.sse2.psubus.b";
	case OP_PSUBW_SAT_UN:
		return "llvm.x86.sse2.psubus.w";
	case OP_PAVGB_UN:
		return "llvm.x86.sse2.pavg.b";
	case OP_PAVGW_UN:
		return "llvm.x86.sse2.pavg.w";
	case OP_SQRTPS:
		return "llvm.x86.sse.sqrt.ps";
	case OP_SQRTPD:
		return "llvm.x86.sse2.sqrt.pd";
	case OP_RSQRTPS:
		return "llvm.x86.sse.rsqrt.ps";
	case OP_RCPPS:
		return "llvm.x86.sse.rcp.ps";
	case OP_CVTDQ2PD:
		return "llvm.x86.sse2.cvtdq2pd";
	case OP_CVTDQ2PS:
		return "llvm.x86.sse2.cvtdq2ps";
	case OP_CVTPD2DQ:
		return "llvm.x86.sse2.cvtpd2dq";
	case OP_CVTPS2DQ:
		return "llvm.x86.sse2.cvtps2dq";
	case OP_CVTPD2PS:
		return "llvm.x86.sse2.cvtpd2ps";
	case OP_CVTPS2PD:
		return "llvm.x86.sse2.cvtps2pd";
	case OP_CVTTPD2DQ:
		return "llvm.x86.sse2.cvttpd2dq";
	case OP_CVTTPS2DQ:
		return "llvm.x86.sse2.cvttps2dq";
	case OP_COMPPS:
		return "llvm.x86.sse.cmp.ps";
	case OP_COMPPD:
		return "llvm.x86.sse2.cmp.pd";
	case OP_PACKW:
		return "llvm.x86.sse2.packsswb.128";
	case OP_PACKD:
		return "llvm.x86.sse2.packssdw.128";
	case OP_PACKW_UN:
		return "llvm.x86.sse2.packuswb.128";
	case OP_PACKD_UN:
		return "llvm.x86.sse41.packusdw";
	case OP_PMULW_HIGH:
		return "llvm.x86.sse2.pmulh.w";
	case OP_PMULW_HIGH_UN:
		return "llvm.x86.sse2.pmulhu.w";
#endif
	default:
		g_assert_not_reached ();
		return NULL;
	}
}

static LLVMTypeRef
simd_op_to_llvm_type (int opcode)
{
#if defined(TARGET_X86) || defined(TARGET_AMD64)
	switch (opcode) {
	case OP_EXTRACT_R8:
	case OP_EXPAND_R8:
		return type_to_simd_type (MONO_TYPE_R8);
	case OP_EXTRACT_I8:
	case OP_EXPAND_I8:
		return type_to_simd_type (MONO_TYPE_I8);
	case OP_EXTRACT_I4:
	case OP_EXPAND_I4:
		return type_to_simd_type (MONO_TYPE_I4);
	case OP_EXTRACT_I2:
	case OP_EXTRACT_U2:
	case OP_EXTRACTX_U2:
	case OP_EXPAND_I2:
		return type_to_simd_type (MONO_TYPE_I2);
	case OP_EXTRACT_I1:
	case OP_EXTRACT_U1:
	case OP_EXPAND_I1:
		return type_to_simd_type (MONO_TYPE_I1);
	case OP_EXPAND_R4:
		return type_to_simd_type (MONO_TYPE_R4);
	case OP_CVTDQ2PD:
	case OP_CVTDQ2PS:
		return type_to_simd_type (MONO_TYPE_I4);
	case OP_CVTPD2DQ:
	case OP_CVTPD2PS:
	case OP_CVTTPD2DQ:
		return type_to_simd_type (MONO_TYPE_R8);
	case OP_CVTPS2DQ:
	case OP_CVTPS2PD:
	case OP_CVTTPS2DQ:
		return type_to_simd_type (MONO_TYPE_R4);
	case OP_EXTRACT_MASK:
		return type_to_simd_type (MONO_TYPE_I1);
	case OP_SQRTPS:
	case OP_RSQRTPS:
	case OP_RCPPS:
	case OP_DUPPS_LOW:
	case OP_DUPPS_HIGH:
		return type_to_simd_type (MONO_TYPE_R4);
	case OP_SQRTPD:
	case OP_DUPPD:
		return type_to_simd_type (MONO_TYPE_R8);
	default:
		g_assert_not_reached ();
		return NULL;
	}
#else
	return NULL;
#endif
}

/*
 * get_bb:
 *
 *   Return the LLVM basic block corresponding to BB.
 */
static LLVMBasicBlockRef
get_bb (EmitContext *ctx, MonoBasicBlock *bb)
{
	char bb_name [128];

	if (ctx->bblocks [bb->block_num].bblock == NULL) {
		if (bb->flags & BB_EXCEPTION_HANDLER) {
			int clause_index = (mono_get_block_region_notry (ctx->cfg, bb->region) >> 8) - 1;
			sprintf (bb_name, "EH_CLAUSE%d_BB%d", clause_index, bb->block_num);
		} else {
			sprintf (bb_name, "BB%d", bb->block_num);
		}

		ctx->bblocks [bb->block_num].bblock = LLVMAppendBasicBlock (ctx->lmethod, bb_name);
		ctx->bblocks [bb->block_num].end_bblock = ctx->bblocks [bb->block_num].bblock;
	}

	return ctx->bblocks [bb->block_num].bblock;
}

/* 
 * get_end_bb:
 *
 *   Return the last LLVM bblock corresponding to BB.
 * This might not be equal to the bb returned by get_bb () since we need to generate
 * multiple LLVM bblocks for a mono bblock to handle throwing exceptions.
 */
static LLVMBasicBlockRef
get_end_bb (EmitContext *ctx, MonoBasicBlock *bb)
{
	get_bb (ctx, bb);
	return ctx->bblocks [bb->block_num].end_bblock;
}

static LLVMBasicBlockRef
gen_bb (EmitContext *ctx, const char *prefix)
{
	char bb_name [128];

	sprintf (bb_name, "%s%d", prefix, ++ ctx->ex_index);
	return LLVMAppendBasicBlock (ctx->lmethod, bb_name);
}

/*
 * resolve_patch:
 *
 *   Return the target of the patch identified by TYPE and TARGET.
 */
static gpointer
resolve_patch (MonoCompile *cfg, MonoJumpInfoType type, gconstpointer target)
{
	MonoJumpInfo ji;

	memset (&ji, 0, sizeof (ji));
	ji.type = type;
	ji.data.target = target;

	return mono_resolve_patch_target (cfg->method, cfg->domain, NULL, &ji, FALSE);
}

/*
 * convert_full:
 *
 *   Emit code to convert the LLVM value V to DTYPE.
 */
static LLVMValueRef
convert_full (EmitContext *ctx, LLVMValueRef v, LLVMTypeRef dtype, gboolean is_unsigned)
{
	LLVMTypeRef stype = LLVMTypeOf (v);

	if (stype != dtype) {
		gboolean ext = FALSE;

		/* Extend */
		if (dtype == LLVMInt64Type () && (stype == LLVMInt32Type () || stype == LLVMInt16Type () || stype == LLVMInt8Type ()))
			ext = TRUE;
		else if (dtype == LLVMInt32Type () && (stype == LLVMInt16Type () || stype == LLVMInt8Type ()))
			ext = TRUE;
		else if (dtype == LLVMInt16Type () && (stype == LLVMInt8Type ()))
			ext = TRUE;

		if (ext)
			return is_unsigned ? LLVMBuildZExt (ctx->builder, v, dtype, "") : LLVMBuildSExt (ctx->builder, v, dtype, "");

		if (dtype == LLVMDoubleType () && stype == LLVMFloatType ())
			return LLVMBuildFPExt (ctx->builder, v, dtype, "");

		/* Trunc */
		if (stype == LLVMInt64Type () && (dtype == LLVMInt32Type () || dtype == LLVMInt16Type () || dtype == LLVMInt8Type ()))
			return LLVMBuildTrunc (ctx->builder, v, dtype, "");
		if (stype == LLVMInt32Type () && (dtype == LLVMInt16Type () || dtype == LLVMInt8Type ()))
			return LLVMBuildTrunc (ctx->builder, v, dtype, "");
		if (stype == LLVMInt16Type () && dtype == LLVMInt8Type ())
			return LLVMBuildTrunc (ctx->builder, v, dtype, "");
		if (stype == LLVMDoubleType () && dtype == LLVMFloatType ())
			return LLVMBuildFPTrunc (ctx->builder, v, dtype, "");

		if (LLVMGetTypeKind (stype) == LLVMPointerTypeKind && LLVMGetTypeKind (dtype) == LLVMPointerTypeKind)
			return LLVMBuildBitCast (ctx->builder, v, dtype, "");
		if (LLVMGetTypeKind (dtype) == LLVMPointerTypeKind)
			return LLVMBuildIntToPtr (ctx->builder, v, dtype, "");
		if (LLVMGetTypeKind (stype) == LLVMPointerTypeKind)
			return LLVMBuildPtrToInt (ctx->builder, v, dtype, "");

#ifdef MONO_ARCH_SOFT_FLOAT
		if (stype == LLVMInt32Type () && dtype == LLVMFloatType ())
			return LLVMBuildBitCast (ctx->builder, v, dtype, "");
		if (stype == LLVMInt32Type () && dtype == LLVMDoubleType ())
			return LLVMBuildBitCast (ctx->builder, LLVMBuildZExt (ctx->builder, v, LLVMInt64Type (), ""), dtype, "");
#endif

		if (LLVMGetTypeKind (stype) == LLVMVectorTypeKind && LLVMGetTypeKind (dtype) == LLVMVectorTypeKind)
			return LLVMBuildBitCast (ctx->builder, v, dtype, "");

		LLVMDumpValue (v);
		LLVMDumpValue (LLVMConstNull (dtype));
		g_assert_not_reached ();
		return NULL;
	} else {
		return v;
	}
}

static LLVMValueRef
convert (EmitContext *ctx, LLVMValueRef v, LLVMTypeRef dtype)
{
	return convert_full (ctx, v, dtype, FALSE);
}

/*
 * emit_volatile_load:
 *
 *   If vreg is volatile, emit a load from its address.
 */
static LLVMValueRef
emit_volatile_load (EmitContext *ctx, int vreg)
{
	MonoType *t;

	LLVMValueRef v = LLVMBuildLoad (ctx->builder, ctx->addresses [vreg], "");
	t = ctx->vreg_cli_types [vreg];
	if (t && !t->byref) {
		/* 
		 * Might have to zero extend since llvm doesn't have 
		 * unsigned types.
		 */
		if (t->type == MONO_TYPE_U1 || t->type == MONO_TYPE_U2 || t->type == MONO_TYPE_CHAR || t->type == MONO_TYPE_BOOLEAN)
			v = LLVMBuildZExt (ctx->builder, v, LLVMInt32Type (), "");
		else if (t->type == MONO_TYPE_U8)
			v = LLVMBuildZExt (ctx->builder, v, LLVMInt64Type (), "");
	}

	return v;
}

/*
 * emit_volatile_store:
 *
 *   If VREG is volatile, emit a store from its value to its address.
 */
static void
emit_volatile_store (EmitContext *ctx, int vreg)
{
	MonoInst *var = get_vreg_to_inst (ctx->cfg, vreg);

	if (var && var->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) {
		g_assert (ctx->addresses [vreg]);
		LLVMBuildStore (ctx->builder, convert (ctx, ctx->values [vreg], type_to_llvm_type (ctx, var->inst_vtype)), ctx->addresses [vreg]);
	}
}

typedef struct {
	/* 
	 * Maps parameter indexes in the original signature to parameter indexes
	 * in the LLVM signature.
	 */
	int *pindexes;
	/* The indexes of various special arguments in the LLVM signature */
	int vret_arg_pindex, this_arg_pindex, rgctx_arg_pindex, imt_arg_pindex;
} LLVMSigInfo;

/*
 * sig_to_llvm_sig_full:
 *
 *   Return the LLVM signature corresponding to the mono signature SIG using the
 * calling convention information in CINFO. Return parameter mapping information in SINFO.
 */
static LLVMTypeRef
sig_to_llvm_sig_full (EmitContext *ctx, MonoMethodSignature *sig, LLVMCallInfo *cinfo,
					  LLVMSigInfo *sinfo)
{
	LLVMTypeRef ret_type;
	LLVMTypeRef *param_types = NULL;
	LLVMTypeRef res;
	int i, j, pindex, vret_arg_pindex = 0;
	int *pindexes;
	gboolean vretaddr = FALSE;

	if (sinfo)
		memset (sinfo, 0, sizeof (LLVMSigInfo));

	ret_type = type_to_llvm_type (ctx, sig->ret);
	CHECK_FAILURE (ctx);

	if (cinfo && cinfo->ret.storage == LLVMArgVtypeInReg) {
		/* LLVM models this by returning an aggregate value */
		if (cinfo->ret.pair_storage [0] == LLVMArgInIReg && cinfo->ret.pair_storage [1] == LLVMArgNone) {
			LLVMTypeRef members [2];

			members [0] = IntPtrType ();
			ret_type = LLVMStructType (members, 1, FALSE);
		} else {
			g_assert_not_reached ();
		}
	} else if (cinfo && mini_type_is_vtype (ctx->cfg, sig->ret)) {
		g_assert (cinfo->ret.storage == LLVMArgVtypeRetAddr);
		vretaddr = TRUE;
		ret_type = LLVMVoidType ();
	}

	pindexes = g_new0 (int, sig->param_count);
	param_types = g_new0 (LLVMTypeRef, (sig->param_count * 2) + 3);
	pindex = 0;
	if (cinfo && cinfo->rgctx_arg) {
		if (sinfo)
			sinfo->rgctx_arg_pindex = pindex;
		param_types [pindex] = IntPtrType ();
		pindex ++;
	}
	if (cinfo && cinfo->imt_arg) {
		if (sinfo)
			sinfo->imt_arg_pindex = pindex;
		param_types [pindex] = IntPtrType ();
		pindex ++;
	}
	if (vretaddr) {
		/* Compute the index in the LLVM signature where the vret arg needs to be passed */
		vret_arg_pindex = pindex;
		if (cinfo->vret_arg_index == 1) {
			/* Add the slots consumed by the first argument */
			LLVMArgInfo *ainfo = &cinfo->args [0];
			switch (ainfo->storage) {
			case LLVMArgVtypeInReg:
				for (j = 0; j < 2; ++j) {
					if (ainfo->pair_storage [j] == LLVMArgInIReg)
						vret_arg_pindex ++;
				}
				break;
			default:
				vret_arg_pindex ++;
			}
		}

		if (sinfo)
			sinfo->vret_arg_pindex = vret_arg_pindex;
	}				

	if (vretaddr && vret_arg_pindex == pindex)
		param_types [pindex ++] = IntPtrType ();
	if (sig->hasthis) {
		if (sinfo)
			sinfo->this_arg_pindex = pindex;
		param_types [pindex ++] = IntPtrType ();
	}
	if (vretaddr && vret_arg_pindex == pindex)
		param_types [pindex ++] = IntPtrType ();
	for (i = 0; i < sig->param_count; ++i) {
		if (vretaddr && vret_arg_pindex == pindex)
			param_types [pindex ++] = IntPtrType ();
		pindexes [i] = pindex;
		if (cinfo && cinfo->args [i + sig->hasthis].storage == LLVMArgVtypeInReg) {
			for (j = 0; j < 2; ++j) {
				switch (cinfo->args [i + sig->hasthis].pair_storage [j]) {
				case LLVMArgInIReg:
					param_types [pindex ++] = LLVMIntType (sizeof (gpointer) * 8);
					break;
				case LLVMArgNone:
					break;
				default:
					g_assert_not_reached ();
				}
			}
		} else if (cinfo && cinfo->args [i + sig->hasthis].storage == LLVMArgVtypeByVal) {
			param_types [pindex] = type_to_llvm_arg_type (ctx, sig->params [i]);
			CHECK_FAILURE (ctx);
			param_types [pindex] = LLVMPointerType (param_types [pindex], 0);
			pindex ++;
		} else {
			param_types [pindex ++] = type_to_llvm_arg_type (ctx, sig->params [i]);
		}			
	}
	if (vretaddr && vret_arg_pindex == pindex)
		param_types [pindex ++] = IntPtrType ();

	CHECK_FAILURE (ctx);

	res = LLVMFunctionType (ret_type, param_types, pindex, FALSE);
	g_free (param_types);

	if (sinfo) {
		sinfo->pindexes = pindexes;
	} else {
		g_free (pindexes);
	}

	return res;

 FAILURE:
	g_free (param_types);

	return NULL;
}

static LLVMTypeRef
sig_to_llvm_sig (EmitContext *ctx, MonoMethodSignature *sig)
{
	return sig_to_llvm_sig_full (ctx, sig, NULL, NULL);
}

/*
 * LLVMFunctionType1:
 *
 *   Create an LLVM function type from the arguments.
 */
static G_GNUC_UNUSED LLVMTypeRef 
LLVMFunctionType1(LLVMTypeRef ReturnType,
				  LLVMTypeRef ParamType1,
				  int IsVarArg)
{
	LLVMTypeRef param_types [1];

	param_types [0] = ParamType1;

	return LLVMFunctionType (ReturnType, param_types, 1, IsVarArg);
}

/*
 * LLVMFunctionType2:
 *
 *   Create an LLVM function type from the arguments.
 */
static G_GNUC_UNUSED LLVMTypeRef
LLVMFunctionType2(LLVMTypeRef ReturnType,
				  LLVMTypeRef ParamType1,
				  LLVMTypeRef ParamType2,
				  int IsVarArg)
{
	LLVMTypeRef param_types [2];

	param_types [0] = ParamType1;
	param_types [1] = ParamType2;

	return LLVMFunctionType (ReturnType, param_types, 2, IsVarArg);
}

/*
 * LLVMFunctionType3:
 *
 *   Create an LLVM function type from the arguments.
 */
static G_GNUC_UNUSED LLVMTypeRef
LLVMFunctionType3(LLVMTypeRef ReturnType,
				  LLVMTypeRef ParamType1,
				  LLVMTypeRef ParamType2,
				  LLVMTypeRef ParamType3,
				  int IsVarArg)
{
	LLVMTypeRef param_types [3];

	param_types [0] = ParamType1;
	param_types [1] = ParamType2;
	param_types [2] = ParamType3;

	return LLVMFunctionType (ReturnType, param_types, 3, IsVarArg);
}

/*
 * create_builder:
 *
 *   Create an LLVM builder and remember it so it can be freed later.
 */
static LLVMBuilderRef
create_builder (EmitContext *ctx)
{
	LLVMBuilderRef builder = LLVMCreateBuilder ();

	ctx->builders = g_slist_prepend_mempool (ctx->cfg->mempool, ctx->builders, builder);

	return builder;
}

static LLVMValueRef
get_plt_entry (EmitContext *ctx, LLVMTypeRef llvm_sig, MonoJumpInfoType type, gconstpointer data)
{
	char *callee_name = mono_aot_get_plt_symbol (type, data);
	LLVMValueRef callee;

	if (!callee_name)
		return NULL;

	if (ctx->cfg->compile_aot)
		/* Add a patch so referenced wrappers can be compiled in full aot mode */
		mono_add_patch_info (ctx->cfg, 0, type, data);

	// FIXME: Locking
	callee = g_hash_table_lookup (ctx->lmodule->plt_entries, callee_name);
	if (!callee) {
		callee = LLVMAddFunction (ctx->module, callee_name, llvm_sig);

		LLVMSetVisibility (callee, LLVMHiddenVisibility);

		g_hash_table_insert (ctx->lmodule->plt_entries, (char*)callee_name, callee);
	}

	return callee;
}

static int
get_handler_clause (MonoCompile *cfg, MonoBasicBlock *bb)
{
	MonoMethodHeader *header = cfg->header;
	MonoExceptionClause *clause;
	int i;

	/* Directly */
	if (bb->region != -1 && MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_TRY))
		return (bb->region >> 8) - 1;

	/* Indirectly */
	for (i = 0; i < header->num_clauses; ++i) {
		clause = &header->clauses [i];
			   
		if (MONO_OFFSET_IN_CLAUSE (clause, bb->real_offset) && clause->flags == MONO_EXCEPTION_CLAUSE_NONE)
			return i;
	}

	return -1;
}

static void
set_metadata_flag (LLVMValueRef v, const char *flag_name)
{
	LLVMValueRef md_arg;
	int md_kind;
	
	md_kind = LLVMGetMDKindID (flag_name, strlen (flag_name));
	md_arg = LLVMMDString ("mono", 4);
	LLVMSetMetadata (v, md_kind, LLVMMDNode (&md_arg, 1));
}

/*
 * emit_call:
 *
 *   Emit an LLVM call or invoke instruction depending on whenever the call is inside
 * a try region.
 */
static LLVMValueRef
emit_call (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref, LLVMValueRef callee, LLVMValueRef *args, int pindex)
{
	MonoCompile *cfg = ctx->cfg;
	LLVMValueRef lcall;
	LLVMBuilderRef builder = *builder_ref;
	int clause_index;

	clause_index = get_handler_clause (cfg, bb);

	if (clause_index != -1) {
		MonoMethodHeader *header = cfg->header;
		MonoExceptionClause *ec = &header->clauses [clause_index];
		MonoBasicBlock *tblock;
		LLVMBasicBlockRef ex_bb, noex_bb;

		/*
		 * Have to use an invoke instead of a call, branching to the
		 * handler bblock of the clause containing this bblock.
		 */

		g_assert (ec->flags == MONO_EXCEPTION_CLAUSE_NONE || ec->flags == MONO_EXCEPTION_CLAUSE_FINALLY);

		tblock = cfg->cil_offset_to_bb [ec->handler_offset];
		g_assert (tblock);

		ctx->bblocks [tblock->block_num].invoke_target = TRUE;

		ex_bb = get_bb (ctx, tblock);

		noex_bb = gen_bb (ctx, "NOEX_BB");

		/* Use an invoke */
		lcall = LLVMBuildInvoke (builder, callee, args, pindex, noex_bb, ex_bb, "");

		builder = ctx->builder = create_builder (ctx);
		LLVMPositionBuilderAtEnd (ctx->builder, noex_bb);

		ctx->bblocks [bb->block_num].end_bblock = noex_bb;
	} else {
		lcall = LLVMBuildCall (builder, callee, args, pindex, "");
		ctx->builder = builder;
	}

	*builder_ref = ctx->builder;

	return lcall;
}

static LLVMValueRef
emit_load (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref, int size, LLVMValueRef addr, const char *name, gboolean is_faulting)
{
	const char *intrins_name;
	LLVMValueRef args [16], res;
	LLVMTypeRef addr_type;

	if (is_faulting && bb->region != -1) {
		/*
		 * We handle loads which can fault by calling a mono specific intrinsic
		 * using an invoke, so they are handled properly inside try blocks.
		 * We can't use this outside clauses, since LLVM optimizes intrinsics which
		 * are marked with IntrReadArgMem.
		 */
		switch (size) {
		case 1:
			intrins_name = "llvm.mono.load.i8.p0i8";
			break;
		case 2:
			intrins_name = "llvm.mono.load.i16.p0i16";
			break;
		case 4:
			intrins_name = "llvm.mono.load.i32.p0i32";
			break;
		case 8:
			intrins_name = "llvm.mono.load.i64.p0i64";
			break;
		default:
			g_assert_not_reached ();
		}

		addr_type = LLVMTypeOf (addr);
		if (addr_type == LLVMPointerType (LLVMDoubleType (), 0) || addr_type == LLVMPointerType (LLVMFloatType (), 0))
			addr = LLVMBuildBitCast (*builder_ref, addr, LLVMPointerType (LLVMIntType (size * 8), 0), "");

		args [0] = addr;
		args [1] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
		args [2] = LLVMConstInt (LLVMInt1Type (), TRUE, FALSE);
		res = emit_call (ctx, bb, builder_ref, LLVMGetNamedFunction (ctx->module, intrins_name), args, 3);

		if (addr_type == LLVMPointerType (LLVMDoubleType (), 0))
			res = LLVMBuildBitCast (*builder_ref, res, LLVMDoubleType (), "");
		else if (addr_type == LLVMPointerType (LLVMFloatType (), 0))
			res = LLVMBuildBitCast (*builder_ref, res, LLVMFloatType (), "");
		
		return res;
	} else {
		LLVMValueRef res;

		/* 
		 * We emit volatile loads for loads which can fault, because otherwise
		 * LLVM will generate invalid code when encountering a load from a
		 * NULL address.
		 */
		 res = mono_llvm_build_load (*builder_ref, addr, name, is_faulting);

		 /* Mark it with a custom metadata */
		 /*
		 if (is_faulting)
			 set_metadata_flag (res, "mono.faulting.load");
		 */

		 return res;
	}
}

static void
emit_store (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref, int size, LLVMValueRef value, LLVMValueRef addr, gboolean is_faulting)
{
	const char *intrins_name;
	LLVMValueRef args [16];

	if (is_faulting && bb->region != -1) {
		switch (size) {
		case 1:
			intrins_name = "llvm.mono.store.i8.p0i8";
			break;
		case 2:
			intrins_name = "llvm.mono.store.i16.p0i16";
			break;
		case 4:
			intrins_name = "llvm.mono.store.i32.p0i32";
			break;
		case 8:
			intrins_name = "llvm.mono.store.i64.p0i64";
			break;
		default:
			g_assert_not_reached ();
		}

		if (LLVMTypeOf (value) == LLVMDoubleType () || LLVMTypeOf (value) == LLVMFloatType ()) {
			value = LLVMBuildBitCast (*builder_ref, value, LLVMIntType (size * 8), "");
			addr = LLVMBuildBitCast (*builder_ref, addr, LLVMPointerType (LLVMIntType (size * 8), 0), "");
		}

		args [0] = value;
		args [1] = addr;
		args [2] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
		args [3] = LLVMConstInt (LLVMInt1Type (), TRUE, FALSE);
		emit_call (ctx, bb, builder_ref, LLVMGetNamedFunction (ctx->module, intrins_name), args, 4);
	} else {
		LLVMBuildStore (*builder_ref, value, addr);
	}
}

/*
 * emit_cond_system_exception:
 *
 *   Emit code to throw the exception EXC_TYPE if the condition CMP is false.
 * Might set the ctx exception.
 */
static void
emit_cond_system_exception (EmitContext *ctx, MonoBasicBlock *bb, const char *exc_type, LLVMValueRef cmp)
{
	LLVMBasicBlockRef ex_bb, noex_bb;
	LLVMBuilderRef builder;
	MonoClass *exc_class;
	LLVMValueRef args [2];
	
	ex_bb = gen_bb (ctx, "EX_BB");
	noex_bb = gen_bb (ctx, "NOEX_BB");

	LLVMBuildCondBr (ctx->builder, cmp, ex_bb, noex_bb);

	exc_class = mono_class_from_name (mono_get_corlib (), "System", exc_type);
	g_assert (exc_class);

	/* Emit exception throwing code */
	builder = create_builder (ctx);
	LLVMPositionBuilderAtEnd (builder, ex_bb);

	if (!ctx->lmodule->throw_corlib_exception) {
		LLVMValueRef callee;
		LLVMTypeRef sig;
		const char *icall_name;

		MonoMethodSignature *throw_sig = mono_metadata_signature_alloc (mono_get_corlib (), 2);
		throw_sig->ret = &mono_get_void_class ()->byval_arg;
		throw_sig->params [0] = &mono_get_int32_class ()->byval_arg;
		icall_name = "llvm_throw_corlib_exception_abs_trampoline";
		throw_sig->params [1] = &mono_get_intptr_class ()->byval_arg;
		sig = sig_to_llvm_sig (ctx, throw_sig);

		if (ctx->cfg->compile_aot) {
			callee = get_plt_entry (ctx, sig, MONO_PATCH_INFO_INTERNAL_METHOD, icall_name);
		} else {
			callee = LLVMAddFunction (ctx->module, "llvm_throw_corlib_exception_trampoline", sig_to_llvm_sig (ctx, throw_sig));

			/*
			 * Differences between the LLVM/non-LLVM throw corlib exception trampoline:
			 * - On x86, LLVM generated code doesn't push the arguments
			 * - When using the LLVM mono branch, the trampoline takes the throw address as an
			 *   arguments, not a pc offset.
			 */
			LLVMAddGlobalMapping (ee, callee, resolve_patch (ctx->cfg, MONO_PATCH_INFO_INTERNAL_METHOD, icall_name));
		}

		mono_memory_barrier ();
		ctx->lmodule->throw_corlib_exception = callee;
	}

	if (IS_TARGET_X86)
		args [0] = LLVMConstInt (LLVMInt32Type (), exc_class->type_token - MONO_TOKEN_TYPE_DEF, FALSE);
	else
		args [0] = LLVMConstInt (LLVMInt32Type (), exc_class->type_token, FALSE);

	/*
	 * The LLVM mono branch contains changes so a block address can be passed as an
	 * argument to a call.
	 */
	args [1] = LLVMBuildPtrToInt (builder, LLVMBlockAddress (ctx->lmethod, ex_bb), IntPtrType (), "");
	emit_call (ctx, bb, &builder, ctx->lmodule->throw_corlib_exception, args, 2);

	LLVMBuildUnreachable (builder);

	ctx->builder = create_builder (ctx);
	LLVMPositionBuilderAtEnd (ctx->builder, noex_bb);

	ctx->bblocks [bb->block_num].end_bblock = noex_bb;

	ctx->ex_index ++;
	return;
}

/*
 * emit_reg_to_vtype:
 *
 *   Emit code to store the vtype in the registers REGS to the address ADDRESS.
 */
static void
emit_reg_to_vtype (EmitContext *ctx, LLVMBuilderRef builder, MonoType *t, LLVMValueRef address, LLVMArgInfo *ainfo, LLVMValueRef *regs)
{
	int j, size;

	size = get_vtype_size (t);

	if (MONO_CLASS_IS_SIMD (ctx->cfg, mono_class_from_mono_type (t))) {
		address = LLVMBuildBitCast (ctx->builder, address, LLVMPointerType (LLVMInt8Type (), 0), "");
	}

	for (j = 0; j < 2; ++j) {
		LLVMValueRef index [2], addr;
		int part_size = size > sizeof (gpointer) ? sizeof (gpointer) : size;
		LLVMTypeRef part_type;

		if (ainfo->pair_storage [j] == LLVMArgNone)
			continue;

		part_type = LLVMIntType (part_size * 8);
		if (MONO_CLASS_IS_SIMD (ctx->cfg, mono_class_from_mono_type (t))) {
			index [0] = LLVMConstInt (LLVMInt32Type (), j * sizeof (gpointer), FALSE);
			addr = LLVMBuildGEP (builder, address, index, 1, "");
		} else {
			index [0] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
			index [1] = LLVMConstInt (LLVMInt32Type (), j * sizeof (gpointer), FALSE);
			addr = LLVMBuildGEP (builder, address, index, 2, "");
		}
		switch (ainfo->pair_storage [j]) {
		case LLVMArgInIReg:
			LLVMBuildStore (builder, convert (ctx, regs [j], part_type), LLVMBuildBitCast (ctx->builder, addr, LLVMPointerType (part_type, 0), ""));
			break;
		case LLVMArgNone:
			break;
		default:
			g_assert_not_reached ();
		}

		size -= sizeof (gpointer);
	}
}

/*
 * emit_vtype_to_reg:
 *
 *   Emit code to load a vtype at address ADDRESS into registers. Store the registers
 * into REGS, and the number of registers into NREGS.
 */
static void
emit_vtype_to_reg (EmitContext *ctx, LLVMBuilderRef builder, MonoType *t, LLVMValueRef address, LLVMArgInfo *ainfo, LLVMValueRef *regs, guint32 *nregs)
{
	int pindex = 0;
	int j, size;

	size = get_vtype_size (t);

	if (MONO_CLASS_IS_SIMD (ctx->cfg, mono_class_from_mono_type (t))) {
		address = LLVMBuildBitCast (ctx->builder, address, LLVMPointerType (LLVMInt8Type (), 0), "");
	}

	for (j = 0; j < 2; ++j) {
		LLVMValueRef index [2], addr;
		int partsize = size > sizeof (gpointer) ? sizeof (gpointer) : size;

		if (ainfo->pair_storage [j] == LLVMArgNone)
			continue;

		if (MONO_CLASS_IS_SIMD (ctx->cfg, mono_class_from_mono_type (t))) {
			index [0] = LLVMConstInt (LLVMInt32Type (), j * sizeof (gpointer), FALSE);
			addr = LLVMBuildGEP (builder, address, index, 1, "");
		} else {
			index [0] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
			index [1] = LLVMConstInt (LLVMInt32Type (), j * sizeof (gpointer), FALSE);				
			addr = LLVMBuildGEP (builder, address, index, 2, "");
		}
		switch (ainfo->pair_storage [j]) {
		case LLVMArgInIReg:
			regs [pindex ++] = convert (ctx, LLVMBuildLoad (builder, LLVMBuildBitCast (ctx->builder, addr, LLVMPointerType (LLVMIntType (partsize * 8), 0), ""), ""), IntPtrType ());
			break;
		case LLVMArgNone:
			break;
		default:
			g_assert_not_reached ();
		}
		size -= sizeof (gpointer);
	}

	*nregs = pindex;
}

static LLVMValueRef
build_alloca (EmitContext *ctx, MonoType *t)
{
	MonoClass *k = mono_class_from_mono_type (t);
	int align;

	if (MONO_CLASS_IS_SIMD (ctx->cfg, k))
		align = 16;
	else
		align = mono_class_min_align (k);

	/* Sometimes align is not a power of 2 */
	while (mono_is_power_of_two (align) == -1)
		align ++;

	/*
	 * Have to place all alloca's at the end of the entry bb, since otherwise they would
	 * get executed every time control reaches them.
	 */
	LLVMPositionBuilder (ctx->alloca_builder, get_bb (ctx, ctx->cfg->bb_entry), ctx->last_alloca);

	ctx->last_alloca = mono_llvm_build_alloca (ctx->alloca_builder, type_to_llvm_type (ctx, t), NULL, align, "");
	return ctx->last_alloca;
}

/*
 * Put the global into the 'llvm.used' array to prevent it from being optimized away.
 */
static void
mark_as_used (LLVMModuleRef module, LLVMValueRef global)
{
	LLVMTypeRef used_type;
	LLVMValueRef used, used_elem;
		
	used_type = LLVMArrayType (LLVMPointerType (LLVMInt8Type (), 0), 1);
	used = LLVMAddGlobal (module, used_type, "llvm.used");
	used_elem = LLVMConstBitCast (global, LLVMPointerType (LLVMInt8Type (), 0));
	LLVMSetInitializer (used, LLVMConstArray (LLVMPointerType (LLVMInt8Type (), 0), &used_elem, 1));
	LLVMSetLinkage (used, LLVMAppendingLinkage);
	LLVMSetSection (used, "llvm.metadata");
}

/*
 * emit_entry_bb:
 *
 *   Emit code to load/convert arguments.
 */
static void
emit_entry_bb (EmitContext *ctx, LLVMBuilderRef builder)
{
	int i, pindex;
	MonoCompile *cfg = ctx->cfg;
	MonoMethodSignature *sig = ctx->sig;
	LLVMCallInfo *linfo = ctx->linfo;
	MonoBasicBlock *bb;

	ctx->alloca_builder = create_builder (ctx);

	/*
	 * Handle indirect/volatile variables by allocating memory for them
	 * using 'alloca', and storing their address in a temporary.
	 */
	for (i = 0; i < cfg->num_varinfo; ++i) {
		MonoInst *var = cfg->varinfo [i];
		LLVMTypeRef vtype;

		if (var->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT) || mini_type_is_vtype (cfg, var->inst_vtype)) {
			vtype = type_to_llvm_type (ctx, var->inst_vtype);
			CHECK_FAILURE (ctx);
			/* Could be already created by an OP_VPHI */
			if (!ctx->addresses [var->dreg])
				ctx->addresses [var->dreg] = build_alloca (ctx, var->inst_vtype);
			ctx->vreg_cli_types [var->dreg] = var->inst_vtype;
		}
	}

	for (i = 0; i < sig->param_count; ++i) {
		LLVMArgInfo *ainfo = &linfo->args [i + sig->hasthis];
		int reg = cfg->args [i + sig->hasthis]->dreg;

		if (ainfo->storage == LLVMArgVtypeInReg) {
			LLVMValueRef regs [2];

			/* 
			 * Emit code to save the argument from the registers to 
			 * the real argument.
			 */
			pindex = ctx->pindexes [i];
			regs [0] = LLVMGetParam (ctx->lmethod, pindex);
			if (ainfo->pair_storage [1] != LLVMArgNone)
				regs [1] = LLVMGetParam (ctx->lmethod, pindex + 1);
			else
				regs [1] = NULL;

			ctx->addresses [reg] = build_alloca (ctx, sig->params [i]);

			emit_reg_to_vtype (ctx, builder, sig->params [i], ctx->addresses [reg], ainfo, regs);

			if (MONO_CLASS_IS_SIMD (ctx->cfg, mono_class_from_mono_type (sig->params [i]))) {
				/* Treat these as normal values */
				ctx->values [reg] = LLVMBuildLoad (builder, ctx->addresses [reg], "");
			}
		} else if (ainfo->storage == LLVMArgVtypeByVal) {
			ctx->addresses [reg] = LLVMGetParam (ctx->lmethod, ctx->pindexes [i]);

			if (MONO_CLASS_IS_SIMD (ctx->cfg, mono_class_from_mono_type (sig->params [i]))) {
				/* Treat these as normal values */
				ctx->values [reg] = LLVMBuildLoad (builder, ctx->addresses [reg], "");
			}
		} else {
			ctx->values [reg] = convert (ctx, ctx->values [reg], llvm_type_to_stack_type (type_to_llvm_type (ctx, sig->params [i])));
		}
	}

	if (cfg->vret_addr)
		emit_volatile_store (ctx, cfg->vret_addr->dreg);
	if (sig->hasthis)
		emit_volatile_store (ctx, cfg->args [0]->dreg);
	for (i = 0; i < sig->param_count; ++i)
		if (!mini_type_is_vtype (cfg, sig->params [i]))
			emit_volatile_store (ctx, cfg->args [i + sig->hasthis]->dreg);

	if (sig->hasthis && !cfg->rgctx_var && cfg->generic_sharing_context) {
		LLVMValueRef this_alloc;

		/*
		 * The exception handling code needs the location where the this argument was
		 * stored for gshared methods. We create a separate alloca to hold it, and mark it
		 * with the "mono.this" custom metadata to tell llvm that it needs to save its
		 * location into the LSDA.
		 */
		this_alloc = mono_llvm_build_alloca (builder, IntPtrType (), LLVMConstInt (LLVMInt32Type (), 1, FALSE), 0, "");
		/* This volatile store will keep the alloca alive */
		mono_llvm_build_store (builder, ctx->values [cfg->args [0]->dreg], this_alloc, TRUE);

		set_metadata_flag (this_alloc, "mono.this");
	}

	if (cfg->rgctx_var) {
		LLVMValueRef rgctx_alloc, store;

		/*
		 * We handle the rgctx arg similarly to the this pointer.
		 */
		g_assert (ctx->addresses [cfg->rgctx_var->dreg]);
		rgctx_alloc = ctx->addresses [cfg->rgctx_var->dreg];
		/* This volatile store will keep the alloca alive */
		store = mono_llvm_build_store (builder, ctx->rgctx_arg, rgctx_alloc, TRUE);

		set_metadata_flag (rgctx_alloc, "mono.this");
	}

	/*
	 * For finally clauses, create an indicator variable telling OP_ENDFINALLY whenever
	 * it needs to continue normally, or return back to the exception handling system.
	 */
	for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
		if (bb->region != -1 && (bb->flags & BB_EXCEPTION_HANDLER))
			g_hash_table_insert (ctx->region_to_handler, GUINT_TO_POINTER (mono_get_block_region_notry (cfg, bb->region)), bb);
		if (bb->region != -1 && (bb->flags & BB_EXCEPTION_HANDLER) && bb->in_scount == 0) {
			char name [128];
			LLVMValueRef val;

			sprintf (name, "finally_ind_bb%d", bb->block_num);
			val = LLVMBuildAlloca (builder, LLVMInt32Type (), name);
			LLVMBuildStore (builder, LLVMConstInt (LLVMInt32Type (), 0, FALSE), val);

			ctx->bblocks [bb->block_num].finally_ind = val;

			/*
			 * Create a new bblock which CALL_HANDLER can branch to, because branching to the
			 * LLVM bblock containing the call to llvm.eh.selector causes problems for the
			 * LLVM optimizer passes.
			 */
			sprintf (name, "BB_%d_CALL_HANDLER_TARGET", bb->block_num);
			ctx->bblocks [bb->block_num].call_handler_target_bb = LLVMAppendBasicBlock (ctx->lmethod, name);
		}
	}

 FAILURE:
	;
}

/* Have to export this for AOT */
void
mono_personality (void);
	
void
mono_personality (void)
{
	/* Not used */
	g_assert_not_reached ();
}

static void
process_call (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref, MonoInst *ins)
{
	MonoCompile *cfg = ctx->cfg;
	LLVMModuleRef module = ctx->module;
	LLVMValueRef *values = ctx->values;
	LLVMValueRef *addresses = ctx->addresses;
	MonoCallInst *call = (MonoCallInst*)ins;
	MonoMethodSignature *sig = call->signature;
	LLVMValueRef callee = NULL, lcall;
	LLVMValueRef *args;
	LLVMCallInfo *cinfo;
	GSList *l;
	int i, len, nargs;
	gboolean vretaddr;
	LLVMTypeRef llvm_sig;
	gpointer target;
	gboolean virtual, calli;
	LLVMBuilderRef builder = *builder_ref;
	LLVMSigInfo sinfo;

	if (call->signature->call_convention != MONO_CALL_DEFAULT)
		LLVM_FAILURE (ctx, "non-default callconv");

	cinfo = call->cinfo;
	if (call->rgctx_arg_reg)
		cinfo->rgctx_arg = TRUE;
	if (call->imt_arg_reg)
		cinfo->imt_arg = TRUE;

	vretaddr = cinfo && cinfo->ret.storage == LLVMArgVtypeRetAddr;

	llvm_sig = sig_to_llvm_sig_full (ctx, sig, cinfo, &sinfo);
	CHECK_FAILURE (ctx);

	virtual = (ins->opcode == OP_VOIDCALL_MEMBASE || ins->opcode == OP_CALL_MEMBASE || ins->opcode == OP_VCALL_MEMBASE || ins->opcode == OP_LCALL_MEMBASE || ins->opcode == OP_FCALL_MEMBASE);
	calli = (ins->opcode == OP_VOIDCALL_REG || ins->opcode == OP_CALL_REG || ins->opcode == OP_VCALL_REG || ins->opcode == OP_LCALL_REG || ins->opcode == OP_FCALL_REG);

	/* FIXME: Avoid creating duplicate methods */

	if (ins->flags & MONO_INST_HAS_METHOD) {
		if (virtual) {
			callee = NULL;
		} else {
			if (cfg->compile_aot) {
				callee = get_plt_entry (ctx, llvm_sig, MONO_PATCH_INFO_METHOD, call->method);
				if (!callee)
					LLVM_FAILURE (ctx, "can't encode patch");
			} else {
				callee = LLVMAddFunction (module, "", llvm_sig);
 
				target =
					mono_create_jit_trampoline_in_domain (mono_domain_get (),
														  call->method);
				LLVMAddGlobalMapping (ee, callee, target);
			}
		}
	} else if (calli) {
	} else {
		MonoJitICallInfo *info = mono_find_jit_icall_by_addr (call->fptr);

		if (info) {
			/*
			  MonoJumpInfo ji;

			  memset (&ji, 0, sizeof (ji));
			  ji.type = MONO_PATCH_INFO_JIT_ICALL_ADDR;
			  ji.data.target = info->name;

			  target = mono_resolve_patch_target (cfg->method, cfg->domain, NULL, &ji, FALSE);
			*/
			if (cfg->compile_aot) {
				callee = get_plt_entry (ctx, llvm_sig, MONO_PATCH_INFO_INTERNAL_METHOD, (char*)info->name);
				if (!callee)
					LLVM_FAILURE (ctx, "can't encode patch");
			} else {
				callee = LLVMAddFunction (module, "", llvm_sig);
				target = (gpointer)mono_icall_get_wrapper (info);
				LLVMAddGlobalMapping (ee, callee, target);
			}
		} else {
			if (cfg->compile_aot) {
				callee = NULL;
				if (cfg->abs_patches) {
					MonoJumpInfo *abs_ji = g_hash_table_lookup (cfg->abs_patches, call->fptr);
					if (abs_ji) {
						callee = get_plt_entry (ctx, llvm_sig, abs_ji->type, abs_ji->data.target);
						if (!callee)
							LLVM_FAILURE (ctx, "can't encode patch");
					}
				}
				if (!callee)
					LLVM_FAILURE (ctx, "aot");
			} else {
				callee = LLVMAddFunction (module, "", llvm_sig);
				target = NULL;
				if (cfg->abs_patches) {
					MonoJumpInfo *abs_ji = g_hash_table_lookup (cfg->abs_patches, call->fptr);
					if (abs_ji) {
						/*
						 * FIXME: Some trampolines might have
						 * their own calling convention on some platforms.
						 */
#ifndef TARGET_AMD64
						if (abs_ji->type == MONO_PATCH_INFO_MONITOR_ENTER || abs_ji->type == MONO_PATCH_INFO_MONITOR_EXIT || abs_ji->type == MONO_PATCH_INFO_GENERIC_CLASS_INIT)
							LLVM_FAILURE (ctx, "trampoline with own cconv");
#endif
						target = mono_resolve_patch_target (cfg->method, cfg->domain, NULL, abs_ji, FALSE);
						LLVMAddGlobalMapping (ee, callee, target);
					}
				}
				if (!target)
					LLVMAddGlobalMapping (ee, callee, (gpointer)call->fptr);
			}
		}
	}

	if (virtual) {
		int size = sizeof (gpointer);
		LLVMValueRef index;

		g_assert (ins->inst_offset % size == 0);
		index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset / size, FALSE);

		callee = convert (ctx, LLVMBuildLoad (builder, LLVMBuildGEP (builder, convert (ctx, values [ins->inst_basereg], LLVMPointerType (LLVMPointerType (IntPtrType (), 0), 0)), &index, 1, ""), ""), LLVMPointerType (llvm_sig, 0));
	} else if (calli) {
		callee = convert (ctx, values [ins->sreg1], LLVMPointerType (llvm_sig, 0));
	} else {
		if (ins->flags & MONO_INST_HAS_METHOD) {
		}
	}

	/* 
	 * Collect and convert arguments
	 */
	nargs = (sig->param_count * 2) + sig->hasthis + vretaddr + call->rgctx_reg + call->imt_arg_reg;
	len = sizeof (LLVMValueRef) * nargs;
	args = alloca (len);
	memset (args, 0, len);
	l = call->out_ireg_args;

	if (call->rgctx_arg_reg) {
		g_assert (values [call->rgctx_arg_reg]);
		g_assert (sinfo.rgctx_arg_pindex < nargs);
		args [sinfo.rgctx_arg_pindex] = values [call->rgctx_arg_reg];
	}
	if (call->imt_arg_reg) {
		g_assert (values [call->imt_arg_reg]);
		g_assert (sinfo.imt_arg_pindex < nargs);
		args [sinfo.imt_arg_pindex] = values [call->imt_arg_reg];
	}

	if (vretaddr) {
		if (!addresses [call->inst.dreg])
			addresses [call->inst.dreg] = build_alloca (ctx, sig->ret);
		g_assert (sinfo.vret_arg_pindex < nargs);
		args [sinfo.vret_arg_pindex] = LLVMBuildPtrToInt (builder, addresses [call->inst.dreg], IntPtrType (), "");
	}

	for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
		guint32 regpair;
		int reg, pindex;
		LLVMArgInfo *ainfo = call->cinfo ? &call->cinfo->args [i] : NULL;

		if (sig->hasthis) {
			if (i == 0)
				pindex = sinfo.this_arg_pindex;
			else
				pindex = sinfo.pindexes [i - 1];
		} else {
			pindex = sinfo.pindexes [i];
		}

		regpair = (guint32)(gssize)(l->data);
		reg = regpair & 0xffffff;
		args [pindex] = values [reg];
		if (ainfo->storage == LLVMArgVtypeInReg) {
			int j;
			LLVMValueRef regs [2];
			guint32 nregs;

			g_assert (ainfo);

			g_assert (addresses [reg]);

			emit_vtype_to_reg (ctx, builder, sig->params [i - sig->hasthis], addresses [reg], ainfo, regs, &nregs);
			for (j = 0; j < nregs; ++j)
				args [pindex ++] = regs [j];

			// FIXME: alignment
			// FIXME: Get rid of the VMOVE
		} else if (ainfo->storage == LLVMArgVtypeByVal) {
			g_assert (addresses [reg]);
			args [pindex] = addresses [reg];
		} else {
			g_assert (args [pindex]);
			if (i == 0 && sig->hasthis)
				args [pindex] = convert (ctx, args [pindex], IntPtrType ());
			else
				args [pindex] = convert (ctx, args [pindex], type_to_llvm_arg_type (ctx, sig->params [i - sig->hasthis]));
		}

		l = l->next;
	}

	// FIXME: Align call sites

	/*
	 * Emit the call
	 */

	lcall = emit_call (ctx, bb, &builder, callee, args, LLVMCountParamTypes (llvm_sig));

#ifdef LLVM_MONO_BRANCH
	/*
	 * Modify cconv and parameter attributes to pass rgctx/imt correctly.
	 */
#if defined(MONO_ARCH_IMT_REG) && defined(MONO_ARCH_RGCTX_REG)
	g_assert (MONO_ARCH_IMT_REG == MONO_ARCH_RGCTX_REG);
#endif
	/* The two can't be used together, so use only one LLVM calling conv to pass them */
	g_assert (!(call->rgctx_arg_reg && call->imt_arg_reg));
	if (!sig->pinvoke)
		LLVMSetInstructionCallConv (lcall, LLVMMono1CallConv);

	if (call->rgctx_arg_reg)
		LLVMAddInstrAttribute (lcall, 1 + sinfo.rgctx_arg_pindex, LLVMInRegAttribute);
	if (call->imt_arg_reg)
		LLVMAddInstrAttribute (lcall, 1 + sinfo.imt_arg_pindex, LLVMInRegAttribute);
#endif

	/* Add byval attributes if needed */
	for (i = 0; i < sig->param_count; ++i) {
		LLVMArgInfo *ainfo = call->cinfo ? &call->cinfo->args [i + sig->hasthis] : NULL;

		if (ainfo && ainfo->storage == LLVMArgVtypeByVal) {
			LLVMAddInstrAttribute (lcall, 1 + sinfo.pindexes [i], LLVMByValAttribute);
		}
	}

	/*
	 * Convert the result
	 */
	if (cinfo && cinfo->ret.storage == LLVMArgVtypeInReg) {
		LLVMValueRef regs [2];

		if (!addresses [ins->dreg])
			addresses [ins->dreg] = build_alloca (ctx, sig->ret);

		regs [0] = LLVMBuildExtractValue (builder, lcall, 0, "");
		if (cinfo->ret.pair_storage [1] != LLVMArgNone)
			regs [1] = LLVMBuildExtractValue (builder, lcall, 1, "");
					
		emit_reg_to_vtype (ctx, builder, sig->ret, addresses [ins->dreg], &cinfo->ret, regs);
	} else if (sig->ret->type != MONO_TYPE_VOID && !vretaddr) {
		/* If the method returns an unsigned value, need to zext it */

		values [ins->dreg] = convert_full (ctx, lcall, llvm_type_to_stack_type (type_to_llvm_type (ctx, sig->ret)), type_is_unsigned (ctx, sig->ret));
	}

	*builder_ref = ctx->builder;

	g_free (sinfo.pindexes);
	
	return;
 FAILURE:
	return;
}

static void
process_bb (EmitContext *ctx, MonoBasicBlock *bb)
{
	MonoCompile *cfg = ctx->cfg;
	MonoMethodSignature *sig = ctx->sig;
	LLVMValueRef method = ctx->lmethod;
	LLVMValueRef *values = ctx->values;
	LLVMValueRef *addresses = ctx->addresses;
	int i;
	LLVMCallInfo *linfo = ctx->linfo;
	LLVMModuleRef module = ctx->module;
	BBInfo *bblocks = ctx->bblocks;
	MonoInst *ins;
	LLVMBasicBlockRef cbb;
	LLVMBuilderRef builder, starting_builder;
	gboolean has_terminator;
	LLVMValueRef v;
	LLVMValueRef lhs, rhs;
	int nins = 0;

	cbb = get_bb (ctx, bb);
	builder = create_builder (ctx);
	ctx->builder = builder;
	LLVMPositionBuilderAtEnd (builder, cbb);

	if (bb == cfg->bb_entry)
		emit_entry_bb (ctx, builder);
	CHECK_FAILURE (ctx);

	if (bb->flags & BB_EXCEPTION_HANDLER) {
		LLVMTypeRef i8ptr;
		LLVMValueRef personality;
		LLVMBasicBlockRef target_bb;
		MonoInst *exvar;
		static gint32 mapping_inited;
		static int ti_generator;
		char ti_name [128];
		MonoClass **ti;
		LLVMValueRef type_info;
		int clause_index;

		if (!bblocks [bb->block_num].invoke_target) {
			/*
			 * LLVM asserts if llvm.eh.selector is called from a bblock which
			 * doesn't have an invoke pointing at it.
			 * Update: LLVM no longer asserts, but some tests in exceptions.exe now fail.
			 */
			LLVM_FAILURE (ctx, "handler without invokes");
		}

		// <resultval> = landingpad <somety> personality <type> <pers_fn> <clause>+

		if (cfg->compile_aot) {
			/* Use a dummy personality function */
			personality = LLVMGetNamedFunction (module, "mono_aot_personality");
			g_assert (personality);
		} else {
			personality = LLVMGetNamedFunction (module, "mono_personality");
			if (InterlockedCompareExchange (&mapping_inited, 1, 0) == 0)
				LLVMAddGlobalMapping (ee, personality, mono_personality);
		}

		i8ptr = LLVMPointerType (LLVMInt8Type (), 0);

		clause_index = (mono_get_block_region_notry (cfg, bb->region) >> 8) - 1;

		/*
		 * Create the type info
		 */
		sprintf (ti_name, "type_info_%d", ti_generator);
		ti_generator ++;

		if (cfg->compile_aot) {
			/* decode_eh_frame () in aot-runtime.c will decode this */
			type_info = LLVMAddGlobal (module, LLVMInt32Type (), ti_name);
			LLVMSetInitializer (type_info, LLVMConstInt (LLVMInt32Type (), clause_index, FALSE));

			/*
			 * These symbols are not really used, the clause_index is embedded into the EH tables generated by DwarfMonoException in LLVM.
			 */
			LLVMSetLinkage (type_info, LLVMInternalLinkage);

			/* 
			 * Enabling this causes llc to crash:
			 * http://llvm.org/bugs/show_bug.cgi?id=6102
			 */
			//LLVM_FAILURE (ctx, "aot+clauses");
#ifdef TARGET_ARM
			// test_0_invalid_unbox_arrays () fails
			LLVM_FAILURE (ctx, "aot+clauses");
#endif
		} else {
			/*
			 * After the cfg mempool is freed, the type info will point to stale memory,
			 * but this is not a problem, since we decode it once in exception_cb during
			 * compilation.
			 */
			ti = mono_mempool_alloc (cfg->mempool, sizeof (gint32));
			*(gint32*)ti = clause_index;

			type_info = LLVMAddGlobal (module, i8ptr, ti_name);

			LLVMAddGlobalMapping (ee, type_info, ti);
		}

		{
			LLVMTypeRef members [2], ret_type;
			LLVMValueRef landing_pad;

			members [0] = i8ptr;
			members [1] = LLVMInt32Type ();
			ret_type = LLVMStructType (members, 2, FALSE);

			landing_pad = LLVMBuildLandingPad (builder, ret_type, personality, 1, "");
			LLVMAddClause (landing_pad, type_info);

			/* Store the exception into the exvar */
			if (bb->in_scount == 1) {
				g_assert (bb->in_scount == 1);
				exvar = bb->in_stack [0];

				// FIXME: This is shared with filter clauses ?
				g_assert (!values [exvar->dreg]);

				values [exvar->dreg] = LLVMBuildExtractValue (builder, landing_pad, 0, "ex_obj");
				emit_volatile_store (ctx, exvar->dreg);
			}
		}

		/* Start a new bblock which CALL_HANDLER can branch to */
		target_bb = bblocks [bb->block_num].call_handler_target_bb;
		if (target_bb) {
			LLVMBuildBr (builder, target_bb);

			ctx->builder = builder = create_builder (ctx);
			LLVMPositionBuilderAtEnd (ctx->builder, target_bb);

			ctx->bblocks [bb->block_num].end_bblock = target_bb;
		}
	}

	has_terminator = FALSE;
	starting_builder = builder;
	for (ins = bb->code; ins; ins = ins->next) {
		const char *spec = LLVM_INS_INFO (ins->opcode);
		char *dname = NULL;
		char dname_buf [128];

		nins ++;
		if (nins > 5000 && builder == starting_builder) {
			/* some steps in llc are non-linear in the size of basic blocks, see #5714 */
			LLVM_FAILURE (ctx, "basic block too long");
		}

		if (has_terminator)
			/* There could be instructions after a terminator, skip them */
			break;

		if (spec [MONO_INST_DEST] != ' ' && !MONO_IS_STORE_MEMBASE (ins)) {
			sprintf (dname_buf, "t%d", ins->dreg);
			dname = dname_buf;
		}

		if (spec [MONO_INST_SRC1] != ' ' && spec [MONO_INST_SRC1] != 'v') {
			MonoInst *var = get_vreg_to_inst (cfg, ins->sreg1);

			if (var && var->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) {
				lhs = emit_volatile_load (ctx, ins->sreg1);
			} else {
				/* It is ok for SETRET to have an uninitialized argument */
				if (!values [ins->sreg1] && ins->opcode != OP_SETRET)
					LLVM_FAILURE (ctx, "sreg1");
				lhs = values [ins->sreg1];
			}
		} else {
			lhs = NULL;
		}

		if (spec [MONO_INST_SRC2] != ' ' && spec [MONO_INST_SRC2] != ' ') {
			MonoInst *var = get_vreg_to_inst (cfg, ins->sreg2);
			if (var && var->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) {
				rhs = emit_volatile_load (ctx, ins->sreg2);
			} else {
				if (!values [ins->sreg2])
					LLVM_FAILURE (ctx, "sreg2");
				rhs = values [ins->sreg2];
			}
		} else {
			rhs = NULL;
		}

		//mono_print_ins (ins);
		switch (ins->opcode) {
		case OP_NOP:
		case OP_NOT_NULL:
		case OP_LIVERANGE_START:
		case OP_LIVERANGE_END:
			break;
		case OP_ICONST:
			values [ins->dreg] = LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE);
			break;
		case OP_I8CONST:
#if SIZEOF_VOID_P == 4
			values [ins->dreg] = LLVMConstInt (LLVMInt64Type (), GET_LONG_IMM (ins), FALSE);
#else
			values [ins->dreg] = LLVMConstInt (LLVMInt64Type (), (gint64)ins->inst_c0, FALSE);
#endif
			break;
		case OP_R8CONST:
			values [ins->dreg] = LLVMConstReal (LLVMDoubleType (), *(double*)ins->inst_p0);
			break;
		case OP_R4CONST:
			values [ins->dreg] = LLVMConstFPExt (LLVMConstReal (LLVMFloatType (), *(float*)ins->inst_p0), LLVMDoubleType ());
			break;
		case OP_BR:
			LLVMBuildBr (builder, get_bb (ctx, ins->inst_target_bb));
			has_terminator = TRUE;
			break;
		case OP_SWITCH: {
			int i;
			LLVMValueRef v;
			char bb_name [128];
			LLVMBasicBlockRef new_bb;
			LLVMBuilderRef new_builder;

			// The default branch is already handled
			// FIXME: Handle it here

			/* Start new bblock */
			sprintf (bb_name, "SWITCH_DEFAULT_BB%d", ctx->default_index ++);
			new_bb = LLVMAppendBasicBlock (ctx->lmethod, bb_name);

			lhs = convert (ctx, lhs, LLVMInt32Type ());
			v = LLVMBuildSwitch (builder, lhs, new_bb, GPOINTER_TO_UINT (ins->klass));
			for (i = 0; i < GPOINTER_TO_UINT (ins->klass); ++i) {
				MonoBasicBlock *target_bb = ins->inst_many_bb [i];

				LLVMAddCase (v, LLVMConstInt (LLVMInt32Type (), i, FALSE), get_bb (ctx, target_bb));
			}

			new_builder = create_builder (ctx);
			LLVMPositionBuilderAtEnd (new_builder, new_bb);
			LLVMBuildUnreachable (new_builder);

			has_terminator = TRUE;
			g_assert (!ins->next);
				
			break;
		}

		case OP_SETRET:
			if (linfo->ret.storage == LLVMArgVtypeInReg) {
				LLVMTypeRef ret_type = LLVMGetReturnType (LLVMGetElementType (LLVMTypeOf (method)));
				LLVMValueRef part1, retval;
				int size;

				size = get_vtype_size (sig->ret);

				g_assert (addresses [ins->sreg1]);

				g_assert (linfo->ret.pair_storage [0] == LLVMArgInIReg);
				g_assert (linfo->ret.pair_storage [1] == LLVMArgNone);
					
				part1 = convert (ctx, LLVMBuildLoad (builder, LLVMBuildBitCast (builder, addresses [ins->sreg1], LLVMPointerType (LLVMIntType (size * 8), 0), ""), ""), IntPtrType ());

				retval = LLVMBuildInsertValue (builder, LLVMGetUndef (ret_type), part1, 0, "");

				LLVMBuildRet (builder, retval);
				break;
			}

			if (linfo->ret.storage == LLVMArgVtypeRetAddr) {
				LLVMBuildRetVoid (builder);
				break;
			}

			if (!lhs || ctx->is_dead [ins->sreg1]) {
				/* 
				 * The method did not set its return value, probably because it
				 * ends with a throw.
				 */
				if (cfg->vret_addr)
					LLVMBuildRetVoid (builder);
				else
					LLVMBuildRet (builder, LLVMConstNull (type_to_llvm_type (ctx, sig->ret)));
			} else {
				LLVMBuildRet (builder, convert (ctx, lhs, type_to_llvm_type (ctx, sig->ret)));
			}
			has_terminator = TRUE;
			break;
		case OP_ICOMPARE:
		case OP_FCOMPARE:
		case OP_LCOMPARE:
		case OP_COMPARE:
		case OP_ICOMPARE_IMM:
		case OP_LCOMPARE_IMM:
		case OP_COMPARE_IMM: {
			CompRelation rel;
			LLVMValueRef cmp;

			if (ins->next->opcode == OP_NOP)
				break;

			if (ins->next->opcode == OP_BR)
				/* The comparison result is not needed */
				continue;

			rel = mono_opcode_to_cond (ins->next->opcode);

			if (ins->opcode == OP_ICOMPARE_IMM) {
				lhs = convert (ctx, lhs, LLVMInt32Type ());
				rhs = LLVMConstInt (LLVMInt32Type (), ins->inst_imm, FALSE);
			}
			if (ins->opcode == OP_LCOMPARE_IMM) {
				lhs = convert (ctx, lhs, LLVMInt64Type ());
				rhs = LLVMConstInt (LLVMInt64Type (), GET_LONG_IMM (ins), FALSE);
			}
			if (ins->opcode == OP_LCOMPARE) {
				lhs = convert (ctx, lhs, LLVMInt64Type ());
				rhs = convert (ctx, rhs, LLVMInt64Type ());
			}
			if (ins->opcode == OP_ICOMPARE) {
				lhs = convert (ctx, lhs, LLVMInt32Type ());
				rhs = convert (ctx, rhs, LLVMInt32Type ());
			}

			if (lhs && rhs) {
				if (LLVMGetTypeKind (LLVMTypeOf (lhs)) == LLVMPointerTypeKind)
					rhs = convert (ctx, rhs, LLVMTypeOf (lhs));
				else if (LLVMGetTypeKind (LLVMTypeOf (rhs)) == LLVMPointerTypeKind)
					lhs = convert (ctx, lhs, LLVMTypeOf (rhs));
			}

			/* We use COMPARE+SETcc/Bcc, llvm uses SETcc+br cond */
			if (ins->opcode == OP_FCOMPARE)
				cmp = LLVMBuildFCmp (builder, fpcond_to_llvm_cond [rel], convert (ctx, lhs, LLVMDoubleType ()), convert (ctx, rhs, LLVMDoubleType ()), "");
			else if (ins->opcode == OP_COMPARE_IMM)
				cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_imm, FALSE), "");
			else if (ins->opcode == OP_LCOMPARE_IMM) {
				if (SIZEOF_REGISTER == 4 && COMPILE_LLVM (cfg))  {
					/* The immediate is encoded in two fields */
					guint64 l = ((guint64)(guint32)ins->inst_offset << 32) | ((guint32)ins->inst_imm);
					cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, LLVMInt64Type ()), LLVMConstInt (LLVMInt64Type (), l, FALSE), "");
				} else {
					cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, LLVMInt64Type ()), LLVMConstInt (LLVMInt64Type (), ins->inst_imm, FALSE), "");
				}
			}
			else if (ins->opcode == OP_COMPARE)
				cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, IntPtrType ()), convert (ctx, rhs, IntPtrType ()), "");
			else
				cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], lhs, rhs, "");

			if (MONO_IS_COND_BRANCH_OP (ins->next)) {
				if (ins->next->inst_true_bb == ins->next->inst_false_bb) {
					/*
					 * If the target bb contains PHI instructions, LLVM requires
					 * two PHI entries for this bblock, while we only generate one.
					 * So convert this to an unconditional bblock. (bxc #171).
					 */
					LLVMBuildBr (builder, get_bb (ctx, ins->next->inst_true_bb));
				} else {
					LLVMBuildCondBr (builder, cmp, get_bb (ctx, ins->next->inst_true_bb), get_bb (ctx, ins->next->inst_false_bb));
				}
				has_terminator = TRUE;
			} else if (MONO_IS_SETCC (ins->next)) {
				sprintf (dname_buf, "t%d", ins->next->dreg);
				dname = dname_buf;
				values [ins->next->dreg] = LLVMBuildZExt (builder, cmp, LLVMInt32Type (), dname);

				/* Add stores for volatile variables */
				emit_volatile_store (ctx, ins->next->dreg);
			} else if (MONO_IS_COND_EXC (ins->next)) {
				emit_cond_system_exception (ctx, bb, ins->next->inst_p1, cmp);
				CHECK_FAILURE (ctx);
				builder = ctx->builder;
			} else {
				LLVM_FAILURE (ctx, "next");
			}

			ins = ins->next;
			break;
		}
		case OP_FCEQ:
		case OP_FCLT:
		case OP_FCLT_UN:
		case OP_FCGT:
		case OP_FCGT_UN: {
			CompRelation rel;
			LLVMValueRef cmp;

			rel = mono_opcode_to_cond (ins->opcode);

			cmp = LLVMBuildFCmp (builder, fpcond_to_llvm_cond [rel], convert (ctx, lhs, LLVMDoubleType ()), convert (ctx, rhs, LLVMDoubleType ()), "");
			values [ins->dreg] = LLVMBuildZExt (builder, cmp, LLVMInt32Type (), dname);
			break;
		}
		case OP_PHI:
		case OP_FPHI:
		case OP_VPHI:
		case OP_XPHI: {
			int i;
			gboolean empty = TRUE;

			/* Check that all input bblocks really branch to us */
			for (i = 0; i < bb->in_count; ++i) {
				if (bb->in_bb [i]->last_ins && bb->in_bb [i]->last_ins->opcode == OP_NOT_REACHED)
					ins->inst_phi_args [i + 1] = -1;
				else
					empty = FALSE;
			}

			if (empty) {
				/* LLVM doesn't like phi instructions with zero operands */
				ctx->is_dead [ins->dreg] = TRUE;
				break;
			}					

			/* Created earlier, insert it now */
			LLVMInsertIntoBuilder (builder, values [ins->dreg]);

			for (i = 0; i < ins->inst_phi_args [0]; i++) {
				int sreg1 = ins->inst_phi_args [i + 1];
				int count, j;

				/* 
				 * Count the number of times the incoming bblock branches to us,
				 * since llvm requires a separate entry for each.
				 */
				if (bb->in_bb [i]->last_ins && bb->in_bb [i]->last_ins->opcode == OP_SWITCH) {
					MonoInst *switch_ins = bb->in_bb [i]->last_ins;

					count = 0;
					for (j = 0; j < GPOINTER_TO_UINT (switch_ins->klass); ++j) {
						if (switch_ins->inst_many_bb [j] == bb)
							count ++;
					}
				} else {
					count = 1;
				}

				/* Remember for later */
				for (j = 0; j < count; ++j) {
					PhiNode *node = mono_mempool_alloc0 (ctx->mempool, sizeof (PhiNode));
					node->bb = bb;
					node->phi = ins;
					node->in_bb = bb->in_bb [i];
					node->sreg = sreg1;
					bblocks [bb->in_bb [i]->block_num].phi_nodes = g_slist_prepend_mempool (ctx->mempool, bblocks [bb->in_bb [i]->block_num].phi_nodes, node);
				}
			}
			break;
		}
		case OP_MOVE:
		case OP_LMOVE:
		case OP_XMOVE:
		case OP_SETFRET:
			g_assert (lhs);
			values [ins->dreg] = lhs;
			break;
		case OP_FMOVE: {
			MonoInst *var = get_vreg_to_inst (cfg, ins->dreg);
				
			g_assert (lhs);
			values [ins->dreg] = lhs;

			if (var && var->klass->byval_arg.type == MONO_TYPE_R4) {
				/* 
				 * This is added by the spilling pass in case of the JIT,
				 * but we have to do it ourselves.
				 */
				values [ins->dreg] = convert (ctx, values [ins->dreg], LLVMFloatType ());
			}
			break;
		}
		case OP_IADD:
		case OP_ISUB:
		case OP_IAND:
		case OP_IMUL:
		case OP_IDIV:
		case OP_IDIV_UN:
		case OP_IREM:
		case OP_IREM_UN:
		case OP_IOR:
		case OP_IXOR:
		case OP_ISHL:
		case OP_ISHR:
		case OP_ISHR_UN:
		case OP_FADD:
		case OP_FSUB:
		case OP_FMUL:
		case OP_FDIV:
		case OP_LADD:
		case OP_LSUB:
		case OP_LMUL:
		case OP_LDIV:
		case OP_LDIV_UN:
		case OP_LREM:
		case OP_LREM_UN:
		case OP_LAND:
		case OP_LOR:
		case OP_LXOR:
		case OP_LSHL:
		case OP_LSHR:
		case OP_LSHR_UN:
			lhs = convert (ctx, lhs, regtype_to_llvm_type (spec [MONO_INST_DEST]));
			rhs = convert (ctx, rhs, regtype_to_llvm_type (spec [MONO_INST_DEST]));

			switch (ins->opcode) {
			case OP_IADD:
			case OP_LADD:
				values [ins->dreg] = LLVMBuildAdd (builder, lhs, rhs, dname);
				break;
			case OP_ISUB:
			case OP_LSUB:
				values [ins->dreg] = LLVMBuildSub (builder, lhs, rhs, dname);
				break;
			case OP_IMUL:
			case OP_LMUL:
				values [ins->dreg] = LLVMBuildMul (builder, lhs, rhs, dname);
				break;
			case OP_IREM:
			case OP_LREM:
				values [ins->dreg] = LLVMBuildSRem (builder, lhs, rhs, dname);
				break;
			case OP_IREM_UN:
			case OP_LREM_UN:
				values [ins->dreg] = LLVMBuildURem (builder, lhs, rhs, dname);
				break;
			case OP_IDIV:
			case OP_LDIV:
				values [ins->dreg] = LLVMBuildSDiv (builder, lhs, rhs, dname);
				break;
			case OP_IDIV_UN:
			case OP_LDIV_UN:
				values [ins->dreg] = LLVMBuildUDiv (builder, lhs, rhs, dname);
				break;
			case OP_FDIV:
				values [ins->dreg] = LLVMBuildFDiv (builder, lhs, rhs, dname);
				break;
			case OP_IAND:
			case OP_LAND:
				values [ins->dreg] = LLVMBuildAnd (builder, lhs, rhs, dname);
				break;
			case OP_IOR:
			case OP_LOR:
				values [ins->dreg] = LLVMBuildOr (builder, lhs, rhs, dname);
				break;
			case OP_IXOR:
			case OP_LXOR:
				values [ins->dreg] = LLVMBuildXor (builder, lhs, rhs, dname);
				break;
			case OP_ISHL:
			case OP_LSHL:
				values [ins->dreg] = LLVMBuildShl (builder, lhs, rhs, dname);
				break;
			case OP_ISHR:
			case OP_LSHR:
				values [ins->dreg] = LLVMBuildAShr (builder, lhs, rhs, dname);
				break;
			case OP_ISHR_UN:
			case OP_LSHR_UN:
				values [ins->dreg] = LLVMBuildLShr (builder, lhs, rhs, dname);
				break;

			case OP_FADD:
				values [ins->dreg] = LLVMBuildFAdd (builder, lhs, rhs, dname);
				break;
			case OP_FSUB:
				values [ins->dreg] = LLVMBuildFSub (builder, lhs, rhs, dname);
				break;
			case OP_FMUL:
				values [ins->dreg] = LLVMBuildFMul (builder, lhs, rhs, dname);
				break;

			default:
				g_assert_not_reached ();
			}
			break;
		case OP_IADD_IMM:
		case OP_ISUB_IMM:
		case OP_IMUL_IMM:
		case OP_IREM_IMM:
		case OP_IREM_UN_IMM:
		case OP_IDIV_IMM:
		case OP_IDIV_UN_IMM:
		case OP_IAND_IMM:
		case OP_IOR_IMM:
		case OP_IXOR_IMM:
		case OP_ISHL_IMM:
		case OP_ISHR_IMM:
		case OP_ISHR_UN_IMM:
		case OP_LADD_IMM:
		case OP_LSUB_IMM:
		case OP_LREM_IMM:
		case OP_LAND_IMM:
		case OP_LOR_IMM:
		case OP_LXOR_IMM:
		case OP_LSHL_IMM:
		case OP_LSHR_IMM:
		case OP_LSHR_UN_IMM:
		case OP_ADD_IMM:
		case OP_AND_IMM:
		case OP_MUL_IMM:
		case OP_SHL_IMM:
		case OP_SHR_IMM: {
			LLVMValueRef imm;

			if (spec [MONO_INST_SRC1] == 'l') {
				imm = LLVMConstInt (LLVMInt64Type (), GET_LONG_IMM (ins), FALSE);
			} else {
				imm = LLVMConstInt (LLVMInt32Type (), ins->inst_imm, FALSE);
			}

#if SIZEOF_VOID_P == 4
			if (ins->opcode == OP_LSHL_IMM || ins->opcode == OP_LSHR_IMM || ins->opcode == OP_LSHR_UN_IMM)
				imm = LLVMConstInt (LLVMInt32Type (), ins->inst_imm, FALSE);
#endif

			if (LLVMGetTypeKind (LLVMTypeOf (lhs)) == LLVMPointerTypeKind)
				lhs = convert (ctx, lhs, IntPtrType ());
			imm = convert (ctx, imm, LLVMTypeOf (lhs));
			switch (ins->opcode) {
			case OP_IADD_IMM:
			case OP_LADD_IMM:
			case OP_ADD_IMM:
				values [ins->dreg] = LLVMBuildAdd (builder, lhs, imm, dname);
				break;
			case OP_ISUB_IMM:
			case OP_LSUB_IMM:
				values [ins->dreg] = LLVMBuildSub (builder, lhs, imm, dname);
				break;
			case OP_IMUL_IMM:
			case OP_MUL_IMM:
				values [ins->dreg] = LLVMBuildMul (builder, lhs, imm, dname);
				break;
			case OP_IDIV_IMM:
			case OP_LDIV_IMM:
				values [ins->dreg] = LLVMBuildSDiv (builder, lhs, imm, dname);
				break;
			case OP_IDIV_UN_IMM:
			case OP_LDIV_UN_IMM:
				values [ins->dreg] = LLVMBuildUDiv (builder, lhs, imm, dname);
				break;
			case OP_IREM_IMM:
			case OP_LREM_IMM:
				values [ins->dreg] = LLVMBuildSRem (builder, lhs, imm, dname);
				break;
			case OP_IREM_UN_IMM:
				values [ins->dreg] = LLVMBuildURem (builder, lhs, imm, dname);
				break;
			case OP_IAND_IMM:
			case OP_LAND_IMM:
			case OP_AND_IMM:
				values [ins->dreg] = LLVMBuildAnd (builder, lhs, imm, dname);
				break;
			case OP_IOR_IMM:
			case OP_LOR_IMM:
				values [ins->dreg] = LLVMBuildOr (builder, lhs, imm, dname);
				break;
			case OP_IXOR_IMM:
			case OP_LXOR_IMM:
				values [ins->dreg] = LLVMBuildXor (builder, lhs, imm, dname);
				break;
			case OP_ISHL_IMM:
			case OP_LSHL_IMM:
			case OP_SHL_IMM:
				values [ins->dreg] = LLVMBuildShl (builder, lhs, imm, dname);
				break;
			case OP_ISHR_IMM:
			case OP_LSHR_IMM:
			case OP_SHR_IMM:
				values [ins->dreg] = LLVMBuildAShr (builder, lhs, imm, dname);
				break;
			case OP_ISHR_UN_IMM:
				/* This is used to implement conv.u4, so the lhs could be an i8 */
				lhs = convert (ctx, lhs, LLVMInt32Type ());
				imm = convert (ctx, imm, LLVMInt32Type ());
				values [ins->dreg] = LLVMBuildLShr (builder, lhs, imm, dname);
				break;
			case OP_LSHR_UN_IMM:
				values [ins->dreg] = LLVMBuildLShr (builder, lhs, imm, dname);
				break;
			default:
				g_assert_not_reached ();
			}
			break;
		}
		case OP_INEG:
			values [ins->dreg] = LLVMBuildSub (builder, LLVMConstInt (LLVMInt32Type (), 0, FALSE), convert (ctx, lhs, LLVMInt32Type ()), dname);
			break;
		case OP_LNEG:
			values [ins->dreg] = LLVMBuildSub (builder, LLVMConstInt (LLVMInt64Type (), 0, FALSE), lhs, dname);
			break;
		case OP_FNEG:
			lhs = convert (ctx, lhs, LLVMDoubleType ());
			values [ins->dreg] = LLVMBuildFSub (builder, LLVMConstReal (LLVMDoubleType (), 0.0), lhs, dname);
			break;
		case OP_INOT: {
			guint32 v = 0xffffffff;
			values [ins->dreg] = LLVMBuildXor (builder, LLVMConstInt (LLVMInt32Type (), v, FALSE), convert (ctx, lhs, LLVMInt32Type ()), dname);
			break;
		}
		case OP_LNOT: {
			guint64 v = 0xffffffffffffffffLL;
			values [ins->dreg] = LLVMBuildXor (builder, LLVMConstInt (LLVMInt64Type (), v, FALSE), lhs, dname);
			break;
		}
#if defined(TARGET_X86) || defined(TARGET_AMD64)
		case OP_X86_LEA: {
			LLVMValueRef v1, v2;

			v1 = LLVMBuildMul (builder, convert (ctx, rhs, IntPtrType ()), LLVMConstInt (IntPtrType (), (1 << ins->backend.shift_amount), FALSE), "");
			v2 = LLVMBuildAdd (builder, convert (ctx, lhs, IntPtrType ()), v1, "");
			values [ins->dreg] = LLVMBuildAdd (builder, v2, LLVMConstInt (IntPtrType (), ins->inst_imm, FALSE), dname);
			break;
		}
#endif

		case OP_ICONV_TO_I1:
		case OP_ICONV_TO_I2:
		case OP_ICONV_TO_I4:
		case OP_ICONV_TO_U1:
		case OP_ICONV_TO_U2:
		case OP_ICONV_TO_U4:
		case OP_LCONV_TO_I1:
		case OP_LCONV_TO_I2:
		case OP_LCONV_TO_U1:
		case OP_LCONV_TO_U2:
		case OP_LCONV_TO_U4: {
			gboolean sign;

			sign = (ins->opcode == OP_ICONV_TO_I1) || (ins->opcode == OP_ICONV_TO_I2) || (ins->opcode == OP_ICONV_TO_I4) || (ins->opcode == OP_LCONV_TO_I1) || (ins->opcode == OP_LCONV_TO_I2);

			/* Have to do two casts since our vregs have type int */
			v = LLVMBuildTrunc (builder, lhs, op_to_llvm_type (ins->opcode), "");
			if (sign)
				values [ins->dreg] = LLVMBuildSExt (builder, v, LLVMInt32Type (), dname);
			else
				values [ins->dreg] = LLVMBuildZExt (builder, v, LLVMInt32Type (), dname);
			break;
		}
		case OP_ICONV_TO_I8:
			values [ins->dreg] = LLVMBuildSExt (builder, lhs, LLVMInt64Type (), dname);
			break;
		case OP_ICONV_TO_U8:
			values [ins->dreg] = LLVMBuildZExt (builder, lhs, LLVMInt64Type (), dname);
			break;
		case OP_FCONV_TO_I4:
			values [ins->dreg] = LLVMBuildFPToSI (builder, lhs, LLVMInt32Type (), dname);
			break;
		case OP_FCONV_TO_I1:
			values [ins->dreg] = LLVMBuildSExt (builder, LLVMBuildFPToSI (builder, lhs, LLVMInt8Type (), dname), LLVMInt32Type (), "");
			break;
		case OP_FCONV_TO_U1:
			values [ins->dreg] = LLVMBuildZExt (builder, LLVMBuildFPToUI (builder, lhs, LLVMInt8Type (), dname), LLVMInt32Type (), "");
			break;
		case OP_FCONV_TO_I2:
			values [ins->dreg] = LLVMBuildSExt (builder, LLVMBuildFPToSI (builder, lhs, LLVMInt16Type (), dname), LLVMInt32Type (), "");
			break;
		case OP_FCONV_TO_U2:
			values [ins->dreg] = LLVMBuildZExt (builder, LLVMBuildFPToUI (builder, lhs, LLVMInt16Type (), dname), LLVMInt32Type (), "");
			break;
		case OP_FCONV_TO_I8:
			values [ins->dreg] = LLVMBuildFPToSI (builder, lhs, LLVMInt64Type (), dname);
			break;
		case OP_FCONV_TO_I:
			values [ins->dreg] = LLVMBuildFPToSI (builder, lhs, IntPtrType (), dname);
			break;
		case OP_ICONV_TO_R8:
		case OP_LCONV_TO_R8:
			values [ins->dreg] = LLVMBuildSIToFP (builder, lhs, LLVMDoubleType (), dname);
			break;
		case OP_LCONV_TO_R_UN:
			values [ins->dreg] = LLVMBuildUIToFP (builder, lhs, LLVMDoubleType (), dname);
			break;
#if SIZEOF_VOID_P == 4
		case OP_LCONV_TO_U:
#endif
		case OP_LCONV_TO_I4:
			values [ins->dreg] = LLVMBuildTrunc (builder, lhs, LLVMInt32Type (), dname);
			break;
		case OP_ICONV_TO_R4:
		case OP_LCONV_TO_R4:
			v = LLVMBuildSIToFP (builder, lhs, LLVMFloatType (), "");
			values [ins->dreg] = LLVMBuildFPExt (builder, v, LLVMDoubleType (), dname);
			break;
		case OP_FCONV_TO_R4:
			v = LLVMBuildFPTrunc (builder, lhs, LLVMFloatType (), "");
			values [ins->dreg] = LLVMBuildFPExt (builder, v, LLVMDoubleType (), dname);
			break;
		case OP_SEXT_I4:
			values [ins->dreg] = LLVMBuildSExt (builder, lhs, LLVMInt64Type (), dname);
			break;
		case OP_ZEXT_I4:
			values [ins->dreg] = LLVMBuildZExt (builder, lhs, LLVMInt64Type (), dname);
			break;
		case OP_TRUNC_I4:
			values [ins->dreg] = LLVMBuildTrunc (builder, lhs, LLVMInt32Type (), dname);
			break;
		case OP_LOCALLOC_IMM: {
			LLVMValueRef v;

			guint32 size = ins->inst_imm;
			size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);

			v = mono_llvm_build_alloca (builder, LLVMInt8Type (), LLVMConstInt (LLVMInt32Type (), size, FALSE), MONO_ARCH_FRAME_ALIGNMENT, "");

			if (ins->flags & MONO_INST_INIT) {
				LLVMValueRef args [5];

				args [0] = v;
				args [1] = LLVMConstInt (LLVMInt8Type (), 0, FALSE);
				args [2] = LLVMConstInt (LLVMInt32Type (), size, FALSE);
				args [3] = LLVMConstInt (LLVMInt32Type (), MONO_ARCH_FRAME_ALIGNMENT, FALSE);
				args [4] = LLVMConstInt (LLVMInt1Type (), 0, FALSE);
				LLVMBuildCall (builder, LLVMGetNamedFunction (module, memset_func_name), args, memset_param_count, "");
			}

			values [ins->dreg] = v;
			break;
		}
		case OP_LOCALLOC: {
			LLVMValueRef v, size;
				
			size = LLVMBuildAnd (builder, LLVMBuildAdd (builder, convert (ctx, lhs, LLVMInt32Type ()), LLVMConstInt (LLVMInt32Type (), MONO_ARCH_FRAME_ALIGNMENT - 1, FALSE), ""), LLVMConstInt (LLVMInt32Type (), ~ (MONO_ARCH_FRAME_ALIGNMENT - 1), FALSE), "");

			v = mono_llvm_build_alloca (builder, LLVMInt8Type (), size, MONO_ARCH_FRAME_ALIGNMENT, "");

			if (ins->flags & MONO_INST_INIT) {
				LLVMValueRef args [5];

				args [0] = v;
				args [1] = LLVMConstInt (LLVMInt8Type (), 0, FALSE);
				args [2] = size;
				args [3] = LLVMConstInt (LLVMInt32Type (), MONO_ARCH_FRAME_ALIGNMENT, FALSE);
				args [4] = LLVMConstInt (LLVMInt1Type (), 0, FALSE);
				LLVMBuildCall (builder, LLVMGetNamedFunction (module, memset_func_name), args, memset_param_count, "");
			}
			values [ins->dreg] = v;
			break;
		}

		case OP_LOADI1_MEMBASE:
		case OP_LOADU1_MEMBASE:
		case OP_LOADI2_MEMBASE:
		case OP_LOADU2_MEMBASE:
		case OP_LOADI4_MEMBASE:
		case OP_LOADU4_MEMBASE:
		case OP_LOADI8_MEMBASE:
		case OP_LOADR4_MEMBASE:
		case OP_LOADR8_MEMBASE:
		case OP_LOAD_MEMBASE:
		case OP_LOADI8_MEM:
		case OP_LOADU1_MEM:
		case OP_LOADU2_MEM:
		case OP_LOADI4_MEM:
		case OP_LOADU4_MEM:
		case OP_LOAD_MEM: {
			int size = 8;
			LLVMValueRef base, index, addr;
			LLVMTypeRef t;
			gboolean sext = FALSE, zext = FALSE;
			gboolean is_volatile = (ins->flags & MONO_INST_FAULT);

			t = load_store_to_llvm_type (ins->opcode, &size, &sext, &zext);

			if (sext || zext)
				dname = (char*)"";

			if ((ins->opcode == OP_LOADI8_MEM) || (ins->opcode == OP_LOAD_MEM) || (ins->opcode == OP_LOADI4_MEM) || (ins->opcode == OP_LOADU4_MEM) || (ins->opcode == OP_LOADU1_MEM) || (ins->opcode == OP_LOADU2_MEM)) {
				addr = LLVMConstInt (IntPtrType (), ins->inst_imm, FALSE);
			} else {
				/* _MEMBASE */
				base = lhs;

				if (ins->inst_offset == 0) {
					addr = base;
				} else if (ins->inst_offset % size != 0) {
					/* Unaligned load */
					index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset, FALSE);
					addr = LLVMBuildGEP (builder, convert (ctx, base, LLVMPointerType (LLVMInt8Type (), 0)), &index, 1, "");
				} else {
					index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset / size, FALSE);
					addr = LLVMBuildGEP (builder, convert (ctx, base, LLVMPointerType (t, 0)), &index, 1, "");
				}
			}

			addr = convert (ctx, addr, LLVMPointerType (t, 0));

			values [ins->dreg] = emit_load (ctx, bb, &builder, size, addr, dname, is_volatile);

			if (!is_volatile && (ins->flags & MONO_INST_CONSTANT_LOAD)) {
				/*
				 * These will signal LLVM that these loads do not alias any stores, and
				 * they can't fail, allowing them to be hoisted out of loops.
				 */
				set_metadata_flag (values [ins->dreg], "mono.noalias");
				set_metadata_flag (values [ins->dreg], "mono.nofail.load");
			}

			if (sext)
				values [ins->dreg] = LLVMBuildSExt (builder, values [ins->dreg], LLVMInt32Type (), dname);
			else if (zext)
				values [ins->dreg] = LLVMBuildZExt (builder, values [ins->dreg], LLVMInt32Type (), dname);
			else if (ins->opcode == OP_LOADR4_MEMBASE)
				values [ins->dreg] = LLVMBuildFPExt (builder, values [ins->dreg], LLVMDoubleType (), dname);
			break;
		}
				
		case OP_STOREI1_MEMBASE_REG:
		case OP_STOREI2_MEMBASE_REG:
		case OP_STOREI4_MEMBASE_REG:
		case OP_STOREI8_MEMBASE_REG:
		case OP_STORER4_MEMBASE_REG:
		case OP_STORER8_MEMBASE_REG:
		case OP_STORE_MEMBASE_REG: {
			int size = 8;
			LLVMValueRef index, addr;
			LLVMTypeRef t;
			gboolean sext = FALSE, zext = FALSE;
			gboolean is_volatile = (ins->flags & MONO_INST_FAULT);

			if (!values [ins->inst_destbasereg])
				LLVM_FAILURE (ctx, "inst_destbasereg");

			t = load_store_to_llvm_type (ins->opcode, &size, &sext, &zext);

			if (ins->inst_offset % size != 0) {
				/* Unaligned store */
				index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset, FALSE);
				addr = LLVMBuildGEP (builder, convert (ctx, values [ins->inst_destbasereg], LLVMPointerType (LLVMInt8Type (), 0)), &index, 1, "");
			} else {
				index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset / size, FALSE);				
				addr = LLVMBuildGEP (builder, convert (ctx, values [ins->inst_destbasereg], LLVMPointerType (t, 0)), &index, 1, "");
			}
			emit_store (ctx, bb, &builder, size, convert (ctx, values [ins->sreg1], t), convert (ctx, addr, LLVMPointerType (t, 0)), is_volatile);
			break;
		}

		case OP_STOREI1_MEMBASE_IMM:
		case OP_STOREI2_MEMBASE_IMM:
		case OP_STOREI4_MEMBASE_IMM:
		case OP_STOREI8_MEMBASE_IMM:
		case OP_STORE_MEMBASE_IMM: {
			int size = 8;
			LLVMValueRef index, addr;
			LLVMTypeRef t;
			gboolean sext = FALSE, zext = FALSE;
			gboolean is_volatile = (ins->flags & MONO_INST_FAULT);

			t = load_store_to_llvm_type (ins->opcode, &size, &sext, &zext);

			if (ins->inst_offset % size != 0) {
				/* Unaligned store */
				index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset, FALSE);
				addr = LLVMBuildGEP (builder, convert (ctx, values [ins->inst_destbasereg], LLVMPointerType (LLVMInt8Type (), 0)), &index, 1, "");
			} else {
				index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset / size, FALSE);				
				addr = LLVMBuildGEP (builder, convert (ctx, values [ins->inst_destbasereg], LLVMPointerType (t, 0)), &index, 1, "");
			}
			emit_store (ctx, bb, &builder, size, convert (ctx, LLVMConstInt (IntPtrType (), ins->inst_imm, FALSE), t), addr, is_volatile);
			break;
		}

		case OP_CHECK_THIS:
			emit_load (ctx, bb, &builder, sizeof (gpointer), convert (ctx, lhs, LLVMPointerType (IntPtrType (), 0)), "", TRUE);
			break;
		case OP_OUTARG_VTRETADDR:
			break;
		case OP_VOIDCALL:
		case OP_CALL:
		case OP_LCALL:
		case OP_FCALL:
		case OP_VCALL:
		case OP_VOIDCALL_MEMBASE:
		case OP_CALL_MEMBASE:
		case OP_LCALL_MEMBASE:
		case OP_FCALL_MEMBASE:
		case OP_VCALL_MEMBASE:
		case OP_VOIDCALL_REG:
		case OP_CALL_REG:
		case OP_LCALL_REG:
		case OP_FCALL_REG:
		case OP_VCALL_REG: {
			process_call (ctx, bb, &builder, ins);
			CHECK_FAILURE (ctx);
			break;
		}
		case OP_AOTCONST: {
			guint32 got_offset;
			LLVMValueRef indexes [2];
			MonoJumpInfo *ji;
			LLVMValueRef got_entry_addr;

			/* 
			 * FIXME: Can't allocate from the cfg mempool since that is freed if
			 * the LLVM compile fails.
			 */
			ji = g_new0 (MonoJumpInfo, 1);
			ji->type = (MonoJumpInfoType)ins->inst_i1;
			ji->data.target = ins->inst_p0;

			ji = mono_aot_patch_info_dup (ji);

			ji->next = cfg->patch_info;
			cfg->patch_info = ji;
				   
			//mono_add_patch_info (cfg, 0, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
			got_offset = mono_aot_get_got_offset (cfg->patch_info);
 
			indexes [0] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
			indexes [1] = LLVMConstInt (LLVMInt32Type (), (gssize)got_offset, FALSE);
			got_entry_addr = LLVMBuildGEP (builder, ctx->lmodule->got_var, indexes, 2, "");

			// FIXME: This doesn't work right now, because it must be
			// paired with an invariant.end, and even then, its only in effect
			// inside its basic block
#if 0
			{
				LLVMValueRef args [3];
				LLVMValueRef ptr, val;

				ptr = LLVMBuildBitCast (builder, got_entry_addr, LLVMPointerType (LLVMInt8Type (), 0), "ptr");

				args [0] = LLVMConstInt (LLVMInt64Type (), sizeof (gpointer), FALSE);
				args [1] = ptr;
				val = LLVMBuildCall (builder, LLVMGetNamedFunction (module, "llvm.invariant.start"), args, 2, "");
			}
#endif

			values [ins->dreg] = LLVMBuildLoad (builder, got_entry_addr, dname);
			break;
		}
		case OP_NOT_REACHED:
			LLVMBuildUnreachable (builder);
			has_terminator = TRUE;
			g_assert (bb->block_num < cfg->max_block_num);
			ctx->unreachable [bb->block_num] = TRUE;
			/* Might have instructions after this */
			while (ins->next) {
				MonoInst *next = ins->next;
				/* 
				 * FIXME: If later code uses the regs defined by these instructions,
				 * compilation will fail.
				 */
				MONO_DELETE_INS (bb, next);
			}				
			break;
		case OP_LDADDR: {
			MonoInst *var = ins->inst_p0;

			values [ins->dreg] = addresses [var->dreg];
			break;
		}
		case OP_SIN: {
			LLVMValueRef args [1];

			args [0] = convert (ctx, lhs, LLVMDoubleType ());
			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, "llvm.sin.f64"), args, 1, dname);
			break;
		}
		case OP_COS: {
			LLVMValueRef args [1];

			args [0] = convert (ctx, lhs, LLVMDoubleType ());
			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, "llvm.cos.f64"), args, 1, dname);
			break;
		}
		case OP_SQRT: {
			LLVMValueRef args [1];

#if 0
			/* This no longer seems to happen */
			/*
			 * LLVM optimizes sqrt(nan) into undefined in
			 * lib/Analysis/ConstantFolding.cpp
			 * Also, sqrt(NegativeInfinity) is optimized into 0.
			 */
			LLVM_FAILURE (ctx, "sqrt");
#endif
			args [0] = convert (ctx, lhs, LLVMDoubleType ());
			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, "llvm.sqrt.f64"), args, 1, dname);
			break;
		}
		case OP_ABS: {
			LLVMValueRef args [1];

			args [0] = convert (ctx, lhs, LLVMDoubleType ());
			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, "fabs"), args, 1, dname);
			break;
		}

		case OP_IMIN:
		case OP_LMIN:
		case OP_IMAX:
		case OP_LMAX:
		case OP_IMIN_UN:
		case OP_LMIN_UN:
		case OP_IMAX_UN:
		case OP_LMAX_UN: {
			LLVMValueRef v;

			lhs = convert (ctx, lhs, regtype_to_llvm_type (spec [MONO_INST_DEST]));
			rhs = convert (ctx, rhs, regtype_to_llvm_type (spec [MONO_INST_DEST]));

			switch (ins->opcode) {
			case OP_IMIN:
			case OP_LMIN:
				v = LLVMBuildICmp (builder, LLVMIntSLE, lhs, rhs, "");
				break;
			case OP_IMAX:
			case OP_LMAX:
				v = LLVMBuildICmp (builder, LLVMIntSGE, lhs, rhs, "");
				break;
			case OP_IMIN_UN:
			case OP_LMIN_UN:
				v = LLVMBuildICmp (builder, LLVMIntULE, lhs, rhs, "");
				break;
			case OP_IMAX_UN:
			case OP_LMAX_UN:
				v = LLVMBuildICmp (builder, LLVMIntUGE, lhs, rhs, "");
				break;
			default:
				g_assert_not_reached ();
				break;
			}
			values [ins->dreg] = LLVMBuildSelect (builder, v, lhs, rhs, dname);
			break;
		}
		case OP_ATOMIC_EXCHANGE_I4: {
			LLVMValueRef args [2];

			g_assert (ins->inst_offset == 0);

			args [0] = convert (ctx, lhs, LLVMPointerType (LLVMInt32Type (), 0));
			args [1] = rhs;

			values [ins->dreg] = mono_llvm_build_atomic_rmw (builder, LLVM_ATOMICRMW_OP_XCHG, args [0], args [1]);
			break;
		}
		case OP_ATOMIC_EXCHANGE_I8: {
			LLVMValueRef args [2];

			g_assert (ins->inst_offset == 0);

			args [0] = convert (ctx, lhs, LLVMPointerType (LLVMInt64Type (), 0));
			args [1] = convert (ctx, rhs, LLVMInt64Type ());
			values [ins->dreg] = mono_llvm_build_atomic_rmw (builder, LLVM_ATOMICRMW_OP_XCHG, args [0], args [1]);
			break;
		}
		case OP_ATOMIC_ADD_NEW_I4: {
			LLVMValueRef args [2];

			g_assert (ins->inst_offset == 0);

			args [0] = convert (ctx, lhs, LLVMPointerType (LLVMInt32Type (), 0));
			args [1] = rhs;
			values [ins->dreg] = LLVMBuildAdd (builder, mono_llvm_build_atomic_rmw (builder, LLVM_ATOMICRMW_OP_ADD, args [0], args [1]), args [1], dname);
			break;
		}
		case OP_ATOMIC_ADD_NEW_I8: {
			LLVMValueRef args [2];

			g_assert (ins->inst_offset == 0);

			args [0] = convert (ctx, lhs, LLVMPointerType (LLVMInt64Type (), 0));
			args [1] = convert (ctx, rhs, LLVMInt64Type ());
			values [ins->dreg] = LLVMBuildAdd (builder, mono_llvm_build_atomic_rmw (builder, LLVM_ATOMICRMW_OP_ADD, args [0], args [1]), args [1], dname);
			break;
		}
		case OP_ATOMIC_CAS_I4:
		case OP_ATOMIC_CAS_I8: {
			LLVMValueRef args [3];
			LLVMTypeRef t;
				
			if (ins->opcode == OP_ATOMIC_CAS_I4) {
				t = LLVMInt32Type ();
			} else {
				t = LLVMInt64Type ();
			}

			args [0] = convert (ctx, lhs, LLVMPointerType (t, 0));
			/* comparand */
			args [1] = convert (ctx, values [ins->sreg3], t);
			/* new value */
			args [2] = convert (ctx, values [ins->sreg2], t);
			values [ins->dreg] = mono_llvm_build_cmpxchg (builder, args [0], args [1], args [2]);
			break;
		}
		case OP_MEMORY_BARRIER: {
			mono_llvm_build_fence (builder);
			break;
		}
		case OP_RELAXED_NOP: {
#if defined(TARGET_AMD64) || defined(TARGET_X86)
			emit_call (ctx, bb, &builder, LLVMGetNamedFunction (ctx->module, "llvm.x86.sse2.pause"), NULL, 0);
			break;
#else
			break;
#endif
		}
		case OP_TLS_GET: {
#if (defined(TARGET_AMD64) || defined(TARGET_X86)) && defined(__linux__)
#ifdef TARGET_AMD64
			// 257 == FS segment register
			LLVMTypeRef ptrtype = LLVMPointerType (IntPtrType (), 257);
#else
			// 256 == GS segment register
			LLVMTypeRef ptrtype = LLVMPointerType (IntPtrType (), 256);
#endif

			// FIXME: XEN
			values [ins->dreg] = LLVMBuildLoad (builder, LLVMBuildIntToPtr (builder, LLVMConstInt (IntPtrType (), ins->inst_offset, TRUE), ptrtype, ""), "");
#else
			LLVM_FAILURE (ctx, "opcode tls-get");
#endif

			break;
		}

			/*
			 * Overflow opcodes.
			 */
		case OP_IADD_OVF:
		case OP_IADD_OVF_UN:
		case OP_ISUB_OVF:
		case OP_ISUB_OVF_UN:
		case OP_IMUL_OVF:
		case OP_IMUL_OVF_UN:
#if SIZEOF_VOID_P == 8
		case OP_LADD_OVF:
		case OP_LADD_OVF_UN:
		case OP_LSUB_OVF:
		case OP_LSUB_OVF_UN:
		case OP_LMUL_OVF:
		case OP_LMUL_OVF_UN:
#endif
			{
				LLVMValueRef args [2], val, ovf, func;

				args [0] = convert (ctx, lhs, op_to_llvm_type (ins->opcode));
				args [1] = convert (ctx, rhs, op_to_llvm_type (ins->opcode));
				func = LLVMGetNamedFunction (module, ovf_op_to_intrins (ins->opcode));
				g_assert (func);
				val = LLVMBuildCall (builder, func, args, 2, "");
				values [ins->dreg] = LLVMBuildExtractValue (builder, val, 0, dname);
				ovf = LLVMBuildExtractValue (builder, val, 1, "");
				emit_cond_system_exception (ctx, bb, "OverflowException", ovf);
				CHECK_FAILURE (ctx);
				builder = ctx->builder;
				break;
			}

			/* 
			 * Valuetypes.
			 *   We currently model them using arrays. Promotion to local vregs is 
			 * disabled for them in mono_handle_global_vregs () in the LLVM case, 
			 * so we always have an entry in cfg->varinfo for them.
			 * FIXME: Is this needed ?
			 */
		case OP_VZERO: {
			MonoClass *klass = ins->klass;
			LLVMValueRef args [5];

			if (!klass) {
				// FIXME:
				LLVM_FAILURE (ctx, "!klass");
				break;
			}

			if (!addresses [ins->dreg])
				addresses [ins->dreg] = build_alloca (ctx, &klass->byval_arg);
			args [0] = LLVMBuildBitCast (builder, addresses [ins->dreg], LLVMPointerType (LLVMInt8Type (), 0), "");
			args [1] = LLVMConstInt (LLVMInt8Type (), 0, FALSE);
			args [2] = LLVMConstInt (LLVMInt32Type (), mono_class_value_size (klass, NULL), FALSE);
			// FIXME: Alignment
			args [3] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
			args [4] = LLVMConstInt (LLVMInt1Type (), 0, FALSE);
			LLVMBuildCall (builder, LLVMGetNamedFunction (module, memset_func_name), args, memset_param_count, "");
			break;
		}

		case OP_STOREV_MEMBASE:
		case OP_LOADV_MEMBASE:
		case OP_VMOVE: {
			MonoClass *klass = ins->klass;
			LLVMValueRef src = NULL, dst, args [5];
			gboolean done = FALSE;

			if (!klass) {
				// FIXME:
				LLVM_FAILURE (ctx, "!klass");
				break;
			}

			if (mini_is_gsharedvt_klass (cfg, klass)) {
				// FIXME:
				LLVM_FAILURE (ctx, "gsharedvt");
				break;
			}

			switch (ins->opcode) {
			case OP_STOREV_MEMBASE:
				if (cfg->gen_write_barriers && klass->has_references && ins->inst_destbasereg != cfg->frame_reg) {
					/* FIXME: Emit write barriers like in mini_emit_stobj () */
					LLVM_FAILURE (ctx, "storev_membase + write barriers");
					break;
				}
				if (!addresses [ins->sreg1]) {
					/* SIMD */
					g_assert (values [ins->sreg1]);
					dst = convert (ctx, LLVMBuildAdd (builder, convert (ctx, values [ins->inst_destbasereg], IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_offset, FALSE), ""), LLVMPointerType (type_to_llvm_type (ctx, &klass->byval_arg), 0));
					LLVMBuildStore (builder, values [ins->sreg1], dst);
					done = TRUE;
				} else {
					src = LLVMBuildBitCast (builder, addresses [ins->sreg1], LLVMPointerType (LLVMInt8Type (), 0), "");
					dst = convert (ctx, LLVMBuildAdd (builder, convert (ctx, values [ins->inst_destbasereg], IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_offset, FALSE), ""), LLVMPointerType (LLVMInt8Type (), 0));
				}
				break;
			case OP_LOADV_MEMBASE:
				if (!addresses [ins->dreg])
					addresses [ins->dreg] = build_alloca (ctx, &klass->byval_arg);
				src = convert (ctx, LLVMBuildAdd (builder, convert (ctx, values [ins->inst_basereg], IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_offset, FALSE), ""), LLVMPointerType (LLVMInt8Type (), 0));
				dst = LLVMBuildBitCast (builder, addresses [ins->dreg], LLVMPointerType (LLVMInt8Type (), 0), "");
				break;
			case OP_VMOVE:
				if (!addresses [ins->sreg1])
					addresses [ins->sreg1] = build_alloca (ctx, &klass->byval_arg);
				if (!addresses [ins->dreg])
					addresses [ins->dreg] = build_alloca (ctx, &klass->byval_arg);
				src = LLVMBuildBitCast (builder, addresses [ins->sreg1], LLVMPointerType (LLVMInt8Type (), 0), "");
				dst = LLVMBuildBitCast (builder, addresses [ins->dreg], LLVMPointerType (LLVMInt8Type (), 0), "");
				break;
			default:
				g_assert_not_reached ();
			}
			CHECK_FAILURE (ctx);

			if (done)
				break;

			args [0] = dst;
			args [1] = src;
			args [2] = LLVMConstInt (LLVMInt32Type (), mono_class_value_size (klass, NULL), FALSE);
			args [3] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
			// FIXME: Alignment
			args [3] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
			args [4] = LLVMConstInt (LLVMInt1Type (), 0, FALSE);
			LLVMBuildCall (builder, LLVMGetNamedFunction (module, memcpy_func_name), args, memcpy_param_count, "");
			break;
		}
		case OP_LLVM_OUTARG_VT:
			if (!addresses [ins->sreg1]) {
				addresses [ins->sreg1] = build_alloca (ctx, &ins->klass->byval_arg);
				g_assert (values [ins->sreg1]);
				LLVMBuildStore (builder, values [ins->sreg1], addresses [ins->sreg1]);
			}
			addresses [ins->dreg] = addresses [ins->sreg1];
			break;

			/* 
			 * SIMD
			 */
#if defined(TARGET_X86) || defined(TARGET_AMD64)
		case OP_XZERO: {
			values [ins->dreg] = LLVMConstNull (type_to_llvm_type (ctx, &ins->klass->byval_arg));
			break;
		}
		case OP_LOADX_MEMBASE: {
			LLVMTypeRef t = type_to_llvm_type (ctx, &ins->klass->byval_arg);
			LLVMValueRef src;

			src = convert (ctx, LLVMBuildAdd (builder, convert (ctx, values [ins->inst_basereg], IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_offset, FALSE), ""), LLVMPointerType (t, 0));
			values [ins->dreg] = mono_llvm_build_aligned_load (builder, src, "", FALSE, 1);
			break;
		}
		case OP_STOREX_MEMBASE: {
			LLVMTypeRef t = LLVMTypeOf (values [ins->sreg1]);
			LLVMValueRef dest;

			dest = convert (ctx, LLVMBuildAdd (builder, convert (ctx, values [ins->inst_destbasereg], IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_offset, FALSE), ""), LLVMPointerType (t, 0));
			mono_llvm_build_aligned_store (builder, values [ins->sreg1], dest, FALSE, 1);
			break;
		}
		case OP_PADDB:
		case OP_PADDW:
		case OP_PADDD:
		case OP_PADDQ:
			values [ins->dreg] = LLVMBuildAdd (builder, lhs, rhs, "");
			break;
		case OP_ADDPD:
		case OP_ADDPS:
			values [ins->dreg] = LLVMBuildFAdd (builder, lhs, rhs, "");
			break;
		case OP_PSUBB:
		case OP_PSUBW:
		case OP_PSUBD:
		case OP_PSUBQ:
			values [ins->dreg] = LLVMBuildSub (builder, lhs, rhs, "");
			break;
		case OP_SUBPD:
		case OP_SUBPS:
			values [ins->dreg] = LLVMBuildFSub (builder, lhs, rhs, "");
			break;
		case OP_MULPD:
		case OP_MULPS:
			values [ins->dreg] = LLVMBuildFMul (builder, lhs, rhs, "");
			break;
		case OP_DIVPD:
		case OP_DIVPS:
			values [ins->dreg] = LLVMBuildFDiv (builder, lhs, rhs, "");
			break;
		case OP_PAND:
			values [ins->dreg] = LLVMBuildAnd (builder, lhs, rhs, "");
			break;
		case OP_POR:
			values [ins->dreg] = LLVMBuildOr (builder, lhs, rhs, "");
			break;
		case OP_PXOR:
			values [ins->dreg] = LLVMBuildXor (builder, lhs, rhs, "");
			break;
		case OP_PMULW:
		case OP_PMULD:
			values [ins->dreg] = LLVMBuildMul (builder, lhs, rhs, "");
			break;
		case OP_ANDPS:
		case OP_ANDNPS:
		case OP_ORPS:
		case OP_XORPS:
		case OP_ANDPD:
		case OP_ANDNPD:
		case OP_ORPD:
		case OP_XORPD: {
			LLVMTypeRef t, rt;
			LLVMValueRef v = NULL;

			switch (ins->opcode) {
			case OP_ANDPS:
			case OP_ANDNPS:
			case OP_ORPS:
			case OP_XORPS:
				t = LLVMVectorType (LLVMInt32Type (), 4);
				rt = LLVMVectorType (LLVMFloatType (), 4);
				break;
			case OP_ANDPD:
			case OP_ANDNPD:
			case OP_ORPD:
			case OP_XORPD:
				t = LLVMVectorType (LLVMInt64Type (), 2);
				rt = LLVMVectorType (LLVMDoubleType (), 2);
				break;
			default:
				t = LLVMInt32Type ();
				rt = LLVMInt32Type ();
				g_assert_not_reached ();
			}

			lhs = LLVMBuildBitCast (builder, lhs, t, "");
			rhs = LLVMBuildBitCast (builder, rhs, t, "");
			switch (ins->opcode) {
			case OP_ANDPS:
			case OP_ANDPD:
				v = LLVMBuildAnd (builder, lhs, rhs, "");
				break;
			case OP_ORPS:
			case OP_ORPD:
				v = LLVMBuildOr (builder, lhs, rhs, "");
				break;
			case OP_XORPS:
			case OP_XORPD:
				v = LLVMBuildXor (builder, lhs, rhs, "");
				break;
			case OP_ANDNPS:
			case OP_ANDNPD:
				v = LLVMBuildAnd (builder, rhs, LLVMBuildNot (builder, lhs, ""), "");
				break;
			}
			values [ins->dreg] = LLVMBuildBitCast (builder, v, rt, "");
			break;
		}
		case OP_MINPD:
		case OP_MINPS:
		case OP_MAXPD:
		case OP_MAXPS:
		case OP_ADDSUBPD:
		case OP_ADDSUBPS:
		case OP_PMIND_UN:
		case OP_PMINW_UN:
		case OP_PMINB_UN:
		case OP_PMINW:
		case OP_PMAXD_UN:
		case OP_PMAXW_UN:
		case OP_PMAXB_UN:
		case OP_HADDPD:
		case OP_HADDPS:
		case OP_HSUBPD:
		case OP_HSUBPS:
		case OP_PADDB_SAT:
		case OP_PADDW_SAT:
		case OP_PSUBB_SAT:
		case OP_PSUBW_SAT:
		case OP_PADDB_SAT_UN:
		case OP_PADDW_SAT_UN:
		case OP_PSUBB_SAT_UN:
		case OP_PSUBW_SAT_UN:
		case OP_PAVGB_UN:
		case OP_PAVGW_UN:
		case OP_PACKW:
		case OP_PACKD:
		case OP_PACKW_UN:
		case OP_PACKD_UN:
		case OP_PMULW_HIGH:
		case OP_PMULW_HIGH_UN: {
			LLVMValueRef args [2];

			args [0] = lhs;
			args [1] = rhs;

			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, simd_op_to_intrins (ins->opcode)), args, 2, dname);
			break;
		}
		case OP_PCMPEQB:
		case OP_PCMPEQW:
		case OP_PCMPEQD:
		case OP_PCMPEQQ: {
			values [ins->dreg] = LLVMBuildSExt (builder, LLVMBuildICmp (builder, LLVMIntEQ, lhs, rhs, ""), LLVMTypeOf (lhs), "");
			break;
		}
		case OP_PCMPGTB: {
			values [ins->dreg] = LLVMBuildSExt (builder, LLVMBuildICmp (builder, LLVMIntSGT, lhs, rhs, ""), LLVMTypeOf (lhs), "");
			break;
		}
		case OP_EXTRACT_R8:
		case OP_EXTRACT_I8:
		case OP_EXTRACT_I4:
		case OP_EXTRACT_I2:
		case OP_EXTRACT_U2:
		case OP_EXTRACTX_U2:
		case OP_EXTRACT_I1:
		case OP_EXTRACT_U1: {
			LLVMTypeRef t;
			gboolean zext = FALSE;

			t = simd_op_to_llvm_type (ins->opcode);

			switch (ins->opcode) {
			case OP_EXTRACT_R8:
			case OP_EXTRACT_I8:
			case OP_EXTRACT_I4:
			case OP_EXTRACT_I2:
			case OP_EXTRACT_I1:
				break;
			case OP_EXTRACT_U2:
			case OP_EXTRACTX_U2:
			case OP_EXTRACT_U1:
				zext = TRUE;
				break;
			default:
				t = LLVMInt32Type ();
				g_assert_not_reached ();
			}

			lhs = LLVMBuildBitCast (builder, lhs, t, "");
			values [ins->dreg] = LLVMBuildExtractElement (builder, lhs, LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE), "");
			if (zext)
				values [ins->dreg] = LLVMBuildZExt (builder, values [ins->dreg], LLVMInt32Type (), "");
			break;
		}

		case OP_EXPAND_I1:
		case OP_EXPAND_I2:
		case OP_EXPAND_I4:
		case OP_EXPAND_I8:
		case OP_EXPAND_R4:
		case OP_EXPAND_R8: {
			LLVMTypeRef t = simd_op_to_llvm_type (ins->opcode);
			LLVMValueRef mask [16], v;

			for (i = 0; i < 16; ++i)
				mask [i] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);

			v = convert (ctx, values [ins->sreg1], LLVMGetElementType (t));

			values [ins->dreg] = LLVMBuildInsertElement (builder, LLVMConstNull (t), v, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
			values [ins->dreg] = LLVMBuildShuffleVector (builder, values [ins->dreg], LLVMGetUndef (t), LLVMConstVector (mask, LLVMGetVectorSize (t)), "");
			break;
		}

		case OP_INSERT_I1:
			values [ins->dreg] = LLVMBuildInsertElement (builder, values [ins->sreg1], convert (ctx, values [ins->sreg2], LLVMInt8Type ()), LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE), dname);
			break;
		case OP_INSERT_I2:
			values [ins->dreg] = LLVMBuildInsertElement (builder, values [ins->sreg1], convert (ctx, values [ins->sreg2], LLVMInt16Type ()), LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE), dname);
			break;
		case OP_INSERT_I4:
			values [ins->dreg] = LLVMBuildInsertElement (builder, values [ins->sreg1], convert (ctx, values [ins->sreg2], LLVMInt32Type ()), LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE), dname);
			break;
		case OP_INSERT_I8:
			values [ins->dreg] = LLVMBuildInsertElement (builder, values [ins->sreg1], convert (ctx, values [ins->sreg2], LLVMInt64Type ()), LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE), dname);
			break;
		case OP_INSERT_R4:
			values [ins->dreg] = LLVMBuildInsertElement (builder, values [ins->sreg1], convert (ctx, values [ins->sreg2], LLVMFloatType ()), LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE), dname);
			break;
		case OP_INSERT_R8:
			values [ins->dreg] = LLVMBuildInsertElement (builder, values [ins->sreg1], convert (ctx, values [ins->sreg2], LLVMDoubleType ()), LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE), dname);
			break;

		case OP_CVTDQ2PD:
		case OP_CVTDQ2PS:
		case OP_CVTPD2DQ:
		case OP_CVTPS2DQ:
		case OP_CVTPD2PS:
		case OP_CVTPS2PD:
		case OP_CVTTPD2DQ:
		case OP_CVTTPS2DQ:
		case OP_EXTRACT_MASK:
		case OP_SQRTPS:
		case OP_SQRTPD:
		case OP_RSQRTPS:
		case OP_RCPPS: {
			LLVMValueRef v;

			v = convert (ctx, values [ins->sreg1], simd_op_to_llvm_type (ins->opcode));

			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, simd_op_to_intrins (ins->opcode)), &v, 1, dname);
			break;
		}

		case OP_ICONV_TO_R8_RAW:
			/* Same as OP_ICONV_TO_R8 */
			values [ins->dreg] = convert (ctx, LLVMBuildBitCast (builder, lhs, LLVMFloatType (), ""), LLVMDoubleType ());
			break;

		case OP_COMPPS:
		case OP_COMPPD: {
			LLVMValueRef args [3];

			args [0] = lhs;
			args [1] = rhs;
			args [2] = LLVMConstInt (LLVMInt8Type (), ins->inst_c0, FALSE);

			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, simd_op_to_intrins (ins->opcode)), args, 3, dname);
			break;
		}

		case OP_ICONV_TO_X:
			/* This is only used for implementing shifts by non-immediate */
			values [ins->dreg] = lhs;
			break;

		case OP_PSHRW:
		case OP_PSHRD:
		case OP_PSHRQ:
		case OP_PSARW:
		case OP_PSARD:
		case OP_PSHLW:
		case OP_PSHLD:
		case OP_PSHLQ: {
			LLVMValueRef args [3];

			args [0] = lhs;
			args [1] = LLVMConstInt (LLVMInt32Type (), ins->inst_imm, FALSE);

			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, simd_op_to_intrins (ins->opcode)), args, 2, dname);
			break;
		}

		case OP_PSHRW_REG:
		case OP_PSHRD_REG:
		case OP_PSHRQ_REG:
		case OP_PSARW_REG:
		case OP_PSARD_REG:
		case OP_PSHLW_REG:
		case OP_PSHLD_REG:
		case OP_PSHLQ_REG: {
			LLVMValueRef args [3];

			args [0] = lhs;
			args [1] = values [ins->sreg2];

			values [ins->dreg] = LLVMBuildCall (builder, LLVMGetNamedFunction (module, simd_op_to_intrins (ins->opcode)), args, 2, dname);
			break;
		}

		case OP_SHUFPS:
		case OP_SHUFPD:
		case OP_PSHUFLED:
		case OP_PSHUFLEW_LOW:
		case OP_PSHUFLEW_HIGH: {
			int mask [16];
			LLVMValueRef v1 = NULL, v2 = NULL, mask_values [16];
			int i, mask_size = 0;
			int imask = ins->inst_c0;
	
			/* Convert the x86 shuffle mask to LLVM's */
			switch (ins->opcode) {
			case OP_SHUFPS:
				mask_size = 4;
				mask [0] = ((imask >> 0) & 3);
				mask [1] = ((imask >> 2) & 3);
				mask [2] = ((imask >> 4) & 3) + 4;
				mask [3] = ((imask >> 6) & 3) + 4;
				v1 = values [ins->sreg1];
				v2 = values [ins->sreg2];
				break;
			case OP_SHUFPD:
				mask_size = 2;
				mask [0] = ((imask >> 0) & 1);
				mask [1] = ((imask >> 1) & 1) + 2;
				v1 = values [ins->sreg1];
				v2 = values [ins->sreg2];
				break;
			case OP_PSHUFLEW_LOW:
				mask_size = 8;
				mask [0] = ((imask >> 0) & 3);
				mask [1] = ((imask >> 2) & 3);
				mask [2] = ((imask >> 4) & 3);
				mask [3] = ((imask >> 6) & 3);
				mask [4] = 4 + 0;
				mask [5] = 4 + 1;
				mask [6] = 4 + 2;
				mask [7] = 4 + 3;
				v1 = values [ins->sreg1];
				v2 = LLVMGetUndef (LLVMTypeOf (v1));
				break;
			case OP_PSHUFLEW_HIGH:
				mask_size = 8;
				mask [0] = 0;
				mask [1] = 1;
				mask [2] = 2;
				mask [3] = 3;
				mask [4] = 4 + ((imask >> 0) & 3);
				mask [5] = 4 + ((imask >> 2) & 3);
				mask [6] = 4 + ((imask >> 4) & 3);
				mask [7] = 4 + ((imask >> 6) & 3);
				v1 = values [ins->sreg1];
				v2 = LLVMGetUndef (LLVMTypeOf (v1));
				break;
			case OP_PSHUFLED:
				mask_size = 4;
				mask [0] = ((imask >> 0) & 3);
				mask [1] = ((imask >> 2) & 3);
				mask [2] = ((imask >> 4) & 3);
				mask [3] = ((imask >> 6) & 3);
				v1 = values [ins->sreg1];
				v2 = LLVMGetUndef (LLVMTypeOf (v1));
				break;
			default:
				g_assert_not_reached ();
			}
			for (i = 0; i < mask_size; ++i)
				mask_values [i] = LLVMConstInt (LLVMInt32Type (), mask [i], FALSE);

			values [ins->dreg] =
				LLVMBuildShuffleVector (builder, v1, v2,
										LLVMConstVector (mask_values, mask_size), dname);
			break;
		}

		case OP_UNPACK_LOWB:
		case OP_UNPACK_LOWW:
		case OP_UNPACK_LOWD:
		case OP_UNPACK_LOWQ:
		case OP_UNPACK_LOWPS:
		case OP_UNPACK_LOWPD:
		case OP_UNPACK_HIGHB:
		case OP_UNPACK_HIGHW:
		case OP_UNPACK_HIGHD:
		case OP_UNPACK_HIGHQ:
		case OP_UNPACK_HIGHPS:
		case OP_UNPACK_HIGHPD: {
			int mask [16];
			LLVMValueRef mask_values [16];
			int i, mask_size = 0;
			gboolean low = FALSE;

			switch (ins->opcode) {
			case OP_UNPACK_LOWB:
				mask_size = 16;
				low = TRUE;
				break;
			case OP_UNPACK_LOWW:
				mask_size = 8;
				low = TRUE;
				break;
			case OP_UNPACK_LOWD:
			case OP_UNPACK_LOWPS:
				mask_size = 4;
				low = TRUE;
				break;
			case OP_UNPACK_LOWQ:
			case OP_UNPACK_LOWPD:
				mask_size = 2;
				low = TRUE;
				break;
			case OP_UNPACK_HIGHB:
				mask_size = 16;
				break;
			case OP_UNPACK_HIGHW:
				mask_size = 8;
				break;
			case OP_UNPACK_HIGHD:
			case OP_UNPACK_HIGHPS:
				mask_size = 4;
				break;
			case OP_UNPACK_HIGHQ:
			case OP_UNPACK_HIGHPD:
				mask_size = 2;
				break;
			default:
				g_assert_not_reached ();
			}

			if (low) {
				for (i = 0; i < (mask_size / 2); ++i) {
					mask [(i * 2)] = i;
					mask [(i * 2) + 1] = mask_size + i;
				}
			} else {
				for (i = 0; i < (mask_size / 2); ++i) {
					mask [(i * 2)] = (mask_size / 2) + i;
					mask [(i * 2) + 1] = mask_size + (mask_size / 2) + i;
				}
			}

			for (i = 0; i < mask_size; ++i)
				mask_values [i] = LLVMConstInt (LLVMInt32Type (), mask [i], FALSE);
			
			values [ins->dreg] =
				LLVMBuildShuffleVector (builder, values [ins->sreg1], values [ins->sreg2],
										LLVMConstVector (mask_values, mask_size), dname);
			break;
		}

		case OP_DUPPD: {
			LLVMTypeRef t = simd_op_to_llvm_type (ins->opcode);
			LLVMValueRef v, val;

			v = LLVMBuildExtractElement (builder, lhs, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
			val = LLVMConstNull (t);
			val = LLVMBuildInsertElement (builder, val, v, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
			val = LLVMBuildInsertElement (builder, val, v, LLVMConstInt (LLVMInt32Type (), 1, FALSE), dname);

			values [ins->dreg] = val;
			break;
		}
		case OP_DUPPS_LOW:
		case OP_DUPPS_HIGH: {
			LLVMTypeRef t = simd_op_to_llvm_type (ins->opcode);
			LLVMValueRef v1, v2, val;
			

			if (ins->opcode == OP_DUPPS_LOW) {
				v1 = LLVMBuildExtractElement (builder, lhs, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
				v2 = LLVMBuildExtractElement (builder, lhs, LLVMConstInt (LLVMInt32Type (), 2, FALSE), "");
			} else {
				v1 = LLVMBuildExtractElement (builder, lhs, LLVMConstInt (LLVMInt32Type (), 1, FALSE), "");
				v2 = LLVMBuildExtractElement (builder, lhs, LLVMConstInt (LLVMInt32Type (), 3, FALSE), "");
			}
			val = LLVMConstNull (t);
			val = LLVMBuildInsertElement (builder, val, v1, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
			val = LLVMBuildInsertElement (builder, val, v1, LLVMConstInt (LLVMInt32Type (), 1, FALSE), "");
			val = LLVMBuildInsertElement (builder, val, v2, LLVMConstInt (LLVMInt32Type (), 2, FALSE), "");
			val = LLVMBuildInsertElement (builder, val, v2, LLVMConstInt (LLVMInt32Type (), 3, FALSE), "");
			
			values [ins->dreg] = val;
			break;
		}

#endif /* SIMD */

		case OP_DUMMY_USE:
			break;

			/*
			 * EXCEPTION HANDLING
			 */
		case OP_IMPLICIT_EXCEPTION:
			/* This marks a place where an implicit exception can happen */
			if (bb->region != -1)
				LLVM_FAILURE (ctx, "implicit-exception");
			break;
		case OP_THROW:
		case OP_RETHROW: {
			MonoMethodSignature *throw_sig;
			LLVMValueRef callee, arg;
			gboolean rethrow = (ins->opcode == OP_RETHROW);
			const char *icall_name;
				
			callee = rethrow ? ctx->lmodule->rethrow : ctx->lmodule->throw;
			icall_name = rethrow ? "mono_arch_rethrow_exception" : "mono_arch_throw_exception";

			if (!callee) {
				throw_sig = mono_metadata_signature_alloc (mono_get_corlib (), 1);
				throw_sig->ret = &mono_get_void_class ()->byval_arg;
				throw_sig->params [0] = &mono_get_object_class ()->byval_arg;
				if (cfg->compile_aot) {
					callee = get_plt_entry (ctx, sig_to_llvm_sig (ctx, throw_sig), MONO_PATCH_INFO_INTERNAL_METHOD, icall_name);
				} else {
					callee = LLVMAddFunction (module, icall_name, sig_to_llvm_sig (ctx, throw_sig));

#ifdef TARGET_X86
					/* 
					 * LLVM doesn't push the exception argument, so we need a different
					 * trampoline.
					 */
					LLVMAddGlobalMapping (ee, callee, resolve_patch (cfg, MONO_PATCH_INFO_INTERNAL_METHOD, rethrow ? "llvm_rethrow_exception_trampoline" : "llvm_throw_exception_trampoline"));
#else
					LLVMAddGlobalMapping (ee, callee, resolve_patch (cfg, MONO_PATCH_INFO_INTERNAL_METHOD, icall_name));
#endif
				}

				mono_memory_barrier ();
				if (rethrow)
					ctx->lmodule->rethrow = callee;
				else
					ctx->lmodule->throw = callee;
			}
			arg = convert (ctx, lhs, type_to_llvm_type (ctx, &mono_get_object_class ()->byval_arg));
			emit_call (ctx, bb, &builder, callee, &arg, 1);
			break;
		}
		case OP_CALL_HANDLER: {
			/* 
			 * We don't 'call' handlers, but instead simply branch to them.
			 * The code generated by ENDFINALLY will branch back to us.
			 */
			LLVMBasicBlockRef noex_bb;
			GSList *bb_list;
			BBInfo *info = &bblocks [ins->inst_target_bb->block_num];

			bb_list = info->call_handler_return_bbs;

			/* 
			 * Set the indicator variable for the finally clause.
			 */
			lhs = info->finally_ind;
			g_assert (lhs);
			LLVMBuildStore (builder, LLVMConstInt (LLVMInt32Type (), g_slist_length (bb_list) + 1, FALSE), lhs);
				
			/* Branch to the finally clause */
			LLVMBuildBr (builder, info->call_handler_target_bb);

			noex_bb = gen_bb (ctx, "CALL_HANDLER_CONT_BB");
			info->call_handler_return_bbs = g_slist_append_mempool (cfg->mempool, info->call_handler_return_bbs, noex_bb);

			builder = ctx->builder = create_builder (ctx);
			LLVMPositionBuilderAtEnd (ctx->builder, noex_bb);

			bblocks [bb->block_num].end_bblock = noex_bb;
			break;
		}
		case OP_START_HANDLER: {
			break;
		}
		case OP_ENDFINALLY: {
			LLVMBasicBlockRef resume_bb;
			MonoBasicBlock *handler_bb;
			LLVMValueRef val, switch_ins, callee;
			GSList *bb_list;
			BBInfo *info;

			handler_bb = g_hash_table_lookup (ctx->region_to_handler, GUINT_TO_POINTER (mono_get_block_region_notry (cfg, bb->region)));
			g_assert (handler_bb);
			info = &bblocks [handler_bb->block_num];
			lhs = info->finally_ind;
			g_assert (lhs);

			bb_list = info->call_handler_return_bbs;

			resume_bb = gen_bb (ctx, "ENDFINALLY_RESUME_BB");

			/* Load the finally variable */
			val = LLVMBuildLoad (builder, lhs, "");

			/* Reset the variable */
			LLVMBuildStore (builder, LLVMConstInt (LLVMInt32Type (), 0, FALSE), lhs);

			/* Branch to either resume_bb, or to the bblocks in bb_list */
			switch_ins = LLVMBuildSwitch (builder, val, resume_bb, g_slist_length (bb_list));
			/* 
			 * The other targets are added at the end to handle OP_CALL_HANDLER
			 * opcodes processed later.
			 */
			info->endfinally_switch_ins_list = g_slist_append_mempool (cfg->mempool, info->endfinally_switch_ins_list, switch_ins);

			builder = ctx->builder = create_builder (ctx);
			LLVMPositionBuilderAtEnd (ctx->builder, resume_bb);

			if (ctx->cfg->compile_aot) {
				callee = get_plt_entry (ctx, LLVMFunctionType (LLVMVoidType (), NULL, 0, FALSE), MONO_PATCH_INFO_INTERNAL_METHOD, "llvm_resume_unwind_trampoline");
			} else {
				callee = LLVMGetNamedFunction (module, "llvm_resume_unwind_trampoline");
			}
			LLVMBuildCall (builder, callee, NULL, 0, "");

			LLVMBuildUnreachable (builder);
			has_terminator = TRUE;
			break;
		}
		default: {
			char reason [128];

			sprintf (reason, "opcode %s", mono_inst_name (ins->opcode));
			LLVM_FAILURE (ctx, reason);
			break;
		}
		}

		/* Convert the value to the type required by phi nodes */
		if (spec [MONO_INST_DEST] != ' ' && !MONO_IS_STORE_MEMBASE (ins) && ctx->vreg_types [ins->dreg]) {
			if (!values [ins->dreg])
				/* vtypes */
				values [ins->dreg] = addresses [ins->dreg];
			else
				values [ins->dreg] = convert (ctx, values [ins->dreg], ctx->vreg_types [ins->dreg]);
		}

		/* Add stores for volatile variables */
		if (spec [MONO_INST_DEST] != ' ' && spec [MONO_INST_DEST] != 'v' && !MONO_IS_STORE_MEMBASE (ins))
			emit_volatile_store (ctx, ins->dreg);
	}

	if (!has_terminator && bb->next_bb && (bb == cfg->bb_entry || bb->in_count > 0))
		LLVMBuildBr (builder, get_bb (ctx, bb->next_bb));

	if (bb == cfg->bb_exit && sig->ret->type == MONO_TYPE_VOID)
		LLVMBuildRetVoid (builder);

	if (bb == cfg->bb_entry)
		ctx->last_alloca = LLVMGetLastInstruction (get_bb (ctx, cfg->bb_entry));

	return;

 FAILURE:
	return;
}

/*
 * mono_llvm_check_method_supported:
 *
 *   Do some quick checks to decide whenever cfg->method can be compiled by LLVM, to avoid
 * compiling a method twice.
 */
void
mono_llvm_check_method_supported (MonoCompile *cfg)
{
	MonoMethodHeader *header = cfg->header;
	MonoExceptionClause *clause;
	int i;

	if (cfg->method->save_lmf) {
		cfg->exception_message = g_strdup ("lmf");
		cfg->disable_llvm = TRUE;
	}

#if 1
	for (i = 0; i < header->num_clauses; ++i) {
		clause = &header->clauses [i];
		
		if (i > 0 && clause->try_offset <= header->clauses [i - 1].handler_offset + header->clauses [i - 1].handler_len) {
			/*
			 * FIXME: Some tests still fail with nested clauses.
			 */
			cfg->exception_message = g_strdup ("nested clauses");
			cfg->disable_llvm = TRUE;
		}
	}
#endif

	/* FIXME: */
	if (cfg->method->dynamic) {
		cfg->exception_message = g_strdup ("dynamic.");
		cfg->disable_llvm = TRUE;
	}
}

/*
 * mono_llvm_emit_method:
 *
 *   Emit LLVM IL from the mono IL, and compile it to native code using LLVM.
 */
void
mono_llvm_emit_method (MonoCompile *cfg)
{
	EmitContext *ctx;
	MonoMethodSignature *sig;
	MonoBasicBlock *bb;
	LLVMTypeRef method_type;
	LLVMValueRef method = NULL;
	char *method_name;
	LLVMValueRef *values;
	int i, max_block_num, bb_index;
	gboolean last = FALSE;
	GPtrArray *phi_values;
	LLVMCallInfo *linfo;
	GSList *l;
	LLVMModuleRef module;
	BBInfo *bblocks;
	GPtrArray *bblock_list;
	MonoMethodHeader *header;
	MonoExceptionClause *clause;
	LLVMSigInfo sinfo;
	char **names;

	/* The code below might acquire the loader lock, so use it for global locking */
	mono_loader_lock ();

	/* Used to communicate with the callbacks */
	mono_native_tls_set_value (current_cfg_tls_id, cfg);

	ctx = g_new0 (EmitContext, 1);
	ctx->cfg = cfg;
	ctx->mempool = cfg->mempool;

	/*
	 * This maps vregs to the LLVM instruction defining them
	 */
	values = g_new0 (LLVMValueRef, cfg->next_vreg);
	/*
	 * This maps vregs for volatile variables to the LLVM instruction defining their
	 * address.
	 */
	ctx->addresses = g_new0 (LLVMValueRef, cfg->next_vreg);
	ctx->vreg_types = g_new0 (LLVMTypeRef, cfg->next_vreg);
	ctx->vreg_cli_types = g_new0 (MonoType*, cfg->next_vreg);
	phi_values = g_ptr_array_new ();
	/* 
	 * This signals whenever the vreg was defined by a phi node with no input vars
	 * (i.e. all its input bblocks end with NOT_REACHABLE).
	 */
	ctx->is_dead = g_new0 (gboolean, cfg->next_vreg);
	/* Whenever the bblock is unreachable */
	ctx->unreachable = g_new0 (gboolean, cfg->max_block_num);

	bblock_list = g_ptr_array_new ();

	ctx->values = values;
	ctx->region_to_handler = g_hash_table_new (NULL, NULL);
 
	if (cfg->compile_aot) {
		ctx->lmodule = &aot_module;
		method_name = mono_aot_get_method_name (cfg);
		cfg->llvm_method_name = g_strdup (method_name);
	} else {
		init_jit_module ();
		ctx->lmodule = &jit_module;
		method_name = mono_method_full_name (cfg->method, TRUE);
	}
	
	module = ctx->module = ctx->lmodule->module;

#if 1
	{
		static int count = 0;
		count ++;

		if (getenv ("LLVM_COUNT")) {
			if (count == atoi (getenv ("LLVM_COUNT"))) {
				printf ("LAST: %s\n", mono_method_full_name (cfg->method, TRUE));
				fflush (stdout);
				last = TRUE;
			}
			if (count > atoi (getenv ("LLVM_COUNT")))
				LLVM_FAILURE (ctx, "");
		}
	}
#endif

	sig = mono_method_signature (cfg->method);
	ctx->sig = sig;

	linfo = mono_arch_get_llvm_call_info (cfg, sig);
	ctx->linfo = linfo;
	CHECK_FAILURE (ctx);

	if (cfg->rgctx_var)
		linfo->rgctx_arg = TRUE;
	method_type = sig_to_llvm_sig_full (ctx, sig, linfo, &sinfo);
	CHECK_FAILURE (ctx);

	/* 
	 * This maps parameter indexes in the original signature to the indexes in
	 * the LLVM signature.
	 */
	ctx->pindexes = sinfo.pindexes;

	method = LLVMAddFunction (module, method_name, method_type);
	ctx->lmethod = method;

#ifdef LLVM_MONO_BRANCH
	LLVMSetFunctionCallConv (method, LLVMMono1CallConv);
#endif
	LLVMSetLinkage (method, LLVMPrivateLinkage);

	LLVMAddFunctionAttr (method, LLVMUWTable);

	if (cfg->compile_aot) {
		LLVMSetLinkage (method, LLVMInternalLinkage);
		LLVMSetVisibility (method, LLVMHiddenVisibility);
	} else {
		LLVMSetLinkage (method, LLVMPrivateLinkage);
	}

	if (cfg->method->save_lmf)
		LLVM_FAILURE (ctx, "lmf");

	if (sig->pinvoke && cfg->method->wrapper_type != MONO_WRAPPER_RUNTIME_INVOKE)
		LLVM_FAILURE (ctx, "pinvoke signature");

	header = cfg->header;
	for (i = 0; i < header->num_clauses; ++i) {
		clause = &header->clauses [i];
		if (clause->flags != MONO_EXCEPTION_CLAUSE_FINALLY && clause->flags != MONO_EXCEPTION_CLAUSE_NONE)
			LLVM_FAILURE (ctx, "non-finally/catch clause.");
	}

	if (linfo->rgctx_arg) {
		ctx->rgctx_arg = LLVMGetParam (method, sinfo.rgctx_arg_pindex);
		/*
		 * We mark the rgctx parameter with the inreg attribute, which is mapped to
		 * MONO_ARCH_RGCTX_REG in the Mono calling convention in llvm, i.e.
		 * CC_X86_64_Mono in X86CallingConv.td.
		 */
		LLVMAddAttribute (ctx->rgctx_arg, LLVMInRegAttribute);
		LLVMSetValueName (ctx->rgctx_arg, "rgctx");
	}
	if (cfg->vret_addr) {
		values [cfg->vret_addr->dreg] = LLVMGetParam (method, sinfo.vret_arg_pindex);
		LLVMSetValueName (values [cfg->vret_addr->dreg], "vret");
	}
	if (sig->hasthis) {
		values [cfg->args [0]->dreg] = LLVMGetParam (method, sinfo.this_arg_pindex);
		LLVMSetValueName (values [cfg->args [0]->dreg], "this");
	}

	names = g_new (char *, sig->param_count);
	mono_method_get_param_names (cfg->method, (const char **) names);

	for (i = 0; i < sig->param_count; ++i) {
		char *name;

		values [cfg->args [i + sig->hasthis]->dreg] = LLVMGetParam (method, sinfo.pindexes [i]);
		if (names [i] && names [i][0] != '\0')
			name = g_strdup_printf ("arg_%s", names [i]);
		else
			name = g_strdup_printf ("arg_%d", i);
		LLVMSetValueName (values [cfg->args [i + sig->hasthis]->dreg], name);
		g_free (name);
		if (linfo->args [i + sig->hasthis].storage == LLVMArgVtypeByVal)
			LLVMAddAttribute (LLVMGetParam (method, sinfo.pindexes [i]), LLVMByValAttribute);
	}
	g_free (names);

	max_block_num = 0;
	for (bb = cfg->bb_entry; bb; bb = bb->next_bb)
		max_block_num = MAX (max_block_num, bb->block_num);
	ctx->bblocks = bblocks = g_new0 (BBInfo, max_block_num + 1);

	/* Add branches between non-consecutive bblocks */
	for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
		if (bb->last_ins && MONO_IS_COND_BRANCH_OP (bb->last_ins) &&
			bb->next_bb != bb->last_ins->inst_false_bb) {
			
			MonoInst *inst = mono_mempool_alloc0 (cfg->mempool, sizeof (MonoInst));
			inst->opcode = OP_BR;
			inst->inst_target_bb = bb->last_ins->inst_false_bb;
			mono_bblock_add_inst (bb, inst);
		}
	}

	/*
	 * The INDIRECT flag added by OP_LDADDR inhibits optimizations, even if the LDADDR
	 * was later optimized away, so clear these flags, and add them back for the still
	 * present OP_LDADDR instructions.
	 */
	for (i = 0; i < cfg->next_vreg; ++i) {
		MonoInst *ins;

		ins = get_vreg_to_inst (cfg, i);
		if (ins && ins != cfg->rgctx_var)
			ins->flags &= ~MONO_INST_INDIRECT;
	}

	/*
	 * Make a first pass over the code to precreate PHI nodes/set INDIRECT flags.
	 */
	for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
		MonoInst *ins;
		LLVMBuilderRef builder;
		char *dname;
		char dname_buf[128];

		builder = create_builder (ctx);

		for (ins = bb->code; ins; ins = ins->next) {
			switch (ins->opcode) {
			case OP_PHI:
			case OP_FPHI:
			case OP_VPHI:
			case OP_XPHI: {
				LLVMTypeRef phi_type = llvm_type_to_stack_type (type_to_llvm_type (ctx, &ins->klass->byval_arg));

				CHECK_FAILURE (ctx);

				if (ins->opcode == OP_VPHI) {
					/* Treat valuetype PHI nodes as operating on the address itself */
					g_assert (ins->klass);
					phi_type = LLVMPointerType (type_to_llvm_type (ctx, &ins->klass->byval_arg), 0);
				}

				/* 
				 * Have to precreate these, as they can be referenced by
				 * earlier instructions.
				 */
				sprintf (dname_buf, "t%d", ins->dreg);
				dname = dname_buf;
				values [ins->dreg] = LLVMBuildPhi (builder, phi_type, dname);

				if (ins->opcode == OP_VPHI)
					ctx->addresses [ins->dreg] = values [ins->dreg];

				g_ptr_array_add (phi_values, values [ins->dreg]);

				/* 
				 * Set the expected type of the incoming arguments since these have
				 * to have the same type.
				 */
				for (i = 0; i < ins->inst_phi_args [0]; i++) {
					int sreg1 = ins->inst_phi_args [i + 1];
					
					if (sreg1 != -1)
						ctx->vreg_types [sreg1] = phi_type;
				}
				break;
				}
			case OP_LDADDR:
				((MonoInst*)ins->inst_p0)->flags |= MONO_INST_INDIRECT;
				break;
			default:
				break;
			}
		}
	}

	/* 
	 * Create an ordering for bblocks, use the depth first order first, then
	 * put the exception handling bblocks last.
	 */
	for (bb_index = 0; bb_index < cfg->num_bblocks; ++bb_index) {
		bb = cfg->bblocks [bb_index];
		if (!(bb->region != -1 && !MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_TRY))) {
			g_ptr_array_add (bblock_list, bb);
			bblocks [bb->block_num].added = TRUE;
		}
	}

	for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
		if (!bblocks [bb->block_num].added)
			g_ptr_array_add (bblock_list, bb);
	}

	/*
	 * Second pass: generate code.
	 */
	for (bb_index = 0; bb_index < bblock_list->len; ++bb_index) {
		bb = g_ptr_array_index (bblock_list, bb_index);

		if (!(bb == cfg->bb_entry || bb->in_count > 0))
			continue;

		process_bb (ctx, bb);
		CHECK_FAILURE (ctx);
	}

	/* Add incoming phi values */
	for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
		GSList *l, *ins_list;

		ins_list = bblocks [bb->block_num].phi_nodes;

		for (l = ins_list; l; l = l->next) {
			PhiNode *node = l->data;
			MonoInst *phi = node->phi;
			int sreg1 = node->sreg;
			LLVMBasicBlockRef in_bb;

			if (sreg1 == -1)
				continue;

			in_bb = get_end_bb (ctx, node->in_bb);

			if (ctx->unreachable [node->in_bb->block_num])
				continue;

			if (!values [sreg1])
				/* Can happen with values in EH clauses */
				LLVM_FAILURE (ctx, "incoming phi sreg1");

			if (phi->opcode == OP_VPHI) {
				g_assert (LLVMTypeOf (ctx->addresses [sreg1]) == LLVMTypeOf (values [phi->dreg]));
				LLVMAddIncoming (values [phi->dreg], &ctx->addresses [sreg1], &in_bb, 1);
			} else {
				g_assert (LLVMTypeOf (values [sreg1]) == LLVMTypeOf (values [phi->dreg]));
				LLVMAddIncoming (values [phi->dreg], &values [sreg1], &in_bb, 1);
			}
		}
	}

	/* Create the SWITCH statements for ENDFINALLY instructions */
	for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
		BBInfo *info = &bblocks [bb->block_num];
		GSList *l;
		for (l = info->endfinally_switch_ins_list; l; l = l->next) {
			LLVMValueRef switch_ins = l->data;
			GSList *bb_list = info->call_handler_return_bbs;

			for (i = 0; i < g_slist_length (bb_list); ++i)
				LLVMAddCase (switch_ins, LLVMConstInt (LLVMInt32Type (), i + 1, FALSE), g_slist_nth (bb_list, i)->data);
		}
	}

	if (cfg->verbose_level > 1)
		mono_llvm_dump_value (method);

	mark_as_used (module, method);

	if (cfg->compile_aot) {
		/* Don't generate native code, keep the LLVM IR */
		if (cfg->compile_aot && cfg->verbose_level)
			printf ("%s emitted as %s\n", mono_method_full_name (cfg->method, TRUE), method_name);

		//LLVMVerifyFunction(method, 0);
	} else {
		mono_llvm_optimize_method (method);

		if (cfg->verbose_level > 1)
			mono_llvm_dump_value (method);

		cfg->native_code = LLVMGetPointerToGlobal (ee, method);

		/* Set by emit_cb */
		g_assert (cfg->code_len);

		/* FIXME: Free the LLVM IL for the function */
	}

	goto CLEANUP;

 FAILURE:

	if (method) {
		/* Need to add unused phi nodes as they can be referenced by other values */
		LLVMBasicBlockRef phi_bb = LLVMAppendBasicBlock (method, "PHI_BB");
		LLVMBuilderRef builder;

		builder = create_builder (ctx);
		LLVMPositionBuilderAtEnd (builder, phi_bb);

		for (i = 0; i < phi_values->len; ++i) {
			LLVMValueRef v = g_ptr_array_index (phi_values, i);
			if (LLVMGetInstructionParent (v) == NULL)
				LLVMInsertIntoBuilder (builder, v);
		}
		
		LLVMDeleteFunction (method);
	}

 CLEANUP:
	g_free (values);
	g_free (ctx->addresses);
	g_free (ctx->vreg_types);
	g_free (ctx->vreg_cli_types);
	g_free (ctx->pindexes);
	g_free (ctx->is_dead);
	g_free (ctx->unreachable);
	g_ptr_array_free (phi_values, TRUE);
	g_free (ctx->bblocks);
	g_hash_table_destroy (ctx->region_to_handler);
	g_free (method_name);
	g_ptr_array_free (bblock_list, TRUE);

	for (l = ctx->builders; l; l = l->next) {
		LLVMBuilderRef builder = l->data;
		LLVMDisposeBuilder (builder);
	}

	g_free (ctx);

	mono_native_tls_set_value (current_cfg_tls_id, NULL);

	mono_loader_unlock ();
}

/*
 * mono_llvm_emit_call:
 *
 *   Same as mono_arch_emit_call () for LLVM.
 */
void
mono_llvm_emit_call (MonoCompile *cfg, MonoCallInst *call)
{
	MonoInst *in;
	MonoMethodSignature *sig;
	int i, n, stack_size;
	LLVMArgInfo *ainfo;

	stack_size = 0;

	sig = call->signature;
	n = sig->param_count + sig->hasthis;

	call->cinfo = mono_arch_get_llvm_call_info (cfg, sig);

	if (cfg->disable_llvm)
		return;

	if (sig->call_convention == MONO_CALL_VARARG) {
		cfg->exception_message = g_strdup ("varargs");
		cfg->disable_llvm = TRUE;
	}

	for (i = 0; i < n; ++i) {
		MonoInst *ins;

		ainfo = call->cinfo->args + i;

		in = call->args [i];
			
		/* Simply remember the arguments */
		switch (ainfo->storage) {
		case LLVMArgInIReg:
		case LLVMArgInFPReg: {
			MonoType *t = (sig->hasthis && i == 0) ? &mono_get_intptr_class ()->byval_arg : sig->params [i - sig->hasthis];

			if (!t->byref && (t->type == MONO_TYPE_R8 || t->type == MONO_TYPE_R4)) {
				MONO_INST_NEW (cfg, ins, OP_FMOVE);
				ins->dreg = mono_alloc_freg (cfg);
			} else {
				MONO_INST_NEW (cfg, ins, OP_MOVE);
				ins->dreg = mono_alloc_ireg (cfg);
			}
			ins->sreg1 = in->dreg;
			break;
		}
		case LLVMArgVtypeByVal:
		case LLVMArgVtypeInReg:
			MONO_INST_NEW (cfg, ins, OP_LLVM_OUTARG_VT);
			ins->dreg = mono_alloc_ireg (cfg);
			ins->sreg1 = in->dreg;
			ins->klass = mono_class_from_mono_type (sig->params [i - sig->hasthis]);
			break;
		default:
			call->cinfo = mono_arch_get_llvm_call_info (cfg, sig);
			cfg->exception_message = g_strdup ("ainfo->storage");
			cfg->disable_llvm = TRUE;
			return;
		}

		if (!cfg->disable_llvm) {
			MONO_ADD_INS (cfg->cbb, ins);
			mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, 0, FALSE);
		}
	}
}

static unsigned char*
alloc_cb (LLVMValueRef function, int size)
{
	MonoCompile *cfg;

	cfg = mono_native_tls_get_value (current_cfg_tls_id);

	if (cfg) {
		// FIXME: dynamic
		return mono_domain_code_reserve (cfg->domain, size);
	} else {
		return mono_domain_code_reserve (mono_domain_get (), size);
	}
}

static void
emitted_cb (LLVMValueRef function, void *start, void *end)
{
	MonoCompile *cfg;

	cfg = mono_native_tls_get_value (current_cfg_tls_id);
	g_assert (cfg);
	cfg->code_len = (guint8*)end - (guint8*)start;
}

static void
exception_cb (void *data)
{
	MonoCompile *cfg;
	MonoJitExceptionInfo *ei;
	guint32 ei_len, i, j, nested_len, nindex;
	gpointer *type_info;
	int this_reg, this_offset;

	cfg = mono_native_tls_get_value (current_cfg_tls_id);
	g_assert (cfg);

	/*
	 * data points to a DWARF FDE structure, convert it to our unwind format and
	 * save it.
	 * An alternative would be to save it directly, and modify our unwinder to work
	 * with it.
	 */
	cfg->encoded_unwind_ops = mono_unwind_decode_fde ((guint8*)data, &cfg->encoded_unwind_ops_len, NULL, &ei, &ei_len, &type_info, &this_reg, &this_offset);

	/* Count nested clauses */
	nested_len = 0;
	for (i = 0; i < ei_len; ++i) {
		for (j = 0; j < ei_len; ++j) {
			gint32 cindex1 = *(gint32*)type_info [i];
			MonoExceptionClause *clause1 = &cfg->header->clauses [cindex1];
			gint32 cindex2 = *(gint32*)type_info [j];
			MonoExceptionClause *clause2 = &cfg->header->clauses [cindex2];

			if (cindex1 != cindex2 && clause1->try_offset >= clause2->try_offset && clause1->handler_offset <= clause2->handler_offset) {
				nested_len ++;
			}
		}
	}

	cfg->llvm_ex_info = mono_mempool_alloc0 (cfg->mempool, (ei_len + nested_len) * sizeof (MonoJitExceptionInfo));
	cfg->llvm_ex_info_len = ei_len + nested_len;
	memcpy (cfg->llvm_ex_info, ei, ei_len * sizeof (MonoJitExceptionInfo));
	/* Fill the rest of the information from the type info */
	for (i = 0; i < ei_len; ++i) {
		gint32 clause_index = *(gint32*)type_info [i];
		MonoExceptionClause *clause = &cfg->header->clauses [clause_index];

		cfg->llvm_ex_info [i].flags = clause->flags;
		cfg->llvm_ex_info [i].data.catch_class = clause->data.catch_class;
	}

	/*
	 * For nested clauses, the LLVM produced exception info associates the try interval with
	 * the innermost handler, while mono expects it to be associated with all nesting clauses.
	 */
	/* FIXME: These should be order with the normal clauses */
	nindex = ei_len;
	for (i = 0; i < ei_len; ++i) {
		for (j = 0; j < ei_len; ++j) {
			gint32 cindex1 = *(gint32*)type_info [i];
			MonoExceptionClause *clause1 = &cfg->header->clauses [cindex1];
			gint32 cindex2 = *(gint32*)type_info [j];
			MonoExceptionClause *clause2 = &cfg->header->clauses [cindex2];

			if (cindex1 != cindex2 && clause1->try_offset >= clause2->try_offset && clause1->handler_offset <= clause2->handler_offset) {
				/* 
				 * The try interval comes from the nested clause, everything else from the
				 * nesting clause.
				 */
				memcpy (&cfg->llvm_ex_info [nindex], &cfg->llvm_ex_info [j], sizeof (MonoJitExceptionInfo));
				cfg->llvm_ex_info [nindex].try_start = cfg->llvm_ex_info [i].try_start;
				cfg->llvm_ex_info [nindex].try_end = cfg->llvm_ex_info [i].try_end;
				nindex ++;
			}
		}
	}
	g_assert (nindex == ei_len + nested_len);
	cfg->llvm_this_reg = this_reg;
	cfg->llvm_this_offset = this_offset;

	/* type_info [i] is cfg mempool allocated, no need to free it */

	g_free (ei);
	g_free (type_info);
}

static char*
dlsym_cb (const char *name, void **symbol)
{
	MonoDl *current;
	char *err;

	err = NULL;
	if (!strcmp (name, "__bzero")) {
		*symbol = (void*)bzero;
	} else {
		current = mono_dl_open (NULL, 0, NULL);
		g_assert (current);

		err = mono_dl_symbol (current, name, symbol);
	}
#ifdef MONO_ARCH_HAVE_CREATE_LLVM_NATIVE_THUNK
	*symbol = (char*)mono_arch_create_llvm_native_thunk (mono_domain_get (), (guint8*)(*symbol));
#endif
	return err;
}

static inline void
AddFunc (LLVMModuleRef module, const char *name, LLVMTypeRef ret_type, LLVMTypeRef *param_types, int nparams)
{
	LLVMAddFunction (module, name, LLVMFunctionType (ret_type, param_types, nparams, FALSE));
}

static inline void
AddFunc2 (LLVMModuleRef module, const char *name, LLVMTypeRef ret_type, LLVMTypeRef param_type1, LLVMTypeRef param_type2)
{
	LLVMTypeRef param_types [4];

	param_types [0] = param_type1;
	param_types [1] = param_type2;

	AddFunc (module, name, ret_type, param_types, 2);
}

static void
add_intrinsics (LLVMModuleRef module)
{
	/* Emit declarations of instrinsics */
	/*
	 * It would be nicer to emit only the intrinsics actually used, but LLVM's Module
	 * type doesn't seem to do any locking.
	 */
	{
		LLVMTypeRef memset_params [] = { LLVMPointerType (LLVMInt8Type (), 0), LLVMInt8Type (), LLVMInt32Type (), LLVMInt32Type (), LLVMInt1Type () };

		memset_param_count = 5;
		memset_func_name = "llvm.memset.p0i8.i32";

		LLVMAddFunction (module, memset_func_name, LLVMFunctionType (LLVMVoidType (), memset_params, memset_param_count, FALSE));
	}

	{
		LLVMTypeRef memcpy_params [] = { LLVMPointerType (LLVMInt8Type (), 0), LLVMPointerType (LLVMInt8Type (), 0), LLVMInt32Type (), LLVMInt32Type (), LLVMInt1Type () };

		memcpy_param_count = 5;
		memcpy_func_name = "llvm.memcpy.p0i8.p0i8.i32";

		LLVMAddFunction (module, memcpy_func_name, LLVMFunctionType (LLVMVoidType (), memcpy_params, memcpy_param_count, FALSE));
	}

	{
		LLVMTypeRef params [] = { LLVMDoubleType () };

		LLVMAddFunction (module, "llvm.sin.f64", LLVMFunctionType (LLVMDoubleType (), params, 1, FALSE));
		LLVMAddFunction (module, "llvm.cos.f64", LLVMFunctionType (LLVMDoubleType (), params, 1, FALSE));
		LLVMAddFunction (module, "llvm.sqrt.f64", LLVMFunctionType (LLVMDoubleType (), params, 1, FALSE));

		/* This isn't an intrinsic, instead llvm seems to special case it by name */
		LLVMAddFunction (module, "fabs", LLVMFunctionType (LLVMDoubleType (), params, 1, FALSE));
	}

	{
		LLVMTypeRef ovf_res_i32 [] = { LLVMInt32Type (), LLVMInt1Type () };
		LLVMTypeRef ovf_params_i32 [] = { LLVMInt32Type (), LLVMInt32Type () };

		LLVMAddFunction (module, "llvm.sadd.with.overflow.i32", LLVMFunctionType (LLVMStructType (ovf_res_i32, 2, FALSE), ovf_params_i32, 2, FALSE));
		LLVMAddFunction (module, "llvm.uadd.with.overflow.i32", LLVMFunctionType (LLVMStructType (ovf_res_i32, 2, FALSE), ovf_params_i32, 2, FALSE));
		LLVMAddFunction (module, "llvm.ssub.with.overflow.i32", LLVMFunctionType (LLVMStructType (ovf_res_i32, 2, FALSE), ovf_params_i32, 2, FALSE));
		LLVMAddFunction (module, "llvm.usub.with.overflow.i32", LLVMFunctionType (LLVMStructType (ovf_res_i32, 2, FALSE), ovf_params_i32, 2, FALSE));
		LLVMAddFunction (module, "llvm.smul.with.overflow.i32", LLVMFunctionType (LLVMStructType (ovf_res_i32, 2, FALSE), ovf_params_i32, 2, FALSE));
		LLVMAddFunction (module, "llvm.umul.with.overflow.i32", LLVMFunctionType (LLVMStructType (ovf_res_i32, 2, FALSE), ovf_params_i32, 2, FALSE));
	}

	{
		LLVMTypeRef ovf_res_i64 [] = { LLVMInt64Type (), LLVMInt1Type () };
		LLVMTypeRef ovf_params_i64 [] = { LLVMInt64Type (), LLVMInt64Type () };

		LLVMAddFunction (module, "llvm.sadd.with.overflow.i64", LLVMFunctionType (LLVMStructType (ovf_res_i64, 2, FALSE), ovf_params_i64, 2, FALSE));
		LLVMAddFunction (module, "llvm.uadd.with.overflow.i64", LLVMFunctionType (LLVMStructType (ovf_res_i64, 2, FALSE), ovf_params_i64, 2, FALSE));
		LLVMAddFunction (module, "llvm.ssub.with.overflow.i64", LLVMFunctionType (LLVMStructType (ovf_res_i64, 2, FALSE), ovf_params_i64, 2, FALSE));
		LLVMAddFunction (module, "llvm.usub.with.overflow.i64", LLVMFunctionType (LLVMStructType (ovf_res_i64, 2, FALSE), ovf_params_i64, 2, FALSE));
		LLVMAddFunction (module, "llvm.smul.with.overflow.i64", LLVMFunctionType (LLVMStructType (ovf_res_i64, 2, FALSE), ovf_params_i64, 2, FALSE));
		LLVMAddFunction (module, "llvm.umul.with.overflow.i64", LLVMFunctionType (LLVMStructType (ovf_res_i64, 2, FALSE), ovf_params_i64, 2, FALSE));
	}

	{
		LLVMTypeRef struct_ptr = LLVMPointerType (LLVMStructType (NULL, 0, FALSE), 0);
		LLVMTypeRef invariant_start_params [] = { LLVMInt64Type (), LLVMPointerType (LLVMInt8Type (), 0) };
		LLVMTypeRef invariant_end_params [] = { struct_ptr, LLVMInt64Type (), LLVMPointerType (LLVMInt8Type (), 0) };

		LLVMAddFunction (module, "llvm.invariant.start", LLVMFunctionType (struct_ptr, invariant_start_params, 2, FALSE));

		LLVMAddFunction (module, "llvm.invariant.end", LLVMFunctionType (LLVMVoidType (), invariant_end_params, 3, FALSE));
	}

	/* EH intrinsics */
	{
		LLVMTypeRef arg_types [2];
		LLVMTypeRef ret_type;

		arg_types [0] = LLVMPointerType (LLVMInt8Type (), 0);
		arg_types [1] = LLVMPointerType (LLVMInt8Type (), 0);
		ret_type = LLVMInt32Type ();

		LLVMAddFunction (module, "mono_personality", LLVMFunctionType (LLVMVoidType (), NULL, 0, FALSE));

		LLVMAddFunction (module, "llvm_resume_unwind_trampoline", LLVMFunctionType (LLVMVoidType (), NULL, 0, FALSE));
	}

	/* SSE intrinsics */
	{
		LLVMTypeRef ret_type, arg_types [16];

		/* Binary ops */
		ret_type = type_to_simd_type (MONO_TYPE_I4);
		arg_types [0] = ret_type;
		arg_types [1] = ret_type;
		AddFunc (module, "llvm.x86.sse41.pminud", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse41.pmaxud", ret_type, arg_types, 2);

		ret_type = type_to_simd_type (MONO_TYPE_I2);
		arg_types [0] = ret_type;
		arg_types [1] = ret_type;
		AddFunc (module, "llvm.x86.sse41.pminuw", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pmins.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse41.pmaxuw", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.padds.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.psubs.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.paddus.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.psubus.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pavg.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pmulh.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pmulhu.w", ret_type, arg_types, 2);

		ret_type = type_to_simd_type (MONO_TYPE_I1);
		arg_types [0] = ret_type;
		arg_types [1] = ret_type;
		AddFunc (module, "llvm.x86.sse2.pminu.b", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pmaxu.b", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.padds.b", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.psubs.b", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.paddus.b", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.psubus.b", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pavg.b", ret_type, arg_types, 2);

		ret_type = type_to_simd_type (MONO_TYPE_R8);
		arg_types [0] = ret_type;
		arg_types [1] = ret_type;
		AddFunc (module, "llvm.x86.sse2.min.pd", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.max.pd", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse3.hadd.pd", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse3.hsub.pd", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse3.addsub.pd", ret_type, arg_types, 2);

		ret_type = type_to_simd_type (MONO_TYPE_R4);
		arg_types [0] = ret_type;
		arg_types [1] = ret_type;
		AddFunc (module, "llvm.x86.sse.min.ps", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse.max.ps", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse3.hadd.ps", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse3.hsub.ps", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse3.addsub.ps", ret_type, arg_types, 2);

		/* pack */
		ret_type = type_to_simd_type (MONO_TYPE_I1);
		arg_types [0] = type_to_simd_type (MONO_TYPE_I2);
		arg_types [1] = type_to_simd_type (MONO_TYPE_I2);
		AddFunc (module, "llvm.x86.sse2.packsswb.128", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.packuswb.128", ret_type, arg_types, 2);
		ret_type = type_to_simd_type (MONO_TYPE_I2);
		arg_types [0] = type_to_simd_type (MONO_TYPE_I4);
		arg_types [1] = type_to_simd_type (MONO_TYPE_I4);
		AddFunc (module, "llvm.x86.sse2.packssdw.128", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse41.packusdw", ret_type, arg_types, 2);

		/* cmp pd/ps */
		ret_type = type_to_simd_type (MONO_TYPE_R8);
		arg_types [0] = ret_type;
		arg_types [1] = ret_type;
		arg_types [2] = LLVMInt8Type ();
		AddFunc (module, "llvm.x86.sse2.cmp.pd", ret_type, arg_types, 3);
		ret_type = type_to_simd_type (MONO_TYPE_R4);
		arg_types [0] = ret_type;
		arg_types [1] = ret_type;
		arg_types [2] = LLVMInt8Type ();
		AddFunc (module, "llvm.x86.sse.cmp.ps", ret_type, arg_types, 3);

		/* Conversion ops */
		ret_type = type_to_simd_type (MONO_TYPE_R8);
		arg_types [0] = type_to_simd_type (MONO_TYPE_I4);
		AddFunc (module, "llvm.x86.sse2.cvtdq2pd", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_R4);
		arg_types [0] = type_to_simd_type (MONO_TYPE_I4);
		AddFunc (module, "llvm.x86.sse2.cvtdq2ps", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_I4);
		arg_types [0] = type_to_simd_type (MONO_TYPE_R8);
		AddFunc (module, "llvm.x86.sse2.cvtpd2dq", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_I4);
		arg_types [0] = type_to_simd_type (MONO_TYPE_R4);
		AddFunc (module, "llvm.x86.sse2.cvtps2dq", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_R4);
		arg_types [0] = type_to_simd_type (MONO_TYPE_R8);
		AddFunc (module, "llvm.x86.sse2.cvtpd2ps", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_R8);
		arg_types [0] = type_to_simd_type (MONO_TYPE_R4);
		AddFunc (module, "llvm.x86.sse2.cvtps2pd", ret_type, arg_types, 1);

		ret_type = type_to_simd_type (MONO_TYPE_I4);
		arg_types [0] = type_to_simd_type (MONO_TYPE_R8);
		AddFunc (module, "llvm.x86.sse2.cvttpd2dq", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_I4);
		arg_types [0] = type_to_simd_type (MONO_TYPE_R4);
		AddFunc (module, "llvm.x86.sse2.cvttps2dq", ret_type, arg_types, 1);

		/* Unary ops */
		ret_type = type_to_simd_type (MONO_TYPE_R8);
		arg_types [0] = ret_type;
		AddFunc (module, "llvm.x86.sse2.sqrt.pd", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_R4);
		arg_types [0] = ret_type;
		AddFunc (module, "llvm.x86.sse.sqrt.ps", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_R4);
		arg_types [0] = ret_type;
		AddFunc (module, "llvm.x86.sse.rsqrt.ps", ret_type, arg_types, 1);
		ret_type = type_to_simd_type (MONO_TYPE_R4);
		arg_types [0] = ret_type;
		AddFunc (module, "llvm.x86.sse.rcp.ps", ret_type, arg_types, 1);

		/* shifts */
		ret_type = type_to_simd_type (MONO_TYPE_I2);
		arg_types [0] = ret_type;
		arg_types [1] = LLVMInt32Type ();
		AddFunc (module, "llvm.x86.sse2.psrli.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.psrai.w", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pslli.w", ret_type, arg_types, 2);
		ret_type = type_to_simd_type (MONO_TYPE_I4);
		arg_types [0] = ret_type;
		arg_types [1] = LLVMInt32Type ();
		AddFunc (module, "llvm.x86.sse2.psrli.d", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.psrai.d", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pslli.d", ret_type, arg_types, 2);
		ret_type = type_to_simd_type (MONO_TYPE_I8);
		arg_types [0] = ret_type;
		arg_types [1] = LLVMInt32Type ();
		AddFunc (module, "llvm.x86.sse2.psrli.q", ret_type, arg_types, 2);
		AddFunc (module, "llvm.x86.sse2.pslli.q", ret_type, arg_types, 2);

		/* pmovmskb */
		ret_type = LLVMInt32Type ();
		arg_types [0] = type_to_simd_type (MONO_TYPE_I1);
		AddFunc (module, "llvm.x86.sse2.pmovmskb.128", ret_type, arg_types, 1);
	}

	AddFunc (module, "llvm.x86.sse2.pause", LLVMVoidType (), NULL, 0);

	/* Load/Store intrinsics */
	{
		LLVMTypeRef arg_types [5];
		int i;
		char name [128];

		for (i = 1; i <= 8; i *= 2) {
			arg_types [0] = LLVMPointerType (LLVMIntType (i * 8), 0);
			arg_types [1] = LLVMInt32Type ();
			arg_types [2] = LLVMInt1Type ();
			sprintf (name, "llvm.mono.load.i%d.p0i%d", i * 8, i * 8);
			LLVMAddFunction (module, name, LLVMFunctionType (LLVMIntType (i * 8), arg_types, 3, FALSE));

			arg_types [0] = LLVMIntType (i * 8);
			arg_types [1] = LLVMPointerType (LLVMIntType (i * 8), 0);
			arg_types [2] = LLVMInt32Type ();
			arg_types [3] = LLVMInt1Type ();
			sprintf (name, "llvm.mono.store.i%d.p0i%d", i * 8, i * 8);
			LLVMAddFunction (module, name, LLVMFunctionType (LLVMVoidType (), arg_types, 4, FALSE));
		}
	}
}

void
mono_llvm_init (void)
{
	mono_native_tls_alloc (&current_cfg_tls_id, NULL);
}

static void
init_jit_module (void)
{
	MonoJitICallInfo *info;

	if (jit_module_inited)
		return;

	mono_loader_lock ();

	if (jit_module_inited) {
		mono_loader_unlock ();
		return;
	}

	jit_module.module = LLVMModuleCreateWithName ("mono");

	ee = mono_llvm_create_ee (LLVMCreateModuleProviderForExistingModule (jit_module.module), alloc_cb, emitted_cb, exception_cb, dlsym_cb);

	add_intrinsics (jit_module.module);

	jit_module.llvm_types = g_hash_table_new (NULL, NULL);

	info = mono_find_jit_icall_by_name ("llvm_resume_unwind_trampoline");
	g_assert (info);
	LLVMAddGlobalMapping (ee, LLVMGetNamedFunction (jit_module.module, "llvm_resume_unwind_trampoline"), (void*)info->func);

	jit_module_inited = TRUE;

	mono_loader_unlock ();
}

void
mono_llvm_cleanup (void)
{
	if (ee)
		mono_llvm_dispose_ee (ee);

	if (jit_module.llvm_types)
		g_hash_table_destroy (jit_module.llvm_types);

	if (aot_module.module)
		LLVMDisposeModule (aot_module.module);

	LLVMContextDispose (LLVMGetGlobalContext ());
}

void
mono_llvm_create_aot_module (const char *got_symbol)
{
	/* Delete previous module */
	if (aot_module.plt_entries)
		g_hash_table_destroy (aot_module.plt_entries);
	if (aot_module.module)
		LLVMDisposeModule (aot_module.module);

	memset (&aot_module, 0, sizeof (aot_module));

	aot_module.module = LLVMModuleCreateWithName ("aot");
	aot_module.got_symbol = got_symbol;

	add_intrinsics (aot_module.module);

	/* Add GOT */
	/*
	 * We couldn't compute the type of the LLVM global representing the got because
	 * its size is only known after all the methods have been emitted. So create
	 * a dummy variable, and replace all uses it with the real got variable when
	 * its size is known in mono_llvm_emit_aot_module ().
	 */
	{
		LLVMTypeRef got_type = LLVMArrayType (IntPtrType (), 0);

		aot_module.got_var = LLVMAddGlobal (aot_module.module, got_type, "mono_dummy_got");
		LLVMSetInitializer (aot_module.got_var, LLVMConstNull (got_type));
	}

	/* Add a dummy personality function */
	{
		LLVMBasicBlockRef lbb;
		LLVMBuilderRef lbuilder;
		LLVMValueRef personality;

		personality = LLVMAddFunction (aot_module.module, "mono_aot_personality", LLVMFunctionType (LLVMVoidType (), NULL, 0, FALSE));
		LLVMSetLinkage (personality, LLVMPrivateLinkage);
		lbb = LLVMAppendBasicBlock (personality, "BB0");
		lbuilder = LLVMCreateBuilder ();
		LLVMPositionBuilderAtEnd (lbuilder, lbb);
		LLVMBuildRetVoid (lbuilder);
	}

	aot_module.llvm_types = g_hash_table_new (NULL, NULL);
	aot_module.plt_entries = g_hash_table_new (g_str_hash, g_str_equal);
}

/*
 * Emit the aot module into the LLVM bitcode file FILENAME.
 */
void
mono_llvm_emit_aot_module (const char *filename, int got_size)
{
	LLVMTypeRef got_type;
	LLVMValueRef real_got;

	/* 
	 * Create the real got variable and replace all uses of the dummy variable with
	 * the real one.
	 */
	got_type = LLVMArrayType (IntPtrType (), got_size);
	real_got = LLVMAddGlobal (aot_module.module, got_type, aot_module.got_symbol);
	LLVMSetInitializer (real_got, LLVMConstNull (got_type));
	LLVMSetLinkage (real_got, LLVMInternalLinkage);

	mono_llvm_replace_uses_of (aot_module.got_var, real_got);

	mark_as_used (aot_module.module, real_got);

	/* Delete the dummy got so it doesn't become a global */
	LLVMDeleteGlobal (aot_module.got_var);

#if 0
	{
		char *verifier_err;

		if (LLVMVerifyModule (aot_module.module, LLVMReturnStatusAction, &verifier_err)) {
			g_assert_not_reached ();
		}
	}
#endif

	LLVMWriteBitcodeToFile (aot_module.module, filename);
}

/*
  DESIGN:
  - Emit LLVM IR from the mono IR using the LLVM C API.
  - The original arch specific code remains, so we can fall back to it if we run
    into something we can't handle.
*/

/*  
  A partial list of issues:
  - Handling of opcodes which can throw exceptions.

      In the mono JIT, these are implemented using code like this:
	  method:
      <compare>
	  throw_pos:
	  b<cond> ex_label
	  <rest of code>
      ex_label:
	  push throw_pos - method
	  call <exception trampoline>

	  The problematic part is push throw_pos - method, which cannot be represented
      in the LLVM IR, since it does not support label values.
	  -> this can be implemented in AOT mode using inline asm + labels, but cannot
	  be implemented in JIT mode ?
	  -> a possible but slower implementation would use the normal exception 
      throwing code but it would need to control the placement of the throw code
      (it needs to be exactly after the compare+branch).
	  -> perhaps add a PC offset intrinsics ?

  - efficient implementation of .ovf opcodes.

	  These are currently implemented as:
	  <ins which sets the condition codes>
	  b<cond> ex_label

	  Some overflow opcodes are now supported by LLVM SVN.

  - exception handling, unwinding.
    - SSA is disabled for methods with exception handlers    
	- How to obtain unwind info for LLVM compiled methods ?
	  -> this is now solved by converting the unwind info generated by LLVM
	     into our format.
	- LLVM uses the c++ exception handling framework, while we use our home grown
      code, and couldn't use the c++ one:
      - its not supported under VC++, other exotic platforms.
	  - it might be impossible to support filter clauses with it.

  - trampolines.
  
    The trampolines need a predictable call sequence, since they need to disasm
    the calling code to obtain register numbers / offsets.

    LLVM currently generates this code in non-JIT mode:
	   mov    -0x98(%rax),%eax
	   callq  *%rax
    Here, the vtable pointer is lost. 
    -> solution: use one vtable trampoline per class.

  - passing/receiving the IMT pointer/RGCTX.
    -> solution: pass them as normal arguments ?

  - argument passing.
  
	  LLVM does not allow the specification of argument registers etc. This means
      that all calls are made according to the platform ABI.

  - passing/receiving vtypes.

      Vtypes passed/received in registers are handled by the front end by using
	  a signature with scalar arguments, and loading the parts of the vtype into those
	  arguments.

	  Vtypes passed on the stack are handled using the 'byval' attribute.

  - ldaddr.

    Supported though alloca, we need to emit the load/store code.

  - types.

    The mono JIT uses pointer sized iregs/double fregs, while LLVM uses precisely
    typed registers, so we have to keep track of the precise LLVM type of each vreg.
    This is made easier because the IR is already in SSA form.
    An additional problem is that our IR is not consistent with types, i.e. i32/ia64 
	types are frequently used incorrectly.
*/

/*
  AOT SUPPORT:
  Emit LLVM bytecode into a .bc file, compile it using llc into a .s file, then 
  append the AOT data structures to that file. For methods which cannot be
  handled by LLVM, the normal JIT compiled versions are used.
*/

/* FIXME: Normalize some aspects of the mono IR to allow easier translation, like:
 *   - each bblock should end with a branch
 *   - setting the return value, making cfg->ret non-volatile
 * - avoid some transformations in the JIT which make it harder for us to generate
 *   code.
 * - use pointer types to help optimizations.
 */
