FFTS Windows build fixes

This commit is contained in:
Kearwood Gilbert
2017-06-11 19:06:35 -07:00
parent 7c09f57eb9
commit bef32ab528
6 changed files with 26 additions and 7 deletions

View File

@@ -40,10 +40,11 @@
#include "ffts_static.h" #include "ffts_static.h"
#else #else
#include "codegen.h" #include "codegen.h"
#include <sys/mman.h>
#endif #endif
#include <errno.h> #include <errno.h>
#include <sys/mman.h>
#include <string.h> #include <string.h>
#include <limits.h> /* for PAGESIZE */ #include <limits.h> /* for PAGESIZE */
@@ -85,6 +86,7 @@ void ffts_free_1d(ffts_plan_t *p) {
//free(p->transforms); //free(p->transforms);
if(p->transforms) free(p->transforms); if(p->transforms) free(p->transforms);
#ifndef DYNAMIC_DISABLED
if(p->transform_base) { if(p->transform_base) {
if (mprotect(p->transform_base, p->transform_size, PROT_READ | PROT_WRITE)) { if (mprotect(p->transform_base, p->transform_size, PROT_READ | PROT_WRITE)) {
perror("Couldn't mprotect"); perror("Couldn't mprotect");
@@ -93,6 +95,7 @@ void ffts_free_1d(ffts_plan_t *p) {
munmap(p->transform_base, p->transform_size); munmap(p->transform_base, p->transform_size);
//free(p->transform_base); //free(p->transform_base);
} }
#endif
free(p); free(p);
} }

View File

@@ -55,7 +55,13 @@
#define PI 3.1415926535897932384626433832795028841971693993751058209 #define PI 3.1415926535897932384626433832795028841971693993751058209
static const __attribute__ ((aligned(64))) float w_data[16] = {
#if defined(_WIN32) || defined(_WIN64)
static const __declspec(align(64)) float w_data[16] =
#else
static const __attribute__ ((aligned(64))) float w_data[16] =
#endif
{
0.70710678118654757273731092936941, 0.70710678118654746171500846685376, 0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
-0.70710678118654757273731092936941, -0.70710678118654746171500846685376, -0.70710678118654757273731092936941, -0.70710678118654746171500846685376,
1.0f, 0.70710678118654757273731092936941f, 1.0f, 0.70710678118654757273731092936941f,

View File

@@ -33,6 +33,10 @@
#include "ffts_nd.h" #include "ffts_nd.h"
#if defined(_WIN32) || defined(_WIN64)
#include "emmintrin.h"
#endif
#ifdef HAVE_NEON #ifdef HAVE_NEON
#include "neon.h" #include "neon.h"
#endif #endif
@@ -163,7 +167,7 @@ void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
} }
#else #else
#ifdef HAVE_SSE #ifdef HAVE_SSE
uint64_t tmp[TSIZE*TSIZE] __attribute__((aligned(64))); __ALIGN64 uint64_t tmp[TSIZE*TSIZE];
int tx, ty; int tx, ty;
int x, y; int x, y;
int tw = w / TSIZE; int tw = w / TSIZE;

View File

@@ -151,7 +151,7 @@ ffts_plan_t *ffts_init_nd_real(int rank, size_t *Ns, int sign) {
bufsize = 2 * (Ns[0] * ((vol / Ns[0]) / 2 + 1) + vol); bufsize = 2 * (Ns[0] * ((vol / Ns[0]) / 2 + 1) + vol);
} }
p->buf = valloc(sizeof(float) * bufsize); p->buf = (void *)valloc(sizeof(float) * bufsize);
for(i=0;i<rank;i++) { for(i=0;i<rank;i++) {
p->Ms[i] = vol / p->Ns[i]; p->Ms[i] = vol / p->Ns[i];

View File

@@ -78,7 +78,7 @@
const data_t *din = (const data_t *)in; const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out; data_t *dout = (data_t *)out;
V r0_1, r2_3, r4_5, r6_7; V r0_1, r2_3, r4_5, r6_7;
float *LUT8 = p->ws + p->ws_is[0]; float *LUT8 = (float *)((size_t *)p->ws + p->ws_is[0]);
L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
@@ -90,7 +90,7 @@
const data_t *din = (const data_t *)in; const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out; data_t *dout = (data_t *)out;
V r0_1, r2_3, r4_5, r6_7; V r0_1, r2_3, r4_5, r6_7;
float *LUT8 = p->ws + p->ws_is[0]; float *LUT8 = (float *)((size_t *)p->ws + p->ws_is[0]);
L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);

View File

@@ -35,9 +35,15 @@
#ifndef __TYPES_H__ #ifndef __TYPES_H__
#define __TYPES_H__ #define __TYPES_H__
#if defined(_WIN32) || defined(_WIN64)
#define __INLINE static __forceinline
#define __ALIGN64 __declspec(align(64))
#else
#define __ALIGN64 __attribute__((aligned(64)))
#define __INLINE static inline __attribute__((always_inline)) #define __INLINE static inline __attribute__((always_inline))
#endif
#if defined(complex) #if defined(complex) && !defined(_WIN32) && !defined(_WIN64)
typedef complex float cdata_t; typedef complex float cdata_t;
#else #else
typedef float cdata_t[2]; typedef float cdata_t[2];