/*
 *	file.c
 *
 *	file system operations
 */

#include "pg_migrator.h"
#include <sys/types.h>
#include <fcntl.h>


#ifdef EDB_NATIVE_LANG
#include <fcntl.h>
#endif

#include <sys/stat.h>
#ifdef WIN32
#include <windows.h>
#endif

#include "dynloader.h"
#include "storage/bufpage.h"

#ifndef WIN32
char		pathSeparator = '/';
#else
char		pathSeparator = '\\';
#endif


#ifdef NOT_USED
static int	copy_dir(const char *from, const char *to, bool force);
#endif
static const char *copyAndUpdateFile(migratorContext *ctx,
				  pageCnvCtx *pageConverter, const char *src,
				  const char *dst, bool force);
static const char *linkAndUpdateFile(migratorContext *ctx,
				pageCnvCtx *pageConverter, const char *src, const char *dst);
#ifdef PAGE_CONVERSION
static const char *getPageVersion(migratorContext *ctx,
			   uint16 *version, const char *pathName);
static pageCnvCtx *loadConverterPlugin(migratorContext *ctx,
					uint16 newPageVersion, uint16 oldPageVersion);
#endif
static int	dir_matching_filenames(const struct dirent *scan_ent);
static int pg_scandir(migratorContext *ctx, const char *dirname,
		   struct dirent ***namelist, int (*selector) (const struct dirent *),
		   int (*cmp) (const void *, const void *));
#ifdef WIN32
static int win32_pghardlink(const char *src, const char *dst);
#endif

#if defined(sun) || defined(WIN32)
static int pg_scandir_internal(migratorContext *ctx, const char *dirname,
			   struct dirent ***namelist,
			   int (*selector) (const struct dirent *));
#endif


/*
 * transfer_single_new_db()
 *
 * create links for mappings stored in "maps" array.
 */
void
transfer_single_new_db(migratorContext *ctx, pageCnvCtx *pageConverter,
					   FileNameMap *maps, int size)
{
	int			mapnum;

	for (mapnum = 0; mapnum < size; mapnum++)
	{
		char		old_file[MAXPGPATH] ;
		char		new_file[MAXPGPATH] ;
		struct dirent **namelist = NULL;
		int			numFiles;

		/* Copying files might take some time, so give feedback. */

		snprintf(old_file, sizeof(old_file), "%s/%u", maps[mapnum].old_file, maps[mapnum].old);
		snprintf(new_file, sizeof(new_file), "%s/%u", maps[mapnum].new_file, maps[mapnum].new);
		pg_log(ctx, PG_REPORT, OVERWRITE_MESSAGE, old_file);

		/*
		 * Copy/link the relation file to the new cluster
		 */
		unlink(new_file);
		transfer_relfile(ctx, pageConverter, old_file, new_file,
					maps[mapnum].old_nspname, maps[mapnum].old_relname,
					maps[mapnum].new_nspname, maps[mapnum].new_relname);

		if (GET_MAJOR_VERSION(ctx->old.pg_version) >= 804 &&
			GET_MAJOR_VERSION(ctx->new.pg_version) >= 804)
		{
			/*
			 * Now copy/link any fsm and vm files, if they exist
			 */
			snprintf(scandir_file_pattern, sizeof(scandir_file_pattern), "%u_", maps[mapnum].old);
			numFiles = pg_scandir(ctx, maps[mapnum].old_file, &namelist, dir_matching_filenames, NULL);
	
			while (numFiles--)
			{
				snprintf(old_file, sizeof(old_file), "%s/%s", maps[mapnum].old_file,
						 namelist[numFiles]->d_name);
				snprintf(new_file, sizeof(new_file), "%s/%u%s", maps[mapnum].new_file,
						 maps[mapnum].new, strchr(namelist[numFiles]->d_name, '_'));
	
				unlink(new_file);
				transfer_relfile(ctx, pageConverter, old_file, new_file,
								 maps[mapnum].old_nspname, maps[mapnum].old_relname,
								 maps[mapnum].new_nspname, maps[mapnum].new_relname);
	
				pg_free(namelist[numFiles]);
			}
	
			pg_free(namelist);
		}

		/*
		 * Now copy/link any related segments as well. Remember, PG breaks
		 * large files into 1GB segments, the first segment has no extension,
		 * subsequent segments are named relfilenode.1, relfilenode.2,
		 * relfilenode.3, ...  'fsm' and 'vm' files use underscores so are
		 * not copied.
		 */
		snprintf(scandir_file_pattern, sizeof(scandir_file_pattern), "%u.", maps[mapnum].old);
		numFiles = pg_scandir(ctx, maps[mapnum].old_file, &namelist, dir_matching_filenames, NULL);

		while (numFiles--)
		{
			snprintf(old_file, sizeof(old_file), "%s/%s", maps[mapnum].old_file,
					 namelist[numFiles]->d_name);
			snprintf(new_file, sizeof(new_file), "%s/%u%s", maps[mapnum].new_file,
					 maps[mapnum].new, strchr(namelist[numFiles]->d_name, '.'));

			unlink(new_file);
			transfer_relfile(ctx, pageConverter, old_file, new_file,
							 maps[mapnum].old_nspname, maps[mapnum].old_relname,
							 maps[mapnum].new_nspname, maps[mapnum].new_relname);

			pg_free(namelist[numFiles]);
		}

		pg_free(namelist);
	}
}


/*
 * transfer_relfile()
 *
 * This routine is responsible for creating the physical links between relfiles.
 */
void
transfer_relfile(migratorContext *ctx, pageCnvCtx *pageConverter, const char *oldfile,
		const char *newfile, const char *oldnspname, const char *oldrelname,
		const char *newnspname, const char *newrelname)
{
	const char *msg;

	if ((ctx->transfer_mode == TRANSFER_MODE_LINK) && (pageConverter != NULL))
		pg_log(ctx, PG_FATAL, "this migration requires page-by-page conversion, "
			   "you must use copy-mode instead of link-mode\n");

	if (ctx->transfer_mode == TRANSFER_MODE_COPY)
	{
		pg_log(ctx, PG_INFO, "copying %s to %s\n", oldfile, newfile);

		if ((msg = copyAndUpdateFile(ctx, pageConverter, oldfile, newfile, true)) != NULL)
			pg_log(ctx, PG_FATAL, "error while copying %s.%s(%s) to %s.%s(%s): %s\n",
				   oldnspname, oldrelname, oldfile, newnspname, newrelname, newfile, msg);
	}
	else
	{
		pg_log(ctx, PG_INFO, "linking %s to %s\n", newfile, oldfile);

		if ((msg = linkAndUpdateFile(ctx, pageConverter, oldfile, newfile)) != NULL)
			pg_log(ctx, PG_FATAL,
				   "error while creating link from %s.%s(%s) to %s.%s(%s): %s\n",
				   	oldnspname, oldrelname, oldfile, newnspname, newrelname,
					newfile, msg);
	}
	return;
}


/*
 *	copy_file
 */
int
copy_file(const char *srcfile, const char *dstfile, bool force)
{

#define COPY_BUF_SIZE (50 * BLCKSZ)

	int			src_fd ;
	int			dest_fd;
	char	   *buffer;

	if ((srcfile == NULL) || (dstfile == NULL))
		return -1;

	if ((src_fd = open(srcfile, O_RDONLY, 0)) < 0)
		return -1;

	if ((dest_fd = open(dstfile, O_RDWR | O_CREAT | (force ? 0 : O_EXCL), S_IRUSR | S_IWUSR)) < 0)
	{
		if (src_fd != 0)
			close(src_fd);

		return -1;
	}

	buffer = (char *) malloc(COPY_BUF_SIZE);

	if (buffer == NULL)
	{
		if (src_fd != 0)
			close(src_fd);

		if (dest_fd != 0)
			close(dest_fd);

		return -1;
	}

	/* perform data copying i.e read src source, write to destination */
	while (true)
	{
		ssize_t		nbytes = read(src_fd, buffer, COPY_BUF_SIZE);

		if (nbytes < 0)
		{
			if (buffer != NULL)
				free(buffer);

			if (src_fd != 0)
				close(src_fd);

			if (dest_fd != 0)
				close(dest_fd);

			return -1;
		}

		if (nbytes == 0)
			break;

		errno = 0;

		if (write(dest_fd, buffer, nbytes) != nbytes)
		{
			/* if write didn't set errno, assume problem is no disk space */
			if (errno == 0)
				errno = ENOSPC;

			if (buffer != NULL)
				free(buffer);

			if (src_fd != 0)
				close(src_fd);

			if (dest_fd != 0)
				close(dest_fd);

			return -1;
		}
	}

	if (buffer != NULL)
		free(buffer);

	if (src_fd != 0)
		close(src_fd);

	if (dest_fd != 0)
		close(dest_fd);

	return 1;
}


#ifdef NOT_USED
/*
 * copy_dir()
 *
 *	Copies either a directory or a single file within a directory.	If the
 *	source argument names a directory, we recursively copy that directory,
 *	otherwise we copy a single file.
 */
static int
copy_dir(const char *src, const char *dst, bool force)
{
	DIR		   *srcdir;
	struct dirent *de = NULL;
	struct stat fst;

	if ((src == NULL) || (dst == NULL))
		return -1;

	/*
	 * Try to open the source directory - if it turns out not to be a
	 * directory, assume that it's a file and copy that instead.
	 */
	if ((srcdir = opendir(src)) == NULL)
	{
		if (errno == ENOTDIR)
			return copy_file(src, dst, true);
		return -1;
	}

	if (mkdir(dst, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
	{
		/*
		 * ignore directory already exist error
		 */
		if (errno != EEXIST)
			return -1;
	}

	while ((de = readdir(srcdir)) != NULL)
	{
		char		src_file[MAX_STRING];
		char		dest_file[MAX_STRING];

		if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
			continue;

		memset(src_file, 0, sizeof(src_file));
		memset(dest_file, 0, sizeof(dest_file));

		snprintf(src_file, sizeof(src_file), "%s/%s", src, de->d_name);
		snprintf(dest_file, sizeof(dest_file), "%s/%s", dst, de->d_name);

		if (stat (src_file, &fst) < 0)
		{
			if (srcdir != NULL)
			{
				closedir(srcdir);
				srcdir = NULL;
			}

			return -1;
		}

		if (fst.st_mode & S_IFDIR)
		{
			/* recurse to handle subdirectories */
			if (force)
				copy_dir(src_file, dest_file, true);
		}
		else if (fst.st_mode & S_IFREG)
		{
			if ((copy_file(src_file, dest_file, 1)) == -1)
			{
				if (srcdir != NULL)
				{
					closedir(srcdir);
					srcdir = NULL;
				}
				return -1;
			}
		}
	}

	if (srcdir != NULL)
	{
		closedir(srcdir);
		srcdir = NULL;
	}
	return 1;
}
#endif


/*
 * copyAndUpdateFile()
 *
 *	Copies a relation file from src to dst.  If pageConverter is non-NULL, this function
 *	uses that pageConverter to do a page-by-page conversion.
 */
static const char *
copyAndUpdateFile(migratorContext *ctx, pageCnvCtx *pageConverter,
				  const char *src, const char *dst, bool force)
{
	if (pageConverter == NULL)
	{
		if (pg_copy_file(src, dst, force) == -1)
		{
#ifdef WIN32
			_dosmaperr(GetLastError());
#endif
			return strerror(errno);
		}
		else
			return NULL;
	}
	else
	{
		/*
		 * We have a pageConverter object - that implies that the
		 * PageLayoutVersion differs between the two clusters so we have to
		 * perform a page-by-page conversion.
		 *
		 * If the pageConverter can convert the entire file at once, invoke
		 * that plugin function, otherwise, read each page in the relation
		 * file and call the convertPage plugin function.
		 */

#ifdef PAGE_CONVERSION
		if (pageConverter->convertFile)
			return pageConverter->convertFile(pageConverter->pluginData,
												dst, src);
		else
#endif
		{
			int			src_fd;
			int			dstfd;
			char		buf[BLCKSZ];
			ssize_t		bytesRead;
			const char *msg = NULL;

			if ((src_fd = open(src, O_RDONLY, 0)) < 0)
				return "can't open source file";

			if ((dstfd = open(dst, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0)
				return "can't create destination file";

			while ((bytesRead = read(src_fd, buf, BLCKSZ)) == BLCKSZ)
			{
#ifdef PAGE_CONVERSION
				if ((msg = pageConverter->convertPage(pageConverter->pluginData, buf, buf)) != NULL)
					break;
#endif
				if (write(dstfd, buf, BLCKSZ) != BLCKSZ)
				{
					msg = "can't write new page to destination";
					break;
				}
			}

			close(src_fd);
			close(dstfd);

			if (msg)
				return msg;
			else if (bytesRead != 0)
				return "found partial page in source file";
			else
				return NULL;
		}
	}
}


/*
 * linkAndUpdateFile()
 *
 * Creates a symbolic link between the given relation files. We use
 * this function to perform a true in-place update. If the on-disk
 * format of the new cluster is bit-for-bit compatible with the on-disk
 * format of the old cluster, we can simply symlink each relation
 * instead of copying the data from the old cluster to the new cluster.
 */
static const char *
linkAndUpdateFile(migratorContext *ctx, pageCnvCtx *pageConverter,
				  const char *src, const char *dst)
{
	if (pageConverter != NULL)
		return "Can't in-place update this cluster, page-by-page conversion is required";

	if (pg_link_file(src, dst) == -1)
	{
#ifdef WIN32
		_dosmaperr(GetLastError());
#endif
		return strerror(errno);
	}
	else
		return NULL;
}

/*
 * pg_scandir()
 *
 * Wrapper for portable scandir functionality
 *
 */
static int
pg_scandir(migratorContext *ctx, const char *dirname,
		   struct dirent ***namelist, int (*selector) (const struct dirent *),
		   int (*cmp) (const void *, const void *))
{
#if defined(sun) || defined(WIN32)
	return pg_scandir_internal(ctx, dirname, namelist, selector);

	/*
	 *	Here we try to guess which libc's need const, and which don't.
	 *	The net goal here is to try to supress a compiler warning due
	 *	to a prototype mismatch of const usage.
	 *	Ideally we would do this via autoconf, but Postgres's autoconf
	 *	doesn't test for this and it is overkill to add autoconf just for this.
	 *	scandir() is from BSD 4.3, which had the third argument as non-const.
	 *	Linux and other C libraries have updated it to use a const.
	 *	http://unix.derkeiler.com/Mailing-Lists/FreeBSD/questions/2005-12/msg00214.html
	 */
#elif defined(freebsd) || defined(bsdi) || defined(darwin) || defined(openbsd)
	/* no const */
	return scandir(dirname, namelist, (int (*) (struct dirent *))selector, cmp);
#else
	/* use const */
	return scandir(dirname, namelist, selector, cmp);
#endif
}


#if defined(sun) || defined(WIN32)
/*
 * pg_scandir_internal()
 *
 * We'll provide our own scandir function for sun, since it is not
 * part of the standard system library.
 *
 * Returns count of files that meet the selection criteria coded in
 * the function pointed to by selector.  Creates an array of pointers
 * to dirent structures.  Address of array returned in namelist.
 *
 * Note that the number of dirent structures needed is dynamically
 * allocated using realloc.  Realloc can be inneficient if invoked a
 * large number of times.  Its use in pg_migrator is to find filesystem
 * filenames that have extended beyond the initial segment (file.1,
 * .2, etc.) and should therefore be invoked a small number of times.
 */
static int
pg_scandir_internal(migratorContext *ctx, const char *dirname,
		  struct dirent ***namelist, int (*selector) (const struct dirent *))
{
	DIR		   *dirdesc;
	struct dirent *direntry;
	int			count = 0;
	int			name_num = 0;
	size_t		entrysize;

	if ((dirdesc = opendir(dirname)) == NULL)
		pg_log(ctx, PG_FATAL, "Could not open directory \"%s\": %m\n", dirname);

	*namelist = NULL;

	while ((direntry = readdir(dirdesc)) != NULL)
	{
		/* Invoke the selector function to see if the direntry matches */
		if ((*selector) (direntry))
		{
			count++;

			*namelist = (struct dirent **) realloc((void *) (*namelist),
							   (size_t) ((name_num + 1) * sizeof(struct dirent *)));

			if (*namelist == NULL)
				return -1;

			entrysize = sizeof(struct dirent) - sizeof(direntry->d_name) +
				strlen(direntry->d_name) +1;

			(*namelist)[name_num] = (struct dirent *) malloc(entrysize);

			if ((*namelist)[name_num] == NULL)
				return -1;

			memcpy((*namelist)[name_num], direntry, entrysize);

			name_num++;
		}
	}

	closedir(dirdesc);

	return count;
}
#endif


/*
 *	dir_matching_filenames
 *
 *	Return only matching file names during directory scan
 */
static int
dir_matching_filenames(const struct dirent *scan_ent)
{
	/* we only compare for string length because the number suffix varies */
	if (!strncmp(scandir_file_pattern, scan_ent->d_name, strlen(scandir_file_pattern)))
		return 1;

	return 0;
}


#ifdef PAGE_CONVERSION
/*
 * setupPageConverter()
 *
 *	This function determines the PageLayoutVersion of the old cluster and
 *	the PageLayoutVersion of the new cluster.  If the versions differ, this
 *	function loads a converter plugin and returns a pointer to a pageCnvCtx
 *	object (in *result) that knows how to convert pages from the old format
 *	to the new format.	If the versions are identical, this function just
 *	returns a NULL pageCnvCtx pointer to indicate that page-by-page conversion
 *	is not required.
 *
 *	If successful this function sets *result and returns NULL.	If an error
 *	occurs, this function returns an error message in the form of an null-terminated
 *	string.
 */
const char *
setupPageConverter(migratorContext *ctx, pageCnvCtx **result)
{
	uint16		oldPageVersion;
	uint16		newPageVersion;
	pageCnvCtx *converter;
	const char *msg;
	char		dstName[MAXPGPATH];
	char		srcName[MAXPGPATH];

	snprintf(dstName, sizeof(dstName), "%s/global/%u", ctx->new.pgdata,
			 ctx->new.pg_database_oid);
	snprintf(srcName, sizeof(srcName), "%s/global/%u", ctx->old.pgdata,
			 ctx->old.pg_database_oid);

	if ((msg = getPageVersion(ctx, &oldPageVersion, srcName)) != NULL)
		return msg;

	if ((msg = getPageVersion(ctx, &newPageVersion, dstName)) != NULL)
		return msg;

	/*
	 * If the old cluster and new cluster use the same page layouts, then we
	 * don't need a page converter.
	 */
	if (newPageVersion == oldPageVersion)
	{
		*result = NULL;
		return NULL;
	}

	/*
	 * The clusters use differing page layouts, see if we can find a plugin
	 * that knows how to convert from the old page layout to the new page
	 * layout.
	 */

	if ((converter = loadConverterPlugin(ctx, newPageVersion, oldPageVersion)) == NULL)
		return "can't find plugin to convert from old page layout to new page layout";
	else
	{
		*result = converter;
		return NULL;
	}
}


/*
 * getPageVersion()
 *
 *	Retrieves the PageLayoutVersion for the given relation.
 *
 *	Returns NULL on success (and stores the PageLayoutVersion at *version),
 *	if an error occurs, this function returns an error message (in the form
 *	of a null-terminated string).
 */
static const char *
getPageVersion(migratorContext *ctx, uint16 *version, const char *pathName)
{
	int			relfd;
	PageHeaderData page;
	ssize_t		bytesRead;

	if ((relfd = open(pathName, O_RDONLY, 0)) < 0)
		return "can't open relation";

	if ((bytesRead = read(relfd, &page, sizeof(page))) != sizeof(page))
		return "can't read page header";

	*version = PageGetPageLayoutVersion(&page);

	close(relfd);

	return NULL;
}


/*
 * loadConverterPlugin()
 *
 *	This function loads a page-converter plugin library and grabs a
 *	pointer to each of the (interesting) functions provided by that
 *	plugin.  The name of the plugin library is derived from the given
 *	newPageVersion and oldPageVersion.	If a plugin is found, this
 *	function returns a pointer to a pageCnvCtx object (which will contain
 *	a collection of plugin function pointers). If the required plugin
 *	is not found, this function returns NULL.
 */
static pageCnvCtx *
loadConverterPlugin(migratorContext *ctx, uint16 newPageVersion, uint16 oldPageVersion)
{
	char		pluginName[MAXPGPATH];
	void	   *plugin;

	/*
	 * Try to find a plugin that can convert pages of oldPageVersion into
	 * pages of newPageVersion.  For example, if we oldPageVersion = 3 and
	 * newPageVersion is 4, we search for a plugin named:
	 * plugins/convertLayout_3_to_4.dll
	 */

	/*
	 * FIXME: we are searching for plugins relative to the current directory,
	 * we should really search relative to our own executable instead.
	 */
	snprintf(pluginName, sizeof(pluginName), "./plugins/convertLayout_%d_to_%d%s",
			 oldPageVersion, newPageVersion, DLSUFFIX);

	if ((plugin = pg_dlopen(pluginName)) == NULL)
		return NULL;
	else
	{
		pageCnvCtx *result = (pageCnvCtx *) pg_malloc(ctx, sizeof(*result));

		result->old.PageVersion = oldPageVersion;
		result->new.PageVersion = newPageVersion;

		result->startup = (pluginStartup) pg_dlsym(plugin, "init");
		result->convertFile = (pluginConvertFile) pg_dlsym(plugin, "convertFile");
		result->convertPage = (pluginConvertPage) pg_dlsym(plugin, "convertPage");
		result->shutdown = (pluginShutdown) pg_dlsym(plugin, "fini");
		result->pluginData = NULL;

		/*
		 * If the plugin has exported an initializer, go ahead and invoke it.
		 */
		if (result->startup)
			result->startup(MIGRATOR_API_VERSION, &result->pluginVersion,
					  newPageVersion, oldPageVersion, &result->pluginData);

		return result;
	}
}
#endif


void check_hard_link(migratorContext *ctx)
{
	char		existing_file[MAXPGPATH] ;
	char		new_link_file[MAXPGPATH] ;

	snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", ctx->old.pgdata);
	snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", ctx->new.pgdata);
	unlink(new_link_file);	/* might fail */

	if (pg_link_file(existing_file, new_link_file) == -1)
	{
#ifdef WIN32
		_dosmaperr(GetLastError());
#endif
		pg_log(ctx, PG_FATAL,
			"Could not create hard link between old and new data directories:  %s\n"
			"In link mode the old and new data directories must be on the same file system volume.\n",
			strerror(errno));
	}
	unlink(new_link_file);
}

#ifdef WIN32
static int
win32_pghardlink(const char *src, const char *dst)
{
	/*
	 *	CreateHardLinkA returns zero for failure
	 *	http://msdn.microsoft.com/en-us/library/aa363860(VS.85).aspx
	 */
	if (CreateHardLinkA(dst, src, NULL) == 0)
	{
		_dosmaperr(GetLastError());
		return -1;
	}
	else
		return 0;
}
#endif
