aboutsummaryrefslogtreecommitdiffstats
path: root/src/trFilelist.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/trFilelist.c')
-rw-r--r--src/trFilelist.c449
1 files changed, 449 insertions, 0 deletions
diff --git a/src/trFilelist.c b/src/trFilelist.c
new file mode 100644
index 0000000..8f65796
--- /dev/null
+++ b/src/trFilelist.c
@@ -0,0 +1,449 @@
+/*
+ Copyright (C) 2008 Renaissance Technologies Corp.
+ main developer: HP Wei <hp@rentec.com>
+ Copyright (C) 2006 Renaissance Technologies Corp.
+ main developer: HP Wei <hp@rentec.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING.
+ If not, write to the Free Software Foundation,
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+/***this code parse the synclist file generated by rsync in dry-run mode.
+ The minimum options for rsync is : -avW --dry-run --delete
+ e.g.
+ /usr/local/bin/rsync --rsync-path=/usr/local/bin/rsync
+ -avW --dry-run --delete
+ /src/path/ dest_machine:/target/path/ > output 2>&1
+A typical output of rsync may look like this:
+Client is very old version of rsync, upgrade recommended.
+building file list ... done
+xyz -> ./sub1/xyz
+file1
+fn
+fn1
+j
+sub1/hardlink_to_file1
+sub1/path/testfile
+sub1/xyz
+sent 337 bytes read 44 bytes 254.00 bytes/sec
+total size is 320751046 speedup is 841866.26
+
+This code does the following three things.
+(1) skip the lines before 'done' and after 'wrote'
+(2) output all directories and file_path
+ e.g
+ for an entry: sub1/path/testfile, the output is
+ sub1
+ sub1/path
+ sub1/path/testfile
+(3) xyz -> ./sub1/xyz
+ the output is
+ xyz
+(4) If file1 and sub1/hardlink are hardlinked
+ the output is
+ file1
+ sub1/hardlink file1
+
+For the example output above, the output of this code is:
+
+xyz
+file1
+fn
+fn1
+j
+sub1
+sub1/hardlink_to_file1 file1
+sub1/path
+sub1/path/testfile
+sub1/xyz
+
+***/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <limits.h> /* to define PATH_MAX */
+#include <sys/stat.h>
+
+#define TRUE 1
+#define FALSE 0
+
+struct string_list {
+ int capacity;
+ char ** endp;
+ char ** str;
+};
+
+void init_string_list(struct string_list * str_ptr, int n)
+{
+ str_ptr->str = malloc(n * sizeof(void*));
+ str_ptr->capacity = n;
+ str_ptr->endp = str_ptr->str;
+}
+
+void grow_string_list(struct string_list * slp)
+{
+ int new_capacity = 2 * slp->capacity;
+ char ** old_ptrs = slp->str;
+ char ** new_ptrs;
+ char ** newp = malloc(new_capacity * sizeof(void *));
+ new_ptrs = newp;
+
+ while (old_ptrs < slp->endp) {
+ *new_ptrs++ = *old_ptrs++;
+ }
+
+ free(slp->str);
+ slp->str = newp;
+ slp->endp= new_ptrs;
+ slp->capacity = new_capacity;
+}
+
+void append_string_list(char * str, struct string_list * slp)
+{
+ if (slp->endp - slp->str == slp->capacity) grow_string_list(slp);
+ *slp->endp = strdup(str);
+ (slp->endp)++;
+}
+
+/*************** change to return index ****/
+int find_string(char * str, struct string_list * slp)
+{
+ /* find if str is in the list */
+ int i;
+ int n = slp->endp - slp->str;
+
+ for(i=0; i<n; ++i) {
+ if (strcmp((slp->str)[i], str)==0) return i;
+ }
+ return -1;
+}
+
+/* find if a string in string-list is a sub-string of str */
+int has_sub_string(char * str, struct string_list *slp)
+{
+ int i;
+ int n = slp->endp - slp->str;
+
+ for(i=0; i<n; ++i) {
+ if (strncmp(str, (slp->str)[i], strlen((slp->str)[i]))==0) return i;
+ }
+ return -1;
+}
+
+/* find if the str is a substr of those in slp */
+int has_newdir(char *str, struct string_list *slp)
+{
+ int i;
+ int n = slp->endp - slp->str;
+
+ for(i=0; i<n; ++i) {
+ if (strncmp((slp->str)[i],str, strlen(str))==0) return i;
+ }
+ return -1;
+}
+
+struct uint_list {
+ int capacity;
+ unsigned int * endp;
+ unsigned int * d;
+};
+
+void init_uint_list(struct uint_list * uil_ptr, int n)
+{
+ uil_ptr->d = malloc(n * sizeof(unsigned int));
+ uil_ptr->capacity = n;
+ uil_ptr->endp = uil_ptr->d;
+}
+
+void grow_uint_list(struct uint_list * uilp)
+{
+ int new_capacity = 2 * uilp->capacity;
+ unsigned int * old_ptrs = uilp->d;
+ unsigned int * new_ptrs;
+ unsigned int * newp = malloc(new_capacity * sizeof(unsigned int));
+ new_ptrs = newp;
+
+ while (old_ptrs < uilp->endp) {
+ *new_ptrs++ = *old_ptrs++;
+ }
+
+ free(uilp->d);
+ uilp->d = newp;
+ uilp->endp= new_ptrs;
+ uilp->capacity = new_capacity;
+}
+
+void append_uint_list(unsigned int data, struct uint_list * uilp)
+{
+ if (uilp->endp - uilp->d == uilp->capacity) grow_uint_list(uilp);
+ *uilp->endp = data;
+ (uilp->endp)++;
+}
+/*************** change to return index ****/
+int find_unit(unsigned int data, struct uint_list * uilp)
+{
+ /* find if data is in the list */
+ int i;
+ int n = uilp->endp - uilp->d;
+
+ for(i=0; i<n; ++i) {
+ if ((uilp->d)[i] == data) return i;
+ }
+ return -1;
+}
+
+struct string_list file_list;
+struct uint_list ino_list;
+struct string_list dir_list;
+struct string_list softlink_list; /* for (a) */
+struct string_list newdir_list; /* for (b) */
+
+void strip(char * str)
+{
+ /* remove trailing \n and spaces */
+ char *pt;
+ char *pc = &str[strlen(str)-1];
+ while (*pc == ' ' || *pc == '\n') *(pc--) = '\0';
+ /* 20080317 remove leading spaces */
+ pt = pc = &str[0];
+ while (*pc == ' ') ++pc;
+ if (pc != pt) {
+ while (*pc != '\0') *pt++ = *pc++;
+ *pt = '\0';
+ }
+}
+
+void output_subs(char * str)
+{
+ return; /*************************** testing ***************/
+ /* to do (2) indicated in the above */
+ /********
+ char * pc;
+ char subs[PATH_MAX];
+ pc = strstr(str, "/");
+ if (!pc) return;
+
+ while (pc) {
+ strncpy(subs, str, pc-str);
+ subs[pc-str] = '\0';
+ if (find_string(subs, &dir_list)<0) {
+ printf("%s\n", subs);
+ append_string_list(subs, &dir_list);
+ }
+ pc = strstr(pc+1, "/");
+ }
+ ************/
+}
+
+/*** (a)
+ get those softlinks that points to a directory
+ this is to deal with the following scenario
+ previous structure
+ dir_path (a directory)
+ db (a directory)
+
+ newly updated structure on master
+ dir_path -> db
+ db
+
+ rsync --dry-run generates
+ dir_path -> db [a link is done on target]
+ deleting dir_path/sub/filename1 [wrong file gets removed ]
+ deleting dir_path/sub/filename2...
+
+ file_operations.c does this when dir_path -> db is due
+ delete dir_path (rm -rf)
+ make the softlink
+ But then the following delete will have undesired deletion.
+
+ ------------------------------------------------------------
+
+ (b)
+ t0 name -> xyz name -> xyz (target)
+ t1 name/ name -> xyz
+
+ rsync generates
+ name/ update_directory() won't have effect
+ name/f1 delivered to wrong place
+ name/f2
+ deleting name too late
+ ** the deletion should be done before not after.
+ For now, I will fail this code for this situation.
+
+***/
+void get_dir_softlinks(char *filename, char * basedir) {
+ FILE * fd;
+ char line[PATH_MAX];
+ struct stat st;
+
+ if ((fd = fopen(filename, "r")) == NULL) {
+ fprintf(stderr, "Cannot open file -- %s \n", filename);
+ exit(-1);
+ }
+
+ while (1) { /* for each line in the file */
+ char *pc;
+ char fn[PATH_MAX];
+
+ if (fgets(line, PATH_MAX, fd)==NULL) break;
+ strip(line);
+ if (strlen(line) == 0) continue; /* skip blank line */
+
+ /* the softlink case is indicated by -> */
+ pc= strstr(line, " -> ");
+ if (pc) { /* it is a softlink */
+ *pc = '\0';
+ /* check if it is a directory */
+ sprintf(fn, "%s/%s", basedir, line);
+
+ /* check if the link-target is a directory */
+ if (stat(fn, &st)<0) continue; /* We skip this bad entry - no longer exist */
+
+ if (S_ISDIR(st.st_mode)) {
+ append_string_list(line, &softlink_list);
+ }
+ } else { /* not a softlink --> find if it is a directory */
+ /* find a line without ' ' and with trailing '/' */
+ pc = strstr(line, " "); /* the first space */
+ if (!pc) {
+ char * plast = &line[0] + strlen(line) - 1;
+ if (*plast == '/') {
+ append_string_list(line, &newdir_list);
+ }
+ }
+ }
+ }
+
+ fclose(fd);
+}
+
+
+int main(int argc, char * argv[])
+{
+ char * filename;
+ char * basedir;
+ FILE *fd;
+ char line[PATH_MAX];
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: trFilelist synclist_filename basedir\n");
+ exit(-1);
+ }
+
+ filename = argv[1];
+ basedir = argv[2];
+
+ init_string_list(&file_list, 10);
+ init_uint_list(&ino_list, 10);
+ init_string_list(&dir_list, 100);
+ init_string_list(&softlink_list, 10);
+ init_string_list(&newdir_list, 100);
+
+ get_dir_softlinks(filename, basedir);
+
+ if ((fd = fopen(filename, "r")) == NULL) {
+ fprintf(stderr, "Cannot open file -- %s \n", filename);
+ return -1;
+ }
+
+ while (1) { /* for each line in the file */
+ char *pc;
+ char fn[PATH_MAX];
+ struct stat st;
+ int newdir_flag;
+
+ if (fgets(line, PATH_MAX, fd)==NULL) break;
+ strip(line);
+ if (strlen(line) == 0) continue; /* skip blank line */
+ if (strcmp(line, ".")==0) continue;
+ if (strcmp(line, "./")==0) continue;
+
+ /* first we look for deleting entry */
+ if (strncmp(line, "deleting ", 9)==0) {
+ /* deleting (directory) file_path */
+ char * p1, *p2, *pf;
+
+ p1 = strstr(line, " "); /* the first space */
+ p2 = strstr(p1+1, " "); /* deleting directory filepath * 20070912 this is old */
+ pf = (p2) ? p2+1 : p1+1;/* it's always p1+1 */
+
+ newdir_flag = has_newdir(pf, &newdir_list);
+
+ if ((has_sub_string(pf, &softlink_list)<0) && newdir_flag<0) {
+ /* see comments above get_dir_softlinks() */
+ printf("deleting %s\n", pf);
+ } else if (newdir_flag>=0) { /* temporary action */
+ /*** we can simply skip this block later. 20070912 ***/
+ /***/
+ fprintf(stderr, "CRITICAL ERROR: An old softlink has been changed to a directory!\n");
+ fprintf(stderr, " For now, we crash this code for human intervention\n");
+ fprintf(stderr, " line= %s\n", line);
+ exit(-1);
+ /***/
+ }
+
+ continue;
+ }
+
+ /* the softlink case is indicated by -> */
+ pc= strstr(line, " -> ");
+ if (pc) {
+ *pc = '\0';
+ output_subs(line);
+ printf("%s\n", line);
+ continue;
+ }
+
+ /* if rsync's -H is turned on, the output may contain
+ file => tar_hardlink_file (relative address)
+ */
+ pc= strstr(line, " => ");
+ if (pc) {
+ *pc = '\0';
+ output_subs(line);
+ printf("%s %s\n", line, pc+4);
+ continue;
+ }
+
+ /* the rest of the entries should be valid paths */
+ sprintf(fn, "%s/%s", basedir, line);
+ if (lstat(fn, &st)<0) continue; /* We skip this bad entry -
+ (1) the header and tail lines
+ (2) perhaps the file no longer exists */
+
+ /* is this a hardlink? */
+ if (st.st_nlink > 1) {
+ int index;
+ output_subs(line);
+ if ((index = find_unit((unsigned int)st.st_ino, &ino_list))<0) {
+ append_uint_list((unsigned int)st.st_ino, &ino_list);
+ append_string_list(line, &file_list); /* relative path */
+ printf("%s\n", line);
+ } else {
+ printf("%s %s\n", line, file_list.str[index]);
+ }
+ continue;
+ }
+
+ /* all others */
+ output_subs(line);
+ printf("%s\n", line);
+ } /* end of one line */
+
+ fclose(fd);
+ return 0;
+}