/* Copyright (C) 2008 Renaissance Technologies Corp. main developer: HP Wei Copyright (C) 2006 Renaissance Technologies Corp. main developer: HP Wei This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /***this code parse the synclist file generated by rsync in dry-run mode. The minimum options for rsync is : -avW --dry-run --delete e.g. /usr/local/bin/rsync --rsync-path=/usr/local/bin/rsync -avW --dry-run --delete /src/path/ dest_machine:/target/path/ > output 2>&1 A typical output of rsync may look like this: Client is very old version of rsync, upgrade recommended. building file list ... done xyz -> ./sub1/xyz file1 fn fn1 j sub1/hardlink_to_file1 sub1/path/testfile sub1/xyz sent 337 bytes read 44 bytes 254.00 bytes/sec total size is 320751046 speedup is 841866.26 This code does the following three things. (1) skip the lines before 'done' and after 'wrote' (2) output all directories and file_path e.g for an entry: sub1/path/testfile, the output is sub1 sub1/path sub1/path/testfile (3) xyz -> ./sub1/xyz the output is xyz (4) If file1 and sub1/hardlink are hardlinked the output is file1 sub1/hardlink file1 For the example output above, the output of this code is: xyz file1 fn fn1 j sub1 sub1/hardlink_to_file1 file1 sub1/path sub1/path/testfile sub1/xyz ***/ #include #include #include #include #include /* to define PATH_MAX */ #include #define TRUE 1 #define FALSE 0 struct string_list { int capacity; char ** endp; char ** str; }; void init_string_list(struct string_list * str_ptr, int n) { str_ptr->str = malloc(n * sizeof(void*)); str_ptr->capacity = n; str_ptr->endp = str_ptr->str; } void grow_string_list(struct string_list * slp) { int new_capacity = 2 * slp->capacity; char ** old_ptrs = slp->str; char ** new_ptrs; char ** newp = malloc(new_capacity * sizeof(void *)); new_ptrs = newp; while (old_ptrs < slp->endp) { *new_ptrs++ = *old_ptrs++; } free(slp->str); slp->str = newp; slp->endp= new_ptrs; slp->capacity = new_capacity; } void append_string_list(char * str, struct string_list * slp) { if (slp->endp - slp->str == slp->capacity) grow_string_list(slp); *slp->endp = strdup(str); (slp->endp)++; } /*************** change to return index ****/ int find_string(char * str, struct string_list * slp) { /* find if str is in the list */ int i; int n = slp->endp - slp->str; for(i=0; istr)[i], str)==0) return i; } return -1; } /* find if a string in string-list is a sub-string of str */ int has_sub_string(char * str, struct string_list *slp) { int i; int n = slp->endp - slp->str; for(i=0; istr)[i], strlen((slp->str)[i]))==0) return i; } return -1; } /* find if the str is a substr of those in slp */ int has_newdir(char *str, struct string_list *slp) { int i; int n = slp->endp - slp->str; for(i=0; istr)[i],str, strlen(str))==0) return i; } return -1; } struct uint_list { int capacity; unsigned int * endp; unsigned int * d; }; void init_uint_list(struct uint_list * uil_ptr, int n) { uil_ptr->d = malloc(n * sizeof(unsigned int)); uil_ptr->capacity = n; uil_ptr->endp = uil_ptr->d; } void grow_uint_list(struct uint_list * uilp) { int new_capacity = 2 * uilp->capacity; unsigned int * old_ptrs = uilp->d; unsigned int * new_ptrs; unsigned int * newp = malloc(new_capacity * sizeof(unsigned int)); new_ptrs = newp; while (old_ptrs < uilp->endp) { *new_ptrs++ = *old_ptrs++; } free(uilp->d); uilp->d = newp; uilp->endp= new_ptrs; uilp->capacity = new_capacity; } void append_uint_list(unsigned int data, struct uint_list * uilp) { if (uilp->endp - uilp->d == uilp->capacity) grow_uint_list(uilp); *uilp->endp = data; (uilp->endp)++; } /*************** change to return index ****/ int find_unit(unsigned int data, struct uint_list * uilp) { /* find if data is in the list */ int i; int n = uilp->endp - uilp->d; for(i=0; id)[i] == data) return i; } return -1; } struct string_list file_list; struct uint_list ino_list; struct string_list dir_list; struct string_list softlink_list; /* for (a) */ struct string_list newdir_list; /* for (b) */ void strip(char * str) { /* remove trailing \n and spaces */ char *pt; char *pc = &str[strlen(str)-1]; while (*pc == ' ' || *pc == '\n') *(pc--) = '\0'; /* 20080317 remove leading spaces */ pt = pc = &str[0]; while (*pc == ' ') ++pc; if (pc != pt) { while (*pc != '\0') *pt++ = *pc++; *pt = '\0'; } } void output_subs(char * str) { return; /*************************** testing ***************/ /* to do (2) indicated in the above */ /******** char * pc; char subs[PATH_MAX]; pc = strstr(str, "/"); if (!pc) return; while (pc) { strncpy(subs, str, pc-str); subs[pc-str] = '\0'; if (find_string(subs, &dir_list)<0) { printf("%s\n", subs); append_string_list(subs, &dir_list); } pc = strstr(pc+1, "/"); } ************/ } /*** (a) get those softlinks that points to a directory this is to deal with the following scenario previous structure dir_path (a directory) db (a directory) newly updated structure on master dir_path -> db db rsync --dry-run generates dir_path -> db [a link is done on target] deleting dir_path/sub/filename1 [wrong file gets removed ] deleting dir_path/sub/filename2... file_operations.c does this when dir_path -> db is due delete dir_path (rm -rf) make the softlink But then the following delete will have undesired deletion. ------------------------------------------------------------ (b) t0 name -> xyz name -> xyz (target) t1 name/ name -> xyz rsync generates name/ update_directory() won't have effect name/f1 delivered to wrong place name/f2 deleting name too late ** the deletion should be done before not after. For now, I will fail this code for this situation. ***/ void get_dir_softlinks(char *filename, char * basedir) { FILE * fd; char line[PATH_MAX]; struct stat st; if ((fd = fopen(filename, "r")) == NULL) { fprintf(stderr, "Cannot open file -- %s \n", filename); exit(-1); } while (1) { /* for each line in the file */ char *pc; char fn[PATH_MAX]; if (fgets(line, PATH_MAX, fd)==NULL) break; strip(line); if (strlen(line) == 0) continue; /* skip blank line */ /* the softlink case is indicated by -> */ pc= strstr(line, " -> "); if (pc) { /* it is a softlink */ *pc = '\0'; /* check if it is a directory */ sprintf(fn, "%s/%s", basedir, line); /* check if the link-target is a directory */ if (stat(fn, &st)<0) continue; /* We skip this bad entry - no longer exist */ if (S_ISDIR(st.st_mode)) { append_string_list(line, &softlink_list); } } else { /* not a softlink --> find if it is a directory */ /* find a line without ' ' and with trailing '/' */ pc = strstr(line, " "); /* the first space */ if (!pc) { char * plast = &line[0] + strlen(line) - 1; if (*plast == '/') { append_string_list(line, &newdir_list); } } } } fclose(fd); } int main(int argc, char * argv[]) { char * filename; char * basedir; FILE *fd; char line[PATH_MAX]; if (argc < 3) { fprintf(stderr, "Usage: trFilelist synclist_filename basedir\n"); exit(-1); } filename = argv[1]; basedir = argv[2]; init_string_list(&file_list, 10); init_uint_list(&ino_list, 10); init_string_list(&dir_list, 100); init_string_list(&softlink_list, 10); init_string_list(&newdir_list, 100); get_dir_softlinks(filename, basedir); if ((fd = fopen(filename, "r")) == NULL) { fprintf(stderr, "Cannot open file -- %s \n", filename); return -1; } while (1) { /* for each line in the file */ char *pc; char fn[PATH_MAX]; struct stat st; int newdir_flag; if (fgets(line, PATH_MAX, fd)==NULL) break; strip(line); if (strlen(line) == 0) continue; /* skip blank line */ if (strcmp(line, ".")==0) continue; if (strcmp(line, "./")==0) continue; /* first we look for deleting entry */ if (strncmp(line, "deleting ", 9)==0) { /* deleting (directory) file_path */ char * p1, *p2, *pf; p1 = strstr(line, " "); /* the first space */ p2 = strstr(p1+1, " "); /* deleting directory filepath * 20070912 this is old */ pf = (p2) ? p2+1 : p1+1;/* it's always p1+1 */ newdir_flag = has_newdir(pf, &newdir_list); if ((has_sub_string(pf, &softlink_list)<0) && newdir_flag<0) { /* see comments above get_dir_softlinks() */ printf("deleting %s\n", pf); } else if (newdir_flag>=0) { /* temporary action */ /*** we can simply skip this block later. 20070912 ***/ /***/ fprintf(stderr, "CRITICAL ERROR: An old softlink has been changed to a directory!\n"); fprintf(stderr, " For now, we crash this code for human intervention\n"); fprintf(stderr, " line= %s\n", line); exit(-1); /***/ } continue; } /* the softlink case is indicated by -> */ pc= strstr(line, " -> "); if (pc) { *pc = '\0'; output_subs(line); printf("%s\n", line); continue; } /* if rsync's -H is turned on, the output may contain file => tar_hardlink_file (relative address) */ pc= strstr(line, " => "); if (pc) { *pc = '\0'; output_subs(line); printf("%s %s\n", line, pc+4); continue; } /* the rest of the entries should be valid paths */ sprintf(fn, "%s/%s", basedir, line); if (lstat(fn, &st)<0) continue; /* We skip this bad entry - (1) the header and tail lines (2) perhaps the file no longer exists */ /* is this a hardlink? */ if (st.st_nlink > 1) { int index; output_subs(line); if ((index = find_unit((unsigned int)st.st_ino, &ino_list))<0) { append_uint_list((unsigned int)st.st_ino, &ino_list); append_string_list(line, &file_list); /* relative path */ printf("%s\n", line); } else { printf("%s %s\n", line, file_list.str[index]); } continue; } /* all others */ output_subs(line); printf("%s\n", line); } /* end of one line */ fclose(fd); return 0; }