diff options
Diffstat (limited to 'trFilelist.c')
| -rw-r--r-- | trFilelist.c | 449 |
1 files changed, 0 insertions, 449 deletions
diff --git a/trFilelist.c b/trFilelist.c deleted file mode 100644 index 8f65796..0000000 --- a/trFilelist.c +++ /dev/null @@ -1,449 +0,0 @@ -/* - Copyright (C) 2008 Renaissance Technologies Corp. - main developer: HP Wei <hp@rentec.com> - Copyright (C) 2006 Renaissance Technologies Corp. - main developer: HP Wei <hp@rentec.com> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -*/ - -/***this code parse the synclist file generated by rsync in dry-run mode. - The minimum options for rsync is : -avW --dry-run --delete - e.g. - /usr/local/bin/rsync --rsync-path=/usr/local/bin/rsync - -avW --dry-run --delete - /src/path/ dest_machine:/target/path/ > output 2>&1 -A typical output of rsync may look like this: -Client is very old version of rsync, upgrade recommended. -building file list ... done -xyz -> ./sub1/xyz -file1 -fn -fn1 -j -sub1/hardlink_to_file1 -sub1/path/testfile -sub1/xyz -sent 337 bytes read 44 bytes 254.00 bytes/sec -total size is 320751046 speedup is 841866.26 - -This code does the following three things. -(1) skip the lines before 'done' and after 'wrote' -(2) output all directories and file_path - e.g - for an entry: sub1/path/testfile, the output is - sub1 - sub1/path - sub1/path/testfile -(3) xyz -> ./sub1/xyz - the output is - xyz -(4) If file1 and sub1/hardlink are hardlinked - the output is - file1 - sub1/hardlink file1 - -For the example output above, the output of this code is: - -xyz -file1 -fn -fn1 -j -sub1 -sub1/hardlink_to_file1 file1 -sub1/path -sub1/path/testfile -sub1/xyz - -***/ - -#include <stdio.h> -#include <stdlib.h> -#include <limits.h> -#include <string.h> -#include <limits.h> /* to define PATH_MAX */ -#include <sys/stat.h> - -#define TRUE 1 -#define FALSE 0 - -struct string_list { - int capacity; - char ** endp; - char ** str; -}; - -void init_string_list(struct string_list * str_ptr, int n) -{ - str_ptr->str = malloc(n * sizeof(void*)); - str_ptr->capacity = n; - str_ptr->endp = str_ptr->str; -} - -void grow_string_list(struct string_list * slp) -{ - int new_capacity = 2 * slp->capacity; - char ** old_ptrs = slp->str; - char ** new_ptrs; - char ** newp = malloc(new_capacity * sizeof(void *)); - new_ptrs = newp; - - while (old_ptrs < slp->endp) { - *new_ptrs++ = *old_ptrs++; - } - - free(slp->str); - slp->str = newp; - slp->endp= new_ptrs; - slp->capacity = new_capacity; -} - -void append_string_list(char * str, struct string_list * slp) -{ - if (slp->endp - slp->str == slp->capacity) grow_string_list(slp); - *slp->endp = strdup(str); - (slp->endp)++; -} - -/*************** change to return index ****/ -int find_string(char * str, struct string_list * slp) -{ - /* find if str is in the list */ - int i; - int n = slp->endp - slp->str; - - for(i=0; i<n; ++i) { - if (strcmp((slp->str)[i], str)==0) return i; - } - return -1; -} - -/* find if a string in string-list is a sub-string of str */ -int has_sub_string(char * str, struct string_list *slp) -{ - int i; - int n = slp->endp - slp->str; - - for(i=0; i<n; ++i) { - if (strncmp(str, (slp->str)[i], strlen((slp->str)[i]))==0) return i; - } - return -1; -} - -/* find if the str is a substr of those in slp */ -int has_newdir(char *str, struct string_list *slp) -{ - int i; - int n = slp->endp - slp->str; - - for(i=0; i<n; ++i) { - if (strncmp((slp->str)[i],str, strlen(str))==0) return i; - } - return -1; -} - -struct uint_list { - int capacity; - unsigned int * endp; - unsigned int * d; -}; - -void init_uint_list(struct uint_list * uil_ptr, int n) -{ - uil_ptr->d = malloc(n * sizeof(unsigned int)); - uil_ptr->capacity = n; - uil_ptr->endp = uil_ptr->d; -} - -void grow_uint_list(struct uint_list * uilp) -{ - int new_capacity = 2 * uilp->capacity; - unsigned int * old_ptrs = uilp->d; - unsigned int * new_ptrs; - unsigned int * newp = malloc(new_capacity * sizeof(unsigned int)); - new_ptrs = newp; - - while (old_ptrs < uilp->endp) { - *new_ptrs++ = *old_ptrs++; - } - - free(uilp->d); - uilp->d = newp; - uilp->endp= new_ptrs; - uilp->capacity = new_capacity; -} - -void append_uint_list(unsigned int data, struct uint_list * uilp) -{ - if (uilp->endp - uilp->d == uilp->capacity) grow_uint_list(uilp); - *uilp->endp = data; - (uilp->endp)++; -} -/*************** change to return index ****/ -int find_unit(unsigned int data, struct uint_list * uilp) -{ - /* find if data is in the list */ - int i; - int n = uilp->endp - uilp->d; - - for(i=0; i<n; ++i) { - if ((uilp->d)[i] == data) return i; - } - return -1; -} - -struct string_list file_list; -struct uint_list ino_list; -struct string_list dir_list; -struct string_list softlink_list; /* for (a) */ -struct string_list newdir_list; /* for (b) */ - -void strip(char * str) -{ - /* remove trailing \n and spaces */ - char *pt; - char *pc = &str[strlen(str)-1]; - while (*pc == ' ' || *pc == '\n') *(pc--) = '\0'; - /* 20080317 remove leading spaces */ - pt = pc = &str[0]; - while (*pc == ' ') ++pc; - if (pc != pt) { - while (*pc != '\0') *pt++ = *pc++; - *pt = '\0'; - } -} - -void output_subs(char * str) -{ - return; /*************************** testing ***************/ - /* to do (2) indicated in the above */ - /******** - char * pc; - char subs[PATH_MAX]; - pc = strstr(str, "/"); - if (!pc) return; - - while (pc) { - strncpy(subs, str, pc-str); - subs[pc-str] = '\0'; - if (find_string(subs, &dir_list)<0) { - printf("%s\n", subs); - append_string_list(subs, &dir_list); - } - pc = strstr(pc+1, "/"); - } - ************/ -} - -/*** (a) - get those softlinks that points to a directory - this is to deal with the following scenario - previous structure - dir_path (a directory) - db (a directory) - - newly updated structure on master - dir_path -> db - db - - rsync --dry-run generates - dir_path -> db [a link is done on target] - deleting dir_path/sub/filename1 [wrong file gets removed ] - deleting dir_path/sub/filename2... - - file_operations.c does this when dir_path -> db is due - delete dir_path (rm -rf) - make the softlink - But then the following delete will have undesired deletion. - - ------------------------------------------------------------ - - (b) - t0 name -> xyz name -> xyz (target) - t1 name/ name -> xyz - - rsync generates - name/ update_directory() won't have effect - name/f1 delivered to wrong place - name/f2 - deleting name too late - ** the deletion should be done before not after. - For now, I will fail this code for this situation. - -***/ -void get_dir_softlinks(char *filename, char * basedir) { - FILE * fd; - char line[PATH_MAX]; - struct stat st; - - if ((fd = fopen(filename, "r")) == NULL) { - fprintf(stderr, "Cannot open file -- %s \n", filename); - exit(-1); - } - - while (1) { /* for each line in the file */ - char *pc; - char fn[PATH_MAX]; - - if (fgets(line, PATH_MAX, fd)==NULL) break; - strip(line); - if (strlen(line) == 0) continue; /* skip blank line */ - - /* the softlink case is indicated by -> */ - pc= strstr(line, " -> "); - if (pc) { /* it is a softlink */ - *pc = '\0'; - /* check if it is a directory */ - sprintf(fn, "%s/%s", basedir, line); - - /* check if the link-target is a directory */ - if (stat(fn, &st)<0) continue; /* We skip this bad entry - no longer exist */ - - if (S_ISDIR(st.st_mode)) { - append_string_list(line, &softlink_list); - } - } else { /* not a softlink --> find if it is a directory */ - /* find a line without ' ' and with trailing '/' */ - pc = strstr(line, " "); /* the first space */ - if (!pc) { - char * plast = &line[0] + strlen(line) - 1; - if (*plast == '/') { - append_string_list(line, &newdir_list); - } - } - } - } - - fclose(fd); -} - - -int main(int argc, char * argv[]) -{ - char * filename; - char * basedir; - FILE *fd; - char line[PATH_MAX]; - - if (argc < 3) { - fprintf(stderr, "Usage: trFilelist synclist_filename basedir\n"); - exit(-1); - } - - filename = argv[1]; - basedir = argv[2]; - - init_string_list(&file_list, 10); - init_uint_list(&ino_list, 10); - init_string_list(&dir_list, 100); - init_string_list(&softlink_list, 10); - init_string_list(&newdir_list, 100); - - get_dir_softlinks(filename, basedir); - - if ((fd = fopen(filename, "r")) == NULL) { - fprintf(stderr, "Cannot open file -- %s \n", filename); - return -1; - } - - while (1) { /* for each line in the file */ - char *pc; - char fn[PATH_MAX]; - struct stat st; - int newdir_flag; - - if (fgets(line, PATH_MAX, fd)==NULL) break; - strip(line); - if (strlen(line) == 0) continue; /* skip blank line */ - if (strcmp(line, ".")==0) continue; - if (strcmp(line, "./")==0) continue; - - /* first we look for deleting entry */ - if (strncmp(line, "deleting ", 9)==0) { - /* deleting (directory) file_path */ - char * p1, *p2, *pf; - - p1 = strstr(line, " "); /* the first space */ - p2 = strstr(p1+1, " "); /* deleting directory filepath * 20070912 this is old */ - pf = (p2) ? p2+1 : p1+1;/* it's always p1+1 */ - - newdir_flag = has_newdir(pf, &newdir_list); - - if ((has_sub_string(pf, &softlink_list)<0) && newdir_flag<0) { - /* see comments above get_dir_softlinks() */ - printf("deleting %s\n", pf); - } else if (newdir_flag>=0) { /* temporary action */ - /*** we can simply skip this block later. 20070912 ***/ - /***/ - fprintf(stderr, "CRITICAL ERROR: An old softlink has been changed to a directory!\n"); - fprintf(stderr, " For now, we crash this code for human intervention\n"); - fprintf(stderr, " line= %s\n", line); - exit(-1); - /***/ - } - - continue; - } - - /* the softlink case is indicated by -> */ - pc= strstr(line, " -> "); - if (pc) { - *pc = '\0'; - output_subs(line); - printf("%s\n", line); - continue; - } - - /* if rsync's -H is turned on, the output may contain - file => tar_hardlink_file (relative address) - */ - pc= strstr(line, " => "); - if (pc) { - *pc = '\0'; - output_subs(line); - printf("%s %s\n", line, pc+4); - continue; - } - - /* the rest of the entries should be valid paths */ - sprintf(fn, "%s/%s", basedir, line); - if (lstat(fn, &st)<0) continue; /* We skip this bad entry - - (1) the header and tail lines - (2) perhaps the file no longer exists */ - - /* is this a hardlink? */ - if (st.st_nlink > 1) { - int index; - output_subs(line); - if ((index = find_unit((unsigned int)st.st_ino, &ino_list))<0) { - append_uint_list((unsigned int)st.st_ino, &ino_list); - append_string_list(line, &file_list); /* relative path */ - printf("%s\n", line); - } else { - printf("%s %s\n", line, file_list.str[index]); - } - continue; - } - - /* all others */ - output_subs(line); - printf("%s\n", line); - } /* end of one line */ - - fclose(fd); - return 0; -} |
