From ded46d4768498c7d27fedcc438fe80a59ad63d0c Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Wed, 4 Nov 2015 13:55:26 -0500 Subject: move code into a src directory --- src/trFilelist.c | 449 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 449 insertions(+) create mode 100644 src/trFilelist.c (limited to 'src/trFilelist.c') diff --git a/src/trFilelist.c b/src/trFilelist.c new file mode 100644 index 0000000..8f65796 --- /dev/null +++ b/src/trFilelist.c @@ -0,0 +1,449 @@ +/* + Copyright (C) 2008 Renaissance Technologies Corp. + main developer: HP Wei + Copyright (C) 2006 Renaissance Technologies Corp. + main developer: HP Wei + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. + If not, write to the Free Software Foundation, + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +/***this code parse the synclist file generated by rsync in dry-run mode. + The minimum options for rsync is : -avW --dry-run --delete + e.g. + /usr/local/bin/rsync --rsync-path=/usr/local/bin/rsync + -avW --dry-run --delete + /src/path/ dest_machine:/target/path/ > output 2>&1 +A typical output of rsync may look like this: +Client is very old version of rsync, upgrade recommended. +building file list ... done +xyz -> ./sub1/xyz +file1 +fn +fn1 +j +sub1/hardlink_to_file1 +sub1/path/testfile +sub1/xyz +sent 337 bytes read 44 bytes 254.00 bytes/sec +total size is 320751046 speedup is 841866.26 + +This code does the following three things. +(1) skip the lines before 'done' and after 'wrote' +(2) output all directories and file_path + e.g + for an entry: sub1/path/testfile, the output is + sub1 + sub1/path + sub1/path/testfile +(3) xyz -> ./sub1/xyz + the output is + xyz +(4) If file1 and sub1/hardlink are hardlinked + the output is + file1 + sub1/hardlink file1 + +For the example output above, the output of this code is: + +xyz +file1 +fn +fn1 +j +sub1 +sub1/hardlink_to_file1 file1 +sub1/path +sub1/path/testfile +sub1/xyz + +***/ + +#include +#include +#include +#include +#include /* to define PATH_MAX */ +#include + +#define TRUE 1 +#define FALSE 0 + +struct string_list { + int capacity; + char ** endp; + char ** str; +}; + +void init_string_list(struct string_list * str_ptr, int n) +{ + str_ptr->str = malloc(n * sizeof(void*)); + str_ptr->capacity = n; + str_ptr->endp = str_ptr->str; +} + +void grow_string_list(struct string_list * slp) +{ + int new_capacity = 2 * slp->capacity; + char ** old_ptrs = slp->str; + char ** new_ptrs; + char ** newp = malloc(new_capacity * sizeof(void *)); + new_ptrs = newp; + + while (old_ptrs < slp->endp) { + *new_ptrs++ = *old_ptrs++; + } + + free(slp->str); + slp->str = newp; + slp->endp= new_ptrs; + slp->capacity = new_capacity; +} + +void append_string_list(char * str, struct string_list * slp) +{ + if (slp->endp - slp->str == slp->capacity) grow_string_list(slp); + *slp->endp = strdup(str); + (slp->endp)++; +} + +/*************** change to return index ****/ +int find_string(char * str, struct string_list * slp) +{ + /* find if str is in the list */ + int i; + int n = slp->endp - slp->str; + + for(i=0; istr)[i], str)==0) return i; + } + return -1; +} + +/* find if a string in string-list is a sub-string of str */ +int has_sub_string(char * str, struct string_list *slp) +{ + int i; + int n = slp->endp - slp->str; + + for(i=0; istr)[i], strlen((slp->str)[i]))==0) return i; + } + return -1; +} + +/* find if the str is a substr of those in slp */ +int has_newdir(char *str, struct string_list *slp) +{ + int i; + int n = slp->endp - slp->str; + + for(i=0; istr)[i],str, strlen(str))==0) return i; + } + return -1; +} + +struct uint_list { + int capacity; + unsigned int * endp; + unsigned int * d; +}; + +void init_uint_list(struct uint_list * uil_ptr, int n) +{ + uil_ptr->d = malloc(n * sizeof(unsigned int)); + uil_ptr->capacity = n; + uil_ptr->endp = uil_ptr->d; +} + +void grow_uint_list(struct uint_list * uilp) +{ + int new_capacity = 2 * uilp->capacity; + unsigned int * old_ptrs = uilp->d; + unsigned int * new_ptrs; + unsigned int * newp = malloc(new_capacity * sizeof(unsigned int)); + new_ptrs = newp; + + while (old_ptrs < uilp->endp) { + *new_ptrs++ = *old_ptrs++; + } + + free(uilp->d); + uilp->d = newp; + uilp->endp= new_ptrs; + uilp->capacity = new_capacity; +} + +void append_uint_list(unsigned int data, struct uint_list * uilp) +{ + if (uilp->endp - uilp->d == uilp->capacity) grow_uint_list(uilp); + *uilp->endp = data; + (uilp->endp)++; +} +/*************** change to return index ****/ +int find_unit(unsigned int data, struct uint_list * uilp) +{ + /* find if data is in the list */ + int i; + int n = uilp->endp - uilp->d; + + for(i=0; id)[i] == data) return i; + } + return -1; +} + +struct string_list file_list; +struct uint_list ino_list; +struct string_list dir_list; +struct string_list softlink_list; /* for (a) */ +struct string_list newdir_list; /* for (b) */ + +void strip(char * str) +{ + /* remove trailing \n and spaces */ + char *pt; + char *pc = &str[strlen(str)-1]; + while (*pc == ' ' || *pc == '\n') *(pc--) = '\0'; + /* 20080317 remove leading spaces */ + pt = pc = &str[0]; + while (*pc == ' ') ++pc; + if (pc != pt) { + while (*pc != '\0') *pt++ = *pc++; + *pt = '\0'; + } +} + +void output_subs(char * str) +{ + return; /*************************** testing ***************/ + /* to do (2) indicated in the above */ + /******** + char * pc; + char subs[PATH_MAX]; + pc = strstr(str, "/"); + if (!pc) return; + + while (pc) { + strncpy(subs, str, pc-str); + subs[pc-str] = '\0'; + if (find_string(subs, &dir_list)<0) { + printf("%s\n", subs); + append_string_list(subs, &dir_list); + } + pc = strstr(pc+1, "/"); + } + ************/ +} + +/*** (a) + get those softlinks that points to a directory + this is to deal with the following scenario + previous structure + dir_path (a directory) + db (a directory) + + newly updated structure on master + dir_path -> db + db + + rsync --dry-run generates + dir_path -> db [a link is done on target] + deleting dir_path/sub/filename1 [wrong file gets removed ] + deleting dir_path/sub/filename2... + + file_operations.c does this when dir_path -> db is due + delete dir_path (rm -rf) + make the softlink + But then the following delete will have undesired deletion. + + ------------------------------------------------------------ + + (b) + t0 name -> xyz name -> xyz (target) + t1 name/ name -> xyz + + rsync generates + name/ update_directory() won't have effect + name/f1 delivered to wrong place + name/f2 + deleting name too late + ** the deletion should be done before not after. + For now, I will fail this code for this situation. + +***/ +void get_dir_softlinks(char *filename, char * basedir) { + FILE * fd; + char line[PATH_MAX]; + struct stat st; + + if ((fd = fopen(filename, "r")) == NULL) { + fprintf(stderr, "Cannot open file -- %s \n", filename); + exit(-1); + } + + while (1) { /* for each line in the file */ + char *pc; + char fn[PATH_MAX]; + + if (fgets(line, PATH_MAX, fd)==NULL) break; + strip(line); + if (strlen(line) == 0) continue; /* skip blank line */ + + /* the softlink case is indicated by -> */ + pc= strstr(line, " -> "); + if (pc) { /* it is a softlink */ + *pc = '\0'; + /* check if it is a directory */ + sprintf(fn, "%s/%s", basedir, line); + + /* check if the link-target is a directory */ + if (stat(fn, &st)<0) continue; /* We skip this bad entry - no longer exist */ + + if (S_ISDIR(st.st_mode)) { + append_string_list(line, &softlink_list); + } + } else { /* not a softlink --> find if it is a directory */ + /* find a line without ' ' and with trailing '/' */ + pc = strstr(line, " "); /* the first space */ + if (!pc) { + char * plast = &line[0] + strlen(line) - 1; + if (*plast == '/') { + append_string_list(line, &newdir_list); + } + } + } + } + + fclose(fd); +} + + +int main(int argc, char * argv[]) +{ + char * filename; + char * basedir; + FILE *fd; + char line[PATH_MAX]; + + if (argc < 3) { + fprintf(stderr, "Usage: trFilelist synclist_filename basedir\n"); + exit(-1); + } + + filename = argv[1]; + basedir = argv[2]; + + init_string_list(&file_list, 10); + init_uint_list(&ino_list, 10); + init_string_list(&dir_list, 100); + init_string_list(&softlink_list, 10); + init_string_list(&newdir_list, 100); + + get_dir_softlinks(filename, basedir); + + if ((fd = fopen(filename, "r")) == NULL) { + fprintf(stderr, "Cannot open file -- %s \n", filename); + return -1; + } + + while (1) { /* for each line in the file */ + char *pc; + char fn[PATH_MAX]; + struct stat st; + int newdir_flag; + + if (fgets(line, PATH_MAX, fd)==NULL) break; + strip(line); + if (strlen(line) == 0) continue; /* skip blank line */ + if (strcmp(line, ".")==0) continue; + if (strcmp(line, "./")==0) continue; + + /* first we look for deleting entry */ + if (strncmp(line, "deleting ", 9)==0) { + /* deleting (directory) file_path */ + char * p1, *p2, *pf; + + p1 = strstr(line, " "); /* the first space */ + p2 = strstr(p1+1, " "); /* deleting directory filepath * 20070912 this is old */ + pf = (p2) ? p2+1 : p1+1;/* it's always p1+1 */ + + newdir_flag = has_newdir(pf, &newdir_list); + + if ((has_sub_string(pf, &softlink_list)<0) && newdir_flag<0) { + /* see comments above get_dir_softlinks() */ + printf("deleting %s\n", pf); + } else if (newdir_flag>=0) { /* temporary action */ + /*** we can simply skip this block later. 20070912 ***/ + /***/ + fprintf(stderr, "CRITICAL ERROR: An old softlink has been changed to a directory!\n"); + fprintf(stderr, " For now, we crash this code for human intervention\n"); + fprintf(stderr, " line= %s\n", line); + exit(-1); + /***/ + } + + continue; + } + + /* the softlink case is indicated by -> */ + pc= strstr(line, " -> "); + if (pc) { + *pc = '\0'; + output_subs(line); + printf("%s\n", line); + continue; + } + + /* if rsync's -H is turned on, the output may contain + file => tar_hardlink_file (relative address) + */ + pc= strstr(line, " => "); + if (pc) { + *pc = '\0'; + output_subs(line); + printf("%s %s\n", line, pc+4); + continue; + } + + /* the rest of the entries should be valid paths */ + sprintf(fn, "%s/%s", basedir, line); + if (lstat(fn, &st)<0) continue; /* We skip this bad entry - + (1) the header and tail lines + (2) perhaps the file no longer exists */ + + /* is this a hardlink? */ + if (st.st_nlink > 1) { + int index; + output_subs(line); + if ((index = find_unit((unsigned int)st.st_ino, &ino_list))<0) { + append_uint_list((unsigned int)st.st_ino, &ino_list); + append_string_list(line, &file_list); /* relative path */ + printf("%s\n", line); + } else { + printf("%s %s\n", line, file_list.str[index]); + } + continue; + } + + /* all others */ + output_subs(line); + printf("%s\n", line); + } /* end of one line */ + + fclose(fd); + return 0; +} -- cgit v1.2.3-70-g09d2