This is a simple splitter for text files by line count.
Input File with 4 lines: input.txt
this is line 1
this is line 2
this is line 3
this is line 4
./chunkfile -f input.txt -c 4 -j 2 -p output
or
cat input.txt | ./chunkfile.sh -c 4 -j 2 -p output -e txt
Output will be:
Output:
output_01.txt
output_02.txt
output_03.txt
output_04.txt
With a single line of text in each file.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #!/bin/bash #=============================================================================== # # FILE: chunkfile.sh # # USAGE: ./chunkfile.sh # # DESCRIPTION: split a text file by lines # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Kirk Roybal (DBA), kroybal@kalkomey.com # COMPANY: Kalkomey # VERSION: 1.0 # CREATED: 12/30/2011 09:45:42 AM CST # REVISION: --- #=============================================================================== ScriptVersion="1.0" #=== FUNCTION ================================================================ # NAME: usage # DESCRIPTION: Display usage information. #=============================================================================== function usage () { cat <<- EOT Usage : ${0##/*/} [options] [--] Options: -c|chunks # Number of chunks -d|debug Display bash debugging info -e|ext exten Output file extension -f|file name File to chunk -h|help Display this message -j|just pad Padding zeros for output files -l|log Display logging information -p|prefix pre Prefix of new file names -v|version Display script version Note: using multiple logging options increases the log level (i.e.) ${0##/*/} -lll -f input.txt -c 3 -j 2 -p output Log level 3. Output: output_01.txt output_02.txt output_03.txt EOT } # ---------- end of function usage ---------- #----------------------------------------------------------------------- # Handle command line arguments #----------------------------------------------------------------------- padding=2 while getopts ":c:de:f:hj:lp:v" opt do case $opt in c|chunks ) chunks=$OPTARG;; d|debug ) set -x;; e|ext ) ext=".$OPTARG";; f|file ) filename=$OPTARG ext=".${filename#*.}" [[ -z $prefix ]] && prefix="$(basename ${filename} $ext)_" ;; h|help ) usage; exit 0 ;; j|just ) padding=$OPTARG;; l|log ) (( logging++ ));; p|prefix ) prefix=$OPTARG;; v|version ) echo "$0 -- Version $ScriptVersion"; exit 0 ;; \? ) echo -e "\n Option does not exist : $OPTARG\n" usage; exit 1 ;; esac # --- end of case --- done shift $(($OPTIND-1)) [[ -z $filename ]] && [[ -z $prefix ]] && { cat <<- EOT You must specify a filename or a prefix for output. (i.e.) ${0##/*/} -f input.txt -c 3 -j 2 or (i.e.) cat input.txt | ${0##/*/} -c 3 -j 2 -p output EOT usage exit 1 } #take input from stdin [[ -z $filename ]] && filename="-" [[ -z $chunks ]] && { usage exit 1 } guts=$(tempfile) cat "$filename" > "$guts" filesize=$(cat "$guts" | wc -l) chunksize=$((filesize/chunks)) [[ $((filesize%chunks)) -gt 0 ]] && (( chunksize++)) for ((i=0;i<$chunks;i++)) do dest="${prefix}$(printf "%0${padding}i" $(($i + 1)))$ext" cat "$guts" | tail -n +$((($i*$chunksize)+1)) | head -n $chunksize > "$dest" done [[ -f "$guts" ]] && rm "$guts" |