Subjectively

dd if=/dev/random | kirk > blog

Subjectively header image 2

Split a text file into chunks by line count

January 4th, 2012 · No Comments · Linux

This is a simple splitter for text files by line count.

Input File with 4 lines: input.txt
this is line 1
this is line 2
this is line 3
this is line 4

./chunkfile -f input.txt -c 4 -j 2 -p output
or
cat input.txt | ./chunkfile.sh -c 4 -j 2 -p output -e txt

Output will be:
Output:
output_01.txt
output_02.txt
output_03.txt
output_04.txt

With a single line of text in each file.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
#===============================================================================
#
#          FILE:  chunkfile.sh
# 
#         USAGE:  ./chunkfile.sh 
# 
#   DESCRIPTION:  split a text file by lines
# 
#       OPTIONS:  ---
#  REQUIREMENTS:  ---
#          BUGS:  ---
#         NOTES:  ---
#        AUTHOR:  Kirk Roybal (DBA), kroybal@kalkomey.com
#       COMPANY:  Kalkomey
#       VERSION:  1.0
#       CREATED:  12/30/2011 09:45:42 AM CST
#      REVISION:  ---
#===============================================================================
 
 
ScriptVersion="1.0"
 
#===  FUNCTION  ================================================================
#         NAME:  usage
#  DESCRIPTION:  Display usage information.
#===============================================================================
function usage ()
{
	cat <<- EOT
 
  Usage :  ${0##/*/} [options] [--] 
 
  Options: 
  -c|chunks #   Number of chunks
  -d|debug      Display bash debugging info
  -e|ext  exten Output file extension
  -f|file name  File to chunk
  -h|help       Display this message
  -j|just pad   Padding zeros for output files
  -l|log        Display logging information
  -p|prefix pre Prefix of new file names
  -v|version    Display script version
 
  Note:  using multiple logging options increases the log level
  (i.e.) ${0##/*/} -lll -f input.txt -c 3 -j 2 -p output
  Log level 3.
 
  Output:
  output_01.txt
  output_02.txt
  output_03.txt
 
 
	EOT
}    # ----------  end of function usage  ----------
 
#-----------------------------------------------------------------------
#  Handle command line arguments
#-----------------------------------------------------------------------
 
padding=2
 
while getopts ":c:de:f:hj:lp:v" opt
do
  case $opt in
 
    c|chunks   )  chunks=$OPTARG;;
 
    d|debug    )  set -x;;
 
    e|ext      )  ext=".$OPTARG";;
 
    f|file     )  filename=$OPTARG
                  ext=".${filename#*.}"
                  [[ -z $prefix ]] && prefix="$(basename ${filename} $ext)_"
                  ;;
 
    h|help     )  usage; exit 0   ;;
 
    j|just     ) padding=$OPTARG;;
 
    l|log      ) (( logging++ ));;
 
    p|prefix   ) prefix=$OPTARG;;
 
    v|version  )  echo "$0 -- Version $ScriptVersion"; exit 0   ;;
 
    \? )  echo -e "\n  Option does not exist : $OPTARG\n"
          usage; exit 1   ;;
 
  esac    # --- end of case ---
done
shift $(($OPTIND-1))
 
[[ -z $filename ]] && [[ -z $prefix ]] && {
	cat <<- EOT
 
  You must specify a filename or a prefix for output.
  (i.e.) ${0##/*/} -f input.txt -c 3 -j 2 
  or
  (i.e.) cat input.txt | ${0##/*/} -c 3 -j 2 -p output
 
	EOT
  usage
  exit 1
}
 
#take input from stdin
[[ -z $filename ]] && filename="-"
 
[[ -z $chunks ]] && {
  usage
  exit 1
}
 
guts=$(tempfile)
cat "$filename" > "$guts"
 
filesize=$(cat "$guts" | wc -l)
chunksize=$((filesize/chunks))
[[ $((filesize%chunks)) -gt 0 ]] && (( chunksize++))
 
for ((i=0;i<$chunks;i++))
do
  dest="${prefix}$(printf "%0${padding}i" $(($i + 1)))$ext"
  cat "$guts" | tail -n +$((($i*$chunksize)+1)) | 
    head -n $chunksize > "$dest"
done
 
[[ -f "$guts" ]] && rm "$guts"

Tags:

No Comments so far ↓

There are no comments yet...Kick things off by filling out the form below.

Leave a Comment

You must log in to post a comment.