Subjectively

dd if=/dev/random | kirk > blog

Subjectively header image 1

Split a text file into chunks by line count

January 4th, 2012 · Linux

This is a simple splitter for text files by line count.

Input File with 4 lines: input.txt
this is line 1
this is line 2
this is line 3
this is line 4

./chunkfile -f input.txt -c 4 -j 2 -p output
or
cat input.txt | ./chunkfile.sh -c 4 -j 2 -p output -e txt

Output will be:
Output:
output_01.txt
output_02.txt
output_03.txt
output_04.txt

With a single line of text in each file.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
#===============================================================================
#
#          FILE:  chunkfile.sh
# 
#         USAGE:  ./chunkfile.sh 
# 
#   DESCRIPTION:  split a text file by lines
# 
#       OPTIONS:  ---
#  REQUIREMENTS:  ---
#          BUGS:  ---
#         NOTES:  ---
#        AUTHOR:  Kirk Roybal (DBA), kroybal@kalkomey.com
#       COMPANY:  Kalkomey
#       VERSION:  1.0
#       CREATED:  12/30/2011 09:45:42 AM CST
#      REVISION:  ---
#===============================================================================
 
 
ScriptVersion="1.0"
 
#===  FUNCTION  ================================================================
#         NAME:  usage
#  DESCRIPTION:  Display usage information.
#===============================================================================
function usage ()
{
	cat <<- EOT
 
  Usage :  ${0##/*/} [options] [--] 
 
  Options: 
  -c|chunks #   Number of chunks
  -d|debug      Display bash debugging info
  -e|ext  exten Output file extension
  -f|file name  File to chunk
  -h|help       Display this message
  -j|just pad   Padding zeros for output files
  -l|log        Display logging information
  -p|prefix pre Prefix of new file names
  -v|version    Display script version
 
  Note:  using multiple logging options increases the log level
  (i.e.) ${0##/*/} -lll -f input.txt -c 3 -j 2 -p output
  Log level 3.
 
  Output:
  output_01.txt
  output_02.txt
  output_03.txt
 
 
	EOT
}    # ----------  end of function usage  ----------
 
#-----------------------------------------------------------------------
#  Handle command line arguments
#-----------------------------------------------------------------------
 
padding=2
 
while getopts ":c:de:f:hj:lp:v" opt
do
  case $opt in
 
    c|chunks   )  chunks=$OPTARG;;
 
    d|debug    )  set -x;;
 
    e|ext      )  ext=".$OPTARG";;
 
    f|file     )  filename=$OPTARG
                  ext=".${filename#*.}"
                  [[ -z $prefix ]] && prefix="$(basename ${filename} $ext)_"
                  ;;
 
    h|help     )  usage; exit 0   ;;
 
    j|just     ) padding=$OPTARG;;
 
    l|log      ) (( logging++ ));;
 
    p|prefix   ) prefix=$OPTARG;;
 
    v|version  )  echo "$0 -- Version $ScriptVersion"; exit 0   ;;
 
    \? )  echo -e "\n  Option does not exist : $OPTARG\n"
          usage; exit 1   ;;
 
  esac    # --- end of case ---
done
shift $(($OPTIND-1))
 
[[ -z $filename ]] && [[ -z $prefix ]] && {
	cat <<- EOT
 
  You must specify a filename or a prefix for output.
  (i.e.) ${0##/*/} -f input.txt -c 3 -j 2 
  or
  (i.e.) cat input.txt | ${0##/*/} -c 3 -j 2 -p output
 
	EOT
  usage
  exit 1
}
 
#take input from stdin
[[ -z $filename ]] && filename="-"
 
[[ -z $chunks ]] && {
  usage
  exit 1
}
 
guts=$(tempfile)
cat "$filename" > "$guts"
 
filesize=$(cat "$guts" | wc -l)
chunksize=$((filesize/chunks))
[[ $((filesize%chunks)) -gt 0 ]] && (( chunksize++))
 
for ((i=0;i<$chunks;i++))
do
  dest="${prefix}$(printf "%0${padding}i" $(($i + 1)))$ext"
  cat "$guts" | tail -n +$((($i*$chunksize)+1)) | 
    head -n $chunksize > "$dest"
done
 
[[ -f "$guts" ]] && rm "$guts"

→ No CommentsTags:

Bulk load csv data from bash into mysql with LOAD DATA LOCAL INFILE

December 13th, 2011 · Uncategorized

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/bin/bash
#===============================================================================
#
#          FILE:  csv2mysql.sh
# 
#         USAGE:  ./csv2mysql.sh 
# 
#   DESCRIPTION:  Import data into mysql from csv
#                     ignore flag fields along the way.
#       OPTIONS:  ---
#  REQUIREMENTS:  ---
#          BUGS:  ---
#         NOTES:  ---
#        AUTHOR:  Kirk Roybal (Operations Manager), kroybal@kalkomey.com
#       COMPANY:  Kalkomey
#       VERSION:  1.0
#       CREATED:  12/13/2011 15:31:50 CST
#      REVISION:  ---
#===============================================================================
 
host=192.168.101.125
user=root
inputfile=product1.csv
sep=,
table=kris_inventory_sims
database=
 
 
ScriptVersion="1.0"
 
#===  FUNCTION  ================================================================
#         NAME:  usage
#  DESCRIPTION:  Display usage information.
#===============================================================================
function usage ()
{
	cat <<- EOT
 
  Usage :  ${0##/*/} [options] [--] 
 
  Options: 
  -d|debug      Display bash debugging info
  -h|host name  Server Name or IP
  -i|input file Filename to load
  -l|log        Display logging information
  -s|sep char   1 Character field sep
  -t|table name Data Table to load
  -u|user name  Database username
  -v|version    Display script version
 
  Note:  using multiple logging options increases the log level
  (i.e.) ${0##/*/} -lll
  Log level 3.
 
  ${0##/*/} -h 192.168.101.125 -u root -d KE_IC -t kris_inventory_sims -i product1.csv
  ${0##/*/} -h 192.168.101.125 -u root -d KE_IC -t ocr_header -i ocr.csv
 
	EOT
}    # ----------  end of function usage  ----------
 
#-----------------------------------------------------------------------
#  Handle command line arguments
#-----------------------------------------------------------------------
 
while getopts ":dh:i:ls:t:u:v" opt
do
  case $opt in
 
    d|db       )  database="$OPTARG";;
 
    h|host     )  host="$OPTARG";;
 
    i|input    )  inputfile="$OPTARG";;
 
    l|log      )  (( logging++ ));  [[ $logging -gt 2 ]] && set -x;;
 
    s|sep      )  sep="$OPTARG{0:1}";;
 
    t|table    )  table="$OPTARG";;
 
    u|user     )  user="$OPTARG";;
 
    v|version  )  echo "$0 -- Version $ScriptVersion"; exit 0   ;;
 
    \? )  echo -e "\n  Option does not exist : $OPTARG\n"
          usage; exit 1   ;;
 
  esac    # --- end of case ---
done
shift $(($OPTIND-1))
 
#CREATE TABLE `ocr_header` (
#  `companyid` varchar(11) COLLATE utf8_unicode_ci DEFAULT NULL,
#  `id` int(11) NOT NULL AUTO_INCREMENT,
#  `table_id` int(11) NOT NULL DEFAULT '0',
#  `table_name` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL,
#  `doc_type` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL,
#  `keywords` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL,
#  PRIMARY KEY (`id`)
#) ENGINE=MyISAM AUTO_INCREMENT=8 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
 
#get a field list
fields=$(mysql -h $host -u $user $database -e "SHOW CREATE TABLE $table;" | 
  gsed 's/\\n/\n/g' | grep '^[[:space:]]' | 
  sed -e 's/^[^[:alnum:]]*//' -e 's/`.*,/,/g' |
  sed -e '/^date_stamp,$/d' -e '/^id,$/d' -e '/ KEY /d' -e '/^user_id,$/d' -e '/^loaded_from,$/d' |
  sed -e '$s/,//g')
 
#note that id is intentionally missing
# and lack of comma on last line
#companyid,
#table_id,
#table_name,
#doc_type,
#keywords
 
#LOAD DATA LOCAL INFILE 'product1.csv'
#INTO TABLE ocr_header
#FIELDS TERMINATED BY ','
#OPTIONALLY ENCLOSED BY """"
#LINES TERMINATED BY '\n' 
#(
#companyid,
#table_id,
#table_name,
#doc_type,
#keywords
#)
 
nl='\\n'
sql="
LOAD DATA LOCAL INFILE '$inputfile'
INTO TABLE $table
FIELDS TERMINATED BY '$sep'
OPTIONALLY ENCLOSED BY \"\"\"\"
LINES TERMINATED BY '$nl'"
 
echo -e "$sql 
(
$fields
)" #| mysql -h $host -u $user $database --local-infile
 
sql="UPDATE $table
SET loaded_from = 'product1.csv',
user_id = 'klr'
WHERE loaded_from is null;"
echo -e "$sql" #|  mysql -h $host -u $user $database

→ No CommentsTags:

Generate unique, non-sequential keys

October 29th, 2011 · Uncategorized

This article demonstrates 3 different methods that I have found to create unique numbers that are non-sequential, do not overlap, and can be modified to produce a non-overlapping series in the future. They are *not* based on random numbers. The encoded series is completely deterministic. Coded in PHP. Hope this helps somebody who’s trying to come up with a numbering scheme that is forward supportable.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
<?php
 
/*
  GetHandleForItemIndex
    Parameters: Integer
 
  Given a series of integers, 
  output a different series that is non-contiguous,
    non-overlapping, and can be modified in the future
    not to overlap numbers that have been generated by
    the same input series.
 
  input 1,2,3,4,5,6,7,8,9,10
  output 8400,35400,62400,89400,116400,143400,170400,197400,224400,251400
 
  By changing the prime number constants, a completly different (and 
    non-overlapping) output series can be generated in the future.
 
  GetItemIndexForHandle
    Parameters: Integer 
 
  Translates the series back into simple decimal contiguous input.
 
*/
 
#const PRIME_NUMBER_A = '65521';
#const PRIME_NUMBER_B = '999721';
#const PRIME_NUMBER_C = '145709';
const PRIME_NUMBER_A = '2';
const PRIME_NUMBER_B = '3';
const PRIME_NUMBER_C = '5';
 
function GetHandleForItemIndex( $in )
{
 
//$in =( PRIME_NUMBER_A*( $in +PRIME_NUMBER_A ) );
$in =bcmul( PRIME_NUMBER_A, bcadd( $in ,PRIME_NUMBER_A ) );
$in =bcmul( PRIME_NUMBER_A, bcsub( $in ,PRIME_NUMBER_B ) );
$in =bcmul( PRIME_NUMBER_A, bcsub( $in ,PRIME_NUMBER_C ) );
$in =bcmul( PRIME_NUMBER_B, bcadd( $in ,PRIME_NUMBER_A ) );
$in =bcmul( PRIME_NUMBER_B, bcsub( $in ,PRIME_NUMBER_B ) );
$in =bcmul( PRIME_NUMBER_B, bcsub( $in ,PRIME_NUMBER_C ) );
$in =bcmul( PRIME_NUMBER_C, bcadd( $in ,PRIME_NUMBER_A ) );
$in =bcmul( PRIME_NUMBER_C, bcsub( $in ,PRIME_NUMBER_B ) );
$in =bcmul( PRIME_NUMBER_C, bcsub( $in ,PRIME_NUMBER_C ) );
return $in;
}
 
function GetItemIndexForHandle( $in )
{
$in = bcadd(bcdiv($in,PRIME_NUMBER_C),PRIME_NUMBER_C);
$in = bcadd(bcdiv($in,PRIME_NUMBER_C),PRIME_NUMBER_B);
$in = bcsub(bcdiv($in,PRIME_NUMBER_C),PRIME_NUMBER_A);
 
$in = bcadd(bcdiv($in,PRIME_NUMBER_B),PRIME_NUMBER_C);
$in = bcadd(bcdiv($in,PRIME_NUMBER_B),PRIME_NUMBER_B);
$in = bcsub(bcdiv($in,PRIME_NUMBER_B),PRIME_NUMBER_A);
 
$in = bcadd(bcdiv($in,PRIME_NUMBER_A),PRIME_NUMBER_C);
$in = bcadd(bcdiv($in,PRIME_NUMBER_A),PRIME_NUMBER_B);
$in = bcsub(bcdiv($in,PRIME_NUMBER_A),PRIME_NUMBER_A);
 
return $in;
 
}
 
?>
 
<HTML>
<HEAD>
<TITLE>Obfuscation Demo</TITLE>
</HEAD>
<BODY>
<TABLE>
<?
for ($i=1;$i<=10;$i++) {
 
  $h =GetHandleForItemIndex($i);
  printf ("<TR><TD>%s</TD><TD>%s</TD></TR>", GetItemIndexForHandle($h), $h) ;
  print "\n";
 
};
 
//print GetHandleForItemIndex('1000000000');
 
 
 
/*
  rot10enc
    Parameters: Integer
 
  Encodes an integer with a simple substitution cipher,
    and then reverses the order of magnitude of the digits.
    The least significant digit is used to reorder an input
    list into non-contiguous, 'unordered' output series.
 
  Input  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
  Output 6,5,7,4,8,3,9,0,2,1,65,55,75,45,85,35,95
 
  Still somewhat easily predictable, but discourages use
    of the integer for anything other than an identifier.
 
  rot10dec
    Original series is easily retrieved.
 
*/
 
const ROT10 = '6574839021';
const ROT01 = '0123456789';
 
 
function rot10enc($i) {
 
  for ($j=0; $j<strlen($i);$j++) {
    $idx = substr($i,$j,1);
    $buffer .= substr(ROT10,$idx,1);
  }
 
  return strrev($buffer);
 
}
 
function rot10dec($i) {
  $i = strrev($i);
  for ($j=0;$j<strlen($i);$j++) {
    $idx = strpos(ROT10, substr($i,$j,1));
    $buffer .= $idx;
  }
 
  return $buffer;
 
}
 
 
for ($i=0;$i<=10;$i++) {
  $j = rot10enc($i);
  printf ('<TR><TD>%s</TD><TD>%s</TD></TR>', rot10dec($j), $j);
  print "\n";
 
}
 
 
/*
 
  Convert an input integer to any arbitrary base encoding.
    base30 is given as an example.  Note that 3,T,0,O 
    are intentionally missing to illustrate use of this function
    for creating serial numbers that are easily legible.
 
  Also, a base conversion to a nonsense encoding illustrates using
    this algorithm to create a non-contiguous, non-repeating series.
 
  This can be modified to produce non-overlapping series by changing
    either 1) the characters used in the encoding 2) the order of magnitude
    of the input series.
 
INPUT   0,1,2,3,4,5,6,7,8,9,10
OUTPUT  9,A,8,B,7,C,6,D,5,E,4
 
  Further obfuscation is also possible by reversing the order of the
    resulting series digits, so the least significant digit becomes the
    most significant sorting digit.
 
*/
 
 
 
 
const base30a = 'ABCDEFGHJKLMNPQRSUVWXYZ2456789';
const base30x = '9A8B7C6D5E4F2GZHYJXKWLVMUNSPQR';
 
const base62 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
 
const basex = base30x;
const baselen = 30; # 30/62

function dec2base($i) {
 
  while ($i >= strlen(basex)) {
    $base = $i;
    $magnitude = 0;
    while ($base >= baselen) {
      $magnitude++;
      $base /= baselen;
    }
    $idx = intval($base);
    $msd = substr(basex,$idx,1);
    $buffer .= $msd;
    $idx = pow(baselen,$magnitude) * $idx;
    $i -= $idx;
  }
 
  if (strlen($i) <> 0) $buffer .= substr(basex,$i,1);
 
 
  return $buffer;
 
}
 
function base2dec($i) {
 
  while (strlen($i) > 0) {
    $magnitude = strlen($i)-1; #order of magnitude of digit
    $msd = substr($i,0,1);  #the most significant digit itself
    $idx = strpos(basex,$msd) ; #decimal position that digit appears in keyfield
    #add the decimal value of the digit multiplied by it's order of magnitude
    $buffer += pow(baselen,$magnitude) * $idx;  
    $i = substr($i,1); #move on to the next digit
  }
 
  return $buffer;
 
}
 
 
for ($i=0; $i<=10; $i++) {
  $blah = dec2base($i);
  printf ("<TR><TD>%s</TD><TD>%s</TD></TR>\n",$blah, base2dec($blah));
 
}
 
?> 
 
</TABLE>
</BODY>
</HTML>

→ No CommentsTags: