Subjectively

dd if=/dev/random | kirk > blog

Subjectively header image 2

csv2pg — Load a comma separated values file into postgresql

January 22nd, 2010 · No Comments · Linux, OS X

Link to the download is here

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/bin/bash
#===============================================================================
#
#          FILE:  csv2pg
# 
#         USAGE:  ./csv2pg [-d|--debug] [-v|--version] -c[|--csv] filename [-s|--schema schemaname]
#                 [-b|--database database] [-u|--user username] -t[|--table] tablename [-o|--ouput]
# 
#   DESCRIPTION:  load a csv file into a prepared table
# 
#       OPTIONS:  ---
#  REQUIREMENTS:  ---
#          BUGS:  ---
#         NOTES:  This script can be used in conjunction with csvheader2createtable
#                 to create a table with ETL tracking fields and load the data.
#                 (e.g.
#                   for file in *.csv
#                   do
#                     csvheader2createtable -s myschema $file | psql
#                     csv2pg -s myschema -t ${file//.csv} $file
#                   done
#                 )
#                 Assuming the naming convention for the files was sane...
#        AUTHOR:  Kirk Roybal (DBA), kirk.roybal@javelindirect.com
#       COMPANY:  Javelin
#       VERSION:  1.0
#       CREATED:  12/21/2009 14:32:37 CST
#      REVISION:  ---
#===============================================================================
 
function usage () {
cat <<EOS
  $0 [--help] [-d|--debug] [-v|--version] -c[|--csv] filename [-s|--schema schemaname]
     [-b|--database database] [-u|--user username] -t[|--table] tablename [-o|--ouput]
EOS
}
 
[[ ${#*} -eq 0 ]] && {
  usage
  exit
}
 
version="0.1"
for arg 
do
  delim=""
  case "$arg" in
    #translate --gnu-long-options to -g (short options) 
    --help) args="${args}-z ";;
    --csv) args="${args}-c ";;
    --host) args="${args}-h ";;
    --database) args="${args}-b ";;
    --user) args="${args}-u ";;
    --schema) args="${args}-s ";;
    --output) args="${args}-o ";;
    --table) args="${args}-t ";;
    --debug) args="${args}-d ";;
    --version) args="${args}-v ";;
  #pass through anything else
    *) leftchar=$(printf %1.1s "$arg")
      if [[ ! "$leftchar" == "-" ]]
      then
        delim="\""
      fi
      args="${args}${delim}${arg}${delim} "
      ;;
  esac
done
 
#Reset the positional parameters to the short options
eval set -- $args
 
schema=public
host=db1
database=warehouse
user=warehouse
 
while getopts ":b:c:dh:os:t:u:vz" option 2>/dev/null
do
  case $option in
    o) output=true;;
    h) host="${OPTARG}";;
    b) database="${OPTARG}";;
    u) user="${OPTARG}";;
    d) set -x ; debug=true ;;
    v) echo "$(basename $0) $version"; exit;;
    c) csvfile="${OPTARG}";;
    s) schema="${OPTARG}";;
    t) table="${OPTARG}";;
    z) usage; exit;;
    *) echo $OPTARG is an unrecognized option; usage; exit;;
  esac
done
 
[[ -z "$csvfile" ]] && {
  echo "must have csv file"
  exit 1
}
[[ -f "$csvfile" ]] || {
  echo "csv file not found"
  exit 1
}
 
[[ -z "$table" ]] && {
  echo "table name is required"
  exit 1
}
 
sql="SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS 
      WHERE table_name = '$table' and table_schema='$schema'
      AND column_name NOT IN ('id','date_stamp')
      ORDER BY ordinal_position;"
fields=$(psql -h $host -U $user $database -qtAc "$sql" | tr '\n' ', ' | sed -e 's/, "$//' -e 's/^/"/' -e 's/,/", "/g' | sed 's/loaded_from.*/loaded_from"/')
[[ "$fields" == "" ]] && {
  echo "table $table does not exist"
  exit 1
}
 
trailing_comma=$(cat "$csvfile" | head -n 1 | sed 's/[[:space:]]*$//g')
trailing_comma=${trailing_comma:${#trailing_comma}-1:1}
[[ "$trailing_comma" == "," ]] && sed -i '' 's/,[[:space:]]*$//' "$csvfile"
 
sql="COPY $schema.$table($fields) FROM STDIN WITH CSV HEADER;"
#code page conversion, append control data, insert
[[ -z "$output" ]] || echo "iconv -f ibm850 -t utf8 \"$csvfile\" | 
  tr -d '\\000' | tr '\\r' '\\n' | 
  sed -e '/^[[:space:]]*\$/d' -e '/^,*$/d' -e \"s/\\$/,klr,$csvfile/\" | 
  psql -h $host -U $user $database -c \"$sql\""
 
iconv -f ibm850 -t utf8 "$csvfile" | tr -d '\000' | tr '\r' '\n' | 
  sed -e '/^[[:space:]]*$/d' -e '/^,*$/d' -e "s/\$/,klr,$csvfile/" | 
  psql -h $host -U $user $database -c "$sql"

Tags:

No Comments so far ↓

There are no comments yet...Kick things off by filling out the form below.

Leave a Comment

You must log in to post a comment.