commit: eb5ac234342db46c881d8e69644d3292b5eabb54
parent: a5143df3035ca34990ab5e800087bd1586551b1f
Author: Eugen Rochko <eugen@zeonfederated.com>
Date: Fri, 6 Oct 2017 03:42:21 +0200
Clean up code style of Mastodon::TimestampId module (#5232)
* Clean up code style of Mastodon::TimestampId module
* Update brakeman config
Diffstat:
3 files changed, 125 insertions(+), 120 deletions(-)
diff --git a/config/brakeman.ignore b/config/brakeman.ignore
@@ -58,6 +58,26 @@
"note": ""
},
{
+ "warning_type": "SQL Injection",
+ "warning_code": 0,
+ "fingerprint": "34efc76883080f8b1110a30c34ec4f903946ee56651aae46c62477f45d4fc412",
+ "check_name": "SQL",
+ "message": "Possible SQL injection",
+ "file": "lib/mastodon/timestamp_ids.rb",
+ "line": 63,
+ "link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
+ "code": "connection.execute(\" CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n RETURNS bigint AS\\n $$\\n DECLARE\\n time_part bigint;\\n sequence_base bigint;\\n tail bigint;\\n BEGIN\\n time_part := (\\n -- Get the time in milliseconds\\n ((date_part('epoch', now()) * 1000))::bigint\\n -- And shift it over two bytes\\n << 16);\\n\\n sequence_base := (\\n 'x' ||\\n -- Take the first two bytes (four hex characters)\\n substr(\\n -- Of the MD5 hash of the data we documented\\n md5(table_name ||\\n '#{SecureRandom.hex(16)}' ||\\n time_part::text\\n ),\\n 1, 4\\n )\\n -- And turn it into a bigint\\n )::bit(16)::bigint;\\n\\n -- Finally, add our sequence number to our base, and chop\\n -- it to the last two bytes\\n tail := (\\n (sequence_base + nextval(table_name || '_id_seq'))\\n & 65535);\\n\\n -- Return the time part and the sequence part. OR appears\\n -- faster here than addition, but they're equivalent:\\n -- time_part has no trailing two bytes, and tail is only\\n -- the last two bytes.\\n RETURN time_part | tail;\\n END\\n $$ LANGUAGE plpgsql VOLATILE;\\n\")",
+ "render_path": null,
+ "location": {
+ "type": "method",
+ "class": "Mastodon::TimestampIds",
+ "method": "define_timestamp_id"
+ },
+ "user_input": "SecureRandom.hex(16)",
+ "confidence": "Medium",
+ "note": ""
+ },
+ {
"warning_type": "Dynamic Render Path",
"warning_code": 15,
"fingerprint": "3b0a20b08aef13cf8cf865384fae0cfd3324d8200a83262bf4abbc8091b5fec5",
@@ -211,26 +231,6 @@
"note": ""
},
{
- "warning_type": "SQL Injection",
- "warning_code": 0,
- "fingerprint": "cd440d9d0bcb76225f4142030cec0bdec6ad119c537c108c9d514bf87bc34d29",
- "check_name": "SQL",
- "message": "Possible SQL injection",
- "file": "lib/mastodon/timestamp_ids.rb",
- "line": 69,
- "link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
- "code": "ActiveRecord::Base.connection.execute(\" CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n RETURNS bigint AS\\n $$\\n DECLARE\\n time_part bigint;\\n sequence_base bigint;\\n tail bigint;\\n BEGIN\\n -- Our ID will be composed of the following:\\n -- 6 bytes (48 bits) of millisecond-level timestamp\\n -- 2 bytes (16 bits) of sequence data\\n\\n -- The 'sequence data' is intended to be unique within a\\n -- given millisecond, yet obscure the 'serial number' of\\n -- this row.\\n\\n -- To do this, we hash the following data:\\n -- * Table name (if provided, skipped if not)\\n -- * Secret salt (should not be guessable)\\n -- * Timestamp (again, millisecond-level granularity)\\n\\n -- We then take the first two bytes of that value, and add\\n -- the lowest two bytes of the table ID sequence number\\n -- (`table_name`_id_seq). This means that even if we insert\\n -- two rows at the same millisecond, they will have\\n -- distinct 'sequence data' portions.\\n\\n -- If this happens, and an attacker can see both such IDs,\\n -- they can determine which of the two entries was inserted\\n -- first, but not the total number of entries in the table\\n -- (even mod 2**16).\\n\\n -- The table name is included in the hash to ensure that\\n -- different tables derive separate sequence bases so rows\\n -- inserted in the same millisecond in different tables do\\n -- not reveal the table ID sequence number for one another.\\n\\n -- The secret salt is included in the hash to ensure that\\n -- external users cannot derive the sequence base given the\\n -- timestamp and table name, which would allow them to\\n -- compute the table ID sequence number.\\n\\n time_part := (\\n -- Get the time in milliseconds\\n ((date_part('epoch', now()) * 1000))::bigint\\n -- And shift it over two bytes\\n << 16);\\n\\n sequence_base := (\\n 'x' ||\\n -- Take the first two bytes (four hex characters)\\n substr(\\n -- Of the MD5 hash of the data we documented\\n md5(table_name ||\\n '#{SecureRandom.hex(16)}' ||\\n time_part::text\\n ),\\n 1, 4\\n )\\n -- And turn it into a bigint\\n )::bit(16)::bigint;\\n\\n -- Finally, add our sequence number to our base, and chop\\n -- it to the last two bytes\\n tail := (\\n (sequence_base + nextval(table_name || '_id_seq'))\\n & 65535);\\n\\n -- Return the time part and the sequence part. OR appears\\n -- faster here than addition, but they're equivalent:\\n -- time_part has no trailing two bytes, and tail is only\\n -- the last two bytes.\\n RETURN time_part | tail;\\n END\\n $$ LANGUAGE plpgsql VOLATILE;\\n\")",
- "render_path": null,
- "location": {
- "type": "method",
- "class": "Mastodon::TimestampIds",
- "method": "s(:self).define_timestamp_id"
- },
- "user_input": "SecureRandom.hex(16)",
- "confidence": "Medium",
- "note": ""
- },
- {
"warning_type": "Cross-Site Scripting",
"warning_code": 4,
"fingerprint": "e04aafe1e06cf8317fb6ac0a7f35783e45aa1274272ee6eaf28d39adfdad489b",
@@ -269,6 +269,6 @@
"note": ""
}
],
- "updated": "2017-10-05 20:06:40 +0200",
+ "updated": "2017-10-06 03:27:46 +0200",
"brakeman_version": "4.0.1"
}
diff --git a/lib/mastodon/timestamp_ids.rb b/lib/mastodon/timestamp_ids.rb
@@ -1,120 +1,111 @@
# frozen_string_literal: true
-module Mastodon
- module TimestampIds
- def self.define_timestamp_id
- conn = ActiveRecord::Base.connection
-
- # Make sure we don't already have a `timestamp_id` function.
- unless conn.execute(<<~SQL).values.first.first
- SELECT EXISTS(
- SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
- );
+module Mastodon::TimestampIds
+ DEFAULT_REGEX = /timestamp_id\('(?<seq_prefix>\w+)'/
+
+ class << self
+ # Our ID will be composed of the following:
+ # 6 bytes (48 bits) of millisecond-level timestamp
+ # 2 bytes (16 bits) of sequence data
+ #
+ # The 'sequence data' is intended to be unique within a
+ # given millisecond, yet obscure the 'serial number' of
+ # this row.
+ #
+ # To do this, we hash the following data:
+ # * Table name (if provided, skipped if not)
+ # * Secret salt (should not be guessable)
+ # * Timestamp (again, millisecond-level granularity)
+ #
+ # We then take the first two bytes of that value, and add
+ # the lowest two bytes of the table ID sequence number
+ # (`table_name`_id_seq). This means that even if we insert
+ # two rows at the same millisecond, they will have
+ # distinct 'sequence data' portions.
+ #
+ # If this happens, and an attacker can see both such IDs,
+ # they can determine which of the two entries was inserted
+ # first, but not the total number of entries in the table
+ # (even mod 2**16).
+ #
+ # The table name is included in the hash to ensure that
+ # different tables derive separate sequence bases so rows
+ # inserted in the same millisecond in different tables do
+ # not reveal the table ID sequence number for one another.
+ #
+ # The secret salt is included in the hash to ensure that
+ # external users cannot derive the sequence base given the
+ # timestamp and table name, which would allow them to
+ # compute the table ID sequence number.
+ def define_timestamp_id
+ return if already_defined?
+
+ connection.execute(<<~SQL)
+ CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
+ RETURNS bigint AS
+ $$
+ DECLARE
+ time_part bigint;
+ sequence_base bigint;
+ tail bigint;
+ BEGIN
+ time_part := (
+ -- Get the time in milliseconds
+ ((date_part('epoch', now()) * 1000))::bigint
+ -- And shift it over two bytes
+ << 16);
+
+ sequence_base := (
+ 'x' ||
+ -- Take the first two bytes (four hex characters)
+ substr(
+ -- Of the MD5 hash of the data we documented
+ md5(table_name ||
+ '#{SecureRandom.hex(16)}' ||
+ time_part::text
+ ),
+ 1, 4
+ )
+ -- And turn it into a bigint
+ )::bit(16)::bigint;
+
+ -- Finally, add our sequence number to our base, and chop
+ -- it to the last two bytes
+ tail := (
+ (sequence_base + nextval(table_name || '_id_seq'))
+ & 65535);
+
+ -- Return the time part and the sequence part. OR appears
+ -- faster here than addition, but they're equivalent:
+ -- time_part has no trailing two bytes, and tail is only
+ -- the last two bytes.
+ RETURN time_part | tail;
+ END
+ $$ LANGUAGE plpgsql VOLATILE;
SQL
- # The function doesn't exist, so we'll define it.
- conn.execute(<<~SQL)
- CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
- RETURNS bigint AS
- $$
- DECLARE
- time_part bigint;
- sequence_base bigint;
- tail bigint;
- BEGIN
- -- Our ID will be composed of the following:
- -- 6 bytes (48 bits) of millisecond-level timestamp
- -- 2 bytes (16 bits) of sequence data
-
- -- The 'sequence data' is intended to be unique within a
- -- given millisecond, yet obscure the 'serial number' of
- -- this row.
-
- -- To do this, we hash the following data:
- -- * Table name (if provided, skipped if not)
- -- * Secret salt (should not be guessable)
- -- * Timestamp (again, millisecond-level granularity)
-
- -- We then take the first two bytes of that value, and add
- -- the lowest two bytes of the table ID sequence number
- -- (`table_name`_id_seq). This means that even if we insert
- -- two rows at the same millisecond, they will have
- -- distinct 'sequence data' portions.
-
- -- If this happens, and an attacker can see both such IDs,
- -- they can determine which of the two entries was inserted
- -- first, but not the total number of entries in the table
- -- (even mod 2**16).
-
- -- The table name is included in the hash to ensure that
- -- different tables derive separate sequence bases so rows
- -- inserted in the same millisecond in different tables do
- -- not reveal the table ID sequence number for one another.
-
- -- The secret salt is included in the hash to ensure that
- -- external users cannot derive the sequence base given the
- -- timestamp and table name, which would allow them to
- -- compute the table ID sequence number.
-
- time_part := (
- -- Get the time in milliseconds
- ((date_part('epoch', now()) * 1000))::bigint
- -- And shift it over two bytes
- << 16);
-
- sequence_base := (
- 'x' ||
- -- Take the first two bytes (four hex characters)
- substr(
- -- Of the MD5 hash of the data we documented
- md5(table_name ||
- '#{SecureRandom.hex(16)}' ||
- time_part::text
- ),
- 1, 4
- )
- -- And turn it into a bigint
- )::bit(16)::bigint;
-
- -- Finally, add our sequence number to our base, and chop
- -- it to the last two bytes
- tail := (
- (sequence_base + nextval(table_name || '_id_seq'))
- & 65535);
-
- -- Return the time part and the sequence part. OR appears
- -- faster here than addition, but they're equivalent:
- -- time_part has no trailing two bytes, and tail is only
- -- the last two bytes.
- RETURN time_part | tail;
- END
- $$ LANGUAGE plpgsql VOLATILE;
- SQL
- end
end
- def self.ensure_id_sequences_exist
- conn = ActiveRecord::Base.connection
-
+ def ensure_id_sequences_exist
# Find tables using timestamp IDs.
- default_regex = /timestamp_id\('(?<seq_prefix>\w+)'/
- conn.tables.each do |table|
+ connection.tables.each do |table|
# We're only concerned with "id" columns.
- next unless (id_col = conn.columns(table).find { |col| col.name == 'id' })
+ next unless (id_col = connection.columns(table).find { |col| col.name == 'id' })
# And only those that are using timestamp_id.
- next unless (data = default_regex.match(id_col.default_function))
+ next unless (data = DEFAULT_REGEX.match(id_col.default_function))
seq_name = data[:seq_prefix] + '_id_seq'
+
# If we were on Postgres 9.5+, we could do CREATE SEQUENCE IF
# NOT EXISTS, but we can't depend on that. Instead, catch the
# possible exception and ignore it.
# Note that seq_name isn't a column name, but it's a
# relation, like a column, and follows the same quoting rules
# in Postgres.
- conn.execute(<<~SQL)
+ connection.execute(<<~SQL)
DO $$
BEGIN
- CREATE SEQUENCE #{conn.quote_column_name(seq_name)};
+ CREATE SEQUENCE #{connection.quote_column_name(seq_name)};
EXCEPTION WHEN duplicate_table THEN
-- Do nothing, we have the sequence already.
END
@@ -122,5 +113,19 @@ module Mastodon
SQL
end
end
+
+ private
+
+ def already_defined?
+ connection.execute(<<~SQL).values.first.first
+ SELECT EXISTS(
+ SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
+ );
+ SQL
+ end
+
+ def connection
+ ActiveRecord::Base.connection
+ end
end
end
diff --git a/lib/tasks/db.rake b/lib/tasks/db.rake
@@ -20,10 +20,10 @@ def each_schema_load_environment
if Rails.env == 'development'
test_conf = ActiveRecord::Base.configurations['test']
+
if test_conf['database']&.present?
ActiveRecord::Base.establish_connection(:test)
yield
-
ActiveRecord::Base.establish_connection(Rails.env.to_sym)
end
end