#!/usr/bin/env ruby # coding: utf-8 require 'find' # These members/tags are common to multiple events BYTE_SIZE_COUNT = ['byte_size', 'count'] # SUFFIX => [MESSAGE, COUNTERS, ADDITIONAL_TAGS] EVENT_CLASSES = { 'BytesReceived' => [ 'Bytes received.', ['received_bytes'], ['byte_size', 'protocol'] ], 'EventsReceived' => [ 'Events received.', ['received_events', 'received_event_bytes'], ['count', 'byte_size'] ], 'EventsSent' => [ 'Events sent.', ['sent_events', 'sent_event_bytes'], ['count', 'byte_size'] ], 'BytesSent' => [ 'Bytes sent.', ['sent_bytes'], ['byte_size', 'protocol'] ], } METRIC_NAME_EVENTS_DROPPED = 'component_discarded_events_total' METRIC_NAME_ERROR = 'component_errors_total' def hash_array_add(hash, key, item) arr = hash.fetch(key, Array::new) arr.append(item) hash[key] = arr end def is_constant?(name) name.start_with? '"' and name.end_with? '"' or name.match? /^(.+::)[A-Z0-9_]$/ end def find_line_number(haystack, needle) idx = haystack.index(needle) if !idx.nil? prior = haystack[0,idx] prior.lines.count else nil end end # A class to hold error reports and common functionality class Event attr_accessor :path, :skip_dropped_events, :uses, :skip_duplicate_check, :skip_validity_check, :impl_internal_event, :impl_register_event, :impl_event_handle attr_reader :name, :reports, :logs attr_writer :members def initialize(name) @path = nil @skip_duplicate_check = false @skip_validity_check = false @skip_dropped_events = false @emits_component_events_dropped = false @name = name @reports = [] @members = {} @counters = {} @metrics = {} @logs = [] @uses = 0 @impl_internal_event = false @impl_register_event = false @impl_event_handle = false end def add_metric(type, name, tags) @metrics["#{type}:#{name}"] = tags if type == 'counter' @counters[name] = tags end end # Scan for counter names and tags def scan_metrics(block) block.scan(/ (counter|gauge|histogram)!\((?:\n\s+)?"([^"]+)",?(.+?)\)[;\n]/ms) \ do |type, name, tags| tags = Hash[tags.scan(/"([^"]+)" => (.+?)(?:,|$)/)] add_metric(type, name, tags) end end # Scan the registered event macro block def scan_registered_event(event_fields, handle_fields, data_type, emit_block) @members = event_fields.scan(/^ *([a-z0-9_]+): *(.+?),$/m) \ .map { |member, type| [member, type] } handle_fields.scan(/^ *([a-z0-9_]+): *(.+?) *= *(.+?),$/m) do |name, type, assignment| self.scan_component_dropped_events(assignment) # This is a _slightly_ different regex than the above, couldn't figure a way to unify them assignment.match(/ (counter|gauge|histogram)!\((?:\n\s+)?"([^"]+)"(,.+)?\)/ms) \ do |type, name, tags| tags = tags || '' tags = Hash[tags.scan(/"([^"]+)" => (.+?)(?:,|$)/)] add_metric(type, name, tags) true end end self.scan_logs(emit_block) end def add_log(type, message, parameters) @logs.append([type, message, parameters]) end # Scan for log outputs and their parameters def scan_logs(block) block.scan(/ (trace|debug|info|warn|error)! # The log type \(\s*(?:message\s*=\s*)? # Skip any leading "message =" bit (?:"([^({)][^("]+)"|([^,]+)) # The log message text ([^;]*?) # Match the parameter list \)(?:;|\n\s*}) # Normally would end with simply ");", but some are missing the semicolon /mx) \ do |type, raw_message, var_message, parameters| parameters = parameters.scan(/([a-z0-9_]+) *= .|[?%]([a-z0-9_.]+)/) \ .map { |assignment, simple| assignment or simple } message = raw_message.nil? ? var_message : raw_message add_log(type, message, parameters) end end # Scan for the emission of ComponentEventsDropped. def scan_component_dropped_events(block) if block.match?(/(emit|register)!\(\s*ComponentEventsDropped\b/) @emits_component_events_dropped = true end end # The event signature is used to check for duplicates and is # composed from the member names and their types, the metric types, # names, and their tags, and the log messages and parameters. If no # metrics and no logs are defined for the event, the signature is # `nil` to skip duplicate checking. def signature if @metrics.length == 0 and @logs.length == 0 nil else members = @members.map { |name, type| "#{name}:#{type}" }.sort.join(':') metrics = @metrics.map do |name, value| tags = value.keys.sort.join(',') "#{name}(#{tags})" end metrics = metrics.sort.join(';') logs = @logs.sort.join(';') "#{members}[#{logs}][#{metrics}]" end end def valid? valid_with_handle? self end def valid_with_handle?(handle) if @uses == 0 append('Event has no uses.') end EVENT_CLASSES.each do |suffix, (required_message, counters, additional_tags)| if @name.end_with? suffix handle.logs.each do |type, message, parameters| if type != 'trace' append('Log type MUST be \"trace!\".') end if message != required_message append("Log message MUST be \"#{required_message}\" (is \"#{message}\").") end additional_tags.each do |tag_name| unless parameters.include? tag_name append("Log MUST contain tag \"#{tag_name}\"") end end end counters.each do |counter| counter = "component_#{counter}_total" counters_must_include_exclude_tags(counter, additional_tags - BYTE_SIZE_COUNT) end end end has_error_logs = handle.logs.one? { |type, _, _| type == 'error' } is_events_dropped_event = (@name.end_with? 'EventsDropped' or @counters.include? METRIC_NAME_EVENTS_DROPPED) # Validate Error events if (has_error_logs and !is_events_dropped_event) or @name.end_with? 'Error' # Name check append('Error events MUST be named "___Error".') unless @name.end_with? 'Error' # Outputs an error log handle.log_level_exactly('error') # Metric check counters_must_include_exclude_tags(METRIC_NAME_ERROR, ['error_type', 'stage']) # Make sure Error events contain the required parameters handle.logs.each do |type, message, parameters| if type == 'error' ['error_type', 'stage'].each do |parameter| unless parameters.include? parameter append("Error log for Error event MUST include parameter \"#{parameter}\".") end end ['error_code', 'error_type', 'stage'].each do |parameter| if parameters.include? parameter and !@counters[METRIC_NAME_ERROR].include? parameter append("Counter \"#{METRIC_NAME_ERROR}\" must include \"#{parameter}\" to match error log.") end end end end end # TODO remove @skip_dropped_events check logic after DroppedEvents audit is complete # (https://github.com/vectordotdev/vector/issues/13995) # Validate EventsDropped events if is_events_dropped_event && !@skip_dropped_events # Don't run the checks on event structs which themselves emit ComponentEventsDropped, # as the ComponentEventsDropped event is already checked. # Instead, verify that component_discarded_events_total is not being over-incremented. if @emits_component_events_dropped if @counters.include? METRIC_NAME_EVENTS_DROPPED append("Event emitting ComponentEventsDropped should not also increment counter `#{METRIC_NAME_EVENTS_DROPPED}`") end else # Name check append('EventsDropped events MUST be named "___EventsDropped".') unless @name.end_with? 'EventsDropped' # Outputs an error log or debug log. Which level is dependent on the value of the param `intentional`, however # because implementation can involve passing in the value of the `intentional` bool at compile time, we would need to # scan all the source code for places that emit this event to determine that. handle.log_level_one_of(['error', 'debug']) # Metric check counters_must_include_exclude_tags(METRIC_NAME_EVENTS_DROPPED, ['intentional'], ['reason', 'count']) # Make sure EventsDropped events contain the required parameters handle.logs.each do |type, message, parameters| if type == 'error' ['count', 'intentional', 'reason'].each do |parameter| unless parameters.include? parameter append("Error log for EventsDropped event MUST include parameter \"#{parameter}\".") end end ['intentional'].each do |parameter| if parameters.include? parameter and !@counters[METRIC_NAME_EVENTS_DROPPED].include? parameter append("Counter \"#{METRIC_NAME_EVENTS_DROPPED}\" must include \"#{parameter}\" to match error log.") end end end end end end @counters.each do |name, tags| # Only component_errors_total and component_discarded_events_total metrics are considered if ['component_errors_total', 'component_discarded_events_total'].include? name # Make sure defined tags to counters are constants tags.each do |tag, value| if tag == 'stage' if !value.start_with? 'error_stage::' append("Counter \"#{name}\" tag \"#{tag}\" value must be an \"error_stage\" constant.") end elsif tag == 'error_type' if !value.start_with? 'error_type::' append("Counter \"#{name}\" tag \"#{tag}\" value must be an \"error_type\" constant.") end end end end end @reports.empty? end def log_level_one_of(levels) if @logs.find_index { |type, message, parameters| levels.include? type }.nil? append("This event MUST log with one of these levels: #{levels}.") end end def log_level_exactly(level) log_level_one_of([level]) end def append(report) @reports.append(report) end private def counters_must_include_exclude_tags(name, required_tags, exclude_tags = []) unless @counters.include? name append("This event MUST increment counter \"#{name}\".") else tags = @counters[name] required_tags.each do |tag| unless tags.include? tag append("Counter \"#{name}\" MUST include tag \"#{tag}\".") end end exclude_tags.each do |tag| if tags.include? tag append("Counter \"#{name}\" MUST NOT include tag \"#{tag}\".") end end end end end $all_events = Hash::new { |hash, key| hash[key] = Event::new(key) } error_count = 0 # Scan sources and build internal structures Find.find('./src', './lib') do |path| if path.start_with? './' path = path[2..] end if path.end_with? '.rs' text = File.read(path) text.scan(/\b(?:emit!?|register!?)\((?:[a-z][a-z0-9_:]+)?([A-Z][A-Za-z0-9]+)/) \ do |event_name,| $all_events[event_name].uses += 1 end # Check log message texts for correct formatting. if path.start_with? 'src/' reports = [] # Try to find all general usage of the various `tracing` macros. text.scan(/( (trace|debug|info|warn|error)!\( # Log type. ([^;]*?) # All parameters to the macro. \)(?:;|\n\s*}) # Handles usages that lack a trailing semicolon. )/mx) \ do |full, type, params| # Extract each parameter to the macros, which involves handling structured fields and # string literals. We parse them further below so that we can iterate through them to try # and determine what the actual log message is, depending on if it's set by using the # `message` field, or implicitly with a string literal. # # We also have some special handling in there for `tracing`-specific "target" and "parent" # settings which influence how the event is handled when being processed by a subscriber, # which we don't care about _here_ but need to account for in our pattern to parse things. params = params.scan(/("(?:[^"\\]++|\\.)*+"|(?:target|parent):\s*[^,]+|(\w+\s*=\s*(?:"(?:[^"\\]++|\\.)*+"|[%?]?[^,]+))|[%?][^,]+)/) \ .map do |param| if /^\".*\"$/.match?(param[0].strip) { "type" => "litstr", "value" => param[0] } elsif param[0].include? "=" parts = param[0].split('=', 2).map { |part| part.strip } { "type" => "named_field", "field" => parts[0], "value" => parts[1] } else { "type" => "field", "field" => param[0] } end end # See if we found a message field. message_param = params.find { |param| # Use the first string literal parameter. param["type"] == "litstr" || # Or the first named field called `message` that has a value that is a string literal. (param["type"] == "named_field" && param["field"] == "message" && /^\".*\"$/.match?(param["value"])) } # We further scrutinize the message field, if we believe we found one. This lets us avoid # scenarios where variable interpolation is being used, since we can't reasonably detect if # an interpolated variable at the beginning or end of the message is capitalized or has a # trailing period, respectively. has_message = !message_param.nil? message = if has_message then message_param["value"].gsub(/^"|"$/, '') else nil end is_capitalized = !has_message || (message[0] == "{" || !message.match?(/^[a-zA-Z]/) || message.match?(/^[[:upper:]]/)) has_trailing_period = !has_message || (message[-1, 1] == "}" || message.match?(/\.$/)) match_reports = [] match_reports.append('Message must start with a capital.') unless is_capitalized match_reports.append('Message must end with a period.') unless has_trailing_period unless match_reports.empty? line_no = find_line_number(text, full) match_reports.each { |report| reports.push(" #{report} (`#{type}` call on #{path}:#{line_no})") } end end unless reports.empty? reports.each { |report| puts report } error_count += reports.length end end # TODO remove @skip_dropped_events check logic after DroppedEvents audit is complete # (https://github.com/vectordotdev/vector/issues/13995) skip_dropped_events = text.match? /## skip check-dropped-events ##/i if (path.start_with? 'src/internal_events/' or path.start_with? 'lib/vector-common/src/internal_event/') # Scan internal event structs for member names text.scan(/[\n ]struct (\S+?)(?:<.+?>)?(?: {\n(.+?)\n\s*}|;)\n/m) do |struct_name, members| event = $all_events[struct_name] event.path = path event.skip_dropped_events = skip_dropped_events if members members = members.scan(/ ([A-Za-z0-9_]+): +(.+?),/).map { |member, type| [member, type] } event.members = members.to_h end end # Scan internal event implementation blocks for logs and metrics text.scan(/^(\s*)impl(?:<.+?>)? (InternalEvent|RegisterInternalEvent|InternalEventHandle) for ([A-Za-z0-9_]+)(?:<.+?>)? {\n(.+?)\n\1}$/m) \ do |_space, trait, event_name, block| event = $all_events[event_name] event.path = path event.skip_duplicate_check = block.match? /## skip check-duplicate-events ##/i event.skip_validity_check = block.match? /## skip check-validity-events ##/i if trait == 'InternalEvent' # Look-aside internal events that defer their implementation to a registered event. if ! block.include? 'register(' event.impl_internal_event = true event.scan_metrics(block) event.scan_logs(block) event.scan_component_dropped_events(block) end elsif trait == 'RegisterInternalEvent' # This is just a dummy name and will cause spurious errors, but it will at least surface # the issue of using the macro. event.impl_register_event = event_name event.append("Do not implement RegisterInternalEvent manually. Use the registered_event! macro instead.") elsif trait == 'InternalEventHandle' event.impl_event_handle = true event.scan_logs(block) end end end # Scan for the `registered_event` macro text.scan(/^(crate::|vector_common::|)registered_event! *[({]\n *([A-Za-z0-9_]+) *({(.*?)})? *=> *{(.+?)}$.*^ *fn emit\(\&self, [a-z0-9_]+: ([A-Za-z0-9_]+)\) {$(.+?)}\n(\);|\})$/m) \ do |_, event_name, _, event_fields, handle_fields, data_type, emit_block, _| event = $all_events[event_name] event.path = path event.scan_registered_event(event_fields || "", handle_fields, data_type, emit_block) end end end $duplicates = Hash::new { |hash, key| hash[key] = [] } $all_events.each do |name, event| # Check for duplicated signatures if !event.skip_duplicate_check and (event.impl_internal_event or event.impl_event_handle) signature = event.signature if signature $duplicates[event.signature].append(name) end end # Check events for validity if !event.skip_validity_check if event.impl_internal_event unless event.valid? puts "#{event.path}: Errors in event #{event.name}:" event.reports.each { |report| puts " #{report}" } error_count += 1 end elsif event.impl_register_event handle = $all_events[event.impl_register_event] if handle unless event.valid_with_handle? handle puts "#{event.path}: Errors in event #{event.name}:" event.reports.each { |report| puts " #{report}" } error_count += 1 end else puts "Registered event #{event.name} references nonexistent handle #{event.impl_register_event}" error_count += 1 next end end end end $duplicates.each do |signature, dupes| if dupes.length > 1 dupes = dupes.join(', ') puts "Duplicate events detected: #{dupes}" error_count += 1 end end puts "#{error_count} error(s)" exit 1 if error_count > 0