test_url_validity.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. #!/usr/bin/env python
  2. # Copyright (c) 2017, Open Source Robotics Foundation
  3. # All rights reserved.
  4. #
  5. # Redistribution and use in source and binary forms, with or without
  6. # modification, are permitted provided that the following conditions are met:
  7. #
  8. # * Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. # * Redistributions in binary form must reproduce the above copyright
  11. # notice, this list of conditions and the following disclaimer in the
  12. # documentation and/or other materials provided with the distribution.
  13. # * Neither the name of the Willow Garage, Inc. nor the names of its
  14. # contributors may be used to endorse or promote products derived from
  15. # this software without specific prior written permission.
  16. #
  17. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. # POSSIBILITY OF SUCH DAMAGE.
  28. from __future__ import print_function
  29. from . import hook_permissions
  30. from io import StringIO
  31. import os
  32. import re
  33. import shutil
  34. import subprocess
  35. import sys
  36. import tempfile
  37. import unittest
  38. try:
  39. from urllib.parse import urlparse
  40. except ImportError:
  41. from urlparse import urlparse
  42. import rosdistro
  43. from scripts import eol_distro_names
  44. import unidiff
  45. import yaml
  46. from yaml.composer import Composer
  47. from yaml.constructor import Constructor
  48. from .fold_block import Fold
  49. # for commented debugging code below
  50. # import pprint
  51. UPSTREAM_NAME = 'unittest_upstream_comparision'
  52. DIFF_BRANCH = 'master'
  53. DIFF_REPO = 'https://github.com/ros/rosdistro.git'
  54. TARGET_FILE_BLACKLIST = []
  55. def get_all_distribution_filenames(url=None):
  56. if not url:
  57. url = rosdistro.get_index_url()
  58. distribution_filenames = []
  59. i = rosdistro.get_index(url)
  60. for d in i.distributions.values():
  61. for f in d['distribution']:
  62. dpath = os.path.abspath(urlparse(f).path)
  63. distribution_filenames.append(dpath)
  64. return distribution_filenames
  65. def get_eol_distribution_filenames(url=None):
  66. if not url:
  67. url = rosdistro.get_index_url()
  68. distribution_filenames = []
  69. i = rosdistro.get_index(url)
  70. for d_name, d in i.distributions.items():
  71. if d_name in eol_distro_names:
  72. for f in d['distribution']:
  73. dpath = os.path.abspath(urlparse(f).path)
  74. distribution_filenames.append(dpath)
  75. return distribution_filenames
  76. def detect_lines(diffstr):
  77. """Take a diff string and return a dict of
  78. files with line numbers changed"""
  79. resultant_lines = {}
  80. # diffstr is already decoded
  81. io = StringIO(diffstr)
  82. udiff = unidiff.PatchSet(io)
  83. for file in udiff:
  84. target_lines = []
  85. # if file.path in TARGET_FILES:
  86. for hunk in file:
  87. target_lines += range(hunk.target_start,
  88. hunk.target_start + hunk.target_length)
  89. resultant_lines[file.path] = target_lines
  90. return resultant_lines
  91. def check_git_remote_exists(url, version, tags_valid=False, commits_valid=False):
  92. """ Check if the remote exists and has the branch version.
  93. If tags_valid is True query tags as well as branches """
  94. # Check for tags first as they take priority.
  95. # From Cloudbees Support:
  96. # >the way git plugin handles this conflict, a tag/sha1 is always preferred to branch as this is the way most user use an existing job to trigger a release build.
  97. # Catching the corner case to #20286
  98. tag_match = False
  99. cmd = ('git ls-remote %s refs/tags/*' % url).split()
  100. try:
  101. tag_list = subprocess.check_output(cmd).decode('utf-8')
  102. except subprocess.CalledProcessError as ex:
  103. return (False, 'subprocess call %s failed: %s' % (cmd, ex))
  104. tags = [t for _, t in (l.split(None, 1) for l in tag_list.splitlines())]
  105. if 'refs/tags/%s' % version in tags:
  106. tag_match = True
  107. if tag_match:
  108. if tags_valid:
  109. return (True, '')
  110. else:
  111. error_str = 'Tags are not valid, but a tag %s was found. ' % version
  112. error_str += 'Re: https://github.com/ros/rosdistro/pull/20286'
  113. return (False, error_str)
  114. branch_match = False
  115. # check for branch name
  116. cmd = ('git ls-remote %s refs/heads/*' % url).split()
  117. commit_match = False
  118. # Only try to match a full length git commit id as this is an expensive operation
  119. if re.match('[0-9a-f]{40}', version):
  120. try:
  121. tmpdir = tempfile.mkdtemp()
  122. subprocess.check_call('git clone %s %s/git-repo' % (url, tmpdir), shell=True)
  123. # When a commit id is not found it results in a non-zero exit and the message
  124. # 'error: malformed object name...'.
  125. subprocess.check_call('git -C %s/git-repo branch -r --contains %s' % (tmpdir, version), shell=True)
  126. commit_match = True
  127. except:
  128. pass #return (False, 'No commit found matching %s' % version)
  129. finally:
  130. shutil.rmtree(tmpdir)
  131. if commit_match:
  132. if commits_valid:
  133. return (True, '')
  134. else:
  135. error_str = 'Commits are not valid, but a commit %s was found. ' % version
  136. error_str += 'Re: https://github.com/ros/rosdistro/pull/20286'
  137. return (False, error_str)
  138. # Commits take priority only check for the branch after checking for tags and commits first
  139. try:
  140. branch_list = subprocess.check_output(cmd).decode('utf-8')
  141. except subprocess.CalledProcessError as ex:
  142. return (False, 'subprocess call %s failed: %s' % (cmd, ex))
  143. if not version:
  144. # If the above passed assume the default exists
  145. return (True, '')
  146. if 'refs/heads/%s' % version in branch_list:
  147. return (True, '')
  148. return (False, 'No branch found matching %s' % version)
  149. def check_source_repo_entry_for_errors(source, tags_valid=False, commits_valid=False):
  150. errors = []
  151. if source['type'] != 'git':
  152. print('Cannot verify remote of type[%s] from line [%s] skipping.'
  153. % (source['type'], source['__line__']))
  154. return None
  155. version = source['version'] if source['version'] else None
  156. (remote_exists, error_reason) = check_git_remote_exists(source['url'], version, tags_valid, commits_valid)
  157. if not remote_exists:
  158. errors.append(
  159. 'Could not validate repository with url %s and version %s from'
  160. ' entry at line %s. Error reason: %s'
  161. % (source['url'], version, source['__line__'], error_reason))
  162. test_pr = source['test_pull_requests'] if 'test_pull_requests' in source else None
  163. if test_pr:
  164. parsedurl = urlparse(source['url'])
  165. if 'github.com' in parsedurl.netloc:
  166. user = os.path.dirname(parsedurl.path).lstrip('/')
  167. repo, _ = os.path.splitext(os.path.basename(parsedurl.path))
  168. hook_errors = []
  169. rosghprb_token = os.getenv('ROSGHPRB_TOKEN', None)
  170. if not rosghprb_token:
  171. print('No ROSGHPRB_TOKEN set, continuing without checking hooks')
  172. else:
  173. hooks_valid = hook_permissions.check_hooks_on_repo(user, repo, hook_errors, hook_user='ros-pull-request-builder', callback_url='http://build.ros.org/ghprbhook/', token=rosghprb_token)
  174. if not hooks_valid:
  175. errors += hook_errors
  176. else:
  177. errors.append('Pull Request builds only supported on GitHub right now. Cannot do pull request against %s' % parsedurl.netloc)
  178. if errors:
  179. return(" ".join(errors))
  180. return None
  181. def check_repo_for_errors(repo):
  182. errors = []
  183. if 'source' in repo:
  184. source = repo['source']
  185. test_prs = source['test_pull_requests'] if 'test_pull_requests' in source else None
  186. test_commits = source['test_commits'] if 'test_commits' in source else None
  187. # Allow tags in source entries if test_commits and test_pull_requests are both explicitly false.
  188. tags_and_commits_valid = True if test_prs is False and test_commits is False else False
  189. source_errors = check_source_repo_entry_for_errors(repo['source'], tags_and_commits_valid, tags_and_commits_valid)
  190. if source_errors:
  191. errors.append('Could not validate source entry for repo %s with error [[[%s]]]' %
  192. (repo['repo'], source_errors))
  193. if 'doc' in repo:
  194. source_errors = check_source_repo_entry_for_errors(repo['doc'], tags_valid=True, commits_valid=True)
  195. if source_errors:
  196. errors.append('Could not validate doc entry for repo %s with error [[[%s]]]' %
  197. (repo['repo'], source_errors))
  198. return errors
  199. def detect_post_eol_release(n, repo, lines):
  200. errors = []
  201. if 'release' in repo:
  202. release_element = repo['release']
  203. start_line = release_element['__line__']
  204. end_line = start_line
  205. if 'tags' not in release_element:
  206. print('Missing tags element in release section skipping')
  207. return []
  208. # There are 3 lines beyond the tags line. The tag contents as well as
  209. # the url and version number
  210. end_line = release_element['tags']['__line__'] + 3
  211. matching_lines = [l for l in lines if l >= start_line and l <= end_line]
  212. if matching_lines:
  213. errors.append('There is a change to a release section of an EOLed '
  214. 'distribution. Lines: %s' % matching_lines)
  215. if 'doc' in repo:
  216. doc_element = repo['doc']
  217. start_line = doc_element['__line__']
  218. end_line = start_line + 3
  219. # There are 3 lines beyond the tags line. The tag contents as well as
  220. # the url and version number
  221. matching_lines = [l for l in lines if l >= start_line and l <= end_line]
  222. if matching_lines:
  223. errors.append('There is a change to a doc section of an EOLed '
  224. 'distribution. Lines: %s' % matching_lines)
  225. return errors
  226. def load_yaml_with_lines(filename):
  227. d = open(filename).read()
  228. loader = yaml.Loader(d)
  229. def compose_node(parent, index):
  230. # the line number where the previous token has ended (plus empty lines)
  231. line = loader.line
  232. node = Composer.compose_node(loader, parent, index)
  233. node.__line__ = line + 1
  234. return node
  235. construct_mapping = loader.construct_mapping
  236. def custom_construct_mapping(node, deep=False):
  237. mapping = construct_mapping(node, deep=deep)
  238. mapping['__line__'] = node.__line__
  239. return mapping
  240. loader.compose_node = compose_node
  241. loader.construct_mapping = custom_construct_mapping
  242. data = loader.get_single_data()
  243. return data
  244. def isolate_yaml_snippets_from_line_numbers(yaml_dict, line_numbers):
  245. changed_repos = {}
  246. for dl in line_numbers:
  247. match = None
  248. for name, values in yaml_dict.items():
  249. if name == '__line__':
  250. continue
  251. if not isinstance(values, dict):
  252. print("not a dict %s %s" % (name, values))
  253. continue
  254. # print("comparing to repo %s values %s" % (name, values))
  255. if values['__line__'] <= dl:
  256. if match and match['__line__'] > values['__line__']:
  257. continue
  258. match = values
  259. match['repo'] = name
  260. if match:
  261. changed_repos[match['repo']] = match
  262. return changed_repos
  263. def main():
  264. detected_errors = []
  265. # See if UPSTREAM_NAME remote is available and use it as it's expected to be setup by CI
  266. # Otherwise fall back to origin/master
  267. try:
  268. cmd = ('git config --get remote.%s.url' % UPSTREAM_NAME).split()
  269. try:
  270. remote_url = subprocess.check_output(cmd).decode('utf-8').strip()
  271. # Remote exists
  272. # Check url
  273. if remote_url != DIFF_REPO:
  274. detected_errors.append('%s remote url [%s] is different than %s' % (UPSTREAM_NAME, remote_url, DIFF_REPO))
  275. return detected_errors
  276. target_branch = '%s/%s' % (UPSTREAM_NAME, DIFF_BRANCH)
  277. except subprocess.CalledProcessError:
  278. # No remote so fall back to origin/master
  279. print('WARNING: No remote %s detected, falling back to origin master. Make sure it is up to date.' % UPSTREAM_NAME)
  280. target_branch = 'origin/master'
  281. cmd = ('git diff --unified=0 %s' % target_branch).split()
  282. diff = subprocess.check_output(cmd).decode('utf-8')
  283. except subprocess.CalledProcessError as ex:
  284. detected_errors.append('%s' % ex)
  285. return detected_errors
  286. # print("output", diff)
  287. diffed_lines = detect_lines(diff)
  288. # print("Diff lines %s" % diffed_lines)
  289. for path, lines in diffed_lines.items():
  290. directory = os.path.join(os.path.dirname(__file__), '..')
  291. url = 'file://%s/index.yaml' % directory
  292. path = os.path.abspath(path)
  293. if path not in get_all_distribution_filenames(url):
  294. # print("not verifying diff of file %s" % path)
  295. continue
  296. with Fold():
  297. print("verifying diff of file '%s'" % path)
  298. is_eol_distro = path in get_eol_distribution_filenames(url)
  299. data = load_yaml_with_lines(path)
  300. repos = data['repositories']
  301. if not repos:
  302. continue
  303. changed_repos = isolate_yaml_snippets_from_line_numbers(repos, lines)
  304. # print("In file: %s Changed repos are:" % path)
  305. # pprint.pprint(changed_repos)
  306. for n, r in changed_repos.items():
  307. errors = check_repo_for_errors(r)
  308. detected_errors.extend(["In file '''%s''': " % path + e
  309. for e in errors])
  310. if is_eol_distro:
  311. errors = detect_post_eol_release(n, r, lines)
  312. detected_errors.extend(["In file '''%s''': " % path + e
  313. for e in errors])
  314. for e in detected_errors:
  315. print("ERROR: %s" % e, file=sys.stderr)
  316. return detected_errors
  317. class TestUrlValidity(unittest.TestCase):
  318. def test_function(self):
  319. detected_errors = main()
  320. self.assertFalse(detected_errors)
  321. if __name__ == "__main__":
  322. detected_errors = main()
  323. if not detected_errors:
  324. sys.exit(0)
  325. sys.exit(1)