patchstream.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. # Copyright (c) 2011 The Chromium OS Authors.
  2. #
  3. # SPDX-License-Identifier: GPL-2.0+
  4. #
  5. import math
  6. import os
  7. import re
  8. import shutil
  9. import tempfile
  10. import command
  11. import commit
  12. import gitutil
  13. from series import Series
  14. # Tags that we detect and remove
  15. re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
  16. '|Reviewed-on:|Commit-\w*:')
  17. # Lines which are allowed after a TEST= line
  18. re_allowed_after_test = re.compile('^Signed-off-by:')
  19. # Signoffs
  20. re_signoff = re.compile('^Signed-off-by: *(.*)')
  21. # The start of the cover letter
  22. re_cover = re.compile('^Cover-letter:')
  23. # A cover letter Cc
  24. re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
  25. # Patch series tag
  26. re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
  27. # Commit series tag
  28. re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
  29. # Commit tags that we want to collect and keep
  30. re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
  31. # The start of a new commit in the git log
  32. re_commit = re.compile('^commit ([0-9a-f]*)$')
  33. # We detect these since checkpatch doesn't always do it
  34. re_space_before_tab = re.compile('^[+].* \t')
  35. # States we can be in - can we use range() and still have comments?
  36. STATE_MSG_HEADER = 0 # Still in the message header
  37. STATE_PATCH_SUBJECT = 1 # In patch subject (first line of log for a commit)
  38. STATE_PATCH_HEADER = 2 # In patch header (after the subject)
  39. STATE_DIFFS = 3 # In the diff part (past --- line)
  40. class PatchStream:
  41. """Class for detecting/injecting tags in a patch or series of patches
  42. We support processing the output of 'git log' to read out the tags we
  43. are interested in. We can also process a patch file in order to remove
  44. unwanted tags or inject additional ones. These correspond to the two
  45. phases of processing.
  46. """
  47. def __init__(self, series, name=None, is_log=False):
  48. self.skip_blank = False # True to skip a single blank line
  49. self.found_test = False # Found a TEST= line
  50. self.lines_after_test = 0 # MNumber of lines found after TEST=
  51. self.warn = [] # List of warnings we have collected
  52. self.linenum = 1 # Output line number we are up to
  53. self.in_section = None # Name of start...END section we are in
  54. self.notes = [] # Series notes
  55. self.section = [] # The current section...END section
  56. self.series = series # Info about the patch series
  57. self.is_log = is_log # True if indent like git log
  58. self.in_change = 0 # Non-zero if we are in a change list
  59. self.blank_count = 0 # Number of blank lines stored up
  60. self.state = STATE_MSG_HEADER # What state are we in?
  61. self.signoff = [] # Contents of signoff line
  62. self.commit = None # Current commit
  63. def AddToSeries(self, line, name, value):
  64. """Add a new Series-xxx tag.
  65. When a Series-xxx tag is detected, we come here to record it, if we
  66. are scanning a 'git log'.
  67. Args:
  68. line: Source line containing tag (useful for debug/error messages)
  69. name: Tag name (part after 'Series-')
  70. value: Tag value (part after 'Series-xxx: ')
  71. """
  72. if name == 'notes':
  73. self.in_section = name
  74. self.skip_blank = False
  75. if self.is_log:
  76. self.series.AddTag(self.commit, line, name, value)
  77. def AddToCommit(self, line, name, value):
  78. """Add a new Commit-xxx tag.
  79. When a Commit-xxx tag is detected, we come here to record it.
  80. Args:
  81. line: Source line containing tag (useful for debug/error messages)
  82. name: Tag name (part after 'Commit-')
  83. value: Tag value (part after 'Commit-xxx: ')
  84. """
  85. if name == 'notes':
  86. self.in_section = 'commit-' + name
  87. self.skip_blank = False
  88. def CloseCommit(self):
  89. """Save the current commit into our commit list, and reset our state"""
  90. if self.commit and self.is_log:
  91. self.series.AddCommit(self.commit)
  92. self.commit = None
  93. # If 'END' is missing in a 'Cover-letter' section, and that section
  94. # happens to show up at the very end of the commit message, this is
  95. # the chance for us to fix it up.
  96. if self.in_section == 'cover' and self.is_log:
  97. self.series.cover = self.section
  98. self.in_section = None
  99. self.skip_blank = True
  100. self.section = []
  101. def ProcessLine(self, line):
  102. """Process a single line of a patch file or commit log
  103. This process a line and returns a list of lines to output. The list
  104. may be empty or may contain multiple output lines.
  105. This is where all the complicated logic is located. The class's
  106. state is used to move between different states and detect things
  107. properly.
  108. We can be in one of two modes:
  109. self.is_log == True: This is 'git log' mode, where most output is
  110. indented by 4 characters and we are scanning for tags
  111. self.is_log == False: This is 'patch' mode, where we already have
  112. all the tags, and are processing patches to remove junk we
  113. don't want, and add things we think are required.
  114. Args:
  115. line: text line to process
  116. Returns:
  117. list of output lines, or [] if nothing should be output
  118. """
  119. # Initially we have no output. Prepare the input line string
  120. out = []
  121. line = line.rstrip('\n')
  122. commit_match = re_commit.match(line) if self.is_log else None
  123. if self.is_log:
  124. if line[:4] == ' ':
  125. line = line[4:]
  126. # Handle state transition and skipping blank lines
  127. series_tag_match = re_series_tag.match(line)
  128. commit_tag_match = re_commit_tag.match(line)
  129. cover_match = re_cover.match(line)
  130. cover_cc_match = re_cover_cc.match(line)
  131. signoff_match = re_signoff.match(line)
  132. tag_match = None
  133. if self.state == STATE_PATCH_HEADER:
  134. tag_match = re_tag.match(line)
  135. is_blank = not line.strip()
  136. if is_blank:
  137. if (self.state == STATE_MSG_HEADER
  138. or self.state == STATE_PATCH_SUBJECT):
  139. self.state += 1
  140. # We don't have a subject in the text stream of patch files
  141. # It has its own line with a Subject: tag
  142. if not self.is_log and self.state == STATE_PATCH_SUBJECT:
  143. self.state += 1
  144. elif commit_match:
  145. self.state = STATE_MSG_HEADER
  146. # If a tag is detected, but we are already in a section,
  147. # this means 'END' is missing for that section, fix it up.
  148. if series_tag_match or commit_tag_match or \
  149. cover_match or cover_cc_match or signoff_match:
  150. if self.in_section:
  151. self.warn.append("Missing 'END' in section '%s'" % self.in_section)
  152. if self.in_section == 'cover':
  153. self.series.cover = self.section
  154. elif self.in_section == 'notes':
  155. if self.is_log:
  156. self.series.notes += self.section
  157. elif self.in_section == 'commit-notes':
  158. if self.is_log:
  159. self.commit.notes += self.section
  160. else:
  161. self.warn.append("Unknown section '%s'" % self.in_section)
  162. self.in_section = None
  163. self.skip_blank = True
  164. self.section = []
  165. # If we are in a section, keep collecting lines until we see END
  166. if self.in_section:
  167. if line == 'END':
  168. if self.in_section == 'cover':
  169. self.series.cover = self.section
  170. elif self.in_section == 'notes':
  171. if self.is_log:
  172. self.series.notes += self.section
  173. elif self.in_section == 'commit-notes':
  174. if self.is_log:
  175. self.commit.notes += self.section
  176. else:
  177. self.warn.append("Unknown section '%s'" % self.in_section)
  178. self.in_section = None
  179. self.skip_blank = True
  180. self.section = []
  181. else:
  182. self.section.append(line)
  183. # Detect the commit subject
  184. elif not is_blank and self.state == STATE_PATCH_SUBJECT:
  185. self.commit.subject = line
  186. # Detect the tags we want to remove, and skip blank lines
  187. elif re_remove.match(line) and not commit_tag_match:
  188. self.skip_blank = True
  189. # TEST= should be the last thing in the commit, so remove
  190. # everything after it
  191. if line.startswith('TEST='):
  192. self.found_test = True
  193. elif self.skip_blank and is_blank:
  194. self.skip_blank = False
  195. # Detect the start of a cover letter section
  196. elif cover_match:
  197. self.in_section = 'cover'
  198. self.skip_blank = False
  199. elif cover_cc_match:
  200. value = cover_cc_match.group(1)
  201. self.AddToSeries(line, 'cover-cc', value)
  202. # If we are in a change list, key collected lines until a blank one
  203. elif self.in_change:
  204. if is_blank:
  205. # Blank line ends this change list
  206. self.in_change = 0
  207. elif line == '---':
  208. self.in_change = 0
  209. out = self.ProcessLine(line)
  210. else:
  211. if self.is_log:
  212. self.series.AddChange(self.in_change, self.commit, line)
  213. self.skip_blank = False
  214. # Detect Series-xxx tags
  215. elif series_tag_match:
  216. name = series_tag_match.group(1)
  217. value = series_tag_match.group(2)
  218. if name == 'changes':
  219. # value is the version number: e.g. 1, or 2
  220. try:
  221. value = int(value)
  222. except ValueError as str:
  223. raise ValueError("%s: Cannot decode version info '%s'" %
  224. (self.commit.hash, line))
  225. self.in_change = int(value)
  226. else:
  227. self.AddToSeries(line, name, value)
  228. self.skip_blank = True
  229. # Detect Commit-xxx tags
  230. elif commit_tag_match:
  231. name = commit_tag_match.group(1)
  232. value = commit_tag_match.group(2)
  233. if name == 'notes':
  234. self.AddToCommit(line, name, value)
  235. self.skip_blank = True
  236. # Detect the start of a new commit
  237. elif commit_match:
  238. self.CloseCommit()
  239. self.commit = commit.Commit(commit_match.group(1))
  240. # Detect tags in the commit message
  241. elif tag_match:
  242. # Remove Tested-by self, since few will take much notice
  243. if (tag_match.group(1) == 'Tested-by' and
  244. tag_match.group(2).find(os.getenv('USER') + '@') != -1):
  245. self.warn.append("Ignoring %s" % line)
  246. elif tag_match.group(1) == 'Patch-cc':
  247. self.commit.AddCc(tag_match.group(2).split(','))
  248. else:
  249. out = [line]
  250. # Suppress duplicate signoffs
  251. elif signoff_match:
  252. if (self.is_log or not self.commit or
  253. self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
  254. out = [line]
  255. # Well that means this is an ordinary line
  256. else:
  257. pos = 1
  258. # Look for ugly ASCII characters
  259. for ch in line:
  260. # TODO: Would be nicer to report source filename and line
  261. if ord(ch) > 0x80:
  262. self.warn.append("Line %d/%d ('%s') has funny ascii char" %
  263. (self.linenum, pos, line))
  264. pos += 1
  265. # Look for space before tab
  266. m = re_space_before_tab.match(line)
  267. if m:
  268. self.warn.append('Line %d/%d has space before tab' %
  269. (self.linenum, m.start()))
  270. # OK, we have a valid non-blank line
  271. out = [line]
  272. self.linenum += 1
  273. self.skip_blank = False
  274. if self.state == STATE_DIFFS:
  275. pass
  276. # If this is the start of the diffs section, emit our tags and
  277. # change log
  278. elif line == '---':
  279. self.state = STATE_DIFFS
  280. # Output the tags (signeoff first), then change list
  281. out = []
  282. log = self.series.MakeChangeLog(self.commit)
  283. out += [line]
  284. if self.commit:
  285. out += self.commit.notes
  286. out += [''] + log
  287. elif self.found_test:
  288. if not re_allowed_after_test.match(line):
  289. self.lines_after_test += 1
  290. return out
  291. def Finalize(self):
  292. """Close out processing of this patch stream"""
  293. self.CloseCommit()
  294. if self.lines_after_test:
  295. self.warn.append('Found %d lines after TEST=' %
  296. self.lines_after_test)
  297. def ProcessStream(self, infd, outfd):
  298. """Copy a stream from infd to outfd, filtering out unwanting things.
  299. This is used to process patch files one at a time.
  300. Args:
  301. infd: Input stream file object
  302. outfd: Output stream file object
  303. """
  304. # Extract the filename from each diff, for nice warnings
  305. fname = None
  306. last_fname = None
  307. re_fname = re.compile('diff --git a/(.*) b/.*')
  308. while True:
  309. line = infd.readline()
  310. if not line:
  311. break
  312. out = self.ProcessLine(line)
  313. # Try to detect blank lines at EOF
  314. for line in out:
  315. match = re_fname.match(line)
  316. if match:
  317. last_fname = fname
  318. fname = match.group(1)
  319. if line == '+':
  320. self.blank_count += 1
  321. else:
  322. if self.blank_count and (line == '-- ' or match):
  323. self.warn.append("Found possible blank line(s) at "
  324. "end of file '%s'" % last_fname)
  325. outfd.write('+\n' * self.blank_count)
  326. outfd.write(line + '\n')
  327. self.blank_count = 0
  328. self.Finalize()
  329. def GetMetaDataForList(commit_range, git_dir=None, count=None,
  330. series = None, allow_overwrite=False):
  331. """Reads out patch series metadata from the commits
  332. This does a 'git log' on the relevant commits and pulls out the tags we
  333. are interested in.
  334. Args:
  335. commit_range: Range of commits to count (e.g. 'HEAD..base')
  336. git_dir: Path to git repositiory (None to use default)
  337. count: Number of commits to list, or None for no limit
  338. series: Series object to add information into. By default a new series
  339. is started.
  340. allow_overwrite: Allow tags to overwrite an existing tag
  341. Returns:
  342. A Series object containing information about the commits.
  343. """
  344. if not series:
  345. series = Series()
  346. series.allow_overwrite = allow_overwrite
  347. params = gitutil.LogCmd(commit_range, reverse=True, count=count,
  348. git_dir=git_dir)
  349. stdout = command.RunPipe([params], capture=True).stdout
  350. ps = PatchStream(series, is_log=True)
  351. for line in stdout.splitlines():
  352. ps.ProcessLine(line)
  353. ps.Finalize()
  354. return series
  355. def GetMetaData(start, count):
  356. """Reads out patch series metadata from the commits
  357. This does a 'git log' on the relevant commits and pulls out the tags we
  358. are interested in.
  359. Args:
  360. start: Commit to start from: 0=HEAD, 1=next one, etc.
  361. count: Number of commits to list
  362. """
  363. return GetMetaDataForList('HEAD~%d' % start, None, count)
  364. def FixPatch(backup_dir, fname, series, commit):
  365. """Fix up a patch file, by adding/removing as required.
  366. We remove our tags from the patch file, insert changes lists, etc.
  367. The patch file is processed in place, and overwritten.
  368. A backup file is put into backup_dir (if not None).
  369. Args:
  370. fname: Filename to patch file to process
  371. series: Series information about this patch set
  372. commit: Commit object for this patch file
  373. Return:
  374. A list of errors, or [] if all ok.
  375. """
  376. handle, tmpname = tempfile.mkstemp()
  377. outfd = os.fdopen(handle, 'w')
  378. infd = open(fname, 'r')
  379. ps = PatchStream(series)
  380. ps.commit = commit
  381. ps.ProcessStream(infd, outfd)
  382. infd.close()
  383. outfd.close()
  384. # Create a backup file if required
  385. if backup_dir:
  386. shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
  387. shutil.move(tmpname, fname)
  388. return ps.warn
  389. def FixPatches(series, fnames):
  390. """Fix up a list of patches identified by filenames
  391. The patch files are processed in place, and overwritten.
  392. Args:
  393. series: The series object
  394. fnames: List of patch files to process
  395. """
  396. # Current workflow creates patches, so we shouldn't need a backup
  397. backup_dir = None #tempfile.mkdtemp('clean-patch')
  398. count = 0
  399. for fname in fnames:
  400. commit = series.commits[count]
  401. commit.patch = fname
  402. result = FixPatch(backup_dir, fname, series, commit)
  403. if result:
  404. print '%d warnings for %s:' % (len(result), fname)
  405. for warn in result:
  406. print '\t', warn
  407. print
  408. count += 1
  409. print 'Cleaned %d patches' % count
  410. return series
  411. def InsertCoverLetter(fname, series, count):
  412. """Inserts a cover letter with the required info into patch 0
  413. Args:
  414. fname: Input / output filename of the cover letter file
  415. series: Series object
  416. count: Number of patches in the series
  417. """
  418. fd = open(fname, 'r')
  419. lines = fd.readlines()
  420. fd.close()
  421. fd = open(fname, 'w')
  422. text = series.cover
  423. prefix = series.GetPatchPrefix()
  424. for line in lines:
  425. if line.startswith('Subject:'):
  426. # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
  427. zero_repeat = int(math.log10(count)) + 1
  428. zero = '0' * zero_repeat
  429. line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
  430. # Insert our cover letter
  431. elif line.startswith('*** BLURB HERE ***'):
  432. # First the blurb test
  433. line = '\n'.join(text[1:]) + '\n'
  434. if series.get('notes'):
  435. line += '\n'.join(series.notes) + '\n'
  436. # Now the change list
  437. out = series.MakeChangeLog(None)
  438. line += '\n' + '\n'.join(out)
  439. fd.write(line)
  440. fd.close()