GitLookup.java
/*
* Copyright (C) 2008-2024 Mycila (mathieu.carbou@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mycila.maven.plugin.license.git;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.api.Status;
import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.diff.DiffConfig;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.PersonIdent;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.FollowFilter;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.revwalk.filter.MaxCountRevFilter;
import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
import org.eclipse.jgit.treewalk.filter.PathFilter;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TimeZone;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static java.util.Objects.requireNonNull;
/**
* A jGit library wrapper to query the date of the last commit.
*/
public class GitLookup implements AutoCloseable {
public static final TimeZone DEFAULT_ZONE = TimeZone.getTimeZone("GMT");
public static final String MAX_COMMITS_LOOKUP_KEY = "license.git.maxCommitsLookup";
// keep for compatibility
private static final String COPYRIGHT_LAST_YEAR_MAX_COMMITS_LOOKUP_KEY = "license.git.copyrightLastYearMaxCommitsLookup";
public static final String COPYRIGHT_LAST_YEAR_SOURCE_KEY = "license.git.copyrightLastYearSource";
public static final String COPYRIGHT_LAST_YEAR_TIME_ZONE_KEY = "license.git.copyrightLastYearTimeZone";
public static final String COMMITS_TO_IGNORE_KEY = "license.git.commitsToIgnore";
public enum DateSource {
AUTHOR, COMMITER
}
private final int checkCommitsCount;
private final DateSource dateSource;
private final GitPathResolver pathResolver;
private final Repository repository;
private final TimeZone timeZone;
private final boolean shallow;
private final Set<ObjectId> commitsToIgnore;
/**
* Lazily initializes #gitLookup assuming that all subsequent calls to this method will be related
* to the same git repository.
*
* @param file the file to lookup in git
* @param props the properties used for license plugin
* @return the git lookup
*/
public static GitLookup create(File file, Map<String, String> props) {
final GitLookup.DateSource dateSource = Optional.ofNullable(props.get(COPYRIGHT_LAST_YEAR_SOURCE_KEY))
.map(String::trim)
.map(String::toUpperCase)
.map(GitLookup.DateSource::valueOf)
.orElse(GitLookup.DateSource.AUTHOR);
final int checkCommitsCount = Stream.of(
MAX_COMMITS_LOOKUP_KEY,
COPYRIGHT_LAST_YEAR_MAX_COMMITS_LOOKUP_KEY) // Backwards compatibility
.map(props::get)
.filter(Objects::nonNull)
.map(String::trim)
.map(Integer::parseInt)
.findFirst()
.orElse(Integer.MAX_VALUE);
final Set<ObjectId> commitsToIgnore = Stream.of(COMMITS_TO_IGNORE_KEY)
.map(props::get)
.filter(Objects::nonNull)
.flatMap(s -> Stream.of(s.split(",")))
.map(String::trim)
.filter(s -> !s.isEmpty())
.map(ObjectId::fromString)
.collect(Collectors.toSet());
final TimeZone timeZone = Optional.ofNullable(props.get(COPYRIGHT_LAST_YEAR_TIME_ZONE_KEY))
.map(String::trim)
.map(TimeZone::getTimeZone)
.orElse(DEFAULT_ZONE);
return new GitLookup(file, dateSource, timeZone, checkCommitsCount, commitsToIgnore);
}
/**
* Creates a new {@link GitLookup} for a repository that is detected from the supplied {@code
* anyFile}.
* <p>
* Note on time zones:
*
* @param anyFile - any path from the working tree of the git repository to consider in
* all subsequent calls to {@link #getYearOfLastChange(File)}
* @param dateSource where to read the commit dates from - committer date or author date
* @param timeZone the time zone if {@code dateSource} is {@link DateSource#COMMITER};
* otherwise must be {@code null}.
* @param checkCommitsCount the number of historical commits, per file, to check
* @param commitsToIgnore the commits to ignore while inspecting the history for {@code anyFile}
*/
private GitLookup(File anyFile, DateSource dateSource, TimeZone timeZone, int checkCommitsCount, Set<ObjectId> commitsToIgnore) {
requireNonNull(anyFile);
requireNonNull(dateSource);
requireNonNull(timeZone);
requireNonNull(commitsToIgnore);
try {
this.repository = new FileRepositoryBuilder().findGitDir(anyFile).build();
/* A workaround for https://bugs.eclipse.org/bugs/show_bug.cgi?id=457961 */
// Also contains contents of .git/shallow and can detect shallow repo
// the line below reads and caches the entries in the FileObjectDatabase of the repository to
// avoid concurrent modifications during RevWalk
// Closing the repository will close the FileObjectDatabase.
// Here the newReader() is a WindowCursor which delegates the getShallowCommits() to the FileObjectDatabase.
try (ObjectReader objectReader = this.repository.getObjectDatabase().newReader()) {
this.shallow = !objectReader.getShallowCommits().isEmpty();
}
this.pathResolver = new GitPathResolver(repository.getWorkTree().getAbsolutePath());
this.dateSource = dateSource;
this.timeZone = timeZone;
this.checkCommitsCount = checkCommitsCount;
this.commitsToIgnore = commitsToIgnore;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* Returns the year of the last change of the given {@code file} based on the history of the present git branch. The
* year is taken either from the committer date or from the author identity depending on how {@link #dateSource} was
* initialized.
* <p>
* See also the note on time zones in {@link #GitLookup(File, DateSource, TimeZone, int, Set)}.
*
* @param file for which the year should be retrieved
* @return year of last modification of the file
* @throws IOException if unable to read the file
* @throws GitAPIException if unable to process the git history
*/
int getYearOfLastChange(File file) throws GitAPIException, IOException {
String repoRelativePath = pathResolver.relativize(file);
if (isFileModifiedOrUnstaged(repoRelativePath)) {
return getCurrentYear();
}
int commitYear = 0;
RevWalk walk = getGitRevWalk(repoRelativePath, false);
for (RevCommit commit : walk) {
if (commitsToIgnore.contains(commit.getId())) {
continue;
}
int y = getYearFromCommit(commit);
if (y > commitYear) {
commitYear = y;
}
}
walk.dispose();
return commitYear;
}
/**
* Returns the year of creation for the given {@code file} based on the history of the present git branch. The
* year is taken either from the committer date or from the author identity depending on how {@link #dateSource} was
* initialized.
*/
int getYearOfCreation(File file) throws IOException {
String repoRelativePath = pathResolver.relativize(file);
int commitYear = 0;
RevWalk walk = getGitRevWalk(repoRelativePath, true);
Iterator<RevCommit> iterator = walk.iterator();
if (iterator.hasNext()) {
RevCommit commit = iterator.next();
commitYear = getYearFromCommit(commit);
}
walk.dispose();
// If we couldn't find a creation year from Git assume newly created file
if (commitYear == 0) {
return getCurrentYear();
}
return commitYear;
}
String getAuthorNameOfCreation(File file) throws IOException {
String repoRelativePath = pathResolver.relativize(file);
String authorName = "";
RevWalk walk = getGitRevWalk(repoRelativePath, true);
Iterator<RevCommit> iterator = walk.iterator();
if (iterator.hasNext()) {
RevCommit commit = iterator.next();
authorName = getAuthorNameFromCommit(commit);
}
walk.dispose();
return authorName;
}
String getAuthorEmailOfCreation(File file) throws IOException {
String repoRelativePath = pathResolver.relativize(file);
String authorEmail = "";
RevWalk walk = getGitRevWalk(repoRelativePath, true);
Iterator<RevCommit> iterator = walk.iterator();
if (iterator.hasNext()) {
RevCommit commit = iterator.next();
authorEmail = getAuthorEmailFromCommit(commit);
}
walk.dispose();
return authorEmail;
}
boolean isShallowRepository() {
return this.shallow;
}
private boolean isFileModifiedOrUnstaged(String repoRelativePath) throws GitAPIException {
Status status = null;
try (Git git = new Git(repository)) {
status = git.status().addPath(repoRelativePath).call();
}
return !status.isClean();
}
private RevWalk getGitRevWalk(String repoRelativePath, boolean oldestCommitsFirst) throws IOException {
DiffConfig diffConfig = repository.getConfig().get(DiffConfig.KEY);
RevWalk walk = new RevWalk(repository);
walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));
walk.setTreeFilter(AndTreeFilter.create(Arrays.asList(
PathFilter.create(repoRelativePath),
FollowFilter.create(repoRelativePath, diffConfig), // Allows us to follow files as they move or are renamed
TreeFilter.ANY_DIFF)
));
walk.setRevFilter(MaxCountRevFilter.create(checkCommitsCount));
walk.setRetainBody(false);
if (oldestCommitsFirst) {
walk.sort(RevSort.REVERSE);
}
return walk;
}
private int getCurrentYear() {
return toYear(System.currentTimeMillis(), timeZone);
}
private int getYearFromCommit(RevCommit commit) {
switch (dateSource) {
case COMMITER:
int epochSeconds = commit.getCommitTime();
return toYear(epochSeconds * 1000L, timeZone);
case AUTHOR:
PersonIdent id = commit.getAuthorIdent();
Date date = id.getWhen();
return toYear(date.getTime(), id.getTimeZone());
default:
throw new IllegalStateException("Unexpected " + DateSource.class.getName() + " " + dateSource);
}
}
private static int toYear(long epochMilliseconds, TimeZone timeZone) {
Calendar result = Calendar.getInstance(timeZone);
result.setTimeInMillis(epochMilliseconds);
return result.get(Calendar.YEAR);
}
private String getAuthorNameFromCommit(RevCommit commit) {
PersonIdent id = commit.getAuthorIdent();
return id.getName();
}
private String getAuthorEmailFromCommit(RevCommit commit) {
PersonIdent id = commit.getAuthorIdent();
return id.getEmailAddress();
}
@Override
public void close() {
repository.close();
}
}