github api: How to efficiently find the number of commits for a repository? - github

I want to find the number of commits done to specific github projects, and within them to specific files. I checked the github api docs but only found an API for actually returning all commits. This would be very inefficient since I have to do multiple api calls for paging thru all commits.
Anyone has a better idea?

Update May 2013: see "File CRUD and repository statistics now available in the API"
You now can Get the last year of commit activity data
GET /repos/:owner/:repo/stats/commit_activity
Returns the last year of commit activity grouped by week. The days array is a group of commits per day, starting on Sunday.
Not completely what you are looking for, but closer.
Original answer (April 2010)
No, the current API doesn't support a 'log --all' for listing all commmits from all branches.
The only alternative is presented in "Github API: Retrieve all commits for all branches for a repo", and list through all pages of all commits, branch after branch.
This seems so cumbersome than another alternative would actually to clone the Github repo and apply git commands on that local clone!
(mainly git shortlog)
Note: you can also checkout that python script created by Arcsector.

With GraphQL API v4, you can get total commit count per branch with totalCount for each branch:
{
repository(owner: "google", name: "gson") {
name
refs(first: 100, refPrefix: "refs/heads/") {
edges {
node {
name
target {
... on Commit {
id
history(first: 0) {
totalCount
}
}
}
}
}
}
}
}
Test it in the explorer

0penBrain found a clever way to obtain this information and detailed it in the following Gist : https://gist.github.com/0penBrain/7be59a48aba778c955d992aa69e524c5
Here's the relevant snippet with curl :
curl -I -k "https://api.github.com/repos/:owner/:repo/commits?per_page=1" | sed -n '/^[Ll]ink:/ s/.*"next".*page=\([0-9]*\).*"last".*/\1/p'
The trick is to enable a 1 commit per page pagination, as can be seen in the query-string.
Then the focus must shift from the response JSON body to the HTTP headers where the following entry can be found :
link: <https://api.github.com/repositories/27193779/commits?per_page=1&page=2>; rel="next", <https://api.github.com/repositories/27193779/commits?per_page=1&page=37949>; rel="last"
The sed expression is then in charge of extracting the 37949 number (in this example)

Pure JS Implementation
const base_url = 'https://api.github.com';
function httpGet(theUrl, return_headers) {
var xmlHttp = new XMLHttpRequest();
xmlHttp.open("GET", theUrl, false); // false for synchronous request
xmlHttp.send(null);
if (return_headers) {
return xmlHttp
}
return xmlHttp.responseText;
}
function get_all_commits_count(owner, repo, sha) {
let first_commit = get_first_commit(owner, repo);
let compare_url = base_url + '/repos/' + owner + '/' + repo + '/compare/' + first_commit + '...' + sha;
let commit_req = httpGet(compare_url);
let commit_count = JSON.parse(commit_req)['total_commits'] + 1;
console.log('Commit Count: ', commit_count);
return commit_count
}
function get_first_commit(owner, repo) {
let url = base_url + '/repos/' + owner + '/' + repo + '/commits';
let req = httpGet(url, true);
let first_commit_hash = '';
if (req.getResponseHeader('Link')) {
let page_url = req.getResponseHeader('Link').split(',')[1].split(';')[0].split('<')[1].split('>')[0];
let req_last_commit = httpGet(page_url);
let first_commit = JSON.parse(req_last_commit);
first_commit_hash = first_commit[first_commit.length - 1]['sha']
} else {
let first_commit = JSON.parse(req.responseText);
first_commit_hash = first_commit[first_commit.length - 1]['sha'];
}
return first_commit_hash;
}
let owner = 'getredash';
let repo = 'redash';
let sha = 'master';
get_all_commits_count(owner, repo, sha);
Credits - https://gist.github.com/yershalom/a7c08f9441d1aadb13777bce4c7cdc3b

Related

Add new Files and Folders to Azure Git Repository with Azure DevOps REST API

How to Add new Files and Folders to Azure Git Repository with Azure DevOps REST API?
I want to add some static files to my repository using Azure DevOps REST APIs.
https://learn.microsoft.com/en-us/rest/api/azure/devops/git/repositories?view=azure-devops-rest-5.1
Is there any option available via REST API?.
or anyother automated way available, either CICD or through c#?
I found the answer, we can use the Git Push REST API uri
https://learn.microsoft.com/en-us/rest/api/azure/devops/git/pushes/create?view=azure-devops-rest-5.1
Follow below steps in your C# code
call GetRef REST https://dev.azure.com/{0}/{1}/_apis/git/repositories/{2}/refs{3}
this should return the object of your repository branch which you can use to push your changes
Next, call Push REST API to create folder or file into your repository
https://dev.azure.com/{0}/{1}/_apis/git/repositories/{2}/pushes{3}
var changes = new List<ChangeToAdd>();
//Add Files
//pnp_structure.yml
var jsonContent = File.ReadAllText(#"./static-files/somejsonfile.json");
ChangeToAdd changeJson = new ChangeToAdd()
{
changeType = "add",
item = new ItemBase() { path = string.Concat(path, "/[your-folder-name]/somejsonfile.json") },
newContent = new Newcontent()
{
contentType = "rawtext",
content = jsonContent
}
};
changes.Add(changeJson);
CommitToAdd commit = new CommitToAdd();
commit.comment = "commit from code";
commit.changes = changes.ToArray();
var content = new List<CommitToAdd>() { commit };
var request = new
{
refUpdates = refs,
commits = content
};
var personalaccesstoken = _configuration["azure-devOps-configuration-token"];
var authorization = Convert.ToBase64String(System.Text.ASCIIEncoding.ASCII.GetBytes(string.Format("{0}:{1}", "", personalaccesstoken)));
_logger.LogInformation($"[HTTP REQUEST] make a http call with uri: {uri} ");
//here I making http client call
// https://dev.azure.com/{orgnizationName}/{projectName}/_apis/git/repositories/{repositoryId}/pushes{?api-version}
var result = _httpClient.SendHttpWebRequest(uri, method, data, authorization);

Need to download ALL audit logs from github with lots of history

I need to download all the audit logs from my organization in GitHub.
The problem is: I have dozens of repositories and 2 years of commit history, so it is a lot of data, and to request it manually would be impossible.
Some one know a tool or a method to retrieve all the information in the audit log of GitHub? Or at least "per repository"?
Thanks.
If you want to download audit log data, you can download using the v4 GraphQL API, which provides a way to access audit log entries. This information is not available with the v3 REST API.
If you want to retrieve just the commit history, which is different, then the easiest way to do that is to clone the repositories. Reading every commit through the API is inefficient and you'll likely hit the rate limit pretty quickly. You can, however, use the API to discover which repositories you have and script it.
You can use GraphQL API provided by github. You can pull all audit logs from your organisation using a python script. I was recently working on it.
Also using GraphQL API, you can pull 100 logs at once. So We have to use cursor to navigate till we get to the end of log.
Refer to this link if you want to learn about cursors.
https://graphql.org/learn/pagination/
This is the source code,
import requests
import json
import pandas as pd
from datetime import datetime
import time
headers = {"Authorization": "token YOUR PERSONAL TOKEN"}
enterprise = '"ENTERPRISE"'
organizations = []
after = ''
while True:
getOrgantionsListQuery = """
query {
enterprise(slug: """+ enterprise + """) {
...enterpriseFragment
}
}
fragment enterpriseFragment on Enterprise {
... on Enterprise{
name
organizations(first: 100, """ + after +"""){
edges{
node{
name
... on Organization{
name
}
}
cursor
}
pageInfo {
endCursor
hasNextPage
hasPreviousPage
}
}
}
}
"""
result = requests.post('https://api.github.com/graphql',
json={'query': getOrgantionsListQuery},
headers=headers)
enterpriseData = json.loads(result.text)
if 'errors' in enterpriseData:
print(enterprise+ " " + enterpriseData['errors'][0]['type'])
break
enterpriseAudit = enterpriseData['data']['enterprise']['organizations']
for org in enterpriseAudit['edges']:
organizations.append(org['node']['name'])
if not enterpriseAudit['pageInfo']['hasNextPage']:
break
after = 'after: "' + str(enterpriseAudit['edges'][-1]['cursor']) + '"'
time.sleep(1)
response = []
for org in organizations:
after = ''
org = '"' + org + '"'
while True:
getAuditLogQuery = """
query {
organization(login: """+ org + """) {
auditLog(first: 100, """ + after +""") {
edges {
node {
... on RepositoryAuditEntryData {
repository {
name
}
}
... on OrganizationAuditEntryData {
organizationResourcePath
organizationName
organizationUrl
}
... on TeamAuditEntryData {
teamName
}
... on TopicAuditEntryData {
topicName
}
... on OauthApplicationAuditEntryData {
oauthApplicationName
}
... on EnterpriseAuditEntryData {
enterpriseResourcePath
enterpriseUrl
enterpriseSlug
}
... on AuditEntry {
actorResourcePath
action
actorIp
actorLogin
operationType
createdAt
actorLocation {
countryCode
country
regionCode
region
city
}
#User 'Action' was performed on
userLogin
userResourcePath
userUrl
}
}
cursor
}
pageInfo {
endCursor
hasNextPage
hasPreviousPage
}
}
}
}
"""
result = requests.post('https://api.github.com/graphql',
json={'query': getAuditLogQuery},
headers=headers)
organizationData = json.loads(result.text)
if 'errors' in organizationData:
print(org + " " + organizationData['errors'][0]['type'])
break
auditLog = organizationData['data']['organization']['auditLog']
print(org + " " + str(len(auditLog['edges'])))
for log in auditLog['edges']:
response.append(log)
if not auditLog['pageInfo']['hasNextPage']:
break
after = 'after: "' + str(auditLog['edges'][-1]['cursor']) + '"'
time.sleep(1)
df = pd.DataFrame(response)
df.to_json(r'/YOUR/PATH/TO/SAVE' + str(datetime.now()) +'.json')```
Check if GitHub Audit Log Streaming can help in your case.
It just got out of beta (Jan. 2022):
Audit log streaming is generally available
GitHub audit log streaming is now out of beta and generally available.
Your experience using audit log streaming will not change, but we expanded the number of options you have for where you can stream your audit and Git events:
Amazon S3
Azure Blob Storage
Azure Event Hubs
Google Cloud Storage
Splunk
Enterprise owners can set up their stream in minutes by navigating to their enterprise account settings under the Audit log tab and configuring the collection endpoint.

slack doesn't recognize github webhook payload format

I'm trying to create a slack app that uses incoming webhooks. I want my github repository to post to slack whenever the wiki is updated. I believe I've set up the webhook on github just fine, because I can see that it is attempting a delivery whenever I update the wiki. However, there's always the error, "no_text". I think this error means slack is expecting an item named "text," but the payload from github provides none. I verified this by trying two curl commands from the command prompt (I'm on windows):
curl -X POST -H "Content-type: application/json" --data "{\"text\":\"Hello, World!\"}" [MY_WEBHOOK_URL]
curl -X POST -H "Content-type: application/json" --data "{\"foobar\":\"Hello, World!\"}" [MY_WEBHOOK_URL]
This first one works as expected; the message "Hello, World!" gets posted to the slack channel I wanted, and I got back the "ok" message from curl. The second one did not work; the message was not posted, and I got back the message "no_text" from curl.
I can think of two possible solutions to this problem:
Change the format of the payload coming from github to include an item called "text" and other properties slack actually recognizes.
Get slack to recognize the format the payload is already in, perhaps by telling it to post the contents of a property other than "text."
I don't know how to accomplish either of these, or if they're even possible. Or perhaps there's another solution I haven't thought of?
Note: I already tried to use the github slack app, but couldn't figure out how to get it to post updates to the wiki. (See my other question if you'd like: slack github integration doesn't find wiki repository)
I'm actually looking to do the same thing as you right now. Because the github and slack hooks are fundamentally different, you will need to have something in the middle to process the github webhooks into a Slack message to be posted via an incoming webhook.
You're going to need to do a couple different things (in no particular order):
Set up Github to send out hooks for the specific events you wish to be notified of.
Configure a middle man (I am currently using AWS SNS and Lambda)
Set up slack for the webhook.
For the github webhooks, you will need to leverage the more powerful github API to create the hook. You could do this with curl, but that's kind of a pain so I am using a JS script to take care of it. You will need to npm install github bluebird in the same directory before running something like this:
var GitHubApi = require("github");
var github = new GitHubApi({
// optional
debug: true,
protocol: "https",
host: "api.github.com", // should be api.github.com for GitHub
pathPrefix: "", // for some GHEs; none for GitHub
headers: {
"user-agent": "ocelotsloth-conf" // GitHub is happy with a unique user agent
},
Promise: require('bluebird'),
followRedirects: false, // default: true; there's currently an issue with non-get redirects, so allow ability to disable follow-redirects
timeout: 5000
});
// user token
github.authenticate({
type: "token",
token: "GITHUB_TOKEN_HERE",
});
// https://mikedeboer.github.io/node-github/#api-repos-createHook
github.repos.createHook({
owner: "ocelotsloth",
repo: "lib-ical",
name: "amazonsns",
events: [
//"commit_comment",
//"create",
//"delete",
//"gollum",
//"issue_comment",
"issues"
//"label",
//"milestone",
//"pull_request",
//"pull_request_review",
//"pull_request_review_comment",
//"push",
//"release"
],
config: {
aws_key: "AWS_KEY",
aws_secret: "AWS_SECRET",
sns_region: "us-east-1",
sns_topic: "SNS_TOPIC_ARN"
},
}, function(err, res) {
console.log(JSON.stringify(res, null, '\t'));
});
I remember following a blog post a while ago about setting up the SNS topic to work properly, but I don't remember exactly where it is anymore. Some googling should help. Also, you should be able to set up your own server for github to send these to and avoid having to set up AWS at all if you want to avoid the complexity. See https://mikedeboer.github.io/node-github/#api-repos-createHook for specific instructions on that method. You will need to use editHook after you create the hook, so either get it right the first time or use edit it. You just need to change the method call to editHook and add the id to the call as well.
Something important to see, you can define all of the different Events that you want github to send to you. For all of these, along with their formats, look at https://developer.github.com/v3/activity/events/types/.
To actually post these events to slack, I have a lambda script that currently looks like this (I literally just started writing this today, and haven't implemented more than just posting issue events, but it should do well as a starting point). For this script, you will need to npm install identify-github-event slack-webhook and have your incoming webhook set up as well.
var identifyGithubEvent = require('identify-github-event');
var SlackWebhook = require('slack-webhook')
// slack's link syntax
function link(url, txt) {
return "<" + url + "|" + txt + ">";
}
exports.handler = function(event, context) {
// 1. extract GitHub event from SNS message
var ghEvent = JSON.parse(event.Records[0].Sns.Message);
var eventType, eventName, numb;
console.log(ghEvent);
var ghEventType = identifyGithubEvent(ghEvent);
if (!ghEventType) {
return;
}
var text = "Event! " + ghEventType;
if (ghEventType === 'IssueCommentEvent') {
var who = link(ghEvent.comment.user.html_url, ghEvent.comment.user.login);
var what = link(ghEvent.issue.html_url, "Issue " + ghEvent.issue.number + ": \"" + ghEvent.issue.title + "\"");
text = who + " commented on " + what;
}
else if (ghEventType === 'IssuesEvent') {
var who = link(ghEvent.sender.html_url, ghEvent.sender.login);
var action = ghEvent.action;
var issueNumber = ghEvent.issue.number;
var issueName = link(ghEvent.issue.html_url, ghEvent.issue.title + "\"");
if (action === "opened" | action === "closed") {
text = {
attachments: [{
"fallback": who + " opened Issue" + issueNumber + ": " + issueName,
"color": "#36a64f",
"pretext": "New issue " + action + ":",
"author_name": ghEvent.sender.login,
"author_link": ghEvent.sender.html_url,
"thumb_url": ghEvent.sender.avatar_url,
"title": "#" + issueNumber + ": " + ghEvent.issue.title,
"title_link": ghEvent.issue.html_url,
"text": ghEvent.issue.body,
"fields": [
{
"title": "Status",
"value": ghEvent.issue.state,
"short": true
},
{
"title": "Labels",
"value": ghEvent.issue.labels.map(label => label.name).join("\n"),
"short": true
}
],
"footer": "lib-ical",
"footer_icon": "https://platform.slack-edge.com/img/default_application_icon.png",
"mrkdwn_in": ["text"]
}]
};
} else return;
}
// 'commit_comment':
// 'create':
// 'delete':
// 'issues':
// 'label':
// 'member':
// 'milestone':
// 'pull_request':
// 'pull_request_review':
// 'pull_request_review_comment':
// 'push':
// 'release':
var slack = new SlackWebhook('https://hooks.slack.com/services/SLACK-WEBHOOK-URL', {
defaults: {
username: 'GitHub -- user/project',
channel: '#CHANNEL-NAME',
icon_emoji: ':github:'
}
})
slack.send(text);
};
It's far from perfect, but it gives a really nice result:
For that specific example it's an issue close, but currently that script will also work on open. The script also does limited markdown processing, so if the issue contains any source blocks, it will be rendered properly inside of slack.
I hope this helps you with your approach, feel free to ask me to elaborate on anything else.

Github API: Get number of contributions on a day by day basis

I want to GET from the Github API, the number of contributions per day. I'm making a webapp that compares the number of github contributions to the number of Dota 2 matches I play.
This picture should explain things more clearly.
http://i.stack.imgur.com/cZ1XK.png
I have scoured the Github API and the internet looking for a simple solution and some of the answers I've seen weren't what I was looking for. This Github API get last year of commit activity blurb is the closest I've gotten to finding a solution, but using it would involve making the API call for ALL repos in my account and concatenating the data into one JSON. If there are no solutions to this I would like to know it so I can abandon ship.
Thanks!
You can use the svg calendar data with the url :
https://github.com/users/USER/contributions?to=2016-12-25
You can set the to query param to your target day and then parse the svg result to get the last data in the output calendar.
For the web integration part, you can use a proxy like urlreq. An example :
const user = 'bertrandmartel';
const day = "2016-12-25";
fetch('https://urlreq.appspot.com/req?method=GET&url=https%3A%2F%2Fgithub.com%2Fusers%2F' + user + '%2Fcontributions%3Fto%3D' + day)
.then(function(response) {
return response.text();
})
.then(function(text) {
xmlDoc = new DOMParser().parseFromString(text, 'text/xml');
var nodes = xmlDoc.getElementsByTagName('rect');
var dayContributions = nodes[nodes.length-1].getAttribute('data-count');
console.log('contributions count for ' + day + ' : ' + dayContributions);
})
.catch(function(error) {
console.log('Request failed', error)
});

Github-api get commits

Can someone explain me or give some tutorial to get all the commit details for a particular in github. I saw this and I didn't understand how to use that.
You can check my article about this:
http://ondrek.me/articles/how-to-parse-github-API-in-nodejs/
This is NodeJs way
(for client js change require("https") to client ajax JSON - code is very simular)
Quick sample link for repo MDOWN of user ONDREK
https://api.github.com/repos/ondrek/mdown/git/refs/heads/
How to make a request to Github API
var options = {
user : 'ondrek',
repo : 'favicon-blog',
branch : 'master'
};
function getHttpRequestJson(customPath, callback){
require('https').request({
hostname: 'api.github.com',
path: customPath,
method: 'GET'
}, function(res){
var data = '';
res.on('data', function (chunk) {
data += chunk;
});
res.on('end', function () {
callback(JSON.parse(data));
});
}).end();
}
How to get user details json from Github API
(inject previous code)
var url = '/repos/'+options.user+'/'+options.repo+'/git/refs/heads/'+options.branch;
getHttpRequestJson(url, function(userJson){
var lastCommitUrl = userJson.object.url;
getLastCommit(lastCommitUrl);
});
How to get last commit json from Github API
(inject previous code)
function getLastCommit(url){
getHttpRequestJson(url+auth, function(lastCommitJson){
var treeUrl = lastCommitJson.tree.url;
getTree(treeUrl);
});
}
How to get tree of last commit from Github API
(inject previous code)
function getTree(url){
getHttpRequestJson(url+auth, function(treeJson){
var treeArr = treeJson.tree;
getOnlyPages(treeArr);
});
}
How to get specific folder of last commit from Github API
(inject previous code)
function getOnlyPages(treeArr){
treeArr.forEach(function(ele){
if (ele.path==='blog') { getArticles(ele.url); }
});
}
function getArticles(url){
getHttpRequestJson(url+auth, function(treeJson){
var treeArr = treeJson;
parseMarkdownArticles(treeArr.tree);
});
}