Skip to content

Commit

Permalink
Merge pull request #14 from mehdigolzadeh/mapping_onlypredict
Browse files Browse the repository at this point in the history
Added new features.
Version updated to 1.0.0
  • Loading branch information
mehdigolzadeh authored Nov 11, 2020
2 parents d55eb4d + 6911e4d commit 3f571c0
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 11 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ _The default start-date is 6 months before the current date.

`--verbose` **To have verbose output result**
> Example: $ bodegha repo_owner/repo_name --verbose --key <token>
_The default value is false, if you don't pass this parameter the output will only be the accounts and their type_

`--min-comments MIN_COMMENTS` **Minimum number of pull request and issue comments that are required to analyze an account**
Expand Down Expand Up @@ -87,6 +87,11 @@ _This group of parameters is the type of output, e.g., if you pass --json you wi

> Example: $ bodegha repo_owner/repo_name --exclude mehdigolzadeh alexandredecan tommens --key <token>
## As of version 1.0.0
`--only-predicted` **Only list accounts that the prediction is available**
> Example: $ bodegha repo_owner/repo_name --only-predicted

## Examples of BoDeGHa output (for illustration purposes only)
```
$ bodegha request/request --key <my token> --start-date 01-01-2017 --verbose
Expand Down
57 changes: 48 additions & 9 deletions bodegha.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def myrunner(function, ret, *args, **kwargs):
return ret[0]


def progress(repository, accounts, exclude, date, verbose, min_comments, max_comments, apikey, output_type):
def progress(repository, accounts, exclude, date, verbose, min_comments, max_comments, apikey, output_type, only_predicted):
download_progress = tqdm(
total=25, desc='Downloading comments', smoothing=.1,
bar_format='{desc}: {percentage:3.0f}%|{bar}', leave=False)
Expand All @@ -383,10 +383,10 @@ def progress(repository, accounts, exclude, date, verbose, min_comments, max_com
.sort_values('created_at', ascending=False)
.groupby('author').head(100)
)
if exclude != []:
if len(exclude) > 0:
df = df[~df["author"].isin(exclude)]

if accounts != []:
if len(accounts) > 0:
df = df[lambda x: x['author'].isin(accounts)]

if(len(df) < 1):
Expand Down Expand Up @@ -429,11 +429,45 @@ def progress(repository, accounts, exclude, date, verbose, min_comments, max_com
prediction_progress.set_description(tasks[1])
result = run_function_in_thread(
prediction_progress, predict, 25, args=(model, df_clusters))

result = result.sort_values(['prediction', 'account']).assign(patterns= lambda x: x['patterns'].astype('Int64'))

if only_predicted == True:
result = result.append(
(
comments[lambda x: ~x['author'].isin(result['account'])][['author','body']]
.groupby('author', as_index=False)
.count()
.assign(
emptycomments=np.nan,
patterns=np.nan,
dispersion=np.nan,
prediction="Unknown",
)
.rename(columns={'author':'account','body':'comments','emptycomments':'empty comments'})
),ignore_index=True,sort=True)

for identity in (set(accounts) - set(result['account'])):
result = result.append({
'account': identity,
'comments':np.nan,
'empty comments':np.nan,
'patterns':np.nan,
'dispersion':np.nan,
'prediction':"Not found",
},ignore_index=True,sort=True)

if verbose is False:
result = result[['account', 'prediction']]
result = result.set_index('account').sort_values(['prediction', 'account'])
result = result.set_index('account')[['prediction']]
else:
result = (
result
.set_index('account')
[['comments', 'empty comments', 'patterns', 'dispersion','prediction']]
)

prediction_progress.close()


if output_type == 'json':
return (result.reset_index().to_json(orient='records'))
Expand All @@ -450,11 +484,12 @@ def arg_parser():
parser.add_argument(
'--accounts', metavar='ACCOUNT', required=False, default=list(), type=str, nargs='*',
help='User login of one or more accounts. Example: \
--accounts mehdigolzadeh alexandredecan tommens')
--accounts mehdijuliani melgibson tomgucci')
parser.add_argument(
'--exclude', metavar='ACCOUNT', required=False, default=list(), type=str, nargs='*',
help='List of accounts to be excluded in the analysis. Example: \
--exclude mehdigolzadeh alexandredecan tommens')
--exclude mehdijuliani melgibson tomgucci')

parser.add_argument(
'--start-date', type=str, required=False,
default=None, help='Starting date of comments to be considered')
Expand All @@ -470,7 +505,10 @@ def arg_parser():
parser.add_argument(
'--key', metavar='APIKEY', required=True, type=str, default='',
help='GitHub APIv4 key to download comments from GitHub GraphQL API')

parser.add_argument(
'--only-predicted', action="store_false", required=False, default=True,
help='Only list accounts that the prediction is available.')

group2 = parser.add_mutually_exclusive_group()
group2.add_argument('--text', action='store_true', help='Print results as text.')
group2.add_argument('--csv', action='store_true', help='Print results as csv.')
Expand Down Expand Up @@ -521,7 +559,8 @@ def cli():
min_comments,
max_comments,
apikey,
output_type
output_type,
args.only_predicted,
))
except BodeghaError as e:
sys.exit(e)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


__package__ = 'bodegha'
__version__ = '0.2.3'
__version__ = '1.0.0'
__licence__ = 'LGPL3'
__maintainer__ = 'Mehdi Golzadeh'
__email__ = '[email protected]'
Expand Down

0 comments on commit 3f571c0

Please sign in to comment.