diff --git a/app/Main.hs b/app/Main.hs index b454bf1..8e960c9 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -11,10 +11,8 @@ import Protolude ( Eq, Generic, IO, - Int, Integer, Maybe (..), - Proxy (Proxy), Show, Text, elem, @@ -80,7 +78,7 @@ import Network.URI (URI) import System.Environment (lookupEnv) import Text.RawString.QQ (r) -import Utils (emptyOwner, emptyRepo, mapMSequentially) +import Utils (RepoObject, mapMSequentially, repoObjectToRepo) -- | Replaces a variable in a string with a value var :: Text -> Text -> Text -> Text @@ -145,22 +143,6 @@ formatRepo extendedRepo = <> "\n" ) --- queryRepos :: Text --- queryRepos = --- [r| --- query reposQuery { --- repos( limit: 100 ) { --- rowid --- id --- name --- language --- url --- stars --- updated_utc --- } --- } --- |] - -- | Query @Link@ header with @rel=last@ from the request headers getLastUrl :: Response a -> Maybe URI getLastUrl req = do @@ -275,7 +257,7 @@ upsertRepoQuery utc extendedRepo = & var "language" (repo & GH.repoLanguage <&> GH.getLanguage & fromMaybe "") - & var "stargazers_count" (repo & GH.repoWatchersCount & show) + & var "stargazers_count" (repo & GH.repoStargazersCount & show) & var "open_issues_count" (repo & GH.repoOpenIssuesCount & show) & var "commits_count" (commitsCount & fromMaybe 0 & show) & var "created_utc" (getTimestamp GH.repoCreatedAt) @@ -325,7 +307,7 @@ insertRepoQuery utc extendedRepo = & var "language" (repo & GH.repoLanguage <&> GH.getLanguage & fromMaybe "") - & var "stargazers_count" (repo & GH.repoWatchersCount & show) + & var "stargazers_count" (repo & GH.repoStargazersCount & show) & var "open_issues_count" (repo & GH.repoOpenIssuesCount & show) & var "commits_count" (commitsCount & fromMaybe 0 & show) & var "created_utc" (getTimestamp GH.repoCreatedAt) @@ -400,23 +382,8 @@ loadAndSaveRepo saveStrategy owner name = do putText $ formatRepo extendedRepo saveRepoInAirsequel saveStrategy extendedRepo -data RepoObject = RepoObject - { owner :: Text - , name :: Text - , githubId :: Int - } - deriving (Show, Eq, Generic) - -instance FromJSON RepoObject where - parseJSON = withObject "RepoObject" $ \o -> do - ownerObj <- o .: "owner" - owner <- ownerObj .: "login" - name <- o .: "name" - githubId <- o .: "databaseId" - pure RepoObject{owner, name, githubId} - data GqlResponse = GqlResponse - { repos :: [RepoObject] + { repos :: [Repo] , errorsMb :: Maybe Value , nextCursorMb :: Maybe Text } @@ -428,12 +395,17 @@ instance FromJSON GqlResponse where errorsMb <- o .:? "errors" search <- data_ .: "search" edges <- search .: "edges" - repos <- edges & mapM (.: "node") + repos :: [RepoObject] <- edges & mapM (.: "node") pageInfo <- search .: "pageInfo" nextCursorMb <- pageInfo .:? "endCursor" - pure GqlResponse{repos, errorsMb, nextCursorMb} + pure + GqlResponse + { repos = repos <&> repoObjectToRepo + , errorsMb + , nextCursorMb + } execGqlQuery :: Text -> @@ -485,22 +457,15 @@ execGqlQuery apiEndpoint tokenMb query nextCursorMb initialRepos = do Just errors -> putErrText $ "GraphQL Errors:\n" <> show errors Nothing -> pure () - let repos :: [GH.Repo] = - gqlResponse.repos <&> \repoObj -> - emptyRepo - { GH.repoOwner = - emptyOwner - { GH.simpleOwnerLogin = - GH.mkOwnerName repoObj.owner - } - , GH.repoName = GH.mkRepoName repoObj.name - , GH.repoId = - GH.mkId - (Proxy :: Proxy GH.Repo) - repoObj.githubId - } - - commitsCounts <- mapMSequentially 1000 getNumberOfCommits repos + let + repos :: [GH.Repo] = gqlResponse.repos + -- Number must be quite high to avoid rate limiting + delayBetweenRequests = 20000 -- ms + commitsCounts <- + mapMSequentially + delayBetweenRequests + getNumberOfCommits + repos let extendedRepos = P.zipWith @@ -513,6 +478,12 @@ execGqlQuery apiEndpoint tokenMb query nextCursorMb initialRepos = do repos commitsCounts + putText + $ "⏳ Save " + <> show (P.length repos) + <> " repos to Airsequel …" + extendedRepos & mapM_ (saveRepoInAirsequel OverwriteRepo) + case gqlResponse.nextCursorMb of Nothing -> pure $ initialRepos <> extendedRepos Just nextCursor -> do @@ -531,7 +502,7 @@ getReposViaSearch githubToken searchQuery = do search( query: "<>", type: REPOSITORY, - first: 100 + first: 20 <> ) { edges { @@ -540,6 +511,16 @@ getReposViaSearch githubToken searchQuery = do owner { login } name databaseId + stargazerCount + createdAt + description + homepageUrl + name + issues (states: [OPEN]) { + totalCount + } + createdAt + updatedAt } } } @@ -565,10 +546,12 @@ main = do -- TODO: Add CLI flag to load and save a single repo -- loadAndSaveRepo OverwriteRepo "Airsequel" "SQLiteDAV" + -- TODO: Add CLI flag to choose between OverwriteRepo and AddRepo + repos <- getReposViaSearch githubTokenMb - "language:haskell stars:>500 sort:stars-desc" + "language:haskell stars:>200 sort:updated-desc" putText $ "Found " <> show (P.length repos) <> " repos:" repos @@ -581,8 +564,4 @@ main = do ) & mapM_ putText - putText $ "⏳ Save " <> show (P.length repos) <> " repos to Airsequel …" - -- TODO: Add CLI flag to choose between OverwriteRepo and AddRepo - repos & mapM_ (saveRepoInAirsequel OverwriteRepo) - pure () diff --git a/app/Utils.hs b/app/Utils.hs index de77973..6025aa9 100644 --- a/app/Utils.hs +++ b/app/Utils.hs @@ -1,20 +1,39 @@ -module Utils (emptyOwner, emptyRepo, mapMSequentially) +module Utils ( + emptyOwner, + emptyRepo, + mapMSequentially, + RepoObject (..), + repoObjectToRepo, +) where import Protolude ( Bool (False), + Eq, + Generic, IO, Int, - Maybe (Nothing), + Maybe (Just, Nothing), Proxy (Proxy), + Show, + Text, + fromMaybe, liftIO, mapM, + pure, ($), + (&), (*), (<*), + (>>=), ) import Control.Concurrent (threadDelay) +import Data.Aeson (FromJSON, withObject, (.:)) +import Data.Aeson.Types (parseJSON) +import Data.Text qualified as T +import Data.Time (UTCTime) +import Data.Time.Format.ISO8601 (iso8601Show) import GitHub.Data qualified as GH emptyOwner :: GH.SimpleOwner @@ -66,5 +85,53 @@ emptyRepo = mapMSequentially :: Int -> (a -> IO b) -> [a] -> IO [b] mapMSequentially delayInMs f xs = do - let delayM = liftIO $ threadDelay (delayInMs * 1000) -- Delay in milliseconds + let delayM = liftIO $ threadDelay (delayInMs * 1000) mapM (\x -> f x <* delayM) xs + +{- | To make loading data from GitHub GraphQL API easier +| we also have this simpler (in comparison to GH.Repo) data type +-} +data RepoObject = RepoObject + { owner :: Text + , name :: Text + , githubId :: Int + , stargazerCount :: Int + , description :: Maybe Text + , homepageUrl :: Maybe Text + , issuesCount :: Int + , createdAt :: UTCTime + , updatedAt :: UTCTime + } + deriving (Show, Eq, Generic) + +instance FromJSON RepoObject where + parseJSON = withObject "RepoObject" $ \o -> do + owner <- o .: "owner" >>= (.: "login") + name <- o .: "name" + githubId <- o .: "databaseId" + stargazerCount <- o .: "stargazerCount" + description <- o .: "description" + homepageUrl <- o .: "homepageUrl" + issuesCount <- o .: "issues" >>= (.: "totalCount") + createdAt <- o .: "createdAt" + updatedAt <- o .: "updatedAt" + + pure RepoObject{..} + +repoObjectToRepo :: RepoObject -> GH.Repo +repoObjectToRepo repoObj = + emptyRepo + { GH.repoOwner = + emptyOwner + { GH.simpleOwnerLogin = + GH.mkOwnerName repoObj.owner + } + , GH.repoName = GH.mkRepoName repoObj.name + , GH.repoId = GH.mkId (Proxy :: Proxy GH.Repo) repoObj.githubId + , GH.repoHomepage = repoObj.homepageUrl + , GH.repoDescription = repoObj.description + , GH.repoStargazersCount = repoObj.stargazerCount + , GH.repoOpenIssuesCount = repoObj.issuesCount + , GH.repoCreatedAt = Just repoObj.createdAt + , GH.repoUpdatedAt = Just repoObj.updatedAt + } diff --git a/package.yaml b/package.yaml index ac584a6..5bf53a7 100644 --- a/package.yaml +++ b/package.yaml @@ -29,6 +29,7 @@ default-extensions: - NoImplicitPrelude - OverloadedRecordDot - OverloadedStrings + - RecordWildCards ghc-options: - -Wall diff --git a/repos-uploader.cabal b/repos-uploader.cabal index 0fc5446..dae8c80 100644 --- a/repos-uploader.cabal +++ b/repos-uploader.cabal @@ -4,7 +4,7 @@ cabal-version: 2.2 -- -- see: https://github.com/sol/hpack -- --- hash: d8c7ea98d1b7d2a39392e05c7048b9f8f54bf3b52a01a408ef7b0a12540cb7ad +-- hash: 490342bfbe157db93933ef67d7545824abe9227ba1f9a84434f7fe37c1a70072 name: repos-uploader version: 0.0.0.0 @@ -35,6 +35,7 @@ library NoImplicitPrelude OverloadedRecordDot OverloadedStrings + RecordWildCards ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -fno-warn-orphans build-depends: aeson @@ -64,6 +65,7 @@ executable repos-uploader NoImplicitPrelude OverloadedRecordDot OverloadedStrings + RecordWildCards ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -fno-warn-orphans build-depends: aeson