-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
567 additions
and
0 deletions.
There are no files selected for viewing
280 changes: 280 additions & 0 deletions
280
graph/.ipynb_checkpoints/pagerank-python-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,280 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pyspark.sql.functions import *\n", | ||
"from pyspark.sql.functions import pandas_udf, PandasUDFType\n", | ||
"from pyspark.sql.types import *\n", | ||
"from graphframes import *\n", | ||
"from pyspark.sql.functions import *" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"spark = SparkSession \\\n", | ||
" .builder \\\n", | ||
" .appName(\"PageRank\") \\\n", | ||
" .config(\"spark.executor.heartbeatInterval\", \"30000s\")\\\n", | ||
" .getOrCreate()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"\n", | ||
" <div>\n", | ||
" <p><b>SparkSession - hive</b></p>\n", | ||
" \n", | ||
" <div>\n", | ||
" <p><b>SparkContext</b></p>\n", | ||
"\n", | ||
" <p><a href=\"http://abhays-mbp-2:4040\">Spark UI</a></p>\n", | ||
"\n", | ||
" <dl>\n", | ||
" <dt>Version</dt>\n", | ||
" <dd><code>v2.4.3</code></dd>\n", | ||
" <dt>Master</dt>\n", | ||
" <dd><code>local[*]</code></dd>\n", | ||
" <dt>AppName</dt>\n", | ||
" <dd><code>PySparkShell</code></dd>\n", | ||
" </dl>\n", | ||
" </div>\n", | ||
" \n", | ||
" </div>\n", | ||
" " | ||
], | ||
"text/plain": [ | ||
"<pyspark.sql.session.SparkSession at 0x116220f60>" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"spark" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"pages = spark.read.format(\"csv\").load(\"../data/w3data/pagerank/*pages.csv\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"links = spark.read.format(\"csv\").load(\"../data/w3data/pagerank/*links.csv\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"links=links.select(col(\"_c0\").alias(\"src\"), (col(\"_c1\").alias(\"dst\")))\n", | ||
"pages=pages.select(col(\"_c0\").alias(\"id\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"+---------------------------------------------------------------------------------+\n", | ||
"|id |\n", | ||
"+---------------------------------------------------------------------------------+\n", | ||
"|https://w3.ibm.com/w3publisher/google-analytics/education-training/when-to-use-ga|\n", | ||
"|https://w3.ibm.com/w3publisher/gts-efc/events/initiatives/balance-sheet-reporting|\n", | ||
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor |\n", | ||
"|https://w3.ibm.com/w3publisher/gts-next/gts-narrative |\n", | ||
"|https://w3.ibm.com/w3publisher/gbs-eu-pde/archive/consulting-fund-2 |\n", | ||
"+---------------------------------------------------------------------------------+\n", | ||
"only showing top 5 rows\n", | ||
"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"pages.show(5, False)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"+---------------------------------------------------------------+-------------------------------------------------------------+\n", | ||
"|src |dst |\n", | ||
"+---------------------------------------------------------------+-------------------------------------------------------------+\n", | ||
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|https://w3.ibm.com/w3publisher/patents/why-we-patent |\n", | ||
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|https://w3.ibm.com/w3publisher/patents/awards |\n", | ||
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|https://w3.ibm.com/ |\n", | ||
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|http://w3.ibm.com/w3/info_terms_of_use.html |\n", | ||
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|https://w3.ibm.com/w3publisher/patents/tips-and-presentations|\n", | ||
"+---------------------------------------------------------------+-------------------------------------------------------------+\n", | ||
"only showing top 5 rows\n", | ||
"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"links.show(5, False)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Create a graphframe from vertices and edges" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"w3graph = GraphFrame(pages, links)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Run a Pagerank algorithm" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"w3pagerank = w3graph.pageRank(resetProbability=0.15, maxIter=5)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"+-----------------------------------------------------------------------------------------------------+------------------+\n", | ||
"|id |pagerank |\n", | ||
"+-----------------------------------------------------------------------------------------------------+------------------+\n", | ||
"|https://w3.ibm.com/w3publisher |918.6350758663948 |\n", | ||
"|https://w3.ibm.com/w3publisher/w3-privacy-notice/ |917.9575882531376 |\n", | ||
"|https://w3.ibm.com/w3publisher/sales-and-delivery-101-mod-3/consult-to-operate |29.492137597197754|\n", | ||
"|https://w3.ibm.com/w3publisher/process-portal-benelux/process-portal-benelux/contact |28.53062710038013 |\n", | ||
"|https://w3.ibm.com/w3publisher/ |27.380086802454347|\n", | ||
"|https://w3.ibm.com/w3publisher/privacy-at-ibm/gdpr-awareness-course |26.689342455816792|\n", | ||
"|https://w3.ibm.com/w3publisher/undefined/ |24.013627161928223|\n", | ||
"|https://w3.ibm.com/w3publisher/undefined// |16.587042868259328|\n", | ||
"|https://w3.ibm.com/w3publisher/hpc2018 |16.22722095383144 |\n", | ||
"|https://w3.ibm.com/w3publisher/businessresourcegroups |15.559478172024198|\n", | ||
"|https://w3.ibm.com/w3publisher/pps-shortcuts/ |13.204535900388832|\n", | ||
"|https://w3.ibm.com/w3publisher/celebrations/ar-operations |12.726049892941768|\n", | ||
"|https://w3.ibm.com/w3publisher/blog |11.593287201869359|\n", | ||
"|https://w3.ibm.com/w3publisher/women-at-ibm |11.359712827645462|\n", | ||
"|https://w3.ibm.com/w3publisher/new-collar/new-collar/meet-the-apprentices |11.112107646371827|\n", | ||
"|https://w3.ibm.com/w3publisher/people-with-disabilities-gateway |10.843015784715224|\n", | ||
"|https://w3.ibm.com/w3publisher/order-management-education/2019/undefined/ |10.79000282342931 |\n", | ||
"|https://w3.ibm.com/w3publisher/tss-canada-quarterly-newsletter |10.704340508292939|\n", | ||
"|https://w3.ibm.com/w3publisher/sales-and-delivery-101-mod-4/iot-sensor-demo |10.669126926881551|\n", | ||
"|https://w3.ibm.com/w3publisher/uaab4lh |10.608933822202024|\n", | ||
"|https://w3.ibm.com/w3publisher/europe-gbs-red-hat-enablement |10.591033920619854|\n", | ||
"|https://w3.ibm.com/w3publisher/ai-ibm/use-cases/undefined/use-cases |10.555391423742247|\n", | ||
"|https://w3.ibm.com/w3publisher/sample-course/sales-opportunities%2Fiot-value-proposition |10.46516987243922 |\n", | ||
"|https://w3.ibm.com/w3publisher/internet-of-things-sales-delivery-101-m56/m6nextsteps |10.19311456800684 |\n", | ||
"|https://w3.ibm.com/w3publisher/ibm-cloud-bundles/undefined/ |10.033878023150816|\n", | ||
"|https://w3.ibm.com/w3publisher/fastcloud/undefined/ |10.033878023150816|\n", | ||
"|https://w3.ibm.com/w3publisher/pridegateway |9.879788844611001 |\n", | ||
"|https://w3.ibm.com/w3publisher/veterans-gateway |9.762175399658117 |\n", | ||
"|https://w3.ibm.com/w3publisher/hispanics-at-ibm-gateway |9.259024590999664 |\n", | ||
"|https://w3.ibm.com/w3publisher/asian-employee-gateway |9.20173532785606 |\n", | ||
"|https://w3.ibm.com/w3publisher/tap-education/introduction |9.07120765979852 |\n", | ||
"|https://w3.ibm.com/w3publisher/talent-acquisition-for-hr-professionals/cognitive-solutions |9.025255534937381 |\n", | ||
"|https://w3.ibm.com/w3publisher/native-americans-at-ibm-gateway |8.979308389889017 |\n", | ||
"|https://w3.ibm.com/w3publisher/tap-education/unit-1 |8.755475070378825 |\n", | ||
"|https://w3.ibm.com/w3publisher/meet-the-team |8.631114750692095 |\n", | ||
"|https://w3.ibm.com/w3publisher/sales-and-delivery-101-mod-4/ibm-watson-iot-platform |8.571226920933485 |\n", | ||
"|https://w3.ibm.com/w3publisher/zresilience-web/ |8.329998922120668 |\n", | ||
"|https://w3.ibm.com/w3publisher/think2019/sales-resources |8.26912511795766 |\n", | ||
"|https://w3.ibm.com/w3publisher/resources |8.262864092305383 |\n", | ||
"|https://w3.ibm.com/w3publisher/black-gateway |8.121392329423122 |\n", | ||
"|https://w3.ibm.com/w3publisher/pps-shortcuts/pps-contacts |8.107727069638473 |\n", | ||
"|https://w3.ibm.com/w3publisher/sibos-2019 |7.9565126202668655|\n", | ||
"|https://w3.ibm.com/w3publisher/inspire-me/inspire-me/events |7.877418776708795 |\n", | ||
"|https://w3.ibm.com/w3publisher/elc-mentorship/find-a-mentor/undefined/find-a-mentor/business-mentors |7.877418776708795 |\n", | ||
"|https://w3.ibm.com/w3publisher/byod/why-powerbi |7.688728592645118 |\n", | ||
"|https://w3.ibm.com/w3publisher/ai-ibm/use-cases |7.661772852064592 |\n", | ||
"|https://w3.ibm.com/w3publisher/gbs-l-k-spotlight-monthly-may-2018/gbs-l-k-spotlight-monthly-may-2018/|7.551928209198955 |\n", | ||
"|https://w3.ibm.com/w3publisher/gts-mea-take-5/take-5-form |7.542097794489055 |\n", | ||
"|https://w3.ibm.com/w3publisher/ces-2019/ginni-keynote |7.524301045618796 |\n", | ||
"|https://w3.ibm.com/w3publisher/gbs-l-k-spotlight-monthly-jul-2018/ |7.338303965098289 |\n", | ||
"+-----------------------------------------------------------------------------------------------------+------------------+\n", | ||
"only showing top 50 rows\n", | ||
"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"w3pagerank.vertices.orderBy(\"pagerank\", ascending=False).show(50, False)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.