Skip to content

Commit

Permalink
i
Browse files Browse the repository at this point in the history
  • Loading branch information
lindsey98 committed Jan 26, 2024
2 parents d701822 + 84d9c8c commit d73d54c
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 91 deletions.
25 changes: 15 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
## Instructions
Requirements:
- CUDA 11
- Anaconda installed, please refer to the official installation guide: https://docs.anaconda.com/free/anaconda/install/index.html

1. Create a local clone of Phishpedia
```
Expand All @@ -72,7 +73,7 @@ git clone https://github.com/lindsey98/Phishpedia.git

2. Setup
```
cd Phishpedia
cd Phishpedia/
chmod +x ./setup.sh
./setup.sh
```
Expand All @@ -84,19 +85,23 @@ conda activate myenv
```

Run in Python to test a single website

```python
from phishpedia.phishpedia_main import test
import matplotlib.pyplot as plt
from phishpedia.phishpedia_config import load_config

url = open("phishpedia/datasets/test_sites/accounts.g.cdcde.com/info.txt").read().strip()
screenshot_path = "phishpedia/datasets/test_sites/accounts.g.cdcde.com/shot.png"
cfg_path = None # None means use default config.yaml
ELE_MODEL, SIAMESE_THRE, SIAMESE_MODEL, LOGO_FEATS, LOGO_FILES, DOMAIN_MAP_PATH = load_config(cfg_path)
ELE_MODEL, SIAMESE_THRE, SIAMESE_MODEL, LOGO_FEATS, LOGO_FILES, DOMAIN_MAP_PATH = load_config(None)

phish_category, pred_target, plotvis, siamese_conf, pred_boxes = test(url, screenshot_path,
ELE_MODEL, SIAMESE_THRE, SIAMESE_MODEL, LOGO_FEATS, LOGO_FILES, DOMAIN_MAP_PATH)
phish_category, pred_target, plotvis, siamese_conf, pred_boxes = test(url=url, screenshot_path=screenshot_path,
ELE_MODEL=ELE_MODEL,
SIAMESE_THRE=SIAMESE_THRE,
SIAMESE_MODEL=SIAMESE_MODEL,
LOGO_FEATS=LOGO_FEATS,
LOGO_FILES=LOGO_FILES,
DOMAIN_MAP_PATH=DOMAIN_MAP_PATH
)

print('Phishing (1) or Benign (0) ?', phish_category)
print('What is its targeted brand if it is a phishing ?', pred_target)
Expand All @@ -106,12 +111,12 @@ plt.imshow(plotvis[:, :, ::-1])
plt.title("Predicted screenshot with annotations")
plt.show()
```
Or run in the terminal to test a list of sites, copy run.py to your local machine and run

Or run in bash
```
python run.py --folder <folder you want to test e.g. phishpedia/datasets/test_sites> --results <where you want to save the results e.g. test.txt> --no_repeat
python run.py --folder <folder you want to test e.g. phishpedia/datasets/test_sites> --results <where you want to save the results e.g. test.txt>
```


## Miscellaneous
- In our paper, we also implement several phishing detection and identification baselines, see [here](https://github.com/lindsey98/PhishingBaseline)
- The logo targetlist described in our paper includes 181 brands, we have further expanded the targetlist to include 277 brands in this code repository
Expand All @@ -131,4 +136,4 @@ If you find our work useful in your research, please consider citing our paper b
```

## Contacts
If you have any issues running our code, you can raise an issue or send an email to [email protected], [email protected], and [email protected]
If you have any issues running our code, you can raise an issue or send an email to [email protected], [email protected], and [email protected]
26 changes: 13 additions & 13 deletions phishpedia/phishpedia_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
import yaml


def load_config(cfg_path: Union[str, None], reload_targetlist=False):
def load_config(cfg_path: Union[str, None]):

#################### '''Default''' ####################
if cfg_path is None:
with open(os.path.join(os.path.dirname(__file__), 'configs.yaml')) as file:
configs = yaml.load(file, Loader=yaml.FullLoader)
Expand All @@ -26,19 +25,20 @@ def load_config(cfg_path: Union[str, None], reload_targetlist=False):
SIAMESE_THRE = configs['SIAMESE_MODEL']['MATCH_THRE']

print('Load protected logo list')
if configs['SIAMESE_MODEL']['TARGETLIST_PATH'].endswith('.zip') \
and not os.path.isdir('{}'.format(configs['SIAMESE_MODEL']['TARGETLIST_PATH'].split('.zip')[0].replace('/', os.sep))):
subprocess.run('cd {} && unzip expand_targetlist.zip -d .'.format(os.path.dirname(configs['SIAMESE_MODEL']['TARGETLIST_PATH'])), shell=True)
# subprocess.run(
# "unzip {} -d {}/".format(configs['SIAMESE_MODEL']['TARGETLIST_PATH'].replace('/', os.sep),
# configs['SIAMESE_MODEL']['TARGETLIST_PATH'].split('.zip')[0].replace('/', os.sep)),
# shell=True,
# )
targetlist_zip_path = configs['SIAMESE_MODEL']['TARGETLIST_PATH']
targetlist_dir = os.path.dirname(targetlist_zip_path)
zip_file_name = os.path.basename(targetlist_zip_path)
targetlist_folder = zip_file_name.split('.zip')[0]
full_targetlist_folder_dir = os.path.join(targetlist_dir, targetlist_folder)

if targetlist_zip_path.endswith('.zip') and not os.path.isdir(full_targetlist_folder_dir.replace('/', os.sep)):
os.makedirs(full_targetlist_folder_dir, exist_ok=True)
subprocess.run(f'unzip -o "{targetlist_zip_path}" -d "{full_targetlist_folder_dir}"', shell=True)

SIAMESE_MODEL, LOGO_FEATS, LOGO_FILES = phishpedia_config(
num_classes=configs['SIAMESE_MODEL']['NUM_CLASSES'],
weights_path=configs['SIAMESE_MODEL']['WEIGHTS_PATH'].replace('/', os.sep),
targetlist_path=configs['SIAMESE_MODEL']['TARGETLIST_PATH'].replace('/', os.sep).split('.zip')[0])
num_classes=configs['SIAMESE_MODEL']['NUM_CLASSES'],
weights_path=configs['SIAMESE_MODEL']['WEIGHTS_PATH'].replace('/', os.sep),
targetlist_path=full_targetlist_folder_dir.replace('/', os.sep))
print('Finish loading protected logo list')

DOMAIN_MAP_PATH = configs['SIAMESE_MODEL']['DOMAIN_MAP_PATH'].replace('/', os.sep)
Expand Down
2 changes: 0 additions & 2 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
help='Input folder path to parse')
parser.add_argument('-r', "--results", default=date + '_pedia.txt',
help='Input results file name')
parser.add_argument('--repeat', action='store_true')
parser.add_argument('--no_repeat', action='store_true')

args = parser.parse_args()
print(args)
Expand Down
52 changes: 25 additions & 27 deletions setup.sh
Original file line number Diff line number Diff line change
@@ -1,44 +1,46 @@
#!/bin/bash

FILEDIR=$(pwd)

# Source the Conda configuration
CONDA_BASE=$(conda info --base)
# source "$CONDA_BASE/etc/profile.d/conda.sh"
ENV_NAME="myenv"
source "$CONDA_BASE/etc/profile.d/conda.sh"

# Check if the environment already exists
conda info --envs | grep -w "$ENV_NAME" > /dev/null
conda info --envs | grep -w "myenv" > /dev/null

if [ $? -eq 0 ]; then
echo "Activating Conda environment $ENV_NAME"
conda activate "$ENV_NAME"
echo "Activating Conda environment myenv"
conda activate myenv
else
echo "Creating and activating new Conda environment $ENV_NAME with Python 3.8"
conda create -n "$ENV_NAME" python=3.8
conda activate "$ENV_NAME"
conda create -n myenv python=3.8
conda activate myenv
fi

# Set Conda environment as an environment variable
# export MYENV=$(conda info --base)/envs/"$ENV_NAME"
pip install -r requirements.txt

# Get the CUDA and cuDNN versions, install pytorch, torchvision
conda run -n "$ENV_NAME" pip install -r requirements.txt
OS=$(uname -s)

# Install pytorch, torchvision, detectron2
if command -v nvcc &> /dev/null; then
conda run -n "$ENV_NAME" pip install torch==1.9.0 torchvision -f "https://download.pytorch.org/whl/cu111/torch_stable.html"
conda run -n "$ENV_NAME" python -m pip install detectron2 -f "https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.9/index.html"
if [[ "$OS" == "Darwin" ]]; then
echo "Installing PyTorch and torchvision for macOS."
pip install torch==1.9.0 torchvision==0.10.0 torchaudio==0.9.0
python -m pip install detectron2 -f "https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.9/index.html"
else
conda run -n "$ENV_NAME" pip install torch==1.9.0+cpu torchvision==0.10.0+cpu torchaudio==0.9.0 -f "https://download.pytorch.org/whl/torch_stable.html"
conda run -n "$ENV_NAME" python -m pip install detectron2 -f "https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.9/index.html"
# Check if NVIDIA GPU is available for Linux and Windows
if command -v nvcc &> /dev/null; then
echo "CUDA is detected, installing GPU-supported PyTorch and torchvision."
pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f "https://download.pytorch.org/whl/torch_stable.html"
python -m pip install detectron2 -f "https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.9/index.html"
else
echo "No CUDA detected, installing CPU-only PyTorch and torchvision."
pip install torch==1.9.0+cpu torchvision==0.10.0+cpu torchaudio==0.9.0 -f "https://download.pytorch.org/whl/torch_stable.html"
python -m pip install detectron2 -f "https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.9/index.html"
fi
fi

## Download models
conda run -n "$ENV_NAME" pip install -v .
package_location=$(conda run -n "$ENV_NAME" pip show phishpedia | grep Location | awk '{print $2}')
pip install -v .
package_location=$(pip show phishpedia | grep Location | awk '{print $2}')

if [ -z "Phishpedia" ]; then
if [ -z "$package_location" ]; then
echo "Package Phishpedia not found in the Conda environment myenv."
exit 1
else
Expand All @@ -50,14 +52,10 @@ else
gdown --id 1H0Q_DbdKPLFcZee8I14K62qV7TTy7xvS
gdown --id 1fr5ZxBKyDiNZ_1B6rRAfZbAHBBoUjZ7I
gdown --id 1qSdkSSoCYUkZMKs44Rup_1DPBxHnEKl1
# sudo apt-get update
# sudo apt-get install unzip
# unzip expand_targetlist.zip
fi

# Replace the placeholder in the YAML template

sed "s|CONDA_ENV_PATH_PLACEHOLDER|$package_location/phishpedia|g" "$FILEDIR/phishpedia/configs_template.yaml" > "$FILEDIR/phishpedia/configs.yaml"


echo "All packages installed successfully!"
39 changes: 0 additions & 39 deletions test.sh

This file was deleted.

0 comments on commit d73d54c

Please sign in to comment.