How to do Semantic segmentation with detectron2 - semantic-segmentation

I'm using Detectron2 to do instance segmentation as in the tutorial. Below is the code:
from detectron2.config import CfgNode
import detectron2.data.transforms as T
from detectron2.data import build_detection_train_loader, DatasetMapper
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
transform_list = [
# T.Resize(shape=(200,300)),
T.RandomRotation(angle=90.0),
# T.RandomContrast(intensity_min=0.75, intensity_max=1.25),
# T.RandomBrightness(intensity_min=0.75, intensity_max=1.25),
# T.RandomSaturation(intensity_min=0.75, intensity_max=1.25),
# T.RandomLighting(scale=0.1),
T.RandomFlip(),
# T.RandomCrop(crop_type="absolute", crop_size=(180, 270))
]
# custom_mapper = get_custom_mapper(transfrom_list)
custom_mapper = DatasetMapper(
cfg,
is_train=True,
augmentations=transform_list,
use_instance_mask=True,
instance_mask_format="bitmask",
)
class CustomTrainer(DefaultTrainer):
#classmethod
def build_test_loader(cls, cfg: CfgNode, dataset_name):
return build_detection_test_loader(cfg, dataset_name, mapper=custom_mapper)
#classmethod
def build_train_loader(cls, cfg: CfgNode):
return build_detection_train_loader(cfg, mapper=custom_mapper)
cfg.INPUT.MASK_FORMAT = 'bitmask'
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = False
trainer = CustomTrainer(cfg)
# trainer = DefaultTrainer(cfg)
# trainer.resume_or_load(resume=False)
# trainer.train()
However, in this case I don't care about instances and more like I want to do semantic segmentation but there is no tutorial or examples to do that nor I'm seeing a semantic model I can start with. Misc/semantic_R_50_FPN_1x.yaml throws an error saying there is no pretrained model available.
So instead I'm trying to use the SemSegEvaluator instead of COCO evaluator to give me metrics around semantic rather than instances. Below is the code:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, SemSegEvaluator
from detectron2.data import build_detection_test_loader
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4
# evaluator = COCOEvaluator(val_dataset_name, output_dir=os.path.join(cfg.OUTPUT_DIR, 'val'), use_fast_impl=False, tasks=['segm'])
evaluator = SemSegEvaluator(val_dataset_name, output_dir=os.path.join(cfg.OUTPUT_DIR, 'val'))
val_loader = build_detection_test_loader(cfg, val_dataset_name)
eval_result = inference_on_dataset(predictor.model, val_loader, evaluator)
print(eval_result)
However, this is failing with the following error:
[12/20 16:29:02 d2.data.datasets.coco]: Loaded 50 imagesss abdul in COCO format from /content/gdrive/MyDrive/SolarDetection/datasets/train8//val/labels.json
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-10-61bd5aaec8ea> in <module>
3 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4
4 # evaluator = COCOEvaluator(val_dataset_name, output_dir=os.path.join(cfg.OUTPUT_DIR, 'val'), use_fast_impl=False, tasks=['segm'])
----> 5 evaluator = SemSegEvaluator(val_dataset_name, output_dir=os.path.join(cfg.OUTPUT_DIR, 'val'))
6 val_loader = build_detection_test_loader(cfg, val_dataset_name)
7 # ipdb.set_trace(context=6)
1 frames
/content/gdrive/MyDrive/repos/detectron2/detectron2/evaluation/sem_seg_evaluation.py in <dictcomp>(.0)
69
70 self.input_file_to_gt_file = {
---> 71 dataset_record["file_name"]: dataset_record["sem_seg_file_name"]
72 for dataset_record in DatasetCatalog.get(dataset_name)
73 }
KeyError: 'sem_seg_file_name'
Any idea or hint how I can setup and use the SemSegEvaluator?

Related

unable to get repr for <class 'albumentations.core.composition.compose'>

I am trying to run a code repository downloaded from GitHub as it is mentioned in the instruction of that but getting following error.
TypeError: init() missing 1 required positional argument: 'image_paths'
I am having this error at the code line 63 (preprocessing = preprocessing).
When I srat the program in debug mode I shows following error
unable to get repr for <class 'albumentations.core.composition.compose'>
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import torch
from skimage import io
from utils import adjust_sar_contrast, compute_building_score, plot_images
sys.path.append('/home/salman/Downloads/SpaceNet_SAR_Buildings_Solutions-master/4-motokimura/tmp/work')
from spacenet6_model.configs.load_config import get_config_with_previous_experiment
from spacenet6_model.datasets import SpaceNet6TestDataset
from spacenet6_model.models import get_model
from spacenet6_model.transforms import get_augmentation, get_preprocess
# select previous experiment to load
exp_id = 14
exp_log_dir = "/home/salman/Downloads/SpaceNet_SAR_Buildings_Solutions-master/4-motokimura/tmp/logs" # None: use default
# select device to which the model is loaded
cuda = True
if cuda:
device = 'cuda'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
else:
device = 'cpu'
os.environ['CUDA_VISIBLE_DEVICES'] = ''
# overwrite default config with previous experiment
config = get_config_with_previous_experiment(exp_id=exp_id, exp_log_dir=exp_log_dir)
# overwrite additional hyper parameters
config.MODEL.DEVICE = device
config.WEIGHT_ROOT = "/home/salman/Downloads/SpaceNet_SAR_Buildings_Solutions-master/4-motokimura/tmp/weights/"
config.MODEL.WEIGHT = f"/home/salman/Downloads/SpaceNet_SAR_Buildings_Solutions-master/4-motokimura/tmp/weights/exp_{exp_id:04d}/model_best.pth"
config.INPUT.MEAN_STD_DIR = "/home/salman/Downloads/SpaceNet_SAR_Buildings_Solutions-master/4-motokimura/tmp/work/models/image_mean_std/"
config.INPUT.TEST_IMAGE_DIR = "/home/salman/data/SN6_buildings_AOI_11_Rotterdam_test_public/test_public/AOI_11_Rotterdam/SAR-Intensity"
config.INPUT.SAR_ORIENTATION="/home/salman/Downloads/SpaceNet_SAR_Buildings_Solutions-master/4-motokimura/tmp/work/static/SAR_orientations.txt"
config.TRAIN_VAL_SPLIT_DIR="/home/salman/Downloads/data/spacenet6/split"
config.PREDICTION_ROOT="/home/salman/Downloads/data/spacenet6/predictions"
config.POLY_CSV_ROOT="/home/salman/Downloads/data/spacenet6/polygons"
config.CHECKPOINT_ROOT="/home/salman/Downloads/data/spacenet6/ceckpoints"
config.POLY_OUTPUT_PATH="/home/salman/Downloads/data/spacenet6/val_polygons"
config.freeze()
print(config)
model = get_model(config)
model.eval();
from glob import glob
image_paths = glob(os.path.join(config.INPUT.TEST_IMAGE_DIR, "*.tif"))
#print(image_paths)
preprocessing = get_preprocess(config, is_test=True)
augmentation = get_augmentation(config, is_train=False)
test_dataset = SpaceNet6TestDataset(
config,
augmentation=augmentation,
preprocessing=preprocessing
)
test_dataset_vis = SpaceNet6TestDataset(
config,
augmentation=augmentation,
preprocessing=None
)
channel_footprint = config.INPUT.CLASSES.index('building_footprint')
channel_boundary = config.INPUT.CLASSES.index('building_boundary')
score_thresh = 0.5
alpha = 1.0
start_index = 900
N = 20
for i in range(start_index, start_index + N):
image_vis = test_dataset_vis[i]['image']
image = test_dataset[i]['image']
x_tensor = image.unsqueeze(0).to(config.MODEL.DEVICE)
pr_score = model.module.predict(x_tensor)
pr_score = pr_score.squeeze().cpu().numpy()
pr_score_building = compute_building_score(
pr_score[channel_footprint],
pr_score[channel_boundary],
alpha=alpha
)
pr_mask = pr_score_building > score_thresh
rotated = test_dataset[i]['rotated']
if rotated:
image_vis = np.flipud(np.fliplr(image_vis))
pr_mask = np.flipud(np.fliplr(pr_mask))
plot_images(
SAR_intensity_0=(adjust_sar_contrast(image_vis[:, :, 0]), 'gray'),
building_mask_pr=(pr_mask, 'viridis')
)
The function which this code calls is given below:
def get_spacenet6_preprocess(config, is_test):
"""
"""
mean_path = os.path.join(
config.INPUT.MEAN_STD_DIR,
config.INPUT.IMAGE_TYPE,
'mean.npy'
)
mean = np.load(mean_path)
mean = mean[np.newaxis, np.newaxis, :]
std_path = os.path.join(
config.INPUT.MEAN_STD_DIR,
config.INPUT.IMAGE_TYPE,
'std.npy'
)
std = np.load(std_path)
std = std[np.newaxis, np.newaxis, :]
if is_test:
to_tensor = albu.Lambda(
image=functools.partial(_to_tensor)
)
else:
to_tensor = albu.Lambda(
image=functools.partial(_to_tensor),
mask=functools.partial(_to_tensor)
)
preprocess = [
albu.Lambda(
image=functools.partial(
_normalize_image,
mean=mean,
std=std
)
),
to_tensor,
]
return albu.Compose(preprocess)

Python multiprocessing, can't pickle thread.lock (pymongo.Cursor)

First, let me assure you I read all the relevant answers and they don't work for me.
I am using multiprocessing Pool to parallelize my data creation. I am using Mongodb 5.0 and pymongo client.
As you can see I am initializing the mongo client in the worker as suggested by the available answers but still I get a :
TypeError: cannot pickle '_thread.lock' object
Exception ignored in: <function CommandCursor.__del__ at 0x7f96f6fff160>
Is there a way I can use multiprocessing with pymongo.Cursor ??
Any help will be appreciated
This is the function that calls the Pool
def get_all_valid_events(
event_criteria:str,
all_listings:List[str],
earnings:List[Dict[str,Any]],
days_around_earnings=0,
debug=False,
poolsize=10,
chunk_size=100,
lookback=30,
lookahead = 0
):
start = time.perf_counter()
listings = Manager().list(all_listings.copy())
valid_events = []
if debug:
for i in range(ceil(len(listings)/chunk_size)):
valid_events += get_valid_event_dates_by_listing(event_criteria,listings[i*chunk_size:(i+1)*chunk_size] , earnings, days_around_earnings,debug)
else:
payload = list()
for i in range(ceil(len(listings)/chunk_size)):
payload.append(
[
event_criteria,
listings[i*chunk_size:(i+1)*chunk_size],
earnings,
days_around_earnings,
debug,
lookback,
lookahead
]
)
with ThreadPool(poolsize) as pool:
valid_events = pool.starmap(get_valid_event_dates_by_listing, payload)
print(f"getting all valid true events took {time.perf_counter() - start} sec")
return valid_events
And this is the worker function:
def get_valid_event_dates_by_listing(
event_criteria:str,
listings:List[str],
earnings_list,
days_around_earnings=0,
debug=False,
lookback=30,
lookahead=0
) -> List[Tuple[Tuple[str, datetime], int]]:
#TODO: generalize event filter
start = time.perf_counter()
client = MongoClient()
db = client['stock_signals']
cursor_candles_by_listing = db.candles.find(
{'listing': {'$in': listings}},
{'_id':0, 'listing':1, 'date':1,'position':1, 'PD_BBANDS_6_lower':1, 'close':1, 'PD_BBANDS_6_upper':1}
)
candles = list(cursor_candles_by_listing)
df = pd.DataFrame(candles).dropna()
minimum_position_dict = dict(df.groupby('listing').min()['position']) # We need the minimum position by listing to filter only events that have lookback
# Filter only the dates that satisfy the criteria
lte_previous_bb_6_lower = df['close'] <= df[f"{event_criteria}_lower"].shift()
gte_previous_bb_6_upper = df['close'] >= df[f"{event_criteria}_upper"].shift()
potential_true_events_df = df[lte_previous_bb_6_lower | gte_previous_bb_6_upper]
potential_false_events_df = df.drop(potential_true_events_df.index)
potential_true_event_dates = potential_true_events_df[['listing', 'date', 'position']].values
actual_true_event_dates = earning_helpers.filter_event_dates_by_earnings_and_position(potential_true_event_dates, earnings_list, minimum_position_dict ,days_around_earning=days_around_earnings, lookback=lookback)
true_event_dates = [((event_date[0], event_date[1], event_date[2]), 1) for event_date in actual_true_event_dates]
potential_false_event_dates = potential_false_events_df[['listing', 'date', 'position']].values
actual_false_event_dates = _random_false_events_from_listing_df(potential_false_event_dates, len(actual_true_event_dates), earnings_list, minimum_position_dict, days_around_earnings,lookback)
false_events_dates = [((event_date[0], event_date[1], event_date[2]), 0) for event_date in actual_false_event_dates]
all_event_dates = true_event_dates + false_events_dates
shuffle(all_event_dates)
print(f"getting a true sequence for listing took {time.perf_counter() - start} sec")
return all_event_dates
And this is my main
from utils import event_helpers, earning_helpers
from utils.queries import get_candle_listing
if __name__ == "__main__":
all_listings = get_candle_listing.get_listings()
earnigns = earning_helpers.get_all_earnings_dates()
res = event_helpers.get_all_valid_events('PD_BBANDS_6', all_listings, earnigns, 2, chunk_size=100)
Full Stack Trace
File "test_multiprocess.py", line 8, in <module>
res = event_helpers.get_all_valid_events('PD_BBANDS_6', all_listings, earnigns, 2, chunk_size=100)
File "/media/data/projects/ml/signal_platform/utils/event_helpers.py", line 53, in get_all_valid_events
valid_events = pool.starmap(get_valid_event_dates_by_listing, payload)
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 372, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 771, in get
raise self._value
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 537, in _handle_tasks
put(task)
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle '_thread.lock' object
Exception ignored in: <function CommandCursor.__del__ at 0x7f46e91e21f0>
Traceback (most recent call last):
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/command_cursor.py", line 68, in __del__
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/command_cursor.py", line 83, in __die
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/mongo_client.py", line 1696, in _cleanup_cursor
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 466, in _end_session
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 871, in in_transaction
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 362, in active
AttributeError: 'NoneType' object has no attribute 'STARTING'
Update: 01-23
I tried using the multiprocess library using dill but it didn't help

Using pathlib.Path with spark.read.parquet

Is it possible to use pathlib.Path objects with spark.read.parquet and other pyspark.sql.DataFrameReader methods?
It doesn't work by default:
>>> from pathlib import Path
>>> basedir = Path("/data")
>>> spark.read.parquet(basedir / "name.parquet")
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-5-cec8ced1bc5d> in <module>
----> 1 spark.read.parquet(basedir / "name.parquet")
<... a long traceback ...>
/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py in get_command_part(parameter, python_proxy_pool)
296 command_part += ";" + interface
297 else:
--> 298 command_part = REFERENCE_TYPE + parameter._get_object_id()
299
300 command_part += "\n"
AttributeError: 'PosixPath' object has no attribute '_get_object_id'
I tried to write py4j type converter:
class PathConverter(object):
def can_convert(self, object):
return isinstance(object, Path)
def convert(self, object, gateway_client):
JavaString = JavaClass("java.lang.String", gateway_client)
return JavaString(str(object))
register_input_converter(PathConverter())
But it looks like I misunderstood some string conversion related concepts/specifics, because jvm.java.lang.String("string") in py4j returns the python str object:
>>> spark.read.parquet(basedir / "name.parquet")
<... a long traceback ...>
/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
1306
1307 for temp_arg in temp_args:
-> 1308 temp_arg._detach()
AttributeError: 'str' object has no attribute '_detach'
I have only one ugly solution for now:
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index fa3e829a88..7441a8ba8c 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
## -298,7 +298,7 ## class DataFrameReader(OptionUtils):
modifiedAfter=modifiedAfter, datetimeRebaseMode=datetimeRebaseMode,
int96RebaseMode=int96RebaseMode)
- return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths)))
+ return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths, converter=str)))
def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
recursiveFileLookup=None, modifiedBefore=None,
Also, looking through the readwriter.py source code it feels safe enough to monkeypatch its version of _to_seq:
from pyspark.sql import readwriter
def converter(x):
if isinstance(x, PurePath):
return str(x)
return x
readwriter._to_seq = partial(readwriter._to_seq, converter=converter)
Or maybe more correct and full workaround would be to monkeypatch the reader/writer methods directly:
#wraps(readwriter.DataFrameWriter.parquet)
def parquet(self, path, mode=None, partitionBy=None, compression=None):
return parquet.__wrapped__(self, str(path), mode=mode,
partitionBy=partitionBy,
compression=compression)
readwriter.DataFrameWriter.parquet = parquet

Pycuda test_driver.py raises Attribute Error

I'm trying to install pycuda on Linux Mint with a GeForce 960M and Cuda 8.0 installed. When I run the test_driver.py script it outputs the following error:
============================= test session starts ==============================
platform linux2 -- Python 2.7.12, pytest-3.0.3, py-1.4.31, pluggy-0.4.0
rootdir: /home/milton/Downloads/pycuda-2016.1.2, inifile:
collected 28 items
test_driver.py ...................x.....F..
=================================== FAILURES ===================================
________________________ TestDriver.test_multi_context _________________________
args = (,), kwargs = {}
pycuda = <module 'pycuda' from '/home/milton/miniconda2/lib/python2.7/site-packages/pycuda-2016.1.2-py2.7-linux-x86_64.egg/pycuda/init.pyc'>
ctx = <pycuda._driver.Context object at 0x7f540e39d758>
clear_context_caches = <function clear_context_caches at 0x7f540ee26758>
collect =<built-in function collect>
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
inner_f(*args, **kwargs)
../../../miniconda2/lib/python2.7/site-packages/pycuda-2016.1.2-py2.7-linux-x86_64.egg/pycuda/tools.py:460:
self = <test_driver.TestDriver instance at 0x7f540c21fc20>
#mark_cuda_test
def test_multi_context(self):
if drv.get_version() < (2,0,0):
return
if drv.get_version() >= (2,2,0):
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE:
E AttributeError: type object 'compute_mode' has no attribute 'EXCLUSIVE'
test_driver.py:638: AttributeError
================ 1 failed, 26 passed, 1 xfailed in 6.92 seconds ================
python driver compute mode only supports following modes:
DEFAULT,
PROHIBITED,
EXCLUSIVE_PROCESS
so please change this:
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE:
to
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE_PROCESS:
in your test_driver.py file

Get ipython notebook filename in function

This code works fine in an Ipython/Juypter cell to get the notebook filename:
js = """var kernel = IPython.notebook.kernel;
var thename = window.document.getElementById("notebook_name").innerHTML;
var command = "theNotebook2 = " + "'"+thename+"'";
kernel.execute(command);"""
display(Javascript(js))
theNotebook2 + '.ipynb'
'techela.ipynb'
If I define a function in a cell and call it:
def get_filename():
"""Get the notebook filename."""
js = """var kernel = IPython.notebook.kernel;
var thename = window.document.getElementById("notebook_name").innerHTML;
var command = "theNotebook2 = " + "'"+thename+"'";
kernel.execute(command);"""
display(Javascript(js))
return theNotebook2 + '.ipynb'
get_filename()
'techela.ipynb'
It also seems to work fine and give me the filename.
However, if I put that function in a module, and import it, then it stops working.
from techela import get_filename
get_filename()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-17-42ee37d0d253> in <module>()
1 from techela import get_filename
----> 2 get_filename()
/Users/jkitchin/techela/ipynb/techela.py in get_filename()
11 kernel.execute(command);"""
12 display(Javascript(js))
---> 13 return theNotebook2 + '.ipynb'
14
15
NameError: name 'theNotebook2' is not defined
Any idea why this is failing?