Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
inovisao
pynovisao
Commits
e29f2ef5
Commit
e29f2ef5
authored
Nov 18, 2017
by
Alexandre Cese
Browse files
mudancas com opcoes para salvar em csv e fazer histograma de superpixels
parent
eb119d36
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
461 additions
and
14 deletions
+461
-14
README.md
README.md
+7
-0
requeriments.txt
requeriments.txt
+1
-0
src/extraction/histoextraction.py
src/extraction/histoextraction.py
+393
-0
src/main.py
src/main.py
+8
-3
src/pynovisao.py
src/pynovisao.py
+52
-11
No files found.
README.md
View file @
e29f2ef5
...
...
@@ -125,6 +125,13 @@ Em uma das máquinas em que tentei instalar deu um erro que resolvi rodando o co
$ sudo apt-get build-dep python-matplotlib
$ sudo pip install cycler
```
### Como instalar o scikit-learn e pandas
```
sudo pip install -U scikit-learn
sudo pip install -U pandas
```
### Como instalar o tk/tk-dev
...
...
requeriments.txt
View file @
e29f2ef5
...
...
@@ -10,3 +10,4 @@ javabridge
python-weka-wrapper
cycler
cython
scikit-learn
src/extraction/histoextraction.py
0 → 100644
View file @
e29f2ef5
import
os
import
itertools
import
pandas
as
pd
from
sklearn.preprocessing
import
StandardScaler
from
interface.interface
import
InterfaceException
as
IException
from
util
import
File
,
TimeUtils
from
sklearn.cluster
import
MiniBatchKMeans
class
histoextraction
(
object
):
def
__init__
(
self
):
self
.
data
=
[]
def
extract_all_superpixels_csv
(
self
,
dataset
,
segmenter
,
extractors
,
overwrite
=
True
):
'''
:String dataset: endereco do dataset
:Segmenter segmenter: segmenter tirado do pynovisao
:Extractors extractors: extrator tirado do pynovisao
:bool overwrite:
:return: Address where csv was saved
'''
if
len
(
extractors
)
==
0
:
raise
IException
(
"Please select at least one extractor"
)
output_file
=
File
.
make_path
(
dataset
,
'training_kmeans.pkl'
)
# if already exists a output file and must not override, return current file
if
overwrite
==
False
and
os
.
path
.
isfile
(
output_file
):
return
output_file
,
0
#start_time = TimeUtils.get_time()
dirs
=
sorted
(
File
.
list_dirs
(
dataset
))
data
=
[]
# Runs the feature extraction for all classes inside the dataset------------------
for
cl
in
dirs
:
items
=
sorted
(
os
.
listdir
(
File
.
make_path
(
dataset
,
cl
)))
print
(
"Processing class %s - %d itens"
%
(
cl
,
len
(
items
)))
for
item
in
items
:
filepath
=
File
.
make_path
(
dataset
,
cl
,
item
)
print
filepath
filename
=
item
.
rsplit
(
'_'
,
1
)[
0
]
image
=
File
.
open_image
(
filepath
,
rgb
=
False
)
#values = list(itertools.chain.from_iterable(
# zip(*([extractor().run(image) for extractor in self.extractors]))[2]))
segmenter
.
run
(
image
)
for
i
in
segmenter
.
get_list_segments
():
segment
,
size_segment
,
idx_segment
,
run_time
=
segmenter
.
get_segment
(
idx_segment
=
i
)
if
len
(
data
)
>
0
:
values
=
list
(
itertools
.
chain
.
from_iterable
(
zip
(
*
([
extractor
().
run
(
segment
)
for
extractor
in
extractors
]))[
2
]))
values
.
append
(
item
)
values
.
append
(
cl
)
data
.
append
(
values
)
else
:
labels
,
types
,
values
=
[
list
(
itertools
.
chain
.
from_iterable
(
ret
))
for
ret
in
zip
(
*
([
extractor
().
run
(
segment
)
for
extractor
in
extractors
]))]
aux
=
[]
for
label
in
labels
:
aux
.
append
(
"%s"
%
(
label
))
aux
.
append
(
"file"
)
aux
.
append
(
"class"
)
data
.
append
(
aux
)
values
.
append
(
item
)
values
.
append
(
cl
)
data
.
append
(
values
)
dataframe
=
pd
.
DataFrame
(
data
)
dataframe
.
to_csv
(
File
.
make_path
(
dataset
,
"data.csv"
),
header
=
False
,
index
=
False
)
print
"Saving data to file"
return
File
.
make_path
(
dataset
,
"data.csv"
)
def
extract_all_csv
(
self
,
dataset
,
extractors
,
output_file
=
None
,
dirs
=
None
,
overwrite
=
True
):
'''
:string dataset: endereco do dataset usado
:extrator extractors: extratores do pynovisao
:string output_file: endereco do arquivo final
:list dirs: nao usado
:bool overwrite: nao usado
:return: output_file, tempo de execucao
'''
if
len
(
extractors
)
==
0
:
raise
IException
(
"Please select at least one extractor"
)
if
output_file
is
None
:
output_file
=
File
.
get_filename
(
dataset
)
output_file
=
File
.
make_path
(
dataset
,
output_file
+
'.arff'
)
# if already exists a output file and must not override, return current file
if
overwrite
==
False
and
os
.
path
.
isfile
(
output_file
):
return
output_file
,
0
start_time
=
TimeUtils
.
get_time
()
classes
=
sorted
(
File
.
list_dirs
(
dataset
))
dirs
=
classes
if
dirs
is
None
else
dirs
data
=
[]
# Runs the feature extraction all images in all classes inside the dataset
for
cl
in
dirs
:
items
=
sorted
(
os
.
listdir
(
File
.
make_path
(
dataset
,
cl
)))
print
(
"Processing class %s - %d itens"
%
(
cl
,
len
(
items
)))
#para cada imagem
for
item
in
items
:
if
item
.
startswith
(
'.'
):
continue
try
:
filepath
=
File
.
make_path
(
dataset
,
cl
,
item
)
image
=
File
.
open_image
(
filepath
,
rgb
=
False
)
except
:
raise
IException
(
"Image %s is possibly corrupt"
%
filepath
)
if
len
(
data
)
>
0
:
values
=
list
(
itertools
.
chain
.
from_iterable
(
zip
(
*
([
extractor
().
run
(
image
)
for
extractor
in
extractors
]))[
2
]))
values
.
append
(
item
)
values
.
append
(
cl
)
data
.
append
(
values
)
else
:
labels
,
types
,
values
=
[
list
(
itertools
.
chain
.
from_iterable
(
ret
))
for
ret
in
zip
(
*
([
extractor
().
run
(
image
)
for
extractor
in
extractors
]))]
#aux seria a linha da tabela
aux
=
[]
for
label
in
labels
:
aux
.
append
(
"%s"
%
(
label
))
aux
.
append
(
"file"
)
aux
.
append
(
"class"
)
#data eh os dados totais
data
.
append
(
aux
)
values
.
append
(
item
)
values
.
append
(
cl
)
data
.
append
(
values
)
if
len
(
data
)
==
0
:
raise
IException
(
"There are no images in dataset: %s"
%
dataset
)
dataframe
=
pd
.
DataFrame
(
data
)
dataframe
.
to_csv
(
File
.
make_path
(
dataset
,
"data.csv"
),
header
=
False
,
index
=
False
)
print
"Saving data to file"
end_time
=
TimeUtils
.
get_time
()
return
output_file
,
(
end_time
-
start_time
)
def
extract_all_superpixels_arff
(
self
,
dataset
,
segmenter
,
extractors
,
output_file
=
None
,
dirs
=
None
,
overwrite
=
True
):
'''
:param dataset: dataset do pynovisao
:param segmenter: segmenter do pynovisao
:param extractors: extratores do pynovisao
:param output_file: endereco do arquivo final
:param dirs: nao usado
:param overwrite: nao usado
:return: output_file, tempo de execucao
'''
if
len
(
extractors
)
==
0
:
raise
IException
(
"Please select at least one extractor"
)
if
output_file
is
None
:
output_file
=
File
.
get_filename
(
dataset
)
output_file
=
File
.
make_path
(
dataset
,
output_file
+
'.arff'
)
# if already exists a output file and must not override, return current file
if
overwrite
==
False
and
os
.
path
.
isfile
(
output_file
):
return
output_file
,
0
start_time
=
TimeUtils
.
get_time
()
classes
=
sorted
(
File
.
list_dirs
(
dataset
))
dirs
=
classes
if
dirs
is
None
else
dirs
data
=
[]
# Runs the feature extraction of every superpixel in all images for all classes inside the dataset
for
cl
in
dirs
:
items
=
sorted
(
os
.
listdir
(
File
.
make_path
(
dataset
,
cl
)))
print
(
"Processing class %s - %d itens"
%
(
cl
,
len
(
items
)))
#para cada imagem
for
item
in
items
:
if
item
.
startswith
(
'.'
):
continue
try
:
filepath
=
File
.
make_path
(
dataset
,
cl
,
item
)
print
filepath
image
=
File
.
open_image
(
filepath
,
rgb
=
False
)
except
:
raise
IException
(
"Image %s is possibly corrupt"
%
filepath
)
segmenter
.
run
(
image
)
#para cada segmento
for
i
in
segmenter
.
get_list_segments
():
segment
,
size_segment
,
idx_segment
,
run_time
=
segmenter
.
get_segment
(
idx_segment
=
i
)
#extrai os atributos de cada superpixel
if
len
(
data
)
>
0
:
values
=
list
(
itertools
.
chain
.
from_iterable
(
zip
(
*
([
extractor
().
run
(
segment
)
for
extractor
in
extractors
]))[
2
]))
else
:
labels
,
types
,
values
=
[
list
(
itertools
.
chain
.
from_iterable
(
ret
))
for
ret
in
zip
(
*
([
extractor
().
run
(
segment
)
for
extractor
in
extractors
]))]
data
.
append
(
values
+
[
cl
if
cl
in
classes
else
classes
[
0
]])
if
len
(
data
)
==
0
:
raise
IException
(
"There are no images in dataset: %s"
%
dataset
)
# Save the output file in ARFF format
self
.
_save_output
(
File
.
get_filename
(
dataset
),
classes
,
labels
,
types
,
data
,
output_file
)
end_time
=
TimeUtils
.
get_time
()
return
output_file
,
(
end_time
-
start_time
)
def
_save_output
(
self
,
relation
,
classes
,
labels
,
types
,
data
,
output_file
):
"""Save output file in ARFF format.
Parameters
----------
relation : string
Name of relation.
classes : list of string
List of classes names.
labels : list of string
List of attributes names.
types : list of string
List of attributes types.
data : list of list of string
List of instances.
output_file : string
Path to output file.
"""
arff
=
open
(
output_file
,
'wb'
)
arff
.
write
(
"%s %s
\n\n
"
%
(
'@relation'
,
relation
))
for
label
,
t
in
zip
(
labels
,
types
):
arff
.
write
(
"%s %s %s
\n
"
%
(
'@attribute'
,
label
,
t
))
arff
.
write
(
"%s %s {%s}
\n\n
"
%
(
'@attribute'
,
'classe'
,
', '
.
join
(
classes
)))
arff
.
write
(
'@data
\n\n
'
)
for
instance
in
data
:
instance
=
map
(
str
,
instance
)
line
=
","
.
join
(
instance
)
arff
.
write
(
line
+
"
\n
"
)
arff
.
close
()
##################################Parte de fazer histogramas######################################
def
get_classes_superpixels_from_k_means
(
self
,
k
,
X
):
#este metodo aplica o K-means pelo miniBatch kmeans e retorna as classes dos superpixels
clusterer
=
MiniBatchKMeans
(
n_clusters
=
k
)
clusterer
.
fit
(
X
)
classes_superpixels
=
clusterer
.
predict
(
X
)
return
classes_superpixels
,
clusterer
def
get_histogramas_de_dados
(
self
,
X_train
,
classes_superpixels
,
k
):
'''
:DataFrame X_train: dados dos superpixels
:lista classes_superpixels: as classes de cada superpixel dada pelo K-means
:int k: k do k-means
:array[][]: histogramas_de_dados
'''
X_train_classes
=
list
(
set
(
X_train
[
'class'
]))
histogramas_de_dados
=
[]
total
=
len
(
X_train_classes
)
for
cl
in
X_train_classes
:
# Para cada classe no X_train
lista_cl
=
X_train
[
X_train
[
'class'
]
==
cl
]
##tabela com a classe expecifica
lista_arq
=
set
(
lista_cl
[
'file'
])
##tabela com o arquivos de uma classe especifica
for
arq
in
lista_arq
:
##Para cada arquivo na tabela de arquivos daquela classe especifica
h
=
[
0
]
*
(
k
+
2
)
##cria o vetor para o novo histograma para a imagem
tab
=
lista_cl
[
lista_cl
[
'file'
]
==
arq
]
##criamos a tabela de um arquivo so
for
index
,
row
in
tab
.
iterrows
():
##para cada superpixel da imagem
h
[
classes_superpixels
[
index
]]
=
h
[
classes_superpixels
[
index
]]
+
1
;
##adiciona a classe do superpixel no
##histograma
h
[
k
]
=
arq
h
[
k
+
1
]
=
cl
histogramas_de_dados
.
append
(
h
)
##add o histograma pra lista de histogramas
return
histogramas_de_dados
def
norm_data
(
self
,
X_csv
):
#Este metodo normaliza os dados dos histogramas
#necessario para fazer o k-means do sklearn
scaler_KM
=
StandardScaler
()
X_csv_norm
=
scaler_KM
.
fit_transform
(
X_csv
.
iloc
[:,
0
:
-
2
])
return
X_csv_norm
def
make_histogram
(
self
,
dataset
,
csv
,
k
):
k
=
10
data
=
pd
.
read_csv
(
File
.
make_path
(
dataset
,
'data.csv'
))
data_norm
=
self
.
norm_data
(
data
)
# Gerar as classes_superpixels
classes_superpixels
,
clusterer
=
self
.
get_classes_superpixels_from_k_means
(
k
,
data_norm
)
# Gera o histograma de dados
histogramas_de_dados
=
self
.
get_histogramas_de_dados
(
data
,
classes_superpixels
,
k
)
if
csv
==
False
:
path
=
self
.
save_histogram
(
dataset
,
histogramas_de_dados
,
k
=
k
)
return
path
else
:
hist
=
pd
.
DataFrame
(
histogramas_de_dados
)
output_file
=
File
.
make_path
(
dataset
,
"histograma.csv"
)
hist
.
to_csv
(
output_file
,
header
=
False
,
index
=
False
)
print
"Saving data to file"
return
output_file
def
save_histogram
(
self
,
dataset
,
imagens_histograma
,
k
):
'''
:param dataset: endereco do dataset
:param imagens_histograma: BoS de cada imagem
:param k: numero de posicoes no histograma de superpixels
:return:training_file - Endereco do arquivo final
'''
training_file
=
File
.
make_path
(
dataset
,
"training_histograma_"
+
str
(
k
)
+
".arff"
)
values
=
""
for
i
,
inst
in
enumerate
(
imagens_histograma
):
for
i
in
range
(
0
,
len
(
inst
)
-
2
):
values
=
values
+
str
(
inst
[
i
])
+
","
values
=
values
+
str
(
inst
[
len
(
inst
)
-
1
])
+
"
\n
"
arff
=
open
(
training_file
,
'w'
)
arff
.
write
(
"%s
\n\n
"
%
(
'@relation histogram'
))
for
i
in
range
(
0
,
len
(
imagens_histograma
[
0
])
-
2
):
arff
.
write
(
"%s %s %s
\n
"
%
(
'@attribute'
,
str
(
i
),
'numeric'
))
arff
.
write
(
"%s %s {%s}
\n\n
"
%
(
'@attribute'
,
'classe'
,
', '
.
join
(
sorted
(
File
.
list_dirs
(
dataset
)))))
# arff.write("%s\n\n" % ('@attribute'))
arff
.
write
(
'@data
\n\n
'
)
arff
.
write
(
values
)
return
training_file
arff
.
close
()
\ No newline at end of file
src/main.py
View file @
e29f2ef5
...
...
@@ -72,10 +72,15 @@ if __name__ == "__main__":
tk
.
add_command
(
"Execute"
,
act
.
run_segmenter
,
'S'
)
tk
.
add_menu
(
"Feature Extraction"
)
tk
.
add_command
(
"Select extractors"
,
act
.
select_extractors
,
'
e
'
)
tk
.
add_command
(
"Select extractors"
,
act
.
select_extractors
,
'
E
'
)
tk
.
add_separator
()
tk
.
add_command
(
"Execute"
,
act
.
run_extractors
,
'F'
)
tk
.
add_command
(
"Execute images extraction"
,
act
.
run_extractors
,
'F'
)
tk
.
add_command
(
"Execute Superpixels extraction"
,
act
.
superpixel_extraction
,
'J'
)
tk
.
add_separator
()
tk
.
add_check_button
(
"Save in CSV"
,
act
.
toggle_CSV
,
default_state
=
False
)
tk
.
add_separator
()
tk
.
add_command
(
"Make Histogram(k=10) only from CSV"
,
act
.
make_histogram
,
'H'
)
tk
.
add_menu
(
"Classification"
)
tk
.
add_command
(
"Choose classifier"
,
act
.
select_classifier
)
tk
.
add_command
(
"Configure"
,
act
.
configure_classifier
)
...
...
src/pynovisao.py
View file @
e29f2ef5
...
...
@@ -25,6 +25,8 @@ from util.config import Config
from
util.file_utils
import
File
as
f
from
util.utils
import
TimeUtils
from
extraction.histoextraction
import
histoextraction
class
Act
(
object
):
"""Store all actions of Pynovisao."""
...
...
@@ -63,6 +65,8 @@ class Act(object):
self
.
_ground_truth
=
False
self
.
_gt_segments
=
None
self
.
csv
=
False
;
def
_init_dataset
(
self
,
directory
):
"""Initialize the directory of image dataset.
...
...
@@ -308,7 +312,16 @@ class Act(object):
"""
self
.
_dataset_generator
=
not
self
.
_dataset_generator
def
toggle_CSV
(
self
):
"""Enable/disable the option to save the softwares in CSV
"""
self
.
csv
=
not
self
.
csv
def
make_histogram
(
self
):
h
=
histoextraction
()
output_file
=
h
.
make_histogram
(
self
.
dataset
,
self
.
csv
,
k
=
10
)
self
.
tk
.
append_log
(
"
\n
Output file saved in %s"
,
output_file
)
def
select_segmenter
(
self
):
"""Open a dialog to choose the segmenter.
"""
...
...
@@ -403,18 +416,46 @@ class Act(object):
"""Perform a feature extraction on all images of dataset, using the current collection of extractors.
"""
self
.
tk
.
write_log
(
"Running extractors on all images in %s"
,
self
.
dataset
)
#Alexandre Cese
#O feature adicionado por mim nesta parte do programa foi dar a opcao de salvar as extracoes das imagens de um
#dataset em CSV
#Nao foi implementado a classificacao destes dados em CSV
if
self
.
csv
==
False
:
fextractor
=
FeatureExtractor
(
self
.
extractors
)
self
.
tk
.
append_log
(
"%s"
,
'
\n
'
.
join
([
extraction
.
_extractor_list
[
extractor
].
label
for
extractor
in
extraction
.
_extractor_list
if
extraction
.
_extractor_list
[
extractor
].
value
==
True
]))
output_file
,
run_time
=
fextractor
.
extract_all
(
self
.
dataset
,
"training"
)
self
.
tk
.
append_log
(
"
\n
Output file saved in %s"
,
output_file
)
self
.
tk
.
append_log
(
"Time elapsed: %0.3f seconds"
,
run_time
)
if
self
.
classifier
:
self
.
classifier
.
reset
()
else
:
h
=
histoextraction
()
self
.
tk
.
append_log
(
"%s"
,
'
\n
'
.
join
(
[
extraction
.
_extractor_list
[
extractor
].
label
for
extractor
in
extraction
.
_extractor_list
if
extraction
.
_extractor_list
[
extractor
].
value
==
True
]))
output_file
,
run_time
=
h
.
extract_all_csv
(
dataset
=
self
.
dataset
,
extractors
=
self
.
extractors
)
self
.
tk
.
append_log
(
"
\n
Output file saved in %s"
,
output_file
)
self
.
tk
.
append_log
(
"Time elapsed: %0.3f seconds"
,
run_time
)
if
self
.
classifier
:
self
.
classifier
.
reset
()
def
superpixel_extraction
(
self
):
#Este metodo divide as imagens dos datasets em superpixels e os extrai
h
=
histoextraction
()
if
self
.
csv
==
False
:
output_file
,
run_time
=
h
.
extract_all_superpixels_arff
(
self
.
dataset
,
self
.
segmenter
,
self
.
extractors
,
output_file
=
'training'
)
self
.
tk
.
append_log
(
"
\n
Output file saved in %s"
,
output_file
)
self
.
tk
.
append_log
(
"Time elapsed: %0.3f seconds"
,
run_time
)
if
self
.
classifier
:
self
.
classifier
.
reset
()
else
:
output_file
=
h
.
extract_all_superpixels_csv
(
self
.
dataset
,
self
.
segmenter
,
self
.
extractors
)
self
.
tk
.
append_log
(
"
\n
Output file saved in %s"
,
output_file
)
if
self
.
classifier
:
self
.
classifier
.
reset
()
fextractor
=
FeatureExtractor
(
self
.
extractors
)
self
.
tk
.
append_log
(
"%s"
,
'
\n
'
.
join
([
extraction
.
_extractor_list
[
extractor
].
label
for
extractor
in
extraction
.
_extractor_list
if
extraction
.
_extractor_list
[
extractor
].
value
==
True
]))
output_file
,
run_time
=
fextractor
.
extract_all
(
self
.
dataset
,
"training"
)
self
.
tk
.
append_log
(
"
\n
Output file saved in %s"
,
output_file
)
self
.
tk
.
append_log
(
"Time elapsed: %0.3f seconds"
,
run_time
)
if
self
.
classifier
:
self
.
classifier
.
reset
()
def
select_classifier
(
self
):
"""Open a dialog to select the classifier.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment