Docker layer info post v1.10 - hash

I used to check the sizes of layers in an image using the Docker history command, although now that shows "missing" instead of layer IDs due to the 1.10 migration to content hashes.
I now retrieve the hashes of all layers in an image through these commands:
docker pull ubuntu
ID=$(docker inspect -f {{.Id}} ubuntu)
sudo jq .rootfs.diff_ids /var/lib/docker/image/aufs/imagedb/content/$(echo $ID|tr ':' '/')
This returns a list of content hashes of all layers in the ubuntu image:
"diff_ids": [
"sha256:2a4049cf895d2384cb93d19f46f0d62560a48b2b202787edad2dc6e4b95a923a",
"sha256:01fbb4b5fa1b76ccdc289de098ea61925c7f8d3364159761720617b096f27bcc",
"sha256:d3492de15d7c87ea9db9ab123214d334f4bcb1e40846b77beebb4c37dd134a45",
"sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef"
],
In /var/lib/docker/image/aufs/layerdb/sha256/ I see information about each layer such as parent and size but I noticed that the diff_ids in this folder are not the same as the above output:
> ls /var/lib/docker/image/aufs/layerdb/sha256/
2088e4744016dbe95308d1920060f1fbc4a095ba5b9517d758745fc3986f2632
2a4049cf895d2384cb93d19f46f0d62560a48b2b202787edad2dc6e4b95a923a
8c63d05abe660a2f3f04d754de3ee3d927a17b3623a8e2be6d727e697f4b1e10
f747ac597de13b7f1ff918874f80bb83004232d7d6d4d45ad8890b58cdc79adc
I then tried inspecting another folder such as /var/lib/docker/aufs/layers:
> ls /var/lib/docker/aufs/layers#
58e7ed1f6d4ba047c9c714e66f10c014008ef4aa133d334198b8b1b7673f16e7
c4dd5a81188e36457624849aaeea74d98ef571390db75d4a03efb5bccb8c04e3
d31f918b7f59fcf768a9ae609141152cd5ae63943aac042429e3d2e04d472bcc
e576c6d41b96bd6a47233a6c6ec2f586021aa945aae6bd0e73ab9d4ad051a94e
As you can see these are 4 other content hashes again. Can someone tell me what the connection is between all these hashes and how I can find the size of each layer of the Ubuntu image? I'd like to be able to match each diff_id in the first output with a size but I don't know how all these diff_ids in different folders are related.
EDIT: I solved it like this - /var/lib/docker/image/aufs/layerdb/sha256/ also contains a file called "diff" which contains the diff_id corresponding to the output of the first command. I used this output to map the size to the correct diff_id.

I solved it like this: /var/lib/docker/image/aufs/layerdb/sha256/ also contains a file called "diff" which contains the diff_id corresponding to the output of the first command. I used this output to map the size to the correct diff_id.

Related

Getting HASH of individual files within folder uploaded to IPFS

When I upload a folder of .jpg files to IPFS, I get the HASH of that folder - which is cool.
But is each individual file in that folder also getting hashed?
And if so, how do I get the hash of each file?
I basically want to be able to upload a whole bunch of files - like 500 images - and do it all at once, or programmatically, and have the hash of each file be returned to me.
Any way to do this?
Yes! From the command line you get back the CIDs (the Content IDentifier, aka, IPFS hash) for each file added when you run ipfs add -r <path to directory>
$ ipfs add -r gifs
added QmfBAEYhJp9ZjGvv8utB3Yv8uuuxsDKjv9rurkHRsYU3ih gifs/martian-iron-man.gif
added QmRBHTH3p4W2xAzgLxvdh8VJvAmWBgchwCr9G98EprwetE gifs/needs-more-dogs.gif
added QmZbffnCcV598QxsUy7WphXCAMZJULZAzy94tuFZzbFcdK gifs/satisfied-with-your-care.gif
added QmTxnmk85ESr97j2xLNFeVZW2Kk9FquhdswofchF8iDGFg gifs/stone-of-triumph.gif
added QmcN71Qh56oSg2YXsEXuf8o6u5CrBXbyYYzgMyAkdkcxxK gifs/thanks-dog.gif
added QmTnuLaivKc1Aj8LBf2iWBHDXsmedip3zSPbQcGi6BFwTC gifs
the root CID for the directory is always the last item in the list.
You can limit the output of that command to just include the CIDs using the --quiet flag
⨎ ipfs add -r gifs --quiet
QmfBAEYhJp9ZjGvv8utB3Yv8uuuxsDKjv9rurkHRsYU3ih
QmRBHTH3p4W2xAzgLxvdh8VJvAmWBgchwCr9G98EprwetE
QmZbffnCcV598QxsUy7WphXCAMZJULZAzy94tuFZzbFcdK
QmTxnmk85ESr97j2xLNFeVZW2Kk9FquhdswofchF8iDGFg
QmcN71Qh56oSg2YXsEXuf8o6u5CrBXbyYYzgMyAkdkcxxK
QmTnuLaivKc1Aj8LBf2iWBHDXsmedip3zSPbQcGi6BFwTC
Or, if you know the CID for a directory, you can list out the files it contains and their individual CIDs with ipfs ls. Here I list out the contents of the gifs dir from the previous example
$ ipfs ls QmTnuLaivKc1Aj8LBf2iWBHDXsmedip3zSPbQcGi6BFwTC
QmfBAEYhJp9ZjGvv8utB3Yv8uuuxsDKjv9rurkHRsYU3ih 2252675 martian-iron-man.gif
QmRBHTH3p4W2xAzgLxvdh8VJvAmWBgchwCr9G98EprwetE 1233669 needs-more-dogs.gif
QmZbffnCcV598QxsUy7WphXCAMZJULZAzy94tuFZzbFcdK 1395067 satisfied-with-your-care.gif
QmTxnmk85ESr97j2xLNFeVZW2Kk9FquhdswofchF8iDGFg 1154617 stone-of-triumph.gif
QmcN71Qh56oSg2YXsEXuf8o6u5CrBXbyYYzgMyAkdkcxxK 2322454 thanks-dog.gif
You can it programatically with the core api in js-ipfs or go-ipfs. Here is an example of adding a files from the local file system in node.js using js-ipfs from the docs for ipfs.addAll(files) - https://github.com/ipfs/js-ipfs/blob/master/docs/core-api/FILES.md#importing-files-from-the-file-system
There is a super helpful video on how adding files to IPFS works over at https://www.youtube.com/watch?v=Z5zNPwMDYGg
And a walk through of js-ipfs here https://github.com/ipfs/js-ipfs/tree/master/examples/ipfs-101

How to change the metadata of all specific file of exist objects in Google Cloud Storage?

I have uploaded thousands of files to google storage, and i found out all the files miss content-type,so that my website cannot get it right.
i wonder if i can set some kind of policy like changing all the files content-type at the same time, for example, i have bunch of .html files inside the bucket
a/b/index.html
a/c/a.html
a/c/a/b.html
a/a.html
.
.
.
is that possible to set the content-type of all the .html files with one command in the different place?
You could do:
gsutil -m setmeta -h Content-Type:text/html gs://your-bucket/**.html
There's no a unique command to achieve the behavior you are looking for (one command to edit all the object's metadata) however, there's a command from gcloud to edit the metadata which you could use on a bash script to make a loop through all the objects inside the bucket.
1.- Option (1) is to use a the gcloud command "setmeta" on a bash script:
# kinda pseudo code here.
# get the list with all your object's names and iterate over the metadata edition command.
for OUTPUT in $(get_list_of_objects_names)
do
gsutil setmeta -h "[METADATA_KEY]:[METADATA_VALUE]" gs://[BUCKET_NAME]/[OBJECT_NAME]
# the "gs://[BUCKET_NAME]/[OBJECT_NAME]" would be your object name.
done
2.- You could also create a C++ script to achieve the same thing:
namespace gcs = google::cloud::storage;
using ::google::cloud::StatusOr;
[](gcs::Client client, std::string bucket_name, std::string object_name,
std::string key, std::string value) {
# you would need to find list all the objects, while on the loop, you can edit the metadata of the object.
for (auto&& object_metadata : client.ListObjects(bucket_name)) {
string bucket_name=object_metadata->bucket(), object_name=object_metadata->name();
StatusOr<gcs::ObjectMetadata> object_metadata =
client.GetObjectMetadata(bucket_name, object_name);
gcs::ObjectMetadata desired = *object_metadata;
desired.mutable_metadata().emplace(key, value);
StatusOr<gcs::ObjectMetadata> updated =
client.UpdateObject(bucket_name, object_name, desired,
gcs::Generation(object_metadata->generation()))
}
}

"failed to load any lstm-specific dictionaries for lang " tesseract 4.1

I tried to train the tesseract 4.1 using OCRD project but after training completed I copied the lang.traineddata but getting above error.
The tesseractWiki page is very confusing to understand asking to use combine_lang_model after making lstmf file. So Actually I have the lstmf file. I created these file by using tif/box pair.
Please help me for further step.
Related discussions:Failed to load any lstm-specific dictionaries for lang xxx
Suppose your training folder like this:
OCRD/makefile
OCRD/data/foo-ground-truth.
You could try as following steps:
Find the WORDLIST_FILE/NUMBERS_FILE/PUNC_FILE in the makefile, and change them to:
WORDLIST_FILE := data/$(MODEL_NAME).wordlist
NUMBERS_FILE := data/$(MODEL_NAME).numbers
PUNC_FILE := data/$(MODEL_NAME).punc
Suppose your base traineddata is eng.traineddata.
2.1 Download the .wordlist/.numbers/.punc files from the langdata_lstm.
2.2 Place them in OCRD/data
2.3 if the MODEL_NAME = foo, rename them as: foo.wordlist, foo.numbers, foo.punc
if you don't have the base traineddata, you could try this too. But if your base traineddata is afr, you should download the files from langdata_lstm/afr.
make training again
The cause of this error:
In OCRD, the default path of the above three files is $ (OUTPUT_DIR) = data / $ (MODEL_NAME), and all files in this path are automatically generated during the training process.
If the variable START_MODEL is not assigned, the makefile will not generate any related files under this path;
If the variable START_MODEL has been assigned, the foo.lstm-number-dawg、foo.lstm-punc-dawg、foo.lstm-word-dawg and so on will be produced in data / $ (MODEL_NAME). But they are not the right one. So there may be a bug in OCRD.

Impossible to merge osm.pbf file properly

I've recently started to work on a project with SRTM datas and I've extracted pbf file using phyghtmap.
To start I'm getting hgt files, converting them to tif using the following command : gdal_fillnodata.py data.hgt data.tif
Then I'm warping them with gdalwarp -co BIGTIFF=YES -co TILED=YES -co COMPRESS=LZW -co PREDICTOR=2 -t_srs "+proj=merc +ellps=sphere +R=6378137 +a=6378137 +units=m" -r bilinear -tr 90 90 data.tif warp-90.tif
And finally creating the pbf file with phyghtmap --max-nodes-per-tile=0 -s 10 -0 --pbf warp-90.tif
The results is a list of pbf files. They are perfectly fine when I load them into PostGIS with osm2pgsql. But I want to merge them to fasten the import.
I've tried all the major solutions:
osmium merge *.pbf -o merged.pbf
convert pbf to o5m then osmconvert64 *.o5m -o=merge.o5m then convert back to pbf
merging two by two with osmosis --read-pbf lon4.00_5.00lat44.00_45.00_local-source.pbf --read-pbf lon5.00_6.00lat44.00_45.00_local-source.osm.pbf --merge --write-pbf osmo_merge.osm.pbf
None of them worked and the result is only a very small fraction of the data merged in the result file.
Am I doing something wrong?
Note: If I load all pbf with --append it works, but it take ages for a very small portion of the world.
I found the issue. I wasn't setting the --start-node-id and --start-way-id in my script so all my pbf was using the same id range. Now I'm assigning unique ID and it work like a charm :)

Copy all files with given extension to output directory using CMake

I've seen that I can use this command in order to copy a directory using cmake:
file(COPY "myDir" DESTINATION "myDestination")
(from this post)
My problem is that I don't want to copy all of myDir, but only the .h files that are in there. I've tried with
file(COPY "myDir/*.h" DESTINATION "myDestination")
but I obtain the following error:
CMake Error at CMakeLists.txt:23 (file):
file COPY cannot find
"/full/path/to/myDIR/*.h".
How can I filter the files that I want to copy to a destination folder?
I've found the solution by myself:
file(GLOB MY_PUBLIC_HEADERS
"myDir/*.h"
)
file(COPY ${MY_PUBLIC_HEADERS} DESTINATION myDestination)
this also works for me:
install(DIRECTORY "myDir/"
DESTINATION "myDestination"
FILES_MATCHING PATTERN "*.h" )
The alternative approach provided by jepessen does not take into account the fact that sometimes the number of files to be copied is too high. I encountered the issue when doing such thing (more than 110 files)
Due to a limitation on Windows on the number of characters (2047 or 8191) in a single command line, this approach may randomly fail depending on the number of headers that are in the folder. More info here https://support.microsoft.com/en-gb/help/830473/command-prompt-cmd-exe-command-line-string-limitation
Here is my solution:
file(GLOB MY_HEADERS myDir/*.h)
foreach(CurrentHeaderFile IN LISTS MY_HEADERS)
add_custom_command(
TARGET MyTarget PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CurrentHeaderFile} ${myDestination}
COMMENT "Copying header: ${CurrentHeaderFile}")
endforeach()
This works like a charm on MacOS. However, if you have another target that depends on MyTarget and needs to use these headers, you may have some compile errors due to not found includes on Windows. Therefore you may want to prefer the following option that defines an intermediate target.
function (CopyFile ORIGINAL_TARGET FILE_PATH COPY_OUTPUT_DIRECTORY)
# Copy to the disk at build time so that when the header file changes, it is detected by the build system.
set(input ${FILE_PATH})
get_filename_component(file_name ${FILE_PATH} NAME)
set(output ${COPY_OUTPUT_DIRECTORY}/${file_name})
set(copyTarget ${ORIGINAL_TARGET}-${file_name})
add_custom_target(${copyTarget} DEPENDS ${output})
add_dependencies(${ORIGINAL_TARGET} ${copyTarget})
add_custom_command(
DEPENDS ${input}
OUTPUT ${output}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${input} ${output}
COMMENT "Copying file to ${output}."
)
endfunction ()
foreach(HeaderFile IN LISTS MY_HEADERS)
CopyFile(MyTarget ${HeaderFile} ${myDestination})
endforeach()
The downside indeed is that you end up with multiple target (one per copied file) but they should all end up together (alphabetically) since they start with the same prefix ORIGINAL_TARGET -> "MyTarget"